diff --git a/.bazelrc b/.bazelrc
deleted file mode 100644
index 785a5d6..0000000
--- a/.bazelrc
+++ /dev/null
@@ -1,12 +0,0 @@
-# Import TensorFlow configuration.
-import %workspace%/tensorflow/.tf_configure.bazelrc
-
-# Coloring for error messages.
-common --color=yes
-
-# Always print test errors.
-test --test_output=errors
-
-# Other build flags.
-build --define=grpc_no_ares=true
-test --define=grpc_no_ares=true
diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index c37242c..0000000
--- a/.gitignore
+++ /dev/null
@@ -1,12 +0,0 @@
-.DS_Store
-.ipynb_checkpoints
-node_modules
-/.bazelrc
-/bazel-*
-/bazel_pip
-/pip_test
-/_python_build
-*.pyc
-__pycache__
-*.swp
-.vscode/
diff --git a/BUILD b/BUILD
deleted file mode 100644
index b5e7ca5..0000000
--- a/BUILD
+++ /dev/null
@@ -1,23 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-sh_binary(
-    name = "pip_pkg",
-    srcs = ["pip_pkg.sh"],
-    data = [
-        "MANIFEST.in",
-        "setup.py",
-        "//tensorflow_lattice",
-    ],
-)
diff --git a/INSTALL.md b/INSTALL.md
deleted file mode 100644
index f8c7cb2..0000000
--- a/INSTALL.md
+++ /dev/null
@@ -1,202 +0,0 @@
-<!-- Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-=============================================================================-->
-# TensorFlow Lattice installation
-
-TensorFlow Lattice runs on Ubuntu and Mac OS X, and requires TensorFlow.
-
-We highly recommend to read [TensorFlow installation
-instructions](https://www.tensorflow.org/install), especially [Installing
-TensorFlow on Ubuntu](https://www.tensorflow.org/install/install_linux) to
-understand virtualenv and pip, and [Installing TensorFlow from
-Sources](https://www.tensorflow.org/install/install_sources).
-
-# Install the prebuilt pip package
-
-## Activate virtualenv
-If using virtualenv, activate your virtualenv for the rest of the installation,
-otherwise skip this step:
-
-``` shell
-~$ virtualenv --system-site-packages tensorflow-lattice # for Python 2.7
-~$ virtualenv --system-site-packages -p python3 tensorflow-lattice # for Python 3.n
-```
-
-Here you can change `tensorflow-lattice` to another target directory you want to
-use.
-
-```shell
-~$ source tensorflow-lattice/bin/activate # bash, sh, ksh, or zsh
-~$ source tensorflow-lattice/bin/activate.csh  # csh or tcsh
-```
-
-## Install pip packages.
-You can use pip install to install tensorflow-lattice pip package.
-
-```shell
-(tensorflow-lattice)$ pip install --upgrade tensorflow-lattice # for Python 2.7
-(tensorflow-lattice)$ pip3 install --upgrade tensorflow-lattice # for Python 3.n
-(tensorflow-lattice)$ pip install --upgrade tensorflow-lattice-gpu # for Python 2.7 and GPU
-(tensorflow-lattice)$ pip3 install --upgrade tensorflow-lattice-gpu # for Python 3.n and GPU
-```
-Our custom operators do not have GPU kernels. The main difference
-between `tensorflow-lattice-gpu` and `tensorflow-lattice` pip package is that
-the former requires `tensorflow-gpu` pip package whereas the latter requires
-`tensorflow` pip package.
-
-## Test TensorFlow and TensorFlow Lattice
-
-Run the following python script to test TensorFlow Lattice.
-
-```python
-import tensorflow as tf
-import tensorflow_lattice as tfl
-
-x = tf.compat.v1.placeholder(tf.float32, shape=(None, 2))
-(y, _, _, _) = tfl.lattice_layer(x, lattice_sizes=(2, 2))
-
-with tf.Session() as sess:
-  sess.run(tf.global_variables_initializer())
-  print(sess.run(y, feed_dict={x: [[0.0, 0.0]]}))
-```
-
-Now you are ready to use *TensorFlow Lattice*. Check out examples in the
-[examples](https://github.com/tensorflow/lattice/tree/master/examples) directory
-and run them if you need more examples to run.
-[Tutorial](g3doc/tutorial/index.md) contains detailed explanation on how to use
-TensorFlow Lattice.
-
-You can stop here unless you want to build TensorFlow Lattice from the source.
-
-# Build TensorFlow Lattice and TensorFlow pip package from the source.
-You can also build TensorFlow Lattice packages from the source.
-For this, you will need to compile all libraries using
-[Bazel](https://bazel.build) against TensorFlow headers.
-
-
-We will show how to build TensorFlow and TensorFlow Lattice pip package using
-Bazel, and install it to your virtualenv.
-
-## Activate virtualenv
-
-If using virtualenv, activate your virtualenv for the rest of the installation,
-otherwise skip this step:
-
-```shell
-~$ source $VIRTUALENV_PATH/bin/activate # bash, sh, ksh, or zsh
-~$ source $VIRTUALENV_PATH/bin/activate.csh  # csh or tcsh
-```
-
-or if you are using virtualenv for the first time,
-
-```shell
-~$ sudo apt-get install python-virtualenv
-~$ virtualenv --system-site-packages tensorflow-lattice
-~$ source ~/tensorflow-lattice/bin/activate  # bash, sh, ksh, or zsh
-~$ source ~/tensorflow-lattice/bin/activate.csh  # csh or tcsh
-```
-## Prepare TensorFlow envirnoment for Linux.
-
-Please follow instructions in [Prepare environment for
-Linux](https://www.tensorflow.org/install/install_sources#prepare_environment_for_linux)
-to setup the environment for TensorFlow.
-
-## Clone the TensorFlow Lattice repository.
-
-Let us clone the TensorFlow Lattice repository, which contains TensorFlow as a
-submodule:
-
-```shell
-(tensorflow-lattice)~$ git clone --recursive https://github.com/tensorflow/lattice.git
-```
-
-## Configure TensorFlow and build TensorFlow pip package.
-
-### Configure TensorFlow
-
-We now need to configure TensorFlow options. See [Configure the
-installation](https://www.tensorflow.org/install/install_sources#configure_the_installation)
-for the details.
-
-```shell
-(tensorflow-lattice)~$ cd lattice
-(tensorflow-lattice)~/lattice$ cd tensorflow
-(tensorflow-lattice)~/lattice/tensorflow$ ./configure
-```
-
-### Build TensorFlow pip packaging script
-
-We are ready to build the TensorFlow pip package. See [Build the pip
-package](https://www.tensorflow.org/install/install_sources#build_the_pip_package)
-for the details.
-
-To build a pip package for TensorFlow with CPU-only support:
-
-```shell
-(tensorflow-lattice)~/lattice/tensorflow$ bazel build \
-  --config=opt \
-  tensorflow/tools/pip_package:build_pip_package
-```
-
-To build a pip package for TensorFlow with GPU support:
-
-```shell
-(tensorflow-lattice)~/lattice/tensorflow$ bazel build \
-  --config=cuda \
-  tensorflow/tools/pip_package:build_pip_package
-```
-
-### Install TensorFlow pip package
-
-```shell
-(tensorflow-lattice)~/lattice/tensorflow$ bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
-(tensorflow-lattice)~/lattice/tensorflow$ pip install /tmp/tensorflow_pkg/*.whl
-```
-
-### Build TensorFlow Lattice pip packaging script
-
-To build a pip package for TensorFlow with CPU-only support:
-
-```shell
-(tensorflow-lattice)~/$ cd ~/lattice
-(tensorflow-lattice)~/lattice$ bazel build \
-  --config=opt :pip_pkg
-```
-
-### Install TensorFlow Lattice pip package
-
-```shell
-(tensorflow-lattice)~/lattice$ bazel-bin/pip_pkg /tmp/tensorflow_lattice_pkg
-(tensorflow-lattice)~/lattice$ pip install /tmp/tensorflow_lattice_pkg/*.whl
-```
-
-### Test TensorFlow and TensorFlow Lattice
-```shell
-(tensorflow-lattice)~/lattice$ cd examples
-(tensorflow-lattice)~/lattice/examples$ python test.py
-```
-
-test.py is a simple python script.
-
-```python
-import tensorflow as tf
-import tensorflow_lattice as tfl
-
-x = tf.compat.v1.placeholder(tf.float32, shape=(None, 2))
-(y, _, _, _) = tfl.lattice_layer(x, lattice_sizes=(2, 2))
-
-with tf.Session() as sess:
-  sess.run(tf.global_variables_initializer())
-  print(sess.run(y, feed_dict={x: [[0.0, 0.0]]}))
-```
diff --git a/MANIFEST.in b/MANIFEST.in
deleted file mode 100644
index 3b46ccd..0000000
--- a/MANIFEST.in
+++ /dev/null
@@ -1,6 +0,0 @@
-include README.md LICENSE BUILD
-recursive-include tensorflow_lattice BUILD
-graft tensorflow_lattice/cc
-recursive-exclude tensorflow_lattice/cc *_test.cc *.so
-recursive-exclude tensorflow_lattice/cc/test_tools *
-
diff --git a/README.md b/README.md
index 230915f..71e848e 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-<!-- Copyright 2017 The TensorFlow Lattice Authors.
+<!-- Copyright 2020 The TensorFlow Lattice Authors.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -12,45 +12,41 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 =============================================================================-->
-<div align="center">
-<img src="g3doc/images/tensorflow_lattice.png" style="width: 100px"/>
-</div>
-
 # TensorFlow Lattice
 
-This is an implementation of [Monotonic Calibrated Interpolated Look-Up Tables](http://jmlr.org/papers/v17/15-243.html) in [TensorFlow](https://www.tensorflow.org).
+TensorFlow Lattice is a library that implements constrained and interpretable
+lattice based models. It is an implementation of
+[Monotonic Calibrated Interpolated Look-Up Tables](http://jmlr.org/papers/v17/15-243.html)
+in [TensorFlow](https://www.tensorflow.org).
 
-These are fast-to-evaluate and interpretable lattice models, also known as
-interpolated look-up tables. This library also provides a rich and intuitive set
-of regularizations and monotonicity constraints configurable per feature.
+The library enables you to inject domain knowledge into
+the learning process through common-sense or policy-driven shape constraints.
+This is done using a collection of Keras layers that can satisfy constraints
+such as monotonicity, convexity and pairwise trust:
 
-It includes
-[__TensorFlow estimators__](https://www.tensorflow.org/extend/estimators) for
-regression and classification with the most common set ups for lattice models:
+* PWLCalibration: piecewise linear calibration of signals.
+* CategoricalCalibration: mapping of categorical inputs into real values.
+* Lattice: interpolated look-up table implementation.
+* Linear: linear function with monotonicity and norm constraints.
+
+The library also provides easy to setup canned estimators for common use cases:
 
 * Calibrated Linear
 * Calibrated Lattice
-* Random Tiny Lattices (_RTL_)
-* Embedded Tiny Lattices (_ETL_) (see [Deep Lattice Networks and Partial Monotonic Functions](https://research.google.com/pubs/pub46327.html))
-
-Additionally this library provides two types of __model components__
-(or __layers__) that can be combined with other types of models (including
-neural networks):
+* Random Tiny Lattices (RTL)
+* Crystals
 
-* Calibration: piecewise linear calibration of signals.
-* Lattice: interpolated look-up table implementation.
+With TF Lattice you can use domain knowledge to better extrapolate to the parts
+of the input space not covered by the training dataset. This helps avoid
+unexpected model behaviour when the serving distribution is different from the
+training distribution.
 
+<div align="center">
+  <img src="docs/images/model_comparison.png">
+</div>
 
 You can install our prebuilt pip package using
 
 ```bash
 pip install tensorflow-lattice
 ```
-
-but please see the [install](INSTALL.md) section for more detailed instructions.
-
-This [tutorial](g3doc/tutorial/index.md) contains more detailed explanation
-about lattice models and usage in TensorFlow, and check out
-[API docs](g3doc/api_docs/python/index.md) for python APIs.
-
-__TensorFlow Lattice is not an official Google product.__
diff --git a/WORKSPACE b/WORKSPACE
index 0f4283f..06761c4 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -1,77 +1,16 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
+# Copyright 2018 The TensorFlow Lattice Authors.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
 # ==============================================================================
-workspace(name = "tensorflow_lattice")
-
-local_repository(
-    name = "org_tensorflow",
-    path = "tensorflow",
-)
-
-load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive", "http_file")
-
-# This rule is from TensorFlow's WORKSPACE.
-http_archive(
-    name = "io_bazel_rules_closure",
-    sha256 = "e0a111000aeed2051f29fcc7a3f83be3ad8c6c93c186e64beb1ad313f0c7f9f9",
-    strip_prefix = "rules_closure-cf1e44edb908e9616030cc83d085989b8e6cd6df",
-    urls = [
-        "http://mirror.tensorflow.org/github.com/bazelbuild/rules_closure/archive/cf1e44edb908e9616030cc83d085989b8e6cd6df.tar.gz",
-        "https://github.com/bazelbuild/rules_closure/archive/cf1e44edb908e9616030cc83d085989b8e6cd6df.tar.gz",  # 2019-04-04
-    ],
-)
 
-# Apple and Swift rules.
-http_archive(
-    name = "build_bazel_rules_apple",
-    sha256 = "23792cd999f97fc97284d1c44cb1324bfdd0bc54aa68ad513fa3705aca3b1f9e",
-    urls = ["https://github.com/bazelbuild/rules_apple/releases/download/0.15.0/rules_apple.0.15.0.tar.gz"],
-)  # https://github.com/bazelbuild/rules_apple/releases
-http_archive(
-    name = "build_bazel_apple_support",
-    sha256 = "7356dbd44dea71570a929d1d4731e870622151a5f27164d966dda97305f33471",
-    urls = ["https://github.com/bazelbuild/apple_support/releases/download/0.6.0/apple_support.0.6.0.tar.gz"],
-)  # https://github.com/bazelbuild/apple_support/releases
-http_archive(
-    name = "bazel_skylib",
-    sha256 = "2ef429f5d7ce7111263289644d233707dba35e39696377ebab8b0bc701f7818e",
-    urls = ["https://github.com/bazelbuild/bazel-skylib/releases/download/0.8.0/bazel-skylib.0.8.0.tar.gz"],
-)  # https://github.com/bazelbuild/bazel-skylib/releases
-http_archive(
-    name = "build_bazel_rules_swift",
-    sha256 = "9efe9699e9765e6b4a5e063e4a08f6b163cccaf0443f775d935baf5c3cd6ed0e",
-    urls = ["https://github.com/bazelbuild/rules_swift/releases/download/0.9.0/rules_swift.0.9.0.tar.gz"],
-)  # https://github.com/bazelbuild/rules_swift/releases
-http_archive(
-    name = "com_github_apple_swift_swift_protobuf",
-    type = "zip",
-    strip_prefix = "swift-protobuf-1.5.0/",
-    urls = ["https://github.com/apple/swift-protobuf/archive/1.5.0.zip"],
-)  # https://github.com/apple/swift-protobuf/releases
-http_file(
-    name = "xctestrunner",
-    executable = 1,
-    urls = ["https://github.com/google/xctestrunner/releases/download/0.2.7/ios_test_runner.par"],
-)  # https://github.com/google/xctestrunner/releases
-# Use `swift_rules_dependencies` to fetch the toolchains. With the
-# `git_repository` rules above, the following call will skip redefining them.
-load("@build_bazel_rules_swift//swift:repositories.bzl", "swift_rules_dependencies")
-swift_rules_dependencies()
-
-load("@org_tensorflow//tensorflow:workspace.bzl", "tf_workspace")
-
-tf_workspace(
-    path_prefix = "",
-    tf_repo_name = "org_tensorflow",
-)
+workspace(name = "tensorflow_lattice")
diff --git a/build_docs.py b/build_docs.py
new file mode 100644
index 0000000..74c317b
--- /dev/null
+++ b/build_docs.py
@@ -0,0 +1,85 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Generate docs API for TF Lattice.
+
+Example run:
+
+```
+python build_docs.py --output_dir=/path/to/output
+```
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+
+from absl import app
+from absl import flags
+
+from tensorflow_docs.api_generator import generate_lib
+from tensorflow_docs.api_generator import public_api
+
+import tensorflow_lattice as tfl
+
+flags.DEFINE_string('output_dir', '/tmp/tfl_api/',
+                    'The path to output the files to')
+
+flags.DEFINE_string(
+    'code_url_prefix',
+    'https://github.com/tensorflow/lattice/blob/master/tensorflow_lattice',
+    'The url prefix for links to code.')
+
+flags.DEFINE_bool('search_hints', True,
+                  'Include metadata search hints in the generated files')
+
+flags.DEFINE_string('site_path', 'lattice/api_docs/python',
+                    'Path prefix in the _toc.yaml')
+
+FLAGS = flags.FLAGS
+
+
+def local_definitions_filter(path, parent, children):
+  """Filters local imports, except for the tfl.layers module."""
+  if path == ('tfl', 'layers'):
+    return children
+  return public_api.local_definitions_filter(path, parent, children)
+
+
+def main(_):
+  private_map = {
+      'tfl': ['python'],
+      'tfl.categorical_calibration_layer': ['CategoricalCalibration'],
+      'tfl.lattice_layer': ['Lattice'],
+      'tfl.linear_layer': ['Linear'],
+      'tfl.pwl_calibration_layer': ['PWLCalibration'],
+      'tfl.parallel_combination_layer': ['ParallelCombination']
+  }
+  doc_generator = generate_lib.DocGenerator(
+      root_title='TensorFlow Lattice 2.0',
+      py_modules=[('tfl', tfl)],
+      base_dir=os.path.dirname(tfl.__file__),
+      code_url_prefix=FLAGS.code_url_prefix,
+      search_hints=FLAGS.search_hints,
+      site_path=FLAGS.site_path,
+      private_map=private_map,
+      callbacks=[local_definitions_filter])
+
+  sys.exit(doc_generator.build(output_dir=FLAGS.output_dir))
+
+
+if __name__ == '__main__':
+  app.run(main)
diff --git a/build_tools/ci_build/ci_common.sh b/build_tools/ci_build/ci_common.sh
deleted file mode 100644
index d368c68..0000000
--- a/build_tools/ci_build/ci_common.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-# Run tensorflow lattice bazel tests.
-function tensorflow_lattice_test {
-  # Cleaning up bazel workspace
-  bazel clean
-
-  if [[ "${IS_MAC}" == true ]]; then
-    N_JOBS=$(sysctl -n hw.ncpu)
-  else
-    N_JOBS=$(grep -c ^processor /proc/cpuinfo)
-  fi
-
-  echo ""
-  echo "Bazel will use ${N_JOBS} concurrent job(s)."
-  echo ""
-
-  bazel test --config=opt --test_tag_filters=-gpu -k \
-      --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 --build_tests_only \
-      --test_output=errors -- \
-      //tensorflow_lattice/...
-}
diff --git a/build_tools/ci_build/macosx/py2.sh b/build_tools/ci_build/macosx/py2.sh
deleted file mode 100755
index b36846b..0000000
--- a/build_tools/ci_build/macosx/py2.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-# This script will run the bash function tensorflow_lattice_test under a python2
-# environment.
-
-set -e
-set -x
-
-# Source common scripts.
-source "build_tools/common.sh"
-
-export IS_MAC=true
-export TFL_PY="py2"
-export TFL_USE_GPU=false
-
-# Prepare build.
-prepare_build
-
-# Source common ci scripts.
-source "build_tools/ci_build/ci_common.sh"
-
-# Activate virtualenv.
-source ${TFL_ENV_PATH}/bin/activate
-
-echo "Running all tests."
-tensorflow_lattice_test
-echo "Done with testing."
-
-deactivate
diff --git a/build_tools/ci_build/macosx/py3.sh b/build_tools/ci_build/macosx/py3.sh
deleted file mode 100755
index 14bccc8..0000000
--- a/build_tools/ci_build/macosx/py3.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-# This script will run the bash function tensorflow_lattice_test under a python3
-# environment.
-
-set -e
-set -x
-
-# Source common scripts.
-source "build_tools/common.sh"
-
-export IS_MAC=true
-export TFL_PY="py3"
-export TFL_USE_GPU=false
-
-# Prepare build.
-prepare_build
-
-# Source common ci scripts.
-source "build_tools/ci_build/ci_common.sh"
-
-# Activate virtualenv.
-source ${TFL_ENV_PATH}/bin/activate
-
-echo "Running all tests."
-tensorflow_lattice_test
-echo "Done with testing."
-
-deactivate
diff --git a/build_tools/ci_build/ubuntu/py2.sh b/build_tools/ci_build/ubuntu/py2.sh
deleted file mode 100755
index bd014b1..0000000
--- a/build_tools/ci_build/ubuntu/py2.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-# This script will run the bash function tensorflow_lattice_test under a python2
-# environment.
-
-set -e
-set -x
-
-# Source common scripts.
-source "build_tools/common.sh"
-
-export IS_MAC=false
-export TFL_PY="py2"
-export TFL_USE_GPU=false
-
-# Prepare build.
-prepare_build
-
-# Source common ci scripts.
-source "build_tools/ci_build/ci_common.sh"
-
-# Activate virtualenv.
-source ${TFL_ENV_PATH}/bin/activate
-
-echo "Running all tests."
-tensorflow_lattice_test
-echo "Done with testing."
-
-deactivate
diff --git a/build_tools/ci_build/ubuntu/py3.sh b/build_tools/ci_build/ubuntu/py3.sh
deleted file mode 100755
index 9e223ef..0000000
--- a/build_tools/ci_build/ubuntu/py3.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-# This script will run the bash function tensorflow_lattice_test under a python3
-# environment.
-
-set -e
-set -x
-
-# Source common scripts.
-source "build_tools/common.sh"
-
-export IS_MAC=false
-export TFL_PY="py3"
-export TFL_USE_GPU=false
-
-# Prepare build.
-prepare_build
-
-# Source common ci scripts.
-source "build_tools/ci_build/ci_common.sh"
-
-# Activate virtualenv.
-source ${TFL_ENV_PATH}/bin/activate
-
-echo "Running all tests."
-tensorflow_lattice_test
-echo "Done with testing."
-
-deactivate
diff --git a/build_tools/common.sh b/build_tools/common.sh
deleted file mode 100755
index e512180..0000000
--- a/build_tools/common.sh
+++ /dev/null
@@ -1,100 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-# Git initialization
-function git_init {
-  # Run configure.
-  export TF_NEED_GCP=0
-  export TF_NEED_HDFS=0
-  export PYTHON_BIN_PATH=$(which python)
-
-  # Initialize git.
-  git init
-
-  if [  -d "tensorflow"  ]; then
-    echo "TensorFlow submodule exist. Checkout r1.14"
-    cd tensorflow
-    git checkout r1.14
-    cd -
-  else
-    echo "Add TensorFlow r1.14 submodule."
-    git submodule add -b r1.14 https://github.com/tensorflow/tensorflow.git
-  fi
-
-  # Fetch all submodules.
-  git submodule update --init --recursive
-
-  # Configure tensorflow.
-  cd tensorflow
-  git show --oneline -s
-  yes "" | ./configure
-
-  cd -
-  echo "Applying visibility change."
-  sed -i.bak -e 's/:internal/\/\/visibility:public/g' -- "tensorflow/tensorflow/BUILD"
-}
-
-# Create virtualenv.
-function create_virtualenv {
-  if [  "${TFL_PY}" = "py3" ]; then
-    echo "Setting up python 3 virtualenv"
-    export TFL_ENV_PATH=${TFL_ROOT}/tensorflow-lattice-env-py3
-    virtualenv --system-site-packages -p python3 ${TFL_ENV_PATH}
-  else
-    echo "Setting up python 2 virtualenv"
-    export TFL_ENV_PATH=${TFL_ROOT}/tensorflow-lattice-env-py2
-    virtualenv --system-site-packages -p python2.7 ${TFL_ENV_PATH}
-  fi
-  source ${TFL_ENV_PATH}/bin/activate
-  python -V
-  pip install --upgrade pip
-  pip install --upgrade six numpy wheel enum34 protobuf keras_applications keras_preprocessing tensorflow_estimator
-  deactivate
-}
-
-# Prepare all necessary environment for bazel build & testing.
-function prepare_build {
-  # modify default gcc on linux
-  if [ "$(uname)" == "Linux" ]; then
-    sudo update-alternatives --set gcc /usr/bin/gcc-4.8
-  fi
-
-  # If TFL_ROOT does not exist, create one in here.
-  if [  -z "${TFL_ROOT}"  ]; then
-    echo "TFL_ROOT is empty, so set to /tmp/tfl_root."
-    export TFL_ROOT="/tmp/tfl_root"
-  fi
-
-  # Create virtualenv.
-  create_virtualenv
-
-  # Activate virtualenv.
-  source ${TFL_ENV_PATH}/bin/activate
-
-  if [  "${TFL_USE_GPU}" = true  ]; then
-    echo "GPU build -- Enable CUDA"
-    export TF_NEED_CUDA=1
-  else
-    echo "CPU build -- No CUDA"
-    export TF_NEED_CUDA=0
-  fi
-
-  echo "Initialize git repo."
-  git_init
-  echo "Initialization is done."
-
-  deactivate
-}
diff --git a/build_tools/release_build/py2.sh b/build_tools/release_build/py2.sh
deleted file mode 100755
index 74de1dc..0000000
--- a/build_tools/release_build/py2.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-set -e
-set -x
-
-# Source common scripts.
-source "build_tools/common.sh"
-
-export TFL_PY="py2"
-export TFL_USE_GPU=false
-
-# Prepare build.
-prepare_build
-
-# Source common release scripts.
-source "build_tools/release_build/release_common.sh"
-
-# Activate virtualenv.
-source ${TFL_ENV_PATH}/bin/activate
-
-echo "Build pip package."
-build_pip_pkg
-echo "Done."
-
-echo "Install pip package and test."
-install_pip_and_test
-echo "Done."
-
-deactivate
diff --git a/build_tools/release_build/py2_gpu.sh b/build_tools/release_build/py2_gpu.sh
deleted file mode 100755
index 2731300..0000000
--- a/build_tools/release_build/py2_gpu.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-set -e
-set -x
-
-# Source common scripts.
-source "build_tools/common.sh"
-
-export TFL_PY="py2"
-export TFL_USE_GPU=true
-
-# Prepare build.
-prepare_build
-
-# Source common release scripts.
-source "build_tools/release_build/release_common.sh"
-
-# Activate virtualenv.
-source ${TFL_ENV_PATH}/bin/activate
-
-echo "Build pip package."
-build_pip_pkg
-echo "Done."
-
-echo "Install pip package and test."
-install_pip_and_test
-echo "Done."
-
-deactivate
diff --git a/build_tools/release_build/py3.sh b/build_tools/release_build/py3.sh
deleted file mode 100755
index 89a2f04..0000000
--- a/build_tools/release_build/py3.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-set -e
-set -x
-
-# Source common scripts.
-source "build_tools/common.sh"
-
-export TFL_PY="py3"
-export TFL_USE_GPU=false
-
-# Prepare build.
-prepare_build
-
-# Source common release scripts.
-source "build_tools/release_build/release_common.sh"
-
-# Activate virtualenv.
-source ${TFL_ENV_PATH}/bin/activate
-
-echo "Build pip package."
-build_pip_pkg
-echo "Done."
-
-echo "Install pip package and test."
-install_pip_and_test
-echo "Done."
-
-deactivate
diff --git a/build_tools/release_build/py3_gpu.sh b/build_tools/release_build/py3_gpu.sh
deleted file mode 100755
index a84defc..0000000
--- a/build_tools/release_build/py3_gpu.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-set -e
-set -x
-
-# Source common scripts.
-source "build_tools/common.sh"
-
-export TFL_PY="py3"
-export TFL_USE_GPU=true
-
-# Prepare build.
-prepare_build
-
-# Source common release scripts.
-source "build_tools/release_build/release_common.sh"
-
-# Activate virtualenv.
-source ${TFL_ENV_PATH}/bin/activate
-
-echo "Build pip package."
-build_pip_pkg
-echo "Done."
-
-echo "Install pip package and test."
-install_pip_and_test
-echo "Done."
-
-deactivate
diff --git a/build_tools/release_build/release_common.sh b/build_tools/release_build/release_common.sh
deleted file mode 100644
index c27b783..0000000
--- a/build_tools/release_build/release_common.sh
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-function build_pip_pkg {
-  # Clean up bazel workspace
-  bazel clean
-
-  if [  "${TFL_NATIVE}" = true  ]; then
-    # Build pip install package.
-    bazel build \
-      --define framework_shared_object=true \
-      --copt="-D_GLIBCXX_USE_CXX11_ABI=0" \
-      --compilation_mode=opt \
-      --distinct_host_configuration=false \
-      :pip_pkg
-  else
-    bazel build \
-      --define framework_shared_object=true \
-      --copt="-D_GLIBCXX_USE_CXX11_ABI=0" \
-      --compilation_mode=opt \
-      --cpu=k8 \
-      --distinct_host_configuration=false \
-      :pip_pkg
-  fi
-
-  if [  -z "${TFL_ARTIFACTS_DIR}"  ]; then
-    echo "TFL_ARTIFACTS_DIR is empty, so set tp /tmp/tfl_artifacts"
-    export TFL_ARTIFACTS_DIR="/tmp/tfl_artifacts"
-  fi
-
-  # Create wheel to artifacts dir.
-  if  [  "${TFL_USE_GPU}" = true  ]; then
-    echo 'Building pip package for gpu'
-    ./bazel-bin/pip_pkg ${TFL_ARTIFACTS_DIR} --gpu
-  else
-    echo 'Building pip package for cpu'
-    ./bazel-bin/pip_pkg ${TFL_ARTIFACTS_DIR}
-  fi
-}
-
-function install_pip_and_test {
-  # Check python version.
-  python -V
-
-  # Install TensorFlow Lattice
-  pip install --upgrade ${TFL_ARTIFACTS_DIR}/*.whl
-
-  # Run the example script to check whether it works or not.
-  cd examples
-
-  # Check TensorFlow version
-  python -c 'import tensorflow as tf; print(tf.__version__)'
-
-  echo 'running lattice example'
-  python lattice_test.py
-  echo 'running coffee example'
-  python coffee_test.py
-  echo 'running estimator example'
-  python estimator_test.py
-}
diff --git a/docs/_book.yaml b/docs/_book.yaml
new file mode 100644
index 0000000..09c62f4
--- /dev/null
+++ b/docs/_book.yaml
@@ -0,0 +1,35 @@
+upper_tabs:
+# Tabs left of dropdown menu
+- include: /_upper_tabs_left.yaml
+- include: /api_docs/_upper_tabs_api.yaml
+# Dropdown menu
+- name: Resources
+  path: /resources
+  is_default: true
+  menu:
+  - include: /resources/_menu_toc.yaml
+  lower_tabs:
+    # Subsite tabs
+    other:
+    - name: Guide & Tutorials
+      contents:
+      - title: Overview
+        path: /lattice/overview
+      - title: Install
+        path: /lattice/install
+      - heading: TensorFlow Lattice Tutorials
+      - title: Shape Constraints Tutorial
+        path: /lattice/tutorials/shape_constraints
+      - title: Keras Layers Tutorial
+        path: /lattice/tutorials/keras_layers
+      - title: Canned Estmators Tutorial
+        path: /lattice/tutorials/canned_estimators
+      - title: Custom Estimators Tutorial
+        path: /lattice/tutorials/custom_estimators
+
+    - name: API
+      skip_translation: true
+      contents:
+      - include: /lattice/api_docs/python/_toc.yaml
+
+- include: /_upper_tabs_right.yaml
diff --git a/docs/_index.yaml b/docs/_index.yaml
new file mode 100644
index 0000000..e2ff8e9
--- /dev/null
+++ b/docs/_index.yaml
@@ -0,0 +1,81 @@
+book_path: /lattice/_book.yaml
+project_path: /lattice/_project.yaml
+description: <!--no description-->
+landing_page:
+  custom_css_path: /site-assets/css/style.css
+  rows:
+  - heading: Flexible, controlled and interpretable ML with lattice based models
+    items:
+    - classname: devsite-landing-row-50
+      description: >
+        <p>TensorFlow Lattice is a library that implements constrained and interpretable lattice
+        based models. The library enables you to inject domain knowledge into the learning process
+        through common-sense or policy-driven
+        <a href="./tutorials/shape_constraints">shape constraints</a>. This is done using a
+        collection of <a href="./tutorials/keras_layers">Keras layers</a> that can satisfy
+        constraints such as monotonicity, convexity and how features interact. The library also
+        provides easy to setup <a href="./tutorials/canned_estimators">canned estimators</a>.</p>
+        <p>With TF Lattice you can use domain knowledge to better extrapolate to the parts of the
+        input space not covered by the training dataset. This helps avoid unexpected model behaviour
+        when the serving distribution is different from the training distribution.</p>
+        <figure>
+            <img src="images/model_comparison.png">
+        </figure>
+
+      code_block: |
+        <pre class = "prettyprint">
+        import numpy as np
+        import tensorflow as tf
+        import tensorflow_lattice as tfl
+
+        model = tf.keras.models.Sequential()
+        model.add(
+            tfl.layers.ParallelCombination([
+                # Monotonic piece-wise linear calibration with bounded output
+                tfl.layers.PWLCalibration(
+                    monotonicity='increasing',
+                    input_keypoints=np.linspace(1., 5., num=20),
+                    output_min=0.0,
+                    output_max=1.0),
+                # Diminishing returns
+                tfl.layers.PWLCalibration(
+                    monotonicity='increasing',
+                    convexity='concave',
+                    input_keypoints=np.linspace(0., 200., num=20),
+                    output_min=0.0,
+                    output_max=2.0),
+                # Partially monotonic categorical calibration: calib(0) <= calib(1)
+                tfl.layers.CategoricalCalibration(
+                    num_buckets=4,
+                    output_min=0.0,
+                    output_max=1.0,
+                    monotonicities=[(0, 1)]),
+            ]))
+        model.add(
+            tfl.layers.Lattice(
+                lattice_size=[2, 3, 2],
+                monotonicities='increasing',
+                # Trust: model is more responsive to input 0 if input 1 increases
+                edgeworth_trusts=(0, 1, 'positive')))
+        model.compile(...)
+        </pre>
+
+  - classname: devsite-landing-row-cards
+    items:
+    - heading: "TensorFlow Lattice: Flexibility empowered by prior knowledge"
+      image_path: /resources/images/tf-logo-card-16x9.png
+      path: https://ai.googleblog.com/2017/10/tensorflow-lattice-flexibility.html
+      buttons:
+      - label: "Read on the Google AI blog"
+        path: https://ai.googleblog.com/2017/10/tensorflow-lattice-flexibility.html
+    - heading: "TensorFlow Lattice: Control your ML with monotonicity"
+      youtube_id: ABBnNjbjv2Q
+      buttons:
+      - label: Watch the video
+        path: https://www.youtube.com/watch?v=ABBnNjbjv2Q
+    - heading: "TF Lattice on GitHub"
+      image_path: /resources/images/github-card-16x9.png
+      path: https://github.com/tensorflow/lattice
+      buttons:
+      - label: "View on GitHub"
+        path: https://github.com/tensorflow/lattice
diff --git a/g3doc/images/2d_lattice.png b/docs/images/2d_lattice.png
similarity index 100%
rename from g3doc/images/2d_lattice.png
rename to docs/images/2d_lattice.png
diff --git a/docs/images/data_dist.png b/docs/images/data_dist.png
new file mode 100644
index 0000000..63eceab
Binary files /dev/null and b/docs/images/data_dist.png differ
diff --git a/docs/images/favicon.ico b/docs/images/favicon.ico
new file mode 100644
index 0000000..41c37e4
Binary files /dev/null and b/docs/images/favicon.ico differ
diff --git a/docs/images/flexible_fit.png b/docs/images/flexible_fit.png
new file mode 100644
index 0000000..1957eb5
Binary files /dev/null and b/docs/images/flexible_fit.png differ
diff --git a/docs/images/linear_fit.png b/docs/images/linear_fit.png
new file mode 100644
index 0000000..ad032f0
Binary files /dev/null and b/docs/images/linear_fit.png differ
diff --git a/docs/images/model_comparison.png b/docs/images/model_comparison.png
new file mode 100644
index 0000000..5213b4b
Binary files /dev/null and b/docs/images/model_comparison.png differ
diff --git a/docs/images/monotonic_fit.png b/docs/images/monotonic_fit.png
new file mode 100644
index 0000000..8e62a76
Binary files /dev/null and b/docs/images/monotonic_fit.png differ
diff --git a/docs/images/pwl_calibration_distance.png b/docs/images/pwl_calibration_distance.png
new file mode 100644
index 0000000..63108f2
Binary files /dev/null and b/docs/images/pwl_calibration_distance.png differ
diff --git a/docs/images/pwl_calibration_price.png b/docs/images/pwl_calibration_price.png
new file mode 100644
index 0000000..1de26c2
Binary files /dev/null and b/docs/images/pwl_calibration_price.png differ
diff --git a/docs/images/regularized_fit.png b/docs/images/regularized_fit.png
new file mode 100644
index 0000000..96f4355
Binary files /dev/null and b/docs/images/regularized_fit.png differ
diff --git a/g3doc/images/tensorflow_lattice.png b/docs/images/tensorflow_lattice.png
similarity index 100%
rename from g3doc/images/tensorflow_lattice.png
rename to docs/images/tensorflow_lattice.png
diff --git a/docs/install.md b/docs/install.md
new file mode 100644
index 0000000..9937349
--- /dev/null
+++ b/docs/install.md
@@ -0,0 +1,39 @@
+# Install TensorFlow Lattice
+
+There are several ways to set up your environment to use TensorFlow Lattice
+(TFL).
+
+*   The easiest way to learn and use TFL requires no installation: run the any
+    of the tutorials (e.g.
+    [canned estimators tutorial](tutorials/canned_estimators.ipynb)).
+*   To use TFL on a local machine, install the `tensorflow-lattice` pip package.
+*   If you have a unique machine configuration, you can build the package from
+    source.
+
+## Install TensorFlow Lattice using pip
+
+Install using pip.
+
+```shell
+pip install --upgrade tensorflow-lattice
+```
+
+## Build from source
+
+Clone the github repo:
+
+```shell
+git clone https://github.com/tensorflow/lattice.git
+```
+
+Build pip package from source:
+
+```shell
+python setup.py sdist bdist_wheel --universal --release
+```
+
+Install the package:
+
+```shell
+pip install --user --upgrade /path/to/pkg.whl
+```
diff --git a/docs/overview.md b/docs/overview.md
new file mode 100644
index 0000000..ba1d9e0
--- /dev/null
+++ b/docs/overview.md
@@ -0,0 +1,200 @@
+# TensorFlow Lattice (TFL)
+
+TensorFlow Lattice is a library that implements flexible, controlled and
+interpretable lattice based models. The library enables you to inject domain
+knowledge into the learning process through common-sense or policy-driven
+[shape constraints](tutorials/shape_constraints.ipynb). This is done using a
+collection of [Keras layers](tutorials/keras_layers.ipynb) that can satisfy
+constraints such as monotonicity, convexity and pairwise trust. The library also
+provides easy to setup [canned estimators](tutorials/canned_estimators.ipynb).
+
+## Concepts
+
+This section is a simplified version of the description in
+[Monotonic Calibrated Interpolated Look-Up Tables](http://jmlr.org/papers/v17/15-243.html)
+, JMLR 2016.
+
+### Lattices
+
+A *lattice* is an interpolated look-up table that can approximate arbitrary
+input-output relationships in your data. It overlaps a regular grid onto your
+input space and learns values for the output in the vertices of the grid. For a
+test point $x$, $f(x)$ is linearly interpolated from the lattice values
+surrounding $x$.
+
+<img src="images/2d_lattice.png" style="display:block; margin:auto;">
+
+The simple example above is a function with 2 input features and 4 parameters:
+$\theta=[0, 0.2, 0.4, 1]$, which are the function's values at the corners of the
+input space; the rest of the function is interpolated from these parameters.
+
+The function $f(x)$ can capture non-linear interactions between features. You
+can think of the lattice parameters as the height of poles set in the ground on
+a regular grid, and the resulting function is like cloth pulled tight against
+the four poles.
+
+With $D$ features and 2 vertices along each dimension, a regular lattice will
+have $2^D$ parameters. To fit a more flexible function, you can specify a
+finer-grained lattice over the feature space with more vertices along each
+dimension. Lattice regression functions are continuous and piecewise infinitely
+differentiable.
+
+### Calibration
+
+Let's say the preceding sample lattice represents a learned *user happiness*
+with a suggested local coffee shop calculated using features:
+
+*   coffee price, in range 0 to 20 dollars
+*   distance to the user, in range 0 to 30 kilometers
+
+We want our model to learn user happiness with a local coffee shop suggestion.
+TensorFlow Lattice models can use *piecewise linear functions* (with
+`tfl.layers.PWLCalibration`) to calibrate and normalize the input features to
+the range accepted by the lattice: 0.0 to 1.0 in the example lattice above. The
+following show examples such calibrations functions with 10 keypoints:
+
+![distance calibration](images/pwl_calibration_distance.png)
+![price calibration](images/pwl_calibration_price.png)
+
+It is often a good idea to use the quantiles of the features as input keypoints.
+TensorFlow Lattice [canned estimators](tutorials/canned_estimators.ipynb) can
+automatically set the input keypoints to the feature quantiles.
+
+For categorical features, TensorFlow Lattice provides categorical calibration
+(with `tfl.layers.CategoricalCalibration`) with similar output bounding to feed
+into a lattice.
+
+### Ensembles
+
+The number of parameters of a lattice layer increases exponentially with the
+number of input features, hence not scaling well to very high dimensions. To
+overcome this limitation, TensorFlow Lattice offers ensembles of lattices that
+combine (average) several *tiny* lattices, which enables the model to grow
+linearly in the number of features.
+
+The library provides two variations of these ensembles:
+
+*   **Random Tiny Lattices** (RTL): Each submodel uses a random subset of
+    features (with replacement).
+
+*   **Crystals** : The Crystals algorithm first trains a *prefitting* model that
+    estimates pairwise feature interactions. It then arranges the final ensemble
+    such that features with more non-linear interactions are in the same
+    lattices.
+
+## Why TensorFlow Lattice ?
+
+You can find a brief introduction to TensorFlow Lattice in
+[Google AI's Blog post](https://research.googleblog.com/).
+
+### Interpretability
+
+Since the parameters of each layer are the output of that layer, it is easy to
+analyze, understand and debug each part of the model.
+
+### Accurate and Flexible Models
+
+Using fine-grained lattices, you can get *arbitrarily complex* functions with a
+single lattice layer. Using multiple layers of calibrators and lattices often
+work nicely in practice and can match or outperform DNN models of similar sizes.
+
+### Common-Sense Shape Constraints
+
+Real world training data is often a somewhat biased representation of where the
+model will be applied.
+
+<img src="images/data_dist.png" style="display:block; margin:auto;">
+
+Unconstrained and flexible ML solutions such as DNNs or decision trees often act
+unexpectedly in parts of the input space not covered by the training data. Even
+though common forms of regularization can reduce nonsensical extrapolation, it
+is hardly enough to guarantee reasonable model behaviour across the entire input
+space.
+
+TensorFlow Lattice provides several types of *semantic regularization* through
+[shape constraints](tutorials/shape_constraints.ipynb):
+
+*   **Monotonicity**: You can specify that the output should only
+    increase/decrease with respect to an input. In our example, you may want to
+    specify that increased distance to a coffee shop should only decrease the
+    predicted user preference.
+
+![linear fit](images/linear_fit.png) ![flexible fit](images/flexible_fit.png)
+![regularized fit](images/regularized_fit.png)
+![monotonic fit](images/monotonic_fit.png)
+
+*   **Convexity/Concavity**: You can specify that the function shape can be
+    convex or concave. Mixed with monotonicity, this can force the function to
+    represent diminishing returns with respect to a given feature.
+
+*   **Unimodality**: You can specify that the function should have a unique peak
+    or unique valley. This let you represent functions that have a *sweet spot*
+    with respect to a feature.
+
+*   **Pairwise trust**: This constraint works on a pair of features and suggests
+    that one input feature semantically reflects trust in another feature. For
+    example, higher number of reviews makes you more confident in the average
+    star rating of a restaurant. The model will be more sensitive with respect
+    to the star rating (i.e. will have a larger slope with respect to the
+    rating) when the number of reviews is higher.
+
+### Controlled Flexibility with Regularizers
+
+In addition to shape constraints, TensorFlow lattice provides a number of
+regularizers to control the flexibility and smoothness of the function for each
+layer.
+
+*   **Laplacian Regularizer**: Outputs of the lattice/calibration
+    vertices/keypoints are regularized towards the values of their respective
+    neighbors. This results in a *flatter* function.
+
+*   **Hessian Regularizer**: This penalizes the first derivative of the PWL
+    calibration layer to make the function *more linear*.
+
+*   **Wrinkle Regularizer**: This penalizes the second derivative of the PWL
+    calibration layer to avoid sudden changes in the curvature. It makes the
+    function smoother.
+
+*   **Torsion**: Outputs of the lattice will be regularized towards preventing
+    torsion among the features. In other words, the model will be regularized
+    towards independence between the contributions of the features.
+
+### Mix and match with other Keras layers
+
+You can use TF Lattice layers in combination with other Keras layers to
+construct partially constrained or regularized models. For example, lattice or
+PWL calibration layers can be used at the last layer of deeper networks that
+include embeddings or other Keras layers.
+
+## Papers
+
+*   [Shape Constraints for Set Functions](http://proceedings.mlr.press/v97/cotter19a.html),
+    Andrew Cotter, Maya Gupta, H. Jiang, Erez Louidor, Jim Muller, Taman
+    Narayan, Serena Wang, Tao Zhu. International Conference on Machine Learning
+    (ICML), 2019
+*   [Diminishing Returns Shape Constraints for Interpretability and
+    Regularization](https://papers.nips.cc/paper/7916-diminishing-returns-shape-constraints-for-interpretability-and-regularization),
+    Maya Gupta, Dara Bahri, Andrew Cotter, Kevin Canini, Advances in Neural
+    Information Processing Systems (NeurIPS), 2018
+*   [Deep Lattice Networks and Partial Monotonic Functions](https://research.google.com/pubs/pub46327.html),
+    Seungil You, Kevin Canini, David Ding, Jan Pfeifer, Maya R. Gupta, Advances
+    in Neural Information Processing Systems (NeurIPS), 2017
+*   [Fast and Flexible Monotonic Functions with Ensembles of Lattices](https://papers.nips.cc/paper/6377-fast-and-flexible-monotonic-functions-with-ensembles-of-lattices),
+    Mahdi Milani Fard, Kevin Canini, Andrew Cotter, Jan Pfeifer, Maya Gupta,
+    Advances in Neural Information Processing Systems (NeurIPS), 2016
+*   [Monotonic Calibrated Interpolated Look-Up Tables](http://jmlr.org/papers/v17/15-243.html),
+    Maya Gupta, Andrew Cotter, Jan Pfeifer, Konstantin Voevodski, Kevin Canini,
+    Alexander Mangylov, Wojciech Moczydlowski, Alexander van Esbroeck, Journal
+    of Machine Learning Research (JMLR), 2016
+*   [Optimized Regression for Efficient Function Evaluation](http://ieeexplore.ieee.org/document/6203580/),
+    Eric Garcia, Raman Arora, Maya R. Gupta, IEEE Transactions on Image
+    Processing, 2012
+*   [Lattice Regression](https://papers.nips.cc/paper/3694-lattice-regression),
+    Eric Garcia, Maya Gupta, Advances in Neural Information Processing Systems
+    (NeurIPS), 2009
+
+## Tutorials and API docs
+
+You can use [Canned Estimators](tutorials/canned_estimators.ipynb) or
+[Keras Layers](tutorials/keras_layers.ipynb). Check out
+[full API docs](api_docs/python/tfl.ipynb) for details.
diff --git a/docs/tutorials/canned_estimators.ipynb b/docs/tutorials/canned_estimators.ipynb
new file mode 100644
index 0000000..afd4bcf
--- /dev/null
+++ b/docs/tutorials/canned_estimators.ipynb
@@ -0,0 +1,722 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "7765UFHoyGx6"
+      },
+      "source": [
+        "##### Copyright 2020 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "cellView": "form",
+        "colab": {},
+        "colab_type": "code",
+        "id": "KsOkK8O69PyT"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "ZS8z-_KeywY9"
+      },
+      "source": [
+        "# TF Lattice Canned Estimators"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "r61fkA2i9Y3_"
+      },
+      "source": [
+        "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/lattice/tutorials/canned_estimators\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/lattice/blob/master/docs/tutorials/canned_estimators.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/lattice/blob/master/docs/tutorials/canned_estimators.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/lattice/tutorials/canned_estimators.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "\u003c/table\u003e"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "WCpl-9WDVq9d"
+      },
+      "source": [
+        "## Overview\n",
+        "\n",
+        "Canned estimators are quick and easy ways to train TFL models for typical use cases. This guide outlines the steps needed to create a TFL canned estimator."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "x769lI12IZXB"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "fbBVAR6UeRN5"
+      },
+      "source": [
+        "Installing TF Lattice package:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "bpXjJKpSd3j4"
+      },
+      "outputs": [],
+      "source": [
+        "#@test {\"skip\": true}\n",
+        "!pip install tensorflow-lattice"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "jSVl9SHTeSGX"
+      },
+      "source": [
+        "Importing required packages:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "cellView": "both",
+        "colab": {},
+        "colab_type": "code",
+        "id": "FbZDk8bIx8ig"
+      },
+      "outputs": [],
+      "source": [
+        "from __future__ import absolute_import, division, print_function, unicode_literals\n",
+        "\n",
+        "try:\n",
+        "  # %tensorflow_version only exists in Colab.\n",
+        "  %tensorflow_version 2.x\n",
+        "except Exception:\n",
+        "  pass\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "import numpy as np\n",
+        "import pandas as pd\n",
+        "import tensorflow_lattice as tfl\n",
+        "from tensorflow import feature_column as fc"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "svPuM6QNxlrH"
+      },
+      "source": [
+        "Downloading the UCI Statlog (Heart) dataset:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "cellView": "both",
+        "colab": {},
+        "colab_type": "code",
+        "id": "j-k1qTR_yvBl"
+      },
+      "outputs": [],
+      "source": [
+        "csv_file = tf.keras.utils.get_file(\n",
+        "    'heart.csv', 'http://storage.googleapis.com/applied-dl/heart.csv')\n",
+        "df = pd.read_csv(csv_file)\n",
+        "target = df.pop('target')\n",
+        "train_size = int(len(df) * 0.8)\n",
+        "train_x = df[:train_size]\n",
+        "train_y = target[:train_size]\n",
+        "test_x = df[train_size:]\n",
+        "test_y = target[train_size:]\n",
+        "df.head()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "nKkAw12SxvGG"
+      },
+      "source": [
+        "Setting the default values used for training in this guide:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "cellView": "both",
+        "colab": {},
+        "colab_type": "code",
+        "id": "1T6GFI9F6mcG"
+      },
+      "outputs": [],
+      "source": [
+        "LEARNING_RATE = 0.01\n",
+        "BATCH_SIZE = 128\n",
+        "NUM_EPOCHS = 500\n",
+        "PREFITTING_NUM_EPOCHS = 10"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "0TGfzhPHzpix"
+      },
+      "source": [
+        "## Feature Columns\n",
+        "\n",
+        "As for any other TF estimator, data needs to be passed to the estimator, which is typically via an input_fn and parsed using [FeatureColumns](https://www.tensorflow.org/guide/feature_columns)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "DCIUz8apzs0l"
+      },
+      "outputs": [],
+      "source": [
+        "# Feature columns.\n",
+        "# - age\n",
+        "# - sex\n",
+        "# - cp        chest pain type (4 values)\n",
+        "# - trestbps  resting blood pressure\n",
+        "# - chol      serum cholestoral in mg/dl\n",
+        "# - fbs       fasting blood sugar \u003e 120 mg/dl\n",
+        "# - restecg   resting electrocardiographic results (values 0,1,2)\n",
+        "# - thalach   maximum heart rate achieved\n",
+        "# - exang     exercise induced angina\n",
+        "# - oldpeak   ST depression induced by exercise relative to rest\n",
+        "# - slope     the slope of the peak exercise ST segment\n",
+        "# - ca        number of major vessels (0-3) colored by flourosopy\n",
+        "# - thal      3 = normal; 6 = fixed defect; 7 = reversable defect\n",
+        "feature_columns = [\n",
+        "    fc.numeric_column('age', default_value=-1),\n",
+        "    fc.categorical_column_with_vocabulary_list('sex', [0, 1]),\n",
+        "    fc.numeric_column('cp'),\n",
+        "    fc.numeric_column('trestbps', default_value=-1),\n",
+        "    fc.numeric_column('chol'),\n",
+        "    fc.categorical_column_with_vocabulary_list('fbs', [0, 1]),\n",
+        "    fc.categorical_column_with_vocabulary_list('restecg', [0, 1, 2]),\n",
+        "    fc.numeric_column('thalach'),\n",
+        "    fc.categorical_column_with_vocabulary_list('exang', [0, 1]),\n",
+        "    fc.numeric_column('oldpeak'),\n",
+        "    fc.categorical_column_with_vocabulary_list('slope', [0, 1, 2]),\n",
+        "    fc.numeric_column('ca'),\n",
+        "    fc.categorical_column_with_vocabulary_list(\n",
+        "        'thal', ['normal', 'fixed', 'reversible']),\n",
+        "]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "hEZstmtT2CA3"
+      },
+      "source": [
+        "TFL canned estimators use the type of the feature column to decide what type of calibration layer to use. We use a `tfl.layers.PWLCalibration` layer for numeric feature columns and a `tfl.layers.CategoricalCalibration` layer for categorical feature columns.\n",
+        "\n",
+        "Note that categorical feature columns are not wrapped by an embedding feature column. They are directly fed into the estimator."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "H_LoW_9m5OFL"
+      },
+      "source": [
+        "## Creating input_fn\n",
+        "\n",
+        "As for any other estimator, you can use an input_fn to feed data to the model for training and evaluation. TFL estimators can automatically calculate quantiles of the features and use them as input keypoints for the PWL calibration layer. To do so, they require passing a `feature_analysis_input_fn`, which is similar to the training input_fn but with a single epoch or a subsample of the data."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "lFVy1Efy5NKD"
+      },
+      "outputs": [],
+      "source": [
+        "train_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn(\n",
+        "    x=train_x,\n",
+        "    y=train_y,\n",
+        "    shuffle=False,\n",
+        "    batch_size=BATCH_SIZE,\n",
+        "    num_epochs=NUM_EPOCHS,\n",
+        "    num_threads=1)\n",
+        "\n",
+        "# feature_analysis_input_fn is used to collect statistics about the input.\n",
+        "feature_analysis_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn(\n",
+        "    x=train_x,\n",
+        "    y=train_y,\n",
+        "    shuffle=False,\n",
+        "    batch_size=BATCH_SIZE,\n",
+        "    # Note that we only need one pass over the data.\n",
+        "    num_epochs=1,\n",
+        "    num_threads=1)\n",
+        "\n",
+        "test_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn(\n",
+        "    x=test_x,\n",
+        "    y=test_y,\n",
+        "    shuffle=False,\n",
+        "    batch_size=BATCH_SIZE,\n",
+        "    num_epochs=1,\n",
+        "    num_threads=1)\n",
+        "\n",
+        "# Serving input fn is used to create saved models.\n",
+        "serving_input_fn = (\n",
+        "    tf.estimator.export.build_parsing_serving_input_receiver_fn(\n",
+        "        feature_spec=fc.make_parse_example_spec(feature_columns)))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "uQlzREcm2Wbj"
+      },
+      "source": [
+        "## Feature Configs\n",
+        "\n",
+        "Feature calibration and per-feature configurations are set using `tfl.configs.FeatureConfig`. Feature configurations include monotonicity constraints, per-feature regularization (see `tfl.configs.RegularizerConfig`), and lattice sizes for lattice models.\n",
+        "\n",
+        "If no configuration is defined for an input feature, the default configuration in `tfl.config.FeatureConfig` is used."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "vD0tNpiO3p9c"
+      },
+      "outputs": [],
+      "source": [
+        "# Feature configs are used to specify how each feature is calibrated and used.\n",
+        "feature_configs = [\n",
+        "    tfl.configs.FeatureConfig(\n",
+        "        name='age',\n",
+        "        lattice_size=3,\n",
+        "        # By default, input keypoints of pwl are quantiles of the feature.\n",
+        "        pwl_calibration_num_keypoints=5,\n",
+        "        monotonicity='increasing',\n",
+        "        pwl_calibration_clip_max=100,\n",
+        "        # Per feature regularization.\n",
+        "        regularizer_configs=[\n",
+        "            tfl.configs.RegularizerConfig(name='calib_wrinkle', l2=0.1),\n",
+        "        ],\n",
+        "    ),\n",
+        "    tfl.configs.FeatureConfig(\n",
+        "        name='cp',\n",
+        "        pwl_calibration_num_keypoints=4,\n",
+        "        # Keypoints can be uniformly spaced.\n",
+        "        pwl_calibration_input_keypoints='uniform',\n",
+        "        monotonicity='increasing',\n",
+        "    ),\n",
+        "    tfl.configs.FeatureConfig(\n",
+        "        name='chol',\n",
+        "        # Explicit input keypoint initialization.\n",
+        "        pwl_calibration_input_keypoints=[126.0, 210.0, 247.0, 286.0, 564.0],\n",
+        "        monotonicity='increasing',\n",
+        "        pwl_calibration_clip_min=130,\n",
+        "        # Calibration can be forced to span the full output range by clamping.\n",
+        "        pwl_calibration_clamp_min=True,\n",
+        "        pwl_calibration_clamp_max=True,\n",
+        "        # Per feature regularization.\n",
+        "        regularizer_configs=[\n",
+        "            tfl.configs.RegularizerConfig(name='calib_hessian', l2=1e-4),\n",
+        "        ],\n",
+        "    ),\n",
+        "    tfl.configs.FeatureConfig(\n",
+        "        name='fbs',\n",
+        "        # Partial monotonicity: output(0) \u003c= output(1)\n",
+        "        monotonicity=[(0, 1)],\n",
+        "    ),\n",
+        "    tfl.configs.FeatureConfig(\n",
+        "        name='trestbps',\n",
+        "        pwl_calibration_num_keypoints=5,\n",
+        "        monotonicity='decreasing',\n",
+        "    ),\n",
+        "    tfl.configs.FeatureConfig(\n",
+        "        name='thalach',\n",
+        "        pwl_calibration_num_keypoints=5,\n",
+        "        monotonicity='decreasing',\n",
+        "    ),\n",
+        "    tfl.configs.FeatureConfig(\n",
+        "        name='restecg',\n",
+        "        # Partial monotonicity: output(0) \u003c= output(1), output(0) \u003c= output(2)\n",
+        "        monotonicity=[(0, 1), (0, 2)],\n",
+        "    ),\n",
+        "    tfl.configs.FeatureConfig(\n",
+        "        name='exang',\n",
+        "        # Partial monotonicity: output(0) \u003c= output(1)\n",
+        "        monotonicity=[(0, 1)],\n",
+        "    ),\n",
+        "    tfl.configs.FeatureConfig(\n",
+        "        name='oldpeak',\n",
+        "        pwl_calibration_num_keypoints=5,\n",
+        "        monotonicity='increasing',\n",
+        "    ),\n",
+        "    tfl.configs.FeatureConfig(\n",
+        "        name='slope',\n",
+        "        # Partial monotonicity: output(0) \u003c= output(1), output(0) \u003c= output(2)\n",
+        "        monotonicity=[(0, 1), (1, 2)],\n",
+        "    ),\n",
+        "    tfl.configs.FeatureConfig(\n",
+        "        name='ca',\n",
+        "        pwl_calibration_num_keypoints=4,\n",
+        "        monotonicity='increasing',\n",
+        "    ),\n",
+        "    tfl.configs.FeatureConfig(\n",
+        "        name='thal',\n",
+        "        # Partial monotonicity:\n",
+        "        # output(normal) \u003c= output(fixed)\n",
+        "        # output(normal) \u003c= output(reversible)        \n",
+        "        monotonicity=[('normal', 'fixed'), ('normal', 'reversible')],\n",
+        "    ),\n",
+        "]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "LKBULveZ4mr3"
+      },
+      "source": [
+        "## Calibrated Linear Model\n",
+        "\n",
+        "To construct a TFL canned estimator, construct a model configuration from `tfl.configs`. A calibrated linear model is constructed using `tfl.configs.CalibratedLinearConfig`. It applies piecewise-linear and categorical calibration on the input features, followed by a linear combination and an optional output piecewise-linear calibration. When using output calibration or when output bounds are specified, the linear layer will apply weighted averaging on calibrated inputs.\n",
+        "\n",
+        "This example creates a calibrated linear model on the first 5 features. We use\n",
+        "`tfl.visualization` to plot the model graph with the calibrator plots."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "diRRozio4sAL"
+      },
+      "outputs": [],
+      "source": [
+        "# Model config defines the model strcutre for the estimator.\n",
+        "model_config = tfl.configs.CalibratedLinearConfig(\n",
+        "    feature_configs=feature_configs,\n",
+        "    use_bias=True,\n",
+        "    output_calibration=True,\n",
+        "    regularizer_configs=[\n",
+        "        # Regularizer for the output calibrator.\n",
+        "        tfl.configs.RegularizerConfig(name='output_calib_hessian', l2=1e-4),\n",
+        "    ])\n",
+        "# A CannedClassifier is constructed from the given model config.\n",
+        "estimator = tfl.estimators.CannedClassifier(\n",
+        "    feature_columns=feature_columns[:5],\n",
+        "    model_config=model_config,\n",
+        "    feature_analysis_input_fn=feature_analysis_input_fn,\n",
+        "    optimizer=tf.keras.optimizers.Adam(LEARNING_RATE),\n",
+        "    config=tf.estimator.RunConfig(tf_random_seed=42))\n",
+        "estimator.train(input_fn=train_input_fn)\n",
+        "results = estimator.evaluate(input_fn=test_input_fn)\n",
+        "print('Calibrated linear test AUC: {}'.format(results['auc']))\n",
+        "saved_model_path = estimator.export_saved_model(estimator.model_dir,\n",
+        "                                                serving_input_fn)\n",
+        "model_graph = tfl.estimators.get_model_graph(saved_model_path)\n",
+        "tfl.visualization.draw_model_graph(model_graph)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "zWzPM2_p977t"
+      },
+      "source": [
+        "## Calibrated Lattice Model\n",
+        "\n",
+        "A calibrated lattice model is constructed using `tfl.configs.CalibratedLatticeConfig`. A calibrated lattice model applies piecewise-linear and categorical calibration on the input features, followed by a lattice model and an optional output piecewise-linear calibration.\n",
+        "\n",
+        "This example creates a calibrated lattice model on the first 5 features.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "C6EvVpKW4BbC"
+      },
+      "outputs": [],
+      "source": [
+        "# This is calibrated lattice model: Inputs are calibrated, then combined\n",
+        "# non-linearly using a lattice layer.\n",
+        "model_config = tfl.configs.CalibratedLatticeConfig(\n",
+        "    feature_configs=feature_configs,\n",
+        "    regularizer_configs=[\n",
+        "        # Torsion regularizer applied to the lattice to make it more linear.\n",
+        "        tfl.configs.RegularizerConfig(name='torsion', l2=1e-4),\n",
+        "        # Globally defined calibration regularizer is applied to all features.\n",
+        "        tfl.configs.RegularizerConfig(name='calib_hessian', l2=1e-4),\n",
+        "    ])\n",
+        "# A CannedClassifier is constructed from the given model config.\n",
+        "estimator = tfl.estimators.CannedClassifier(\n",
+        "    feature_columns=feature_columns[:5],\n",
+        "    model_config=model_config,\n",
+        "    feature_analysis_input_fn=feature_analysis_input_fn,\n",
+        "    optimizer=tf.keras.optimizers.Adam(LEARNING_RATE),\n",
+        "    config=tf.estimator.RunConfig(tf_random_seed=42))\n",
+        "estimator.train(input_fn=train_input_fn)\n",
+        "results = estimator.evaluate(input_fn=test_input_fn)\n",
+        "print('Calibrated lattice test AUC: {}'.format(results['auc']))\n",
+        "saved_model_path = estimator.export_saved_model(estimator.model_dir,\n",
+        "                                                serving_input_fn)\n",
+        "model_graph = tfl.estimators.get_model_graph(saved_model_path)\n",
+        "tfl.visualization.draw_model_graph(model_graph)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "9494K_ZBKFcm"
+      },
+      "source": [
+        "## Calibrated Lattice Ensemble\n",
+        "\n",
+        "When the number of features is large, you can use an ensemble model, which creates multiple smaller lattices for subsets of the features and averages their output instead of creating just a single huge lattice. Ensemble lattice models are constructed using `tfl.configs.CalibratedLatticeEnsembleConfig`. A calibrated lattice ensemble model applies piecewise-linear and categorical calibration on the input feature, followed by an ensemble of lattice models and an optional output piecewise-linear calibration.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "KjrzziMFKuCB"
+      },
+      "source": [
+        "### Random Lattice Ensemble\n",
+        "\n",
+        "The following model config uses a random subset of features for each lattice."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "YBSS7dLjKExq"
+      },
+      "outputs": [],
+      "source": [
+        "# This is random lattice ensemble model with separate calibration:\n",
+        "# model output is the average output of separatly calibrated lattices.\n",
+        "model_config = tfl.configs.CalibratedLatticeEnsembleConfig(\n",
+        "    feature_configs=feature_configs,\n",
+        "    num_lattices=5,\n",
+        "    lattice_rank=3)\n",
+        "# A CannedClassifier is constructed from the given model config.\n",
+        "estimator = tfl.estimators.CannedClassifier(\n",
+        "    feature_columns=feature_columns,\n",
+        "    model_config=model_config,\n",
+        "    feature_analysis_input_fn=feature_analysis_input_fn,\n",
+        "    optimizer=tf.keras.optimizers.Adam(LEARNING_RATE),\n",
+        "    config=tf.estimator.RunConfig(tf_random_seed=42))\n",
+        "estimator.train(input_fn=train_input_fn)\n",
+        "results = estimator.evaluate(input_fn=test_input_fn)\n",
+        "print('Random ensemble test AUC: {}'.format(results['auc']))\n",
+        "saved_model_path = estimator.export_saved_model(estimator.model_dir,\n",
+        "                                                serving_input_fn)\n",
+        "model_graph = tfl.estimators.get_model_graph(saved_model_path)\n",
+        "tfl.visualization.draw_model_graph(model_graph, calibrator_dpi=15)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "LSXEaYAULRvf"
+      },
+      "source": [
+        "### Crystals Lattice Ensemble\n",
+        "\n",
+        "TFL also provides a heuristic feature arrangement algorithm, called *Crystals*. The Crystals algorithm first trains a *prefitting model* that estimates pairwise feature interactions. It then arranges the final ensemble such that features with more non-linear interactions are in the same lattices.\n",
+        "\n",
+        "For Crystals models, you will also need to provide a `prefitting_input_fn` that is used to train the prefitting model, as described above. The prefitting model does not need to be fully trained, so a few epochs should be enough.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "FjQKh9saMaFu"
+      },
+      "outputs": [],
+      "source": [
+        "prefitting_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn(\n",
+        "    x=train_x,\n",
+        "    y=train_y,\n",
+        "    shuffle=False,\n",
+        "    batch_size=BATCH_SIZE,\n",
+        "    num_epochs=PREFITTING_NUM_EPOCHS,\n",
+        "    num_threads=1)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "fVnZpwX8MtPi"
+      },
+      "source": [
+        "You can then create a Crystal model by setting `lattice='crystals'` in the model config."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "f4awRMDe-eMv"
+      },
+      "outputs": [],
+      "source": [
+        "# This is Crystals ensemble model with separate calibration: model output is\n",
+        "# the average output of separatly calibrated lattices.\n",
+        "model_config = tfl.configs.CalibratedLatticeEnsembleConfig(\n",
+        "    feature_configs=feature_configs,\n",
+        "    lattices='crystals',\n",
+        "    num_lattices=5,\n",
+        "    lattice_rank=3)\n",
+        "# A CannedClassifier is constructed from the given model config.\n",
+        "estimator = tfl.estimators.CannedClassifier(\n",
+        "    feature_columns=feature_columns,\n",
+        "    model_config=model_config,\n",
+        "    feature_analysis_input_fn=feature_analysis_input_fn,\n",
+        "    # prefitting_input_fn is required to train the prefitting model.\n",
+        "    prefitting_input_fn=prefitting_input_fn,\n",
+        "    optimizer=tf.keras.optimizers.Adam(LEARNING_RATE),\n",
+        "    prefitting_optimizer=tf.keras.optimizers.Adam(LEARNING_RATE),\n",
+        "    config=tf.estimator.RunConfig(tf_random_seed=42))\n",
+        "estimator.train(input_fn=train_input_fn)\n",
+        "results = estimator.evaluate(input_fn=test_input_fn)\n",
+        "print('Crystals ensemble test AUC: {}'.format(results['auc']))\n",
+        "saved_model_path = estimator.export_saved_model(estimator.model_dir,\n",
+        "                                                serving_input_fn)\n",
+        "model_graph = tfl.estimators.get_model_graph(saved_model_path)\n",
+        "tfl.visualization.draw_model_graph(model_graph, calibrator_dpi=15)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "Isb2vyLAVBM1"
+      },
+      "source": [
+        "You can plot feature calibrators with more details using the `tfl.visualization` module."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "DJPaREuWS2sg"
+      },
+      "outputs": [],
+      "source": [
+        "_ = tfl.visualization.plot_feature_calibrator(model_graph, \"age\")\n",
+        "_ = tfl.visualization.plot_feature_calibrator(model_graph, \"restecg\")"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "tfl_canned_estimators.ipynb",
+      "private_outputs": true,
+      "provenance": [
+        {
+          "file_id": "1gw3igUWesgUCASoPM-xRZk6bGg3E1qOX",
+          "timestamp": 1579554854035
+        }
+      ],
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/docs/tutorials/custom_estimators.ipynb b/docs/tutorials/custom_estimators.ipynb
new file mode 100644
index 0000000..3850d5a
--- /dev/null
+++ b/docs/tutorials/custom_estimators.ipynb
@@ -0,0 +1,443 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "7765UFHoyGx6"
+      },
+      "source": [
+        "##### Copyright 2020 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "cellView": "form",
+        "colab": {},
+        "colab_type": "code",
+        "id": "KsOkK8O69PyT"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "ZS8z-_KeywY9"
+      },
+      "source": [
+        "# TF Lattice Custom Estimators"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "r61fkA2i9Y3_"
+      },
+      "source": [
+        "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/lattice/tutorials/custom_estimators\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/lattice/blob/master/docs/tutorials/custom_estimators.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/lattice/blob/master/docs/tutorials/custom_estimators.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/lattice/tutorials/custom_estimators.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "\u003c/table\u003e"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "Ur6yCw7YVvr8"
+      },
+      "source": [
+        "## Overview\n",
+        "\n",
+        "You can use custom estimators to create arbitrarily monotonic models using TFL layers. This guide outlines the steps needed to create such estimators."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "x769lI12IZXB"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "fbBVAR6UeRN5"
+      },
+      "source": [
+        "Installing TF Lattice package:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "bpXjJKpSd3j4"
+      },
+      "outputs": [],
+      "source": [
+        "#@test {\"skip\": true}\n",
+        "!pip install tensorflow-lattice"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "jSVl9SHTeSGX"
+      },
+      "source": [
+        "Importing required packages:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "cellView": "both",
+        "colab": {},
+        "colab_type": "code",
+        "id": "P9rMpg1-ASY3"
+      },
+      "outputs": [],
+      "source": [
+        "from __future__ import absolute_import, division, print_function, unicode_literals\n",
+        "!pip install tensorflow-lattice\n",
+        "\n",
+        "try:\n",
+        "  # %tensorflow_version only exists in Colab.\n",
+        "  %tensorflow_version 2.x\n",
+        "except Exception:\n",
+        "  pass\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "import numpy as np\n",
+        "import pandas as pd\n",
+        "import tensorflow_lattice as tfl\n",
+        "from tensorflow import feature_column as fc\n",
+        "\n",
+        "from tensorflow_estimator.python.estimator.canned import optimizers\n",
+        "from tensorflow_estimator.python.estimator.head import binary_class_head"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "svPuM6QNxlrH"
+      },
+      "source": [
+        "Downloading the UCI Statlog (Heart) dataset:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "cellView": "both",
+        "colab": {},
+        "colab_type": "code",
+        "id": "M0CmH1gPASZF"
+      },
+      "outputs": [],
+      "source": [
+        "csv_file = tf.keras.utils.get_file(\n",
+        "    'heart.csv', 'http://storage.googleapis.com/applied-dl/heart.csv')\n",
+        "df = pd.read_csv(csv_file)\n",
+        "target = df.pop('target')\n",
+        "train_size = int(len(df) * 0.8)\n",
+        "train_x = df[:train_size]\n",
+        "train_y = target[:train_size]\n",
+        "test_x = df[train_size:]\n",
+        "test_y = target[train_size:]\n",
+        "df.head()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "nKkAw12SxvGG"
+      },
+      "source": [
+        "Setting the default values used for training in this guide:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "cellView": "both",
+        "colab": {},
+        "colab_type": "code",
+        "id": "1T6GFI9F6mcG"
+      },
+      "outputs": [],
+      "source": [
+        "LEARNING_RATE = 0.1\n",
+        "BATCH_SIZE = 128\n",
+        "NUM_EPOCHS = 1000"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "0TGfzhPHzpix"
+      },
+      "source": [
+        "## Feature Columns\n",
+        "\n",
+        "As for any other TF estimator, data needs to be passed to the estimator, which is typically via an input_fn and parsed using [FeatureColumns](https://www.tensorflow.org/guide/feature_columns)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "DCIUz8apzs0l"
+      },
+      "outputs": [],
+      "source": [
+        "# Feature columns.\n",
+        "# - age\n",
+        "# - sex\n",
+        "# - ca        number of major vessels (0-3) colored by flourosopy\n",
+        "# - thal      3 = normal; 6 = fixed defect; 7 = reversable defect\n",
+        "feature_columns = [\n",
+        "    fc.numeric_column('age', default_value=-1),\n",
+        "    fc.categorical_column_with_vocabulary_list('sex', [0, 1]),\n",
+        "    fc.numeric_column('ca'),\n",
+        "    fc.categorical_column_with_vocabulary_list(\n",
+        "        'thal', ['normal', 'fixed', 'reversible']),\n",
+        "]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "hEZstmtT2CA3"
+      },
+      "source": [
+        "Note that categorical features do not need to be wrapped by a dense feature column, since `tfl.laysers.CategoricalCalibration` layer can directly consume category indices."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "H_LoW_9m5OFL"
+      },
+      "source": [
+        "## Creating input_fn\n",
+        "\n",
+        "As for any other estimator, you can use input_fn to feed data to the model for training and evaluation."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "lFVy1Efy5NKD"
+      },
+      "outputs": [],
+      "source": [
+        "train_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn(\n",
+        "    x=train_x,\n",
+        "    y=train_y,\n",
+        "    shuffle=True,\n",
+        "    batch_size=BATCH_SIZE,\n",
+        "    num_epochs=NUM_EPOCHS,\n",
+        "    num_threads=1)\n",
+        "\n",
+        "test_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn(\n",
+        "    x=test_x,\n",
+        "    y=test_y,\n",
+        "    shuffle=False,\n",
+        "    batch_size=BATCH_SIZE,\n",
+        "    num_epochs=1,\n",
+        "    num_threads=1)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "kbrgSr9KaRg0"
+      },
+      "source": [
+        "## Creating model_fn\n",
+        "\n",
+        "There are several ways to create a custom estimator. Here we will construct a `model_fn` that calls a Keras model on the parsed input tensors. To parse the input features, you can use `tf.feature_column.input_layer`, `tf.keras.layers.DenseFeatures`, or `tfl.estimators.transform_features`. If you use the latter, you will not need to wrap categorical features with dense feature columns, and the resulting tensors will not be concatenated, which makes it easier to use the features in the calibration layers.\n",
+        "\n",
+        "To construct a model, you can mix and match TFL layers or any other Keras layers. Here we create a calibrated lattice Keras model out of TFL layers and impose several monotonicity constraints. When then use the Keras model to create the custom estimator.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "n2Zrv6OPaQO2"
+      },
+      "outputs": [],
+      "source": [
+        "def model_fn(features, labels, mode, config):\n",
+        "  \"\"\"model_fn for the custom estimator.\"\"\"\n",
+        "  del config\n",
+        "  input_tensors = tfl.estimators.transform_features(features, feature_columns)\n",
+        "  inputs = {\n",
+        "      key: tf.keras.layers.Input(shape=(1,), name=key) for key in input_tensors\n",
+        "  }\n",
+        "\n",
+        "  lattice_sizes = [3, 2, 2, 2]\n",
+        "  lattice_monotonicities = ['increasing', 'none', 'increasing', 'increasing']\n",
+        "  lattice_input = tf.keras.layers.Concatenate(axis=1)([\n",
+        "      tfl.layers.PWLCalibration(\n",
+        "          input_keypoints=np.linspace(10, 100, num=8, dtype=np.float32),\n",
+        "          # The output range of the calibrator should be the input range of\n",
+        "          # the following lattice dimension.\n",
+        "          output_min=0.0,\n",
+        "          output_max=lattice_sizes[0] - 1.0,\n",
+        "          monotonicity='increasing',\n",
+        "      )(inputs['age']),\n",
+        "      tfl.layers.CategoricalCalibration(\n",
+        "          # Number of categories including any missing/default category.\n",
+        "          num_buckets=2,\n",
+        "          output_min=0.0,\n",
+        "          output_max=lattice_sizes[1] - 1.0,\n",
+        "      )(inputs['sex']),\n",
+        "      tfl.layers.PWLCalibration(\n",
+        "          input_keypoints=[0.0, 1.0, 2.0, 3.0],\n",
+        "          output_min=0.0,\n",
+        "          output_max=lattice_sizes[0] - 1.0,\n",
+        "          # You can specify TFL regularizers as tuple\n",
+        "          # ('regularizer name', l1, l2).\n",
+        "          kernel_regularizer=('hessian', 0.0, 1e-4),\n",
+        "          monotonicity='increasing',\n",
+        "      )(inputs['ca']),\n",
+        "      tfl.layers.CategoricalCalibration(\n",
+        "          num_buckets=3,\n",
+        "          output_min=0.0,\n",
+        "          output_max=lattice_sizes[1] - 1.0,\n",
+        "          # Categorical monotonicity can be partial order.\n",
+        "          # (i, j) indicates that we must have output(i) \u003c= output(j).\n",
+        "          # Make sure to set the lattice monotonicity to 'increasing' for this\n",
+        "          # dimension.\n",
+        "          monotonicities=[(0, 1), (0, 2)],\n",
+        "      )(inputs['thal']),\n",
+        "  ])\n",
+        "  output = tfl.layers.Lattice(\n",
+        "      lattice_sizes=lattice_sizes, monotonicities=lattice_monotonicities)(\n",
+        "          lattice_input)\n",
+        "\n",
+        "  training = (mode == tf.estimator.ModeKeys.TRAIN)\n",
+        "  model = tf.keras.Model(inputs=inputs, outputs=output)\n",
+        "  logits = model(input_tensors, training=training)\n",
+        "\n",
+        "  if training:\n",
+        "    optimizer = optimizers.get_optimizer_instance_v2('Adagrad', LEARNING_RATE)\n",
+        "  else:\n",
+        "    optimizer = None\n",
+        "\n",
+        "  head = binary_class_head.BinaryClassHead()\n",
+        "  return head.create_estimator_spec(\n",
+        "      features=features,\n",
+        "      mode=mode,\n",
+        "      labels=labels,\n",
+        "      optimizer=optimizer,\n",
+        "      logits=logits,\n",
+        "      trainable_variables=model.trainable_variables,\n",
+        "      update_ops=model.updates)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "mng-VtsSbVtQ"
+      },
+      "source": [
+        "## Training and Estimator\n",
+        "\n",
+        "Using the `model_fn` we can create and train the estimator."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "j38GaEbKbZju"
+      },
+      "outputs": [],
+      "source": [
+        "estimator = tf.estimator.Estimator(model_fn=model_fn)\n",
+        "estimator.train(input_fn=train_input_fn)\n",
+        "results = estimator.evaluate(input_fn=test_input_fn)\n",
+        "print('AUC: {}'.format(results['auc']))"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "tfl_custom_estimators.ipynb",
+      "private_outputs": true,
+      "provenance": [
+        {
+          "file_id": "1YQhpyfKAW4Gz49gDFMJtVSpAM-Zi12h9",
+          "timestamp": 1579559437099
+        }
+      ],
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/docs/tutorials/keras_layers.ipynb b/docs/tutorials/keras_layers.ipynb
new file mode 100644
index 0000000..6d96515
--- /dev/null
+++ b/docs/tutorials/keras_layers.ipynb
@@ -0,0 +1,838 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "7765UFHoyGx6"
+      },
+      "source": [
+        "##### Copyright 2020 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "cellView": "form",
+        "colab": {},
+        "colab_type": "code",
+        "id": "KsOkK8O69PyT"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "ZS8z-_KeywY9"
+      },
+      "source": [
+        "# Creating Keras Models with TFL Layers"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "r61fkA2i9Y3_"
+      },
+      "source": [
+        "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/lattice/tutorials/keras_layers\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/lattice/blob/master/docs/tutorials/keras_layers.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/lattice/blob/master/docs/tutorials/keras_layers.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/lattice/tutorials/keras_layers.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "\u003c/table\u003e"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "ecLbJCvJSSCd"
+      },
+      "source": [
+        "##Overview\n",
+        "\n",
+        "You can use TFL Keras layers to construct Keras models with monotonicity and other shape constraints. This example builds and trains a calibrated lattice model for the UCI heart dataset using TFL layers.\n",
+        "\n",
+        "In a calibrated lattice model, each feature is transformed by a `tfl.layers.PWLCalibration` or a `tfl.layers.CategoricalCalibration` layer and the results are nonlinearly fused using a `tfl.layers.Lattice`."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "x769lI12IZXB"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "fbBVAR6UeRN5"
+      },
+      "source": [
+        "Installing TF Lattice package:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "bpXjJKpSd3j4"
+      },
+      "outputs": [],
+      "source": [
+        "#@test {\"skip\": true}\n",
+        "!pip install tensorflow-lattice"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "jSVl9SHTeSGX"
+      },
+      "source": [
+        "Importing required packages:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "cellView": "both",
+        "colab": {},
+        "colab_type": "code",
+        "id": "pm0LD8iyIZXF"
+      },
+      "outputs": [],
+      "source": [
+        "from __future__ import absolute_import, division, print_function, unicode_literals\n",
+        "\n",
+        "try:\n",
+        "  # %tensorflow_version only exists in Colab.\n",
+        "  %tensorflow_version 2.x\n",
+        "except Exception:\n",
+        "  pass\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "import numpy as np\n",
+        "import pandas as pd\n",
+        "import tensorflow_lattice as tfl\n",
+        "from tensorflow import feature_column as fc"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "svPuM6QNxlrH"
+      },
+      "source": [
+        "Downloading the UCI Statlog (Heart) dataset:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "cellView": "both",
+        "colab": {},
+        "colab_type": "code",
+        "id": "PG3pFtK-IZXM"
+      },
+      "outputs": [],
+      "source": [
+        "# UCI Statlog (Heart) dataset.\n",
+        "csv_file = tf.keras.utils.get_file(\n",
+        "    'heart.csv', 'http://storage.googleapis.com/applied-dl/heart.csv')\n",
+        "training_data_df = pd.read_csv(csv_file).sample(\n",
+        "    frac=1.0, random_state=41).reset_index(drop=True)\n",
+        "training_data_df.head()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "nKkAw12SxvGG"
+      },
+      "source": [
+        "Setting the default values used for training in this guide:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "cellView": "both",
+        "colab": {},
+        "colab_type": "code",
+        "id": "krAJBE-yIZXR"
+      },
+      "outputs": [],
+      "source": [
+        "LEARNING_RATE = 0.1\n",
+        "BATCH_SIZE = 128\n",
+        "NUM_EPOCHS = 100"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "0TGfzhPHzpix"
+      },
+      "source": [
+        "## Sequential Keras Model\n",
+        "\n",
+        "This example creates a Sequential Keras model and only uses TFL layers.\n",
+        "\n",
+        "Lattice layers expect `input[i]` to be within `[0, lattice_sizes[i] - 1.0]`, so we need to define the lattice sizes ahead of the calibration layers so we can properly specify output range of the calibration layers.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "nOQWqPAbQS3o"
+      },
+      "outputs": [],
+      "source": [
+        "# Lattice layer expects input[i] to be within [0, lattice_sizes[i] - 1.0], so\n",
+        "lattice_sizes = [3, 2, 2, 2, 2, 2, 2]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "W3DnEKWvQYXm"
+      },
+      "source": [
+        "We use a `tfl.layers.ParallelCombination` layer to group together calibration layers which have to be executed in paralel in order to be able to create a Sequential model.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "o_hyk5GkQfl8"
+      },
+      "outputs": [],
+      "source": [
+        "combined_calibrators = tfl.layers.ParallelCombination()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "BPZsSUZiQiwc"
+      },
+      "source": [
+        "We create a calibration layer for each feature and add it to the parallel combination layer. For numeric features we use `tfl.layers.PWLCalibration` and for categorical features we use `tfl.layers.CategoricalCalibration`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "DXPc6rSGxzFZ"
+      },
+      "outputs": [],
+      "source": [
+        "# ############### age ###############\n",
+        "calibrator = tfl.layers.PWLCalibration(\n",
+        "    # Every PWLCalibration layer must have keypoints of piecewise linear\n",
+        "    # function specified. Easiest way to specify them is to uniformly cover\n",
+        "    # entire input range by using numpy.linspace().\n",
+        "    input_keypoints=np.linspace(\n",
+        "        training_data_df['age'].min(), training_data_df['age'].max(), num=5),\n",
+        "    # You need to ensure that input keypoints have same dtype as layer input.\n",
+        "    # You can do it by setting dtype here or by providing keypoints in such\n",
+        "    # format which will be converted to deisred tf.dtype by default.\n",
+        "    dtype=tf.float32,\n",
+        "    # Output range must correspond to expected lattice input range.\n",
+        "    output_min=0.0,\n",
+        "    output_max=lattice_sizes[0] - 1.0,\n",
+        ")\n",
+        "combined_calibrators.append(calibrator)\n",
+        "\n",
+        "# ############### sex ###############\n",
+        "# For boolean features simply specify CategoricalCalibration layer with 2\n",
+        "# buckets.\n",
+        "calibrator = tfl.layers.CategoricalCalibration(\n",
+        "    num_buckets=2,\n",
+        "    output_min=0.0,\n",
+        "    output_max=lattice_sizes[1] - 1.0,\n",
+        "    # Initializes all outputs to (output_min + output_max) / 2.0.\n",
+        "    kernel_initializer='constant')\n",
+        "combined_calibrators.append(calibrator)\n",
+        "\n",
+        "# ############### cp ###############\n",
+        "calibrator = tfl.layers.PWLCalibration(\n",
+        "    # Here instead of specifying dtype of layer we convert keypoints into\n",
+        "    # np.float32.\n",
+        "    input_keypoints=np.linspace(1, 4, num=4, dtype=np.float32),\n",
+        "    output_min=0.0,\n",
+        "    output_max=lattice_sizes[2] - 1.0,\n",
+        "    monotonicity='increasing',\n",
+        "    # You can specify TFL regularizers as a tuple ('regularizer name', l1, l2).\n",
+        "    kernel_regularizer=('hessian', 0.0, 1e-4))\n",
+        "combined_calibrators.append(calibrator)\n",
+        "\n",
+        "# ############### trestbps ###############\n",
+        "calibrator = tfl.layers.PWLCalibration(\n",
+        "    # Alternatively, you might want to use quantiles as keypoints instead of\n",
+        "    # uniform keypoints\n",
+        "    input_keypoints=np.quantile(training_data_df['trestbps'],\n",
+        "                                np.linspace(0.0, 1.0, num=5)),\n",
+        "    dtype=tf.float32,\n",
+        "    # Together with quantile keypoints you might want to initialize piecewise\n",
+        "    # linear function to have 'equal_slopes' in order for output of layer\n",
+        "    # after initialization to preserve original distribution.\n",
+        "    kernel_initializer='equal_slopes',\n",
+        "    output_min=0.0,\n",
+        "    output_max=lattice_sizes[3] - 1.0,\n",
+        "    # You might consider clamping extreme inputs of the calibrator to output\n",
+        "    # bounds.\n",
+        "    clamp_min=True,\n",
+        "    clamp_max=True,\n",
+        "    monotonicity='increasing')\n",
+        "combined_calibrators.append(calibrator)\n",
+        "\n",
+        "# ############### chol ###############\n",
+        "calibrator = tfl.layers.PWLCalibration(\n",
+        "    # Explicit input keypoint initialization.\n",
+        "    input_keypoints=[126.0, 210.0, 247.0, 286.0, 564.0],\n",
+        "    dtype=tf.float32,\n",
+        "    output_min=0.0,\n",
+        "    output_max=lattice_sizes[4] - 1.0,\n",
+        "    # Monotonicity of calibrator can be decreasing. Note that corresponding\n",
+        "    # lattice dimension must have INCREASING monotonicity regardless of\n",
+        "    # monotonicity direction of calibrator.\n",
+        "    monotonicity='decreasing',\n",
+        "    # Convexity together with decreasing monotonicity result in diminishing\n",
+        "    # return constraint.\n",
+        "    convexity='convex',\n",
+        "    # You can specify list of regularizers. You are not limited to TFL\n",
+        "    # regularizrs. Feel free to use any :)\n",
+        "    kernel_regularizer=[('laplacian', 0.0, 1e-4),\n",
+        "                        tf.keras.regularizers.l1_l2(l1=0.001)])\n",
+        "combined_calibrators.append(calibrator)\n",
+        "\n",
+        "# ############### fbs ###############\n",
+        "calibrator = tfl.layers.CategoricalCalibration(\n",
+        "    num_buckets=2,\n",
+        "    output_min=0.0,\n",
+        "    output_max=lattice_sizes[5] - 1.0,\n",
+        "    # For categorical calibration layer monotonicity is specified for pairs\n",
+        "    # of indices of categories. Output for first category in pair will be\n",
+        "    # smaller than output for second category.\n",
+        "    #\n",
+        "    # Don't forget to set monotonicity of corresponding dimension of Lattice\n",
+        "    # layer to '1'.\n",
+        "    monotonicities=[(0, 1)],\n",
+        "    # This initializer is identical to default one('uniform'), but has fixed\n",
+        "    # seed in order to simplify experimentation.\n",
+        "    kernel_initializer=tf.keras.initializers.RandomUniform(\n",
+        "        minval=0.0, maxval=lattice_sizes[5] - 1.0, seed=1))\n",
+        "combined_calibrators.append(calibrator)\n",
+        "\n",
+        "# ############### restecg ###############\n",
+        "calibrator = tfl.layers.CategoricalCalibration(\n",
+        "    num_buckets=3,\n",
+        "    output_min=0.0,\n",
+        "    output_max=lattice_sizes[6] - 1.0,\n",
+        "    # Categorical monotonicity can be partial order.\n",
+        "    monotonicities=[(0, 1), (0, 2)],\n",
+        "    # Categorical calibration layer supports standard Keras regularizers.\n",
+        "    kernel_regularizer=tf.keras.regularizers.l1_l2(l1=0.001),\n",
+        "    kernel_initializer='constant')\n",
+        "combined_calibrators.append(calibrator)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "inyNlSBeQyp7"
+      },
+      "source": [
+        "We then create a lattice layer to nonlinearly fuse the outputs of the calibrators.\n",
+        "\n",
+        "Note that we need to specify the monotonicity of the lattice to be increasing for required dimensions. The composition with the direction of the monotonicity in the calibration will result in the correct end-to-end direction of monotonicity. This includes partial monotonicity of CategoricalCalibration layer."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "DNCc9oBTRo6w"
+      },
+      "outputs": [],
+      "source": [
+        "lattice = tfl.layers.Lattice(\n",
+        "    lattice_sizes=lattice_sizes,\n",
+        "    monotonicities=[\n",
+        "        'increasing', 'none', 'increasing', 'increasing', 'increasing',\n",
+        "        'increasing', 'increasing'\n",
+        "    ],\n",
+        "    output_min=0.0,\n",
+        "    output_max=1.0)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "T5q2InayRpDr"
+      },
+      "source": [
+        "We can then create a sequential model using the combined calibrators and lattice layers."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "xX6lroYZQy3L"
+      },
+      "outputs": [],
+      "source": [
+        "model = tf.keras.models.Sequential()\n",
+        "model.add(combined_calibrators)\n",
+        "model.add(lattice)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "W3UFxD3fRzIC"
+      },
+      "source": [
+        "Training works the same as any other keras model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "2jz4JvI-RzSj"
+      },
+      "outputs": [],
+      "source": [
+        "features = training_data_df[[\n",
+        "    'age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg'\n",
+        "]].values.astype(np.float32)\n",
+        "target = training_data_df[['target']].values.astype(np.float32)\n",
+        "\n",
+        "model.compile(\n",
+        "    loss=tf.keras.losses.mean_squared_error,\n",
+        "    optimizer=tf.keras.optimizers.Adagrad(learning_rate=LEARNING_RATE))\n",
+        "model.fit(\n",
+        "    features,\n",
+        "    target,\n",
+        "    batch_size=BATCH_SIZE,\n",
+        "    epochs=NUM_EPOCHS,\n",
+        "    validation_split=0.2,\n",
+        "    shuffle=False,\n",
+        "    verbose=0)\n",
+        "\n",
+        "model.evaluate(features, target)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "RTHoW_5lxwT5"
+      },
+      "source": [
+        "## Functional Keras Model\n",
+        "\n",
+        "This example uses a functional API for Keras model construction.\n",
+        "\n",
+        "As mentioned in the previous section, lattice layers expect `input[i]` to be within `[0, lattice_sizes[i] - 1.0]`, so we need to define the lattice sizes ahead of the calibration layers so we can properly specify output range of the calibration layers."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "gJjUYvBuW1qE"
+      },
+      "outputs": [],
+      "source": [
+        "# We are going to have 2-d embedding as one of lattice inputs.\n",
+        "lattice_sizes = [3, 2, 2, 3, 3, 2, 2]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "Z03qY5MYW1yT"
+      },
+      "source": [
+        "For each feature, we need to create an input layer followed by a calibration layer. For numeric features we use `tfl.layers.PWLCalibration` and for categorical features we use `tfl.layers.CategoricalCalibration`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "DCIUz8apzs0l"
+      },
+      "outputs": [],
+      "source": [
+        "model_inputs = []\n",
+        "lattice_inputs = []\n",
+        "# ############### age ###############\n",
+        "age_input = tf.keras.layers.Input(shape=[1], name='age')\n",
+        "model_inputs.append(age_input)\n",
+        "age_calibrator = tfl.layers.PWLCalibration(\n",
+        "    # Every PWLCalibration layer must have keypoints of piecewise linear\n",
+        "    # function specified. Easiest way to specify them is to uniformly cover\n",
+        "    # entire input range by using numpy.linspace().\n",
+        "    input_keypoints=np.linspace(\n",
+        "        training_data_df['age'].min(), training_data_df['age'].max(), num=5),\n",
+        "    # You need to ensure that input keypoints have same dtype as layer input.\n",
+        "    # You can do it by setting dtype here or by providing keypoints in such\n",
+        "    # format which will be converted to deisred tf.dtype by default.\n",
+        "    dtype=tf.float32,\n",
+        "    # Output range must correspond to expected lattice input range.\n",
+        "    output_min=0.0,\n",
+        "    output_max=lattice_sizes[0] - 1.0,\n",
+        "    monotonicity='increasing',\n",
+        "    name='age_calib',\n",
+        ")(\n",
+        "    age_input)\n",
+        "lattice_inputs.append(age_calibrator)\n",
+        "\n",
+        "# ############### sex ###############\n",
+        "# For boolean features simply specify CategoricalCalibration layer with 2\n",
+        "# buckets.\n",
+        "sex_input = tf.keras.layers.Input(shape=[1], name='sex')\n",
+        "model_inputs.append(sex_input)\n",
+        "sex_calibrator = tfl.layers.CategoricalCalibration(\n",
+        "    num_buckets=2,\n",
+        "    output_min=0.0,\n",
+        "    output_max=lattice_sizes[1] - 1.0,\n",
+        "    # Initializes all outputs to (output_min + output_max) / 2.0.\n",
+        "    kernel_initializer='constant',\n",
+        "    name='sex_calib',\n",
+        ")(\n",
+        "    sex_input)\n",
+        "lattice_inputs.append(sex_calibrator)\n",
+        "\n",
+        "# ############### cp ###############\n",
+        "cp_input = tf.keras.layers.Input(shape=[1], name='cp')\n",
+        "model_inputs.append(cp_input)\n",
+        "cp_calibrator = tfl.layers.PWLCalibration(\n",
+        "    # Here instead of specifying dtype of layer we convert keypoints into\n",
+        "    # np.float32.\n",
+        "    input_keypoints=np.linspace(1, 4, num=4, dtype=np.float32),\n",
+        "    output_min=0.0,\n",
+        "    output_max=lattice_sizes[2] - 1.0,\n",
+        "    monotonicity='increasing',\n",
+        "    # You can specify TFL regularizers as tuple ('regularizer name', l1, l2).\n",
+        "    kernel_regularizer=('hessian', 0.0, 1e-4),\n",
+        "    name='cp_calib',\n",
+        ")(\n",
+        "    cp_input)\n",
+        "lattice_inputs.append(cp_calibrator)\n",
+        "\n",
+        "# ############### trestbps ###############\n",
+        "trestbps_input = tf.keras.layers.Input(shape=[1], name='trestbps')\n",
+        "model_inputs.append(trestbps_input)\n",
+        "trestbps_calibrator = tfl.layers.PWLCalibration(\n",
+        "    # Alternatively, you might want to use quantiles as keypoints instead of\n",
+        "    # uniform keypoints\n",
+        "    input_keypoints=np.quantile(training_data_df['trestbps'],\n",
+        "                                np.linspace(0.0, 1.0, num=5)),\n",
+        "    dtype=tf.float32,\n",
+        "    # Together with quantile keypoints you might want to initialize piecewise\n",
+        "    # linear function to have 'equal_slopes' in order for output of layer\n",
+        "    # after initialization to preserve original distribution.\n",
+        "    kernel_initializer='equal_slopes',\n",
+        "    output_min=0.0,\n",
+        "    output_max=lattice_sizes[3] - 1.0,\n",
+        "    # You might consider clamping extreme inputs of the calibrator to output\n",
+        "    # bounds.\n",
+        "    clamp_min=True,\n",
+        "    clamp_max=True,\n",
+        "    monotonicity='increasing',\n",
+        "    name='trestbps_calib',\n",
+        ")(\n",
+        "    trestbps_input)\n",
+        "lattice_inputs.append(trestbps_calibrator)\n",
+        "\n",
+        "# ############### chol ###############\n",
+        "chol_input = tf.keras.layers.Input(shape=[1], name='chol')\n",
+        "model_inputs.append(chol_input)\n",
+        "chol_calibrator = tfl.layers.PWLCalibration(\n",
+        "    # Explicit input keypoint initialization.\n",
+        "    input_keypoints=[126.0, 210.0, 247.0, 286.0, 564.0],\n",
+        "    output_min=0.0,\n",
+        "    output_max=lattice_sizes[4] - 1.0,\n",
+        "    # Monotonicity of calibrator can be decreasing. Note that corresponding\n",
+        "    # lattice dimension must have INCREASING monotonicity regardless of\n",
+        "    # monotonicity direction of calibrator.\n",
+        "    monotonicity='decreasing',\n",
+        "    # Convexity together with decreasing monotonicity result in diminishing\n",
+        "    # return constraint.\n",
+        "    convexity='convex',\n",
+        "    # You can specify list of regularizers. You are not limited to TFL\n",
+        "    # regularizrs. Feel free to use any :)\n",
+        "    kernel_regularizer=[('laplacian', 0.0, 1e-4),\n",
+        "                        tf.keras.regularizers.l1_l2(l1=0.001)],\n",
+        "    name='chol_calib',\n",
+        ")(\n",
+        "    chol_input)\n",
+        "lattice_inputs.append(chol_calibrator)\n",
+        "\n",
+        "# ############### fbs ###############\n",
+        "fbs_input = tf.keras.layers.Input(shape=[1], name='fbs')\n",
+        "model_inputs.append(fbs_input)\n",
+        "fbs_calibrator = tfl.layers.CategoricalCalibration(\n",
+        "    num_buckets=2,\n",
+        "    output_min=0.0,\n",
+        "    output_max=lattice_sizes[5] - 1.0,\n",
+        "    # For categorical calibration layer monotonicity is specified for pairs\n",
+        "    # of indices of categories. Output for first category in pair will be\n",
+        "    # smaller than output for second category.\n",
+        "    #\n",
+        "    # Don't forget to set monotonicity of corresponding dimension of Lattice\n",
+        "    # layer to '1'.\n",
+        "    monotonicities=[(0, 1)],\n",
+        "    # This initializer is identical to default one ('uniform'), but has fixed\n",
+        "    # seed in order to simplify experimentation.\n",
+        "    kernel_initializer=tf.keras.initializers.RandomUniform(\n",
+        "        minval=0.0, maxval=lattice_sizes[5] - 1.0, seed=1),\n",
+        "    name='fbs_calib',\n",
+        ")(\n",
+        "    fbs_input)\n",
+        "lattice_inputs.append(fbs_calibrator)\n",
+        "\n",
+        "# ############### restecg ###############\n",
+        "restecg_input = tf.keras.layers.Input(shape=[1], name='restecg')\n",
+        "model_inputs.append(restecg_input)\n",
+        "restecg_calibrator = tfl.layers.CategoricalCalibration(\n",
+        "    num_buckets=3,\n",
+        "    output_min=0.0,\n",
+        "    output_max=lattice_sizes[6] - 1.0,\n",
+        "    # Categorical monotonicity can be partial order.\n",
+        "    monotonicities=[(0, 1), (0, 2)],\n",
+        "    # Categorical calibration layer supports standard Keras regularizers.\n",
+        "    kernel_regularizer=tf.keras.regularizers.l1_l2(l1=0.001),\n",
+        "    kernel_initializer='constant',\n",
+        "    name='restecg_calib',\n",
+        ")(\n",
+        "    restecg_input)\n",
+        "lattice_inputs.append(restecg_calibrator)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "Fr0k8La_YgQG"
+      },
+      "source": [
+        "We then create a lattice layer to nonlinearly fuse the outputs of the calibrators.\n",
+        "\n",
+        "Note that we need to specify the monotonicity of the lattice to be increasing for required dimensions. The composition with the direction of the monotonicity in the calibration will result in the correct end-to-end direction of monotonicity. This includes partial monotonicity of `tfl.layers.CategoricalCalibration` layer."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "X15RE0NybNbU"
+      },
+      "outputs": [],
+      "source": [
+        "lattice = tfl.layers.Lattice(\n",
+        "    lattice_sizes=lattice_sizes,\n",
+        "    monotonicities=[\n",
+        "        'increasing', 'none', 'increasing', 'increasing', 'increasing',\n",
+        "        'increasing', 'increasing'\n",
+        "    ],\n",
+        "    output_min=0.0,\n",
+        "    output_max=1.0,\n",
+        "    name='lattice',\n",
+        ")(\n",
+        "    lattice_inputs)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "31VzsnMCA9dh"
+      },
+      "source": [
+        "To add more flexibility to the model, we add an output calibration layer."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "efCP3Yx2A9n7"
+      },
+      "outputs": [],
+      "source": [
+        "model_output = tfl.layers.PWLCalibration(\n",
+        "    input_keypoints=np.linspace(0.0, 1.0, 5),\n",
+        "    name='output_calib',\n",
+        ")(\n",
+        "    lattice)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "1SURnNl8bNgw"
+      },
+      "source": [
+        "We can now create a model using the inputs and outputs."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "7gY-VXuYbZLa"
+      },
+      "outputs": [],
+      "source": [
+        "model = tf.keras.models.Model(\n",
+        "    inputs=model_inputs,\n",
+        "    outputs=model_output)\n",
+        "tf.keras.utils.plot_model(model, rankdir='LR')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "tvFJTs94bZXK"
+      },
+      "source": [
+        "Training works the same as any other keras model. Note that, with our setup, input features are passed as separate tensors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "vMQTGbFAYgYS"
+      },
+      "outputs": [],
+      "source": [
+        "feature_names = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg']\n",
+        "features = np.split(\n",
+        "    training_data_df[feature_names].values.astype(np.float32),\n",
+        "    indices_or_sections=len(feature_names),\n",
+        "    axis=1)\n",
+        "target = training_data_df[['target']].values.astype(np.float32)\n",
+        "\n",
+        "model.compile(\n",
+        "    loss=tf.keras.losses.mean_squared_error,\n",
+        "    optimizer=tf.keras.optimizers.Adagrad(LEARNING_RATE))\n",
+        "model.fit(\n",
+        "    features,\n",
+        "    target,\n",
+        "    batch_size=BATCH_SIZE,\n",
+        "    epochs=NUM_EPOCHS,\n",
+        "    validation_split=0.2,\n",
+        "    shuffle=False,\n",
+        "    verbose=0)\n",
+        "\n",
+        "model.evaluate(features, target)"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "tfl_keras_layers.ipynb",
+      "private_outputs": true,
+      "provenance": [
+        {
+          "file_id": "1ov3qXThgltj77os4ULx7nI63f3oM0vsc",
+          "timestamp": 1579561232062
+        },
+        {
+          "file_id": "1YQhpyfKAW4Gz49gDFMJtVSpAM-Zi12h9",
+          "timestamp": 1579117071304
+        }
+      ],
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/docs/tutorials/shape_constraints.ipynb b/docs/tutorials/shape_constraints.ipynb
new file mode 100644
index 0000000..0065400
--- /dev/null
+++ b/docs/tutorials/shape_constraints.ipynb
@@ -0,0 +1,1262 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "7765UFHoyGx6"
+      },
+      "source": [
+        "##### Copyright 2020 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "cellView": "form",
+        "colab": {},
+        "colab_type": "code",
+        "id": "KsOkK8O69PyT"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "RKQpW0JqQQmY"
+      },
+      "source": [
+        "# Shape Constraints with Tensorflow Lattice\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "r61fkA2i9Y3_"
+      },
+      "source": [
+        "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/lattice/tutorials/shape_constraints\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/lattice/blob/master/docs/tutorials/shape_constraints.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/lattice/blob/master/docs/tutorials/shape_constraints.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd\u003e\n",
+        "    \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/docs/site/en/lattice/tutorials/shape_constraints.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "\u003c/table\u003e"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "2plcL3iTVjsp"
+      },
+      "source": [
+        "## Overview\n",
+        "\n",
+        "In this colab we will fit a TensorFlow Lattice (TFL) canned classifier on a handcrafted restaurant review rating dataset and experiment with various TFL regularizers and shape constraints. Before proceeding, make sure your runtime has all required packages installed (as imported in the code cells below)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "x769lI12IZXB"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "fbBVAR6UeRN5"
+      },
+      "source": [
+        "Installing TF Lattice package:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "bpXjJKpSd3j4"
+      },
+      "outputs": [],
+      "source": [
+        "#@test {\"skip\": true}\n",
+        "!pip install tensorflow-lattice"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "jSVl9SHTeSGX"
+      },
+      "source": [
+        "Importing required packages:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "cellView": "both",
+        "colab": {},
+        "colab_type": "code",
+        "id": "iY6awAl058TV"
+      },
+      "outputs": [],
+      "source": [
+        "from __future__ import absolute_import, division, print_function, unicode_literals\n",
+        "!pip install tensorflow-lattice\n",
+        "\n",
+        "try:\n",
+        "  # %tensorflow_version only exists in Colab.\n",
+        "  %tensorflow_version 2.x\n",
+        "except Exception:\n",
+        "  pass\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "from IPython.core.pylabtools import figsize\n",
+        "import itertools\n",
+        "import matplotlib\n",
+        "from matplotlib import pyplot as plt\n",
+        "import numpy as np\n",
+        "import pandas as pd\n",
+        "import tensorflow_lattice as tfl"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "7TmBk_IGgJF0"
+      },
+      "source": [
+        "Default values used in this guide:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "kQHPyPsPUF92"
+      },
+      "outputs": [],
+      "source": [
+        "NUM_EPOCHS = 500\n",
+        "BATCH_SIZE = 64\n",
+        "LEARNING_RATE=0.001"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "FjR7D8Ag3z0d"
+      },
+      "source": [
+        "## Training Dataset for Ranking Restaurants"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "a1YetzbdFOij"
+      },
+      "source": [
+        "Imagine a simplified scenario where we want to determine whether or not users will click on a restaurant search. The task is to predict the clickthrough rate (CTR) given input features:\n",
+        "- Average rating (`avg_rating`): a numeric (float) feature with values in the range [1,5].\n",
+        "- Number of reviews (`num_reviews`): a positive numeric (integer) feature with values capped at 200, and used as a measure of trendiness.\n",
+        "- Dollar rating (`dollar_rating`): a categorical feature with string values in the set {\"D\", \"DD\", \"DDD\", \"DDDD\"}.\n",
+        "\n",
+        "Here we create a synthetic dataset where the true CTR is given by the formula:\n",
+        "$$\n",
+        "CTR = \\frac{1}{1 + exp\\{\\mbox{b(dollar_rating)}-\\mbox{avg_rating}\\times log(\\mbox{num_reviews}) /4 \\}}, \n",
+        "$$\n",
+        "where $b(\\cdot)$ translates each `dollar_rating` to a baseline value:\n",
+        "$$\n",
+        "\\mbox{D}\\to 3,\\ \\mbox{DD}\\to 2,\\ \\mbox{DDD}\\to 4,\\ \\mbox{DDDD}\\to 4.5. \n",
+        "$$\n",
+        "\n",
+        "This formula reflects typical user patterns. e.g. given everything else fixed, \"\\\\$\\\\$\" restaurants will receive more clicks than \"\\\\$\", followed by \"\\\\$\\\\$\\\\$\" and \"\\\\$\\\\$\\\\$\\\\$\". "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "mKovnyv1jATw"
+      },
+      "outputs": [],
+      "source": [
+        "def click_through_rate(avg_ratings, num_reviews, dollar_ratings):\n",
+        "  dollar_rating_baseline = {\"D\": 3, \"DD\": 2, \"DDD\": 4, \"DDDD\": 4.5}\n",
+        "  return 1 / (1 + np.exp(\n",
+        "      np.array([dollar_rating_baseline[d] for d in dollar_ratings]) -\n",
+        "      avg_ratings * np.log1p(num_reviews) / 4))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "BPlgRdt6jAbP"
+      },
+      "source": [
+        "Let's take a look at the contour plots of this CTR function."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "KC5qX_XKmc7g"
+      },
+      "outputs": [],
+      "source": [
+        "def color_bar():\n",
+        "  bar = matplotlib.cm.ScalarMappable(\n",
+        "      norm=matplotlib.colors.Normalize(0, 1, True),\n",
+        "      cmap=\"viridis\",\n",
+        "  )\n",
+        "  bar.set_array([0, 1])\n",
+        "  return bar\n",
+        "\n",
+        "\n",
+        "def plot_fns(fns, split_by_dollar=False, res=25):\n",
+        "  \"\"\"Generates contour plots for a list of (name, fn) functions.\"\"\"\n",
+        "  num_reviews, avg_ratings = np.meshgrid(\n",
+        "      np.linspace(0, 200, num=res),\n",
+        "      np.linspace(1, 5, num=res),\n",
+        "  )\n",
+        "  if split_by_dollar:\n",
+        "    dollar_rating_splits = [\"D\", \"DD\", \"DDD\", \"DDDD\"]\n",
+        "  else:\n",
+        "    dollar_rating_splits = [None]\n",
+        "  if len(fns) == 1:\n",
+        "    fig, axes = plt.subplots(2, 2, sharey=True, tight_layout=False)\n",
+        "  else:\n",
+        "    fig, axes = plt.subplots(\n",
+        "        len(dollar_rating_splits), len(fns), sharey=True, tight_layout=False)\n",
+        "  axes = axes.flatten()\n",
+        "  axes_index = 0\n",
+        "  for dollar_rating_split in dollar_rating_splits:\n",
+        "    for title, fn in fns:\n",
+        "      if dollar_rating_split is not None:\n",
+        "        dollar_ratings = np.repeat(dollar_rating_split, res**2)\n",
+        "        values = fn(avg_ratings.flatten(), num_reviews.flatten(),\n",
+        "                    dollar_ratings)\n",
+        "        title = \"{}: dollar_rating={}\".format(title, dollar_rating_split)\n",
+        "      else:\n",
+        "        values = fn(avg_ratings.flatten(), num_reviews.flatten())\n",
+        "      subplot = axes[axes_index]\n",
+        "      axes_index += 1\n",
+        "      subplot.contourf(\n",
+        "          avg_ratings,\n",
+        "          num_reviews,\n",
+        "          np.reshape(values, (res, res)),\n",
+        "          vmin=0,\n",
+        "          vmax=1)\n",
+        "      subplot.title.set_text(title)\n",
+        "      subplot.set(xlabel=\"Average Rating\")\n",
+        "      subplot.set(ylabel=\"Number of Reviews\")\n",
+        "      subplot.set(xlim=(1, 5))\n",
+        "\n",
+        "  _ = fig.colorbar(color_bar(), cax=fig.add_axes([0.95, 0.2, 0.01, 0.6]))\n",
+        "\n",
+        "\n",
+        "figsize(11, 11)\n",
+        "plot_fns([(\"CTR\", click_through_rate)], split_by_dollar=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "Ol91olp3muNN"
+      },
+      "source": [
+        "### Preparing Data\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "H8BOshZS9xwn"
+      },
+      "source": [
+        "We start by generating a simulated dataset of restaurants and their features."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "MhqcOPdTT_wj"
+      },
+      "outputs": [],
+      "source": [
+        "def sample_restaurants(n):\n",
+        "  avg_ratings = np.random.uniform(1.0, 5.0, n)\n",
+        "  num_reviews = np.round(np.exp(np.random.uniform(0.0, np.log(200), n)))\n",
+        "  dollar_ratings = np.random.choice([\"D\", \"DD\", \"DDD\", \"DDDD\"], n)\n",
+        "  ctr_labels = click_through_rate(avg_ratings, num_reviews, dollar_ratings)\n",
+        "  return avg_ratings, num_reviews, dollar_ratings, ctr_labels\n",
+        "\n",
+        "\n",
+        "np.random.seed(42)\n",
+        "avg_ratings, num_reviews, dollar_ratings, ctr_labels = sample_restaurants(2000)\n",
+        "\n",
+        "figsize(5, 5)\n",
+        "fig, axs = plt.subplots(1, 1, sharey=False, tight_layout=False)\n",
+        "for rating, marker in [(\"D\", \"o\"), (\"DD\", \"^\"), (\"DDD\", \"+\"), (\"DDDD\", \"x\")]:\n",
+        "  plt.scatter(\n",
+        "      x=avg_ratings[np.where(dollar_ratings == rating)],\n",
+        "      y=num_reviews[np.where(dollar_ratings == rating)],\n",
+        "      c=ctr_labels[np.where(dollar_ratings == rating)],\n",
+        "      vmin=0,\n",
+        "      vmax=1,\n",
+        "      marker=marker,\n",
+        "      label=rating)\n",
+        "plt.xlabel(\"Average Rating\")\n",
+        "plt.ylabel(\"Number of Reviews\")\n",
+        "plt.legend()\n",
+        "plt.xlim((1, 5))\n",
+        "plt.title(\"Distribution of restaurants\")\n",
+        "_ = fig.colorbar(color_bar(), cax=fig.add_axes([0.95, 0.2, 0.01, 0.6]))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "tRetsfLv_JSR"
+      },
+      "source": [
+        "Let's produce the training, validation and testing datasets. When a restaurant is viewed in the search results, we can record user's engagement (click or no click) as a sample point. \n",
+        "\n",
+        "In practice, users often do not go through all search results. This means that users will likely only see restaurants already considered \"good\" by the current ranking model in use. As a result, \"good\" restaurants are more frequently impressed and over-represented in the training datasets.\n",
+        "\n",
+        "When the model is used for ranking, it is often evaluated on all relevant results with a more uniform distribution. As a result, it may act unexpectedly at evaluation time for cases that were are under-represented in the training dataset. When using more features, the training dataset can have large gaps in \"bad\" parts of the feature space.\n",
+        "\n",
+        "A flexible and complicated model might fail in this case due to overfitting the over-represented data points and thus lacking generalizability. We handle this issue by applying domain knowledge to add *shape constraints* that guide the trained model to make reasonable predictions *when it cannot pick them up from the training dataset*.\n",
+        "\n",
+        "In this example, for the testing dataset we intentionally ignore the over-representation to simulate the online setting previously discussed."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "jS6WOtXQ8jwX"
+      },
+      "outputs": [],
+      "source": [
+        "def sample_dataset(n, testing_set):\n",
+        "  (avg_ratings, num_reviews, dollar_ratings, ctr_labels) = sample_restaurants(n)\n",
+        "  if testing_set:\n",
+        "    # Testing has a more uniform distribution over all restaurants.\n",
+        "    num_views = np.random.poisson(lam=3, size=n)\n",
+        "  else:\n",
+        "    # Training/validation datasets have more views on popular restaurants.\n",
+        "    num_views = np.random.poisson(lam=ctr_labels * num_reviews / 50.0, size=n)\n",
+        "\n",
+        "  return pd.DataFrame({\n",
+        "      \"avg_rating\": np.repeat(avg_ratings, num_views),\n",
+        "      \"num_reviews\": np.repeat(num_reviews, num_views),\n",
+        "      \"dollar_rating\": np.repeat(dollar_ratings, num_views),\n",
+        "      \"clicked\": np.random.binomial(n=1, p=np.repeat(ctr_labels, num_views))\n",
+        "  })\n",
+        "\n",
+        "\n",
+        "# Generate datasets.\n",
+        "np.random.seed(42)\n",
+        "data_train = sample_dataset(2000, testing_set=False)\n",
+        "data_val = sample_dataset(1000, testing_set=False)\n",
+        "data_test = sample_dataset(1000, testing_set=True)\n",
+        "\n",
+        "# Plotting dataset densities.\n",
+        "figsize(12, 5)\n",
+        "fig, axs = plt.subplots(1, 2, sharey=False, tight_layout=False)\n",
+        "for ax, data, title in [(axs[0], data_train, \"training\"),\n",
+        "                        (axs[1], data_test, \"testing\")]:\n",
+        "  _, _, _, density = ax.hist2d(\n",
+        "      x=data[\"avg_rating\"],\n",
+        "      y=data[\"num_reviews\"],\n",
+        "      bins=(np.linspace(1, 5, num=21), np.linspace(0, 200, num=21)),\n",
+        "      normed=True,\n",
+        "      cmap=\"Blues\",\n",
+        "  )\n",
+        "  ax.set(xlim=(1, 5))\n",
+        "  ax.set(ylim=(0, 200))\n",
+        "  ax.set(xlabel=\"Average Rating\")\n",
+        "  ax.set(ylabel=\"Number of Reviews\")\n",
+        "  ax.title.set_text(\"Density of {} examples\".format(title))\n",
+        "  _ = fig.colorbar(density, ax=ax)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "4fVyLgpCT1nW"
+      },
+      "source": [
+        "Defining input_fns used for training and evaluation:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "DYzRTRR2GKoS"
+      },
+      "outputs": [],
+      "source": [
+        "train_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn(\n",
+        "    x=data_train,\n",
+        "    y=data_train[\"clicked\"],\n",
+        "    batch_size=BATCH_SIZE,\n",
+        "    num_epochs=NUM_EPOCHS,\n",
+        "    shuffle=False,\n",
+        ")\n",
+        "\n",
+        "# feature_analysis_input_fn is used for TF Lattice estimators.\n",
+        "feature_analysis_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn(\n",
+        "    x=data_train,\n",
+        "    y=data_train[\"clicked\"],\n",
+        "    batch_size=BATCH_SIZE,\n",
+        "    num_epochs=1,\n",
+        "    shuffle=False,\n",
+        ")\n",
+        "\n",
+        "val_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn(\n",
+        "    x=data_val,\n",
+        "    y=data_val[\"clicked\"],\n",
+        "    batch_size=BATCH_SIZE,\n",
+        "    num_epochs=1,\n",
+        "    shuffle=False,\n",
+        ")\n",
+        "\n",
+        "test_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn(\n",
+        "    x=data_test,\n",
+        "    y=data_test[\"clicked\"],\n",
+        "    batch_size=BATCH_SIZE,\n",
+        "    num_epochs=1,\n",
+        "    shuffle=False,\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "qoTrw3FZqvPK"
+      },
+      "source": [
+        "## Fitting Gradient Boosted Trees"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "ZklNowexE3wB"
+      },
+      "source": [
+        "Let's start off with only two features: `avg_rating` and `num_reviews`.\n",
+        "\n",
+        "We create a few auxillary functions for plotting and calculating validation and test metrics."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "SX6rARJWURWl"
+      },
+      "outputs": [],
+      "source": [
+        "def analyze_two_d_estimator(estimator, name):\n",
+        "  # Extract validation metrics.\n",
+        "  metric = estimator.evaluate(input_fn=val_input_fn)\n",
+        "  print(\"Validation AUC: {}\".format(metric[\"auc\"]))\n",
+        "  metric = estimator.evaluate(input_fn=test_input_fn)\n",
+        "  print(\"Testing AUC: {}\".format(metric[\"auc\"]))\n",
+        "\n",
+        "  def two_d_pred(avg_ratings, num_reviews):\n",
+        "    results = estimator.predict(\n",
+        "        tf.compat.v1.estimator.inputs.pandas_input_fn(\n",
+        "            x=pd.DataFrame({\n",
+        "                \"avg_rating\": avg_ratings,\n",
+        "                \"num_reviews\": num_reviews,\n",
+        "            }),\n",
+        "            shuffle=False,\n",
+        "        ))\n",
+        "    return [x[\"logistic\"][0] for x in results]\n",
+        "\n",
+        "  def two_d_click_through_rate(avg_ratings, num_reviews):\n",
+        "    return np.mean([\n",
+        "        click_through_rate(avg_ratings, num_reviews,\n",
+        "                           np.repeat(d, len(avg_ratings)))\n",
+        "        for d in [\"D\", \"DD\", \"DDD\", \"DDDD\"]\n",
+        "    ],\n",
+        "                   axis=0)\n",
+        "\n",
+        "  figsize(11, 5)\n",
+        "  plot_fns([(\"{} Estimated CTR\".format(name), two_d_pred),\n",
+        "            (\"CTR\", two_d_click_through_rate)],\n",
+        "           split_by_dollar=False)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "JVef4f8yUUbs"
+      },
+      "source": [
+        "We can fit TensorFlow gradient boosted decision trees on the dataset:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "DnPYlRAo2mnQ"
+      },
+      "outputs": [],
+      "source": [
+        "feature_columns = [\n",
+        "    tf.feature_column.numeric_column(\"num_reviews\"),\n",
+        "    tf.feature_column.numeric_column(\"avg_rating\"),\n",
+        "]\n",
+        "gbt_estimator = tf.estimator.BoostedTreesClassifier(\n",
+        "    feature_columns=feature_columns,\n",
+        "    # Hyper-params optimized on validation set.\n",
+        "    n_batches_per_layer=100,\n",
+        "    max_depth=2,\n",
+        "    n_trees=100,\n",
+        "    config=tf.estimator.RunConfig(tf_random_seed=42),\n",
+        ")\n",
+        "gbt_estimator.train(input_fn=train_input_fn)\n",
+        "analyze_two_d_estimator(gbt_estimator, \"GBT\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "nYZtd6YvsNdn"
+      },
+      "source": [
+        "Even though the model has captured the general shape of the true CTR, it produces a counter-intuitive contour plot of the estimated (predicted) CTR: certain spots on the contour surface can see an increased estimated CTR if we move either downwards or leftwards, meaning that one would be more likely to click on the restaurants at those spots if\n",
+        "- their review numbers decreased, i.e. they were less trendy, or\n",
+        "- their average ratings dropped, i.e. they served worse food.\n",
+        "\n",
+        "A couple of reasons may be accountable:\n",
+        "- We are missing `dollar_rating`, an important feature.\n",
+        "- The traing dataset has \"holes\": areas where there are not enough sample points.\n",
+        "- Noise signal ratio is high.\n",
+        "- Applied ML model (boosted trees) is too flexibile and is easy to overfit.\n",
+        "\n",
+        "The remedy could be simple: we enforce the shape constraint that the model must estimate CTR values monotonically increasing with respect to both the average rating and the number of reviews. We will later see how to implement this in TFL.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "Uf7WqGooFiEp"
+      },
+      "source": [
+        "## Fitting a DNN"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "_s2aT3x0E_tF"
+      },
+      "source": [
+        "We can repeat the same steps with a DNN classifier. Similar patterns can also be observed (you can rerun the cell if not so): not enough sample points are in the area where `num_reviews` is small, and the DNN extrapolation in this area works poorly. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "gFUeG6kLDNhO"
+      },
+      "outputs": [],
+      "source": [
+        "feature_columns = [\n",
+        "    tf.feature_column.numeric_column(\"num_reviews\"),\n",
+        "    tf.feature_column.numeric_column(\"avg_rating\"),\n",
+        "]\n",
+        "dnn_estimator = tf.estimator.DNNClassifier(\n",
+        "    feature_columns=feature_columns,\n",
+        "    # Hyper-params optimized on validation set.\n",
+        "    hidden_units=[16, 8, 8],\n",
+        "    optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),\n",
+        "    config=tf.estimator.RunConfig(tf_random_seed=42),\n",
+        ")\n",
+        "dnn_estimator.train(input_fn=train_input_fn)\n",
+        "analyze_two_d_estimator(dnn_estimator, \"DNN\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "0Avkw-okw7JL"
+      },
+      "source": [
+        "## Shape Constraints"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "3ExyethCFBrP"
+      },
+      "source": [
+        "TensorFlow Lattice (TFL) is laser-focusing on installing shape constraints to safeguard model behavior. These shape constraints are fulfilled by two key concepts of TFL: \n",
+        "- *calibrator*: a piece-wise linear function, and\n",
+        "- *lattice*: a multi-dimentional lookup table. \n",
+        "\n",
+        "Their details can be found in [our JMLR paper](http://jmlr.org/papers/volume17/15-243/15-243.pdf). \n",
+        "\n",
+        "The most straightforward way to use TFL is through the premade TFL canned estimators. In this colab we will configure a TFL canned classifier. Similar to a TensorFlow estimator, training a TFL canned estimator requires several components:\n",
+        "- feature columns: definition of model features.\n",
+        "- feature configs: definition of TFL specific feature specs and shape constraints.\n",
+        "- model config: configuration of TFL canned estimator specs.\n",
+        "- feature analysis input fn: a TF input fn passing data for TFL initialization.\n",
+        "- (train) input fn: a TF input fn passing data for model training."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "anyCM4sCpOSo"
+      },
+      "source": [
+        "### Monotonicity\n",
+        "We first address our monotonicity concerns by adding monotonicity shape constraints to both features involved. \n",
+        "\n",
+        "To instruct TFL to install shape constraints, we decide which features are involved and specify the constraints for any feature in its feature config. The following code shows how we can require the output to be monotonically increasing with respect to both `num_reviews` and `avg_rating`.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "FCm1lOjmwur_"
+      },
+      "outputs": [],
+      "source": [
+        "feature_columns = [\n",
+        "    tf.feature_column.numeric_column(\"num_reviews\"),\n",
+        "    tf.feature_column.numeric_column(\"avg_rating\"),\n",
+        "]\n",
+        "model_config = tfl.configs.CalibratedLatticeConfig(\n",
+        "    feature_configs=[\n",
+        "        tfl.configs.FeatureConfig(\n",
+        "            name=\"num_reviews\",\n",
+        "            lattice_size=2,\n",
+        "            monotonicity=\"increasing\",\n",
+        "            pwl_calibration_num_keypoints=20,\n",
+        "        ),\n",
+        "        tfl.configs.FeatureConfig(\n",
+        "            name=\"avg_rating\",\n",
+        "            lattice_size=2,\n",
+        "            monotonicity=\"increasing\",\n",
+        "            pwl_calibration_num_keypoints=20,\n",
+        "        )\n",
+        "    ])\n",
+        "tfl_estimator = tfl.estimators.CannedClassifier(\n",
+        "    feature_columns=feature_columns,\n",
+        "    model_config=model_config,\n",
+        "    feature_analysis_input_fn=feature_analysis_input_fn,\n",
+        "    optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),\n",
+        "    config=tf.estimator.RunConfig(tf_random_seed=42),\n",
+        ")\n",
+        "tfl_estimator.train(input_fn=train_input_fn)\n",
+        "analyze_two_d_estimator(tfl_estimator, \"TF Lattice\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "ubNRBCWW5wQ9"
+      },
+      "source": [
+        "This canned classifier we built behaves in the follow manner:\n",
+        "- It first applies a *calibrator* (a piece-wise linear function) to each feature to map the feature values onto [0,1].\n",
+        "- It then joins the calibrated feature values using a *lattice* and outputs the prediction.\n",
+        "\n",
+        "We can use `tfl.visualization` to visualize model behavior. In particular, the following plot shows the two trained calibrators included in the canned classifier. \n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "cellView": "both",
+        "colab": {},
+        "colab_type": "code",
+        "id": "C0py9Q6OBRBE"
+      },
+      "outputs": [],
+      "source": [
+        "def save_and_visualize_lattice(tfl_estimator):\n",
+        "  saved_model_path = tfl_estimator.export_saved_model(\n",
+        "      \"/tmp/TensorFlow_Lattice_101/\",\n",
+        "      tf.estimator.export.build_parsing_serving_input_receiver_fn(\n",
+        "          feature_spec=tf.feature_column.make_parse_example_spec(\n",
+        "              feature_columns)))\n",
+        "  model_graph = tfl.estimators.get_model_graph(saved_model_path)\n",
+        "  figsize(8, 8)\n",
+        "  tfl.visualization.draw_model_graph(model_graph)\n",
+        "  return model_graph\n",
+        "\n",
+        "_ = save_and_visualize_lattice(tfl_estimator)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "7vZ5fShXs504"
+      },
+      "source": [
+        "Now the contour plot gets cleaner and is showing what makes sense: estimated CTR will go up as long as the average rating increases or the number of reviews increases. Notice that the calibrators are monotonic."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "RfniRZCHIvfK"
+      },
+      "source": [
+        "### Diminishing Returns\n",
+        "[Diminishing returns](https://en.wikipedia.org/wiki/Diminishing_returns) means that the marginal gain of increasing certain a feature value will decrease as we increase the value. In our case we expect that the `num_reviews` feature follows this pattern, so we can configure its calibrator accordingly. Notice that we can decompose diminishing returns into two sufficient conditions:\n",
+        "\n",
+        "- the calibrator is monotonicially increasing, and\n",
+        "- the calibrator is concave.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "XQrM9BskY-wx"
+      },
+      "outputs": [],
+      "source": [
+        "feature_columns = [\n",
+        "    tf.feature_column.numeric_column(\"num_reviews\"),\n",
+        "    tf.feature_column.numeric_column(\"avg_rating\"),\n",
+        "]\n",
+        "model_config = tfl.configs.CalibratedLatticeConfig(\n",
+        "    feature_configs=[\n",
+        "        tfl.configs.FeatureConfig(\n",
+        "            name=\"num_reviews\",\n",
+        "            lattice_size=2,\n",
+        "            monotonicity=\"increasing\",\n",
+        "            pwl_calibration_convexity=\"concave\",\n",
+        "            pwl_calibration_num_keypoints=20,\n",
+        "        ),\n",
+        "        tfl.configs.FeatureConfig(\n",
+        "            name=\"avg_rating\",\n",
+        "            lattice_size=2,\n",
+        "            monotonicity=\"increasing\",\n",
+        "            pwl_calibration_num_keypoints=20,\n",
+        "        )\n",
+        "    ])\n",
+        "tfl_estimator = tfl.estimators.CannedClassifier(\n",
+        "    feature_columns=feature_columns,\n",
+        "    model_config=model_config,\n",
+        "    feature_analysis_input_fn=feature_analysis_input_fn,\n",
+        "    optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),\n",
+        "    config=tf.estimator.RunConfig(tf_random_seed=42),\n",
+        ")\n",
+        "tfl_estimator.train(input_fn=train_input_fn)\n",
+        "analyze_two_d_estimator(tfl_estimator, \"TF Lattice\")\n",
+        "_ = save_and_visualize_lattice(tfl_estimator)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "J6CP2Ovapiu3"
+      },
+      "source": [
+        "### 2D Shape Constraint: Trust\n",
+        "A 5-star rating for a restaurant with only one or two reviews is likely an unreliable rating (the restaurant might not actually be good), whereas a 4-star rating for a restaurant with hundreds of reviews is much more reliable (the restaurant is likely good in this case). We can see that the number of reviews of a restaurant affects how much trust we place in its average rating. \n",
+        "\n",
+        "We can exercise TFL trust constraints to inform the model that the larger (or smaller) value of one feature indicates more usage of another feature.  "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "OA14j0erm6TJ"
+      },
+      "outputs": [],
+      "source": [
+        "feature_columns = [\n",
+        "    tf.feature_column.numeric_column(\"num_reviews\"),\n",
+        "    tf.feature_column.numeric_column(\"avg_rating\"),\n",
+        "]\n",
+        "model_config = tfl.configs.CalibratedLatticeConfig(\n",
+        "    feature_configs=[\n",
+        "        tfl.configs.FeatureConfig(\n",
+        "            name=\"num_reviews\",\n",
+        "            lattice_size=2,\n",
+        "            monotonicity=\"increasing\",\n",
+        "            pwl_calibration_convexity=\"concave\",\n",
+        "            pwl_calibration_num_keypoints=20,\n",
+        "            # Larger num_reviews indicating more trust in avg_rating.\n",
+        "            reflects_trust_in=[\n",
+        "                tfl.configs.TrustConfig(\n",
+        "                    feature_name=\"avg_rating\", trust_type=\"edgeworth\"),\n",
+        "            ],\n",
+        "        ),\n",
+        "        tfl.configs.FeatureConfig(\n",
+        "            name=\"avg_rating\",\n",
+        "            lattice_size=2,\n",
+        "            monotonicity=\"increasing\",\n",
+        "            pwl_calibration_num_keypoints=20,\n",
+        "        )\n",
+        "    ])\n",
+        "tfl_estimator = tfl.estimators.CannedClassifier(\n",
+        "    feature_columns=feature_columns,\n",
+        "    model_config=model_config,\n",
+        "    feature_analysis_input_fn=feature_analysis_input_fn,\n",
+        "    optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),\n",
+        "    config=tf.estimator.RunConfig(tf_random_seed=42),\n",
+        ")\n",
+        "tfl_estimator.train(input_fn=train_input_fn)\n",
+        "analyze_two_d_estimator(tfl_estimator, \"TF Lattice\")\n",
+        "model_graph = save_and_visualize_lattice(tfl_estimator)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "puvP9X8XxyRV"
+      },
+      "source": [
+        "The following plot presents the trained lattice lookup result. Due to the trust constraint, we would expect that larger values of calibrated `num_reviews` would enable wider ranges for calibrated `avg_rating` to more significantly move the lattice output. This is confirmed by the plot."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "cellView": "both",
+        "colab": {},
+        "colab_type": "code",
+        "id": "RounEQebxxnA"
+      },
+      "outputs": [],
+      "source": [
+        "lat_mesh_n = 12\n",
+        "lat_mesh_x, lat_mesh_y = tfl.test_utils.two_dim_mesh_grid(\n",
+        "    lat_mesh_n**2, 0, 0, 1, 1)\n",
+        "lat_mesh_fn = tfl.test_utils.get_hypercube_interpolation_fn(\n",
+        "    model_graph.output_node.weights.flatten())\n",
+        "lat_mesh_z = [\n",
+        "    lat_mesh_fn([lat_mesh_x.flatten()[i],\n",
+        "                 lat_mesh_y.flatten()[i]]) for i in range(lat_mesh_n**2)\n",
+        "]\n",
+        "trust_plt = tfl.visualization.plot_outputs(\n",
+        "    (lat_mesh_x, lat_mesh_y),\n",
+        "    {\"Lattice Lookup\": lat_mesh_z},\n",
+        "    figsize=(6, 6),\n",
+        ")\n",
+        "trust_plt.title(\"Trust\")\n",
+        "trust_plt.xlabel(\"Calibrated avg_rating\")\n",
+        "trust_plt.ylabel(\"Calibrated num_reviews\")\n",
+        "trust_plt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "SKe3UHX6pUjw"
+      },
+      "source": [
+        "### Smoothing Calibrators\n",
+        "Let's now take a look at the calibrator of `avg_rating`. Though it is monotonically increasing, the changes in its slopes are somewhat random and hard to interpret. That suggests we might want to consider smoothing this calibrator.\n",
+        "\n",
+        "Here we apply a `wrinkle` regularizer to reduce changes in the curvature. There are also the `laplacian` regularizer to flatten the calibrator and the `hessian` regularizer to make it more linear. \n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "qxFHH3hSpWfq"
+      },
+      "outputs": [],
+      "source": [
+        "feature_columns = [\n",
+        "    tf.feature_column.numeric_column(\"num_reviews\"),\n",
+        "    tf.feature_column.numeric_column(\"avg_rating\"),\n",
+        "]\n",
+        "model_config = tfl.configs.CalibratedLatticeConfig(\n",
+        "    feature_configs=[\n",
+        "        tfl.configs.FeatureConfig(\n",
+        "            name=\"num_reviews\",\n",
+        "            lattice_size=2,\n",
+        "            monotonicity=\"increasing\",\n",
+        "            pwl_calibration_convexity=\"concave\",\n",
+        "            pwl_calibration_num_keypoints=20,\n",
+        "            regularizer_configs=[\n",
+        "                tfl.configs.RegularizerConfig(name=\"calib_wrinkle\", l2=1.0),\n",
+        "            ],\n",
+        "            reflects_trust_in=[\n",
+        "                tfl.configs.TrustConfig(\n",
+        "                    feature_name=\"avg_rating\", trust_type=\"edgeworth\"),\n",
+        "            ],\n",
+        "        ),\n",
+        "        tfl.configs.FeatureConfig(\n",
+        "            name=\"avg_rating\",\n",
+        "            lattice_size=2,\n",
+        "            monotonicity=\"increasing\",\n",
+        "            pwl_calibration_num_keypoints=20,\n",
+        "            regularizer_configs=[\n",
+        "                tfl.configs.RegularizerConfig(name=\"calib_wrinkle\", l2=1.0),\n",
+        "            ],\n",
+        "        )\n",
+        "    ])\n",
+        "tfl_estimator = tfl.estimators.CannedClassifier(\n",
+        "    feature_columns=feature_columns,\n",
+        "    model_config=model_config,\n",
+        "    feature_analysis_input_fn=feature_analysis_input_fn,\n",
+        "    optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),\n",
+        "    config=tf.estimator.RunConfig(tf_random_seed=42),\n",
+        ")\n",
+        "tfl_estimator.train(input_fn=train_input_fn)\n",
+        "analyze_two_d_estimator(tfl_estimator, \"TF Lattice\")\n",
+        "_ = save_and_visualize_lattice(tfl_estimator)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "pSUd6aFlpYz4"
+      },
+      "source": [
+        "### Partial Monotonicity for Categorical Calibration\n",
+        "So far we have been using only two of the numeric features in the model. Here we will add a third feature using a categorical calibration layer. Again we start by setting up helper functions for plotting and metric calculation."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "5tLDKwTmjrLw"
+      },
+      "outputs": [],
+      "source": [
+        "def analyze_three_d_estimator(estimator, name):\n",
+        "  # Extract validation metrics.\n",
+        "  metric = estimator.evaluate(input_fn=val_input_fn)\n",
+        "  print(\"Validation AUC: {}\".format(metric[\"auc\"]))\n",
+        "  metric = estimator.evaluate(input_fn=test_input_fn)\n",
+        "  print(\"Testing AUC: {}\".format(metric[\"auc\"]))\n",
+        "\n",
+        "  def three_d_pred(avg_ratings, num_reviews, dollar_rating):\n",
+        "    results = estimator.predict(\n",
+        "        tf.compat.v1.estimator.inputs.pandas_input_fn(\n",
+        "            x=pd.DataFrame({\n",
+        "                \"avg_rating\": avg_ratings,\n",
+        "                \"num_reviews\": num_reviews,\n",
+        "                \"dollar_rating\": dollar_rating,\n",
+        "            }),\n",
+        "            shuffle=False,\n",
+        "        ))\n",
+        "    return [x[\"logistic\"][0] for x in results]\n",
+        "\n",
+        "  figsize(11, 22)\n",
+        "  plot_fns([(\"{} Estimated CTR\".format(name), three_d_pred),\n",
+        "            (\"CTR\", click_through_rate)],\n",
+        "           split_by_dollar=True)\n",
+        "  "
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "CnPiqf4rq6kJ"
+      },
+      "source": [
+        "To involve the third feature `dollar_rating`, we should recall that categorical features require a slightly different treatment in TFL: both as a feature column and as a feature config. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "m-w7iGEEpgGt"
+      },
+      "outputs": [],
+      "source": [
+        "feature_columns = [\n",
+        "    tf.feature_column.numeric_column(\"num_reviews\"),\n",
+        "    tf.feature_column.numeric_column(\"avg_rating\"),\n",
+        "    tf.feature_column.categorical_column_with_vocabulary_list(\n",
+        "        \"dollar_rating\",\n",
+        "        vocabulary_list=[\"D\", \"DD\", \"DDD\", \"DDDD\"],\n",
+        "        dtype=tf.string,\n",
+        "        default_value=0),\n",
+        "]\n",
+        "model_config = tfl.configs.CalibratedLatticeConfig(\n",
+        "    feature_configs=[\n",
+        "        tfl.configs.FeatureConfig(\n",
+        "            name=\"num_reviews\",\n",
+        "            lattice_size=2,\n",
+        "            monotonicity=\"increasing\",\n",
+        "            pwl_calibration_convexity=\"concave\",\n",
+        "            pwl_calibration_num_keypoints=20,\n",
+        "            regularizer_configs=[\n",
+        "                tfl.configs.RegularizerConfig(name=\"calib_wrinkle\", l2=1.0),\n",
+        "            ],\n",
+        "            reflects_trust_in=[\n",
+        "                tfl.configs.TrustConfig(\n",
+        "                    feature_name=\"avg_rating\", trust_type=\"edgeworth\"),\n",
+        "            ],\n",
+        "        ),\n",
+        "        tfl.configs.FeatureConfig(\n",
+        "            name=\"avg_rating\",\n",
+        "            lattice_size=2,\n",
+        "            monotonicity=\"increasing\",\n",
+        "            pwl_calibration_num_keypoints=20,\n",
+        "            regularizer_configs=[\n",
+        "                tfl.configs.RegularizerConfig(name=\"calib_wrinkle\", l2=1.0),\n",
+        "            ],\n",
+        "        ),\n",
+        "        tfl.configs.FeatureConfig(\n",
+        "            name=\"dollar_rating\",\n",
+        "            lattice_size=2,\n",
+        "            pwl_calibration_num_keypoints=4,\n",
+        "            # Here we only specify one monotonicity:\n",
+        "            # `D` resturants has smaller value than `DD` restaurants\n",
+        "            monotonicity=[(\"D\", \"DD\")],\n",
+        "        ),\n",
+        "    ])\n",
+        "tfl_estimator = tfl.estimators.CannedClassifier(\n",
+        "    feature_columns=feature_columns,\n",
+        "    model_config=model_config,\n",
+        "    feature_analysis_input_fn=feature_analysis_input_fn,\n",
+        "    optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),\n",
+        "    config=tf.estimator.RunConfig(tf_random_seed=42),\n",
+        ")\n",
+        "tfl_estimator.train(input_fn=train_input_fn)\n",
+        "analyze_three_d_estimator(tfl_estimator, \"TF Lattice\")\n",
+        "_ = save_and_visualize_lattice(tfl_estimator)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "gdIzhYL79_Pp"
+      },
+      "source": [
+        "This categorical calibrator shows the preference of the model output: DD \u003e D \u003e DDD \u003e DDDD, which is consistent with our setup. Notice there is also a column for missing values. Though there is no missing feature in our training and testing data, the model provides us with the best way to treat the missing value should it happen during downstream model serving.\n",
+        "\n",
+        "Here we also plot the predicted CTR of this model conditioned on `dollar_rating`. Notice that all the constraints we required are fulfilled in each of the slices."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "rh0H2b6l_rwZ"
+      },
+      "source": [
+        "### Output Calibration"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "KPb2ri4e7HXF"
+      },
+      "source": [
+        "For all the TFL models we have trained so far, the lattice layer (indicated as \"Lattice\" in the model graph) directly outputs the model prediction. Sometimes we are not sure whether the lattice output should be rescaled to emit model outputs:\n",
+        "- the features are $log$ counts while the labels are counts.\n",
+        "- the lattice is configured to have very few vertices but the label distribution is relatively complicated.\n",
+        "\n",
+        "In those cases we can add another calibrator between the lattice output and the model output to increase model flexibility. Here let's add a calibrator layer with 5 keypoints to the model we just built. We also add a regularizer for the output calibrator to keep the function smooth.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 0,
+      "metadata": {
+        "colab": {},
+        "colab_type": "code",
+        "id": "k5Sg_gUj_0i4"
+      },
+      "outputs": [],
+      "source": [
+        "feature_columns = [\n",
+        "    tf.feature_column.numeric_column(\"num_reviews\"),\n",
+        "    tf.feature_column.numeric_column(\"avg_rating\"),\n",
+        "    tf.feature_column.categorical_column_with_vocabulary_list(\n",
+        "        \"dollar_rating\",\n",
+        "        vocabulary_list=[\"D\", \"DD\", \"DDD\", \"DDDD\"],\n",
+        "        dtype=tf.string,\n",
+        "        default_value=0),\n",
+        "]\n",
+        "model_config = tfl.configs.CalibratedLatticeConfig(\n",
+        "    output_calibration=True,\n",
+        "    output_calibration_num_keypoint=5,\n",
+        "    regularizer_configs=[\n",
+        "        tfl.configs.RegularizerConfig(name=\"output_calib_wrinkle\", l2=0.1),\n",
+        "    ],\n",
+        "    feature_configs=[\n",
+        "    tfl.configs.FeatureConfig(\n",
+        "        name=\"num_reviews\",\n",
+        "        lattice_size=2,\n",
+        "        monotonicity=\"increasing\",\n",
+        "        pwl_calibration_convexity=\"concave\",\n",
+        "        pwl_calibration_num_keypoints=20,\n",
+        "        regularizer_configs=[\n",
+        "            tfl.configs.RegularizerConfig(name=\"calib_wrinkle\", l2=1.0),\n",
+        "        ],\n",
+        "        reflects_trust_in=[\n",
+        "            tfl.configs.TrustConfig(\n",
+        "                feature_name=\"avg_rating\", trust_type=\"edgeworth\"),\n",
+        "        ],\n",
+        "    ),\n",
+        "    tfl.configs.FeatureConfig(\n",
+        "        name=\"avg_rating\",\n",
+        "        lattice_size=2,\n",
+        "        monotonicity=\"increasing\",\n",
+        "        pwl_calibration_num_keypoints=20,\n",
+        "        regularizer_configs=[\n",
+        "            tfl.configs.RegularizerConfig(name=\"calib_wrinkle\", l2=1.0),\n",
+        "        ],\n",
+        "    ),\n",
+        "    tfl.configs.FeatureConfig(\n",
+        "        name=\"dollar_rating\",\n",
+        "        lattice_size=2,\n",
+        "        pwl_calibration_num_keypoints=4,\n",
+        "        # Here we only specify one monotonicity:\n",
+        "        # `D` resturants has smaller value than `DD` restaurants\n",
+        "        monotonicity=[(\"D\", \"DD\")],\n",
+        "    ),\n",
+        "])\n",
+        "tfl_estimator = tfl.estimators.CannedClassifier(\n",
+        "    feature_columns=feature_columns,\n",
+        "    model_config=model_config,\n",
+        "    feature_analysis_input_fn=feature_analysis_input_fn,\n",
+        "    optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),\n",
+        "    config=tf.estimator.RunConfig(tf_random_seed=42),\n",
+        ")\n",
+        "tfl_estimator.train(input_fn=train_input_fn)\n",
+        "analyze_three_d_estimator(tfl_estimator, \"TF Lattice\")\n",
+        "_ = save_and_visualize_lattice(tfl_estimator)"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "tfl_shape_constraints.ipynb",
+      "private_outputs": true,
+      "provenance": [
+        {
+          "file_id": "1NYk-Kehpe0V3JgdRAYZmR9-kUdKcSxys",
+          "timestamp": 1579632224365
+        }
+      ],
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/examples/BUILD b/examples/BUILD
new file mode 100644
index 0000000..ec47d8d
--- /dev/null
+++ b/examples/BUILD
@@ -0,0 +1,62 @@
+# Copyright 2019 The TensorFlow Lattice Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+licenses(["notice"])
+
+package(
+    default_visibility = [
+        "//tensorflow_lattice:__subpackages__",
+    ],
+)
+
+py_binary(
+    name = "canned_estimators_uci_heart",
+    srcs = ["canned_estimators_uci_heart.py"],
+    python_version = "PY3",
+    deps = [
+        # tensorflow dep,
+        "//tensorflow_lattice",
+    ],
+)
+
+py_binary(
+    name = "keras_sequential_uci_heart",
+    srcs = ["keras_sequential_uci_heart.py"],
+    python_version = "PY3",
+    deps = [
+        # tensorflow dep,
+        "//tensorflow_lattice",
+    ],
+)
+
+py_binary(
+    name = "keras_functional_uci_heart",
+    srcs = ["keras_functional_uci_heart.py"],
+    python_version = "PY3",
+    deps = [
+        # tensorflow dep,
+        "//tensorflow_lattice",
+    ],
+)
+
+py_binary(
+    name = "custom_estimators_uci_heart",
+    srcs = ["custom_estimators_uci_heart.py"],
+    python_version = "PY3",
+    deps = [
+        # tensorflow dep,
+        "//tensorflow_lattice",
+    ],
+)
diff --git a/examples/canned_estimators_uci_heart.py b/examples/canned_estimators_uci_heart.py
new file mode 100644
index 0000000..51597c3
--- /dev/null
+++ b/examples/canned_estimators_uci_heart.py
@@ -0,0 +1,325 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Example usage of TFL canned estimators.
+
+This example trains several TFL canned estimators on the UCI heart dataset.
+
+Example usage:
+canned_estimators_uci_heart --config_updates=feature__age__lattice_size=4
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import re
+from absl import app
+from absl import flags
+import pandas as pd
+import tensorflow as tf
+from tensorflow import feature_column as fc
+from tensorflow_lattice import configs
+from tensorflow_lattice import estimators
+
+FLAGS = flags.FLAGS
+flags.DEFINE_float('learning_rate', 0.1, 'Learning rate.')
+flags.DEFINE_integer('batch_size', 100, 'Batch size.')
+flags.DEFINE_integer('num_epochs', 50, 'Number of training epoch.')
+flags.DEFINE_integer('prefitting_num_epochs', 10, 'Prefitting epochs.')
+flags.DEFINE_list(
+    'config_updates', '',
+    'Comma separated list of updates to model configs in name=value format.'
+    'See tfl.configs.apply_updates().')
+
+
+def main(_):
+  # Parse configs updates from command line flags.
+  config_updates = []
+  for update in FLAGS.config_updates:
+    config_updates.extend(re.findall(r'(\S*)\s*=\s*(\S*)', update))
+
+  # UCI Statlog (Heart) dataset.
+  csv_file = tf.keras.utils.get_file(
+      'heart.csv', 'http://storage.googleapis.com/applied-dl/heart.csv')
+  df = pd.read_csv(csv_file)
+  target = df.pop('target')
+  train_size = int(len(df) * 0.8)
+  train_x = df[:train_size]
+  train_y = target[:train_size]
+  test_x = df[train_size:]
+  test_y = target[train_size:]
+
+  # feature_analysis_input_fn is used to collect statistics about the input
+  # features, thus requiring only one loop of the dataset.
+  #
+  # feature_analysis_input_fn is required if you have at least one FeatureConfig
+  # with "pwl_calibration_input_keypoints='quantiles'". Note that 'quantiles' is
+  # default keypoints configuration so most likely you'll need it.
+  feature_analysis_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn(
+      x=train_x,
+      y=train_y,
+      shuffle=False,
+      batch_size=FLAGS.batch_size,
+      num_epochs=1,
+      num_threads=1)
+
+  # prefitting_input_fn is used to prefit an initial ensemble that is used to
+  # estimate feature interactions. This prefitting step does not need to fully
+  # converge and thus requiring fewer epochs than the main training.
+  #
+  # prefitting_input_fn is only required if your model_config is
+  # CalibratedLatticeEnsembleConfig with "lattices='crystals'"
+  prefitting_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn(
+      x=train_x,
+      y=train_y,
+      shuffle=True,
+      batch_size=FLAGS.batch_size,
+      num_epochs=FLAGS.prefitting_num_epochs,
+      num_threads=1)
+
+  train_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn(
+      x=train_x,
+      y=train_y,
+      shuffle=True,
+      batch_size=FLAGS.batch_size,
+      num_epochs=FLAGS.num_epochs,
+      num_threads=1)
+
+  test_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn(
+      x=test_x,
+      y=test_y,
+      shuffle=False,
+      batch_size=FLAGS.batch_size,
+      num_epochs=FLAGS.num_epochs,
+      num_threads=1)
+
+  # Feature columns.
+  # - age
+  # - sex
+  # - cp        chest pain type (4 values)
+  # - trestbps  resting blood pressure
+  # - chol      serum cholestoral in mg/dl
+  # - fbs       fasting blood sugar > 120 mg/dl
+  # - restecg   resting electrocardiographic results (values 0,1,2)
+  # - thalach   maximum heart rate achieved
+  # - exang     exercise induced angina
+  # - oldpeak   ST depression induced by exercise relative to rest
+  # - slope     the slope of the peak exercise ST segment
+  # - ca        number of major vessels (0-3) colored by flourosopy
+  # - thal      3 = normal; 6 = fixed defect; 7 = reversable defect
+  feature_columns = [
+      fc.numeric_column('age', default_value=-1),
+      fc.categorical_column_with_vocabulary_list('sex', [0, 1]),
+      fc.numeric_column('cp'),
+      fc.numeric_column('trestbps', default_value=-1),
+      fc.numeric_column('chol'),
+      fc.categorical_column_with_vocabulary_list('fbs', [0, 1]),
+      fc.categorical_column_with_vocabulary_list('restecg', [0, 1, 2]),
+      fc.numeric_column('thalach'),
+      fc.categorical_column_with_vocabulary_list('exang', [0, 1]),
+      fc.numeric_column('oldpeak'),
+      fc.categorical_column_with_vocabulary_list('slope', [0, 1, 2]),
+      fc.numeric_column('ca'),
+      fc.categorical_column_with_vocabulary_list(
+          'thal', ['normal', 'fixed', 'reversible']),
+  ]
+
+  # Feature configs are used to specify how each feature is calibrated and used.
+  feature_configs = [
+      configs.FeatureConfig(
+          name='age',
+          lattice_size=3,
+          # By default, input keypoints of pwl are quantiles of the feature.
+          pwl_calibration_num_keypoints=5,
+          monotonicity='increasing',
+          pwl_calibration_clip_max=100,
+      ),
+      configs.FeatureConfig(
+          name='cp',
+          pwl_calibration_num_keypoints=4,
+          # Keypoints can be uniformly spaced.
+          pwl_calibration_input_keypoints='uniform',
+          monotonicity='increasing',
+      ),
+      configs.FeatureConfig(
+          name='chol',
+          # Explicit input keypoint initialization.
+          pwl_calibration_input_keypoints=[126.0, 210.0, 247.0, 286.0, 564.0],
+          monotonicity='increasing',
+          pwl_calibration_clip_min=130,
+          # Calibration can be forced to span the full output range by clamping.
+          pwl_calibration_clamp_min=True,
+          pwl_calibration_clamp_max=True,
+          # Per feature regularization.
+          regularizer_configs=[
+              configs.RegularizerConfig(name='calib_hessian', l2=1e-4),
+          ],
+      ),
+      configs.FeatureConfig(
+          name='fbs',
+          # Monotonicity: output for 1 should be larger than output for 0.
+          monotonicity=[(0, 1)],
+      ),
+      configs.FeatureConfig(
+          name='trestbps',
+          pwl_calibration_num_keypoints=5,
+          monotonicity='decreasing',
+      ),
+      configs.FeatureConfig(
+          name='thalach',
+          pwl_calibration_num_keypoints=5,
+          monotonicity='decreasing',
+      ),
+      configs.FeatureConfig(
+          name='restecg',
+          # Categorical monotonicity can be partial order.
+          monotonicity=[(0, 1), (0, 2)],
+      ),
+      configs.FeatureConfig(
+          name='exang',
+          monotonicity=[(0, 1)],
+      ),
+      configs.FeatureConfig(
+          name='oldpeak',
+          pwl_calibration_num_keypoints=5,
+          monotonicity='increasing',
+      ),
+      configs.FeatureConfig(
+          name='slope',
+          monotonicity=[(0, 1), (1, 2)],
+      ),
+      configs.FeatureConfig(
+          name='ca',
+          pwl_calibration_num_keypoints=4,
+          monotonicity='increasing',
+      ),
+      configs.FeatureConfig(
+          name='thal',
+          monotonicity=[('normal', 'fixed'), ('normal', 'reversible')],
+      ),
+  ]
+
+  # Serving input fn is used to create saved models.
+  serving_input_fn = (
+      tf.estimator.export.build_parsing_serving_input_receiver_fn(
+          feature_spec=fc.make_parse_example_spec(feature_columns)))
+
+  # Model config defines the model strcutre for the estimator.
+  # This is calibrated linear model with outputput calibration: Inputs are
+  # calibrated, linearly combined and the output of the linear layer is
+  # calibrated again using a PWL function.
+  model_config = configs.CalibratedLinearConfig(
+      feature_configs=feature_configs,
+      use_bias=True,
+      output_calibration=True,
+      regularizer_configs=[
+          # Regularizer for the output calibrator.
+          configs.RegularizerConfig(name='output_calib_hessian', l2=1e-4),
+      ])
+  # Update model configuration.
+  # See tfl.configs.apply_updates for details.
+  configs.apply_updates(model_config, config_updates)
+  estimator = estimators.CannedClassifier(
+      feature_columns=feature_columns,
+      model_config=model_config,
+      feature_analysis_input_fn=feature_analysis_input_fn,
+      optimizer=tf.keras.optimizers.Adam(FLAGS.learning_rate))
+  estimator.train(input_fn=train_input_fn)
+  results = estimator.evaluate(input_fn=test_input_fn)
+  print('Calibrated linear results: {}'.format(results))
+  print('Calibrated linear model exported to {}'.format(
+      estimator.export_saved_model(estimator.model_dir, serving_input_fn)))
+
+  # This is calibrated lattice model: Inputs are calibrated, then combined
+  # non-linearly using a lattice layer.
+  model_config = configs.CalibratedLatticeConfig(
+      feature_configs=feature_configs,
+      regularizer_configs=[
+          # Torsion regularizer applied to the lattice to make it more linear.
+          configs.RegularizerConfig(name='torsion', l2=1e-4),
+          # Globally defined calibration regularizer is applied to all features.
+          configs.RegularizerConfig(name='calib_hessian', l2=1e-4),
+      ])
+  estimator = estimators.CannedClassifier(
+      feature_columns=feature_columns,
+      model_config=model_config,
+      feature_analysis_input_fn=feature_analysis_input_fn,
+      optimizer=tf.keras.optimizers.Adam(FLAGS.learning_rate))
+  estimator.train(input_fn=train_input_fn)
+  results = estimator.evaluate(input_fn=test_input_fn)
+  print('Calibrated lattice results: {}'.format(results))
+  print('Calibrated lattice model exported to {}'.format(
+      estimator.export_saved_model(estimator.model_dir, serving_input_fn)))
+
+  # This is random lattice ensemble model with separate calibration:
+  # model output is the average output of separatly calibrated lattices.
+  model_config = configs.CalibratedLatticeEnsembleConfig(
+      feature_configs=feature_configs,
+      num_lattices=6,
+      lattice_rank=5,
+      separate_calibrators=True,
+      regularizer_configs=[
+          # Torsion regularizer applied to the lattice to make it more linear.
+          configs.RegularizerConfig(name='torsion', l2=1e-4),
+          # Globally defined calibration regularizer is applied to all features.
+          configs.RegularizerConfig(name='calib_hessian', l2=1e-4),
+      ])
+  configs.apply_updates(model_config, config_updates)
+  estimator = estimators.CannedClassifier(
+      feature_columns=feature_columns,
+      model_config=model_config,
+      feature_analysis_input_fn=feature_analysis_input_fn,
+      optimizer=tf.keras.optimizers.Adam(FLAGS.learning_rate))
+  estimator.train(input_fn=train_input_fn)
+  results = estimator.evaluate(input_fn=test_input_fn)
+  print('Random ensemble results: {}'.format(results))
+  print('Random ensemble model exported to {}'.format(
+      estimator.export_saved_model(estimator.model_dir, serving_input_fn)))
+
+  # This is Crystals ensemble model with separate calibration: model output is
+  # the average output of separatly calibrated lattices.
+  # Crystals algorithm first trains a prefitting model and uses the interactions
+  # between features to form the final lattice ensemble.
+  model_config = configs.CalibratedLatticeEnsembleConfig(
+      feature_configs=feature_configs,
+      # Using Crystals algorithm.
+      lattices='crystals',
+      num_lattices=6,
+      lattice_rank=5,
+      separate_calibrators=True,
+      regularizer_configs=[
+          # Torsion regularizer applied to the lattice to make it more linear.
+          configs.RegularizerConfig(name='torsion', l2=1e-4),
+          # Globally defined calibration regularizer is applied to all features.
+          configs.RegularizerConfig(name='calib_hessian', l2=1e-4),
+      ])
+  configs.apply_updates(model_config, config_updates)
+  estimator = estimators.CannedClassifier(
+      feature_columns=feature_columns,
+      model_config=model_config,
+      feature_analysis_input_fn=feature_analysis_input_fn,
+      # prefitting_input_fn is required to train the prefitting model.
+      prefitting_input_fn=prefitting_input_fn,
+      optimizer=tf.keras.optimizers.Adam(FLAGS.learning_rate))
+  estimator.train(input_fn=train_input_fn)
+  results = estimator.evaluate(input_fn=test_input_fn)
+  print('Crystals ensemble results: {}'.format(results))
+  print('Crystals ensemble model exported to {}'.format(
+      estimator.export_saved_model(estimator.model_dir, serving_input_fn)))
+
+
+if __name__ == '__main__':
+  app.run(main)
diff --git a/examples/coffee_test.py b/examples/coffee_test.py
deleted file mode 100644
index c465fb3..0000000
--- a/examples/coffee_test.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for lattice estimators."""
-import numpy as np
-import tensorflow as tf
-import tensorflow_lattice as tfl
-
-# Example training and testing data.
-train_features = {
-    'distance': np.array([1.0, 2.0, 3.0, 4.0, 5.0]),
-    'quality': np.array([2.0, 5.0, 1.0, 2.0, 5.0]),
-}
-train_labels = np.array([0.2, 1.0, 0.0, 0.0, 1.0])
-
-# Same quality but different distance.
-test_features = {
-    'distance': np.array([5.0, 10.0]),
-    'quality': np.array([3.0, 3.0]),
-}
-
-# Feature definition.
-feature_columns = [
-    tf.feature_column.numeric_column('distance'),
-    tf.feature_column.numeric_column('quality'),
-]
-
-# Hyperparameters.
-num_keypoints = 10
-hparams = tfl.CalibratedLatticeHParams(
-    feature_names=['distance', 'quality'],
-    num_keypoints=num_keypoints,
-    learning_rate=0.1,
-)
-
-# Set feature monotonicity.
-hparams.set_feature_param('distance', 'monotonicity', -1)
-hparams.set_feature_param('quality', 'monotonicity', +1)
-
-# Define keypoint init.
-keypoints_init_fns = {
-    'distance': lambda: tfl.uniform_keypoints_for_signal(num_keypoints,
-                                                         input_min=0.0,
-                                                         input_max=10.0,
-                                                         output_min=0.0,
-                                                         output_max=1.0),
-    'quality': lambda: tfl.uniform_keypoints_for_signal(num_keypoints,
-                                                        input_min=0.0,
-                                                        input_max=5.0,
-                                                        output_min=0.0,
-                                                        output_max=1.0),
-}
-
-lattice_estimator = tfl.calibrated_lattice_regressor(
-    feature_columns=feature_columns,
-    hparams=hparams,
-    keypoints_initializers_fn=keypoints_init_fns)
-
-# Train!
-train_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-    x=train_features,
-    y=train_labels,
-    batch_size=1,
-    num_epochs=100,
-    shuffle=False)
-
-lattice_estimator.train(input_fn=train_input_fn)
-
-# Test.
-test_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-    x=test_features, y=None, batch_size=1, num_epochs=1, shuffle=False)
-
-print(list(lattice_estimator.predict(input_fn=test_input_fn)))
diff --git a/examples/custom_estimators_uci_heart.py b/examples/custom_estimators_uci_heart.py
new file mode 100644
index 0000000..ac3e5e3
--- /dev/null
+++ b/examples/custom_estimators_uci_heart.py
@@ -0,0 +1,170 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Example usage of TFL layers in custom estimators.
+
+This example trains a TFL custom estimators on the UCI heart dataset.
+
+Example usage:
+custom_estimators_uci_heart --num_epochs=40
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl import app
+from absl import flags
+import numpy as np
+import pandas as pd
+import tensorflow as tf
+from tensorflow import feature_column as fc
+import tensorflow_lattice as tfl
+from tensorflow_estimator.python.estimator.canned import optimizers
+from tensorflow_estimator.python.estimator.head import binary_class_head
+
+FLAGS = flags.FLAGS
+flags.DEFINE_float('learning_rate', 0.01, 'Learning rate.')
+flags.DEFINE_integer('batch_size', 100, 'Batch size.')
+flags.DEFINE_integer('num_epochs', 200, 'Number of training epoch.')
+
+
+def main(_):
+  # UCI Statlog (Heart) dataset.
+  csv_file = tf.keras.utils.get_file(
+      'heart.csv', 'http://storage.googleapis.com/applied-dl/heart.csv')
+  df = pd.read_csv(csv_file)
+  target = df.pop('target')
+  train_size = int(len(df) * 0.8)
+  train_x = df[:train_size]
+  train_y = target[:train_size]
+  test_x = df[train_size:]
+  test_y = target[train_size:]
+
+  train_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn(
+      x=train_x,
+      y=train_y,
+      shuffle=True,
+      batch_size=FLAGS.batch_size,
+      num_epochs=FLAGS.num_epochs,
+      num_threads=1)
+
+  test_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn(
+      x=test_x,
+      y=test_y,
+      shuffle=False,
+      batch_size=FLAGS.batch_size,
+      num_epochs=FLAGS.num_epochs,
+      num_threads=1)
+
+  # Feature columns.
+  # - age
+  # - sex
+  # - cp        chest pain type (4 values)
+  # - trestbps  resting blood pressure
+  # - chol      serum cholestoral in mg/dl
+  # - fbs       fasting blood sugar > 120 mg/dl
+  # - restecg   resting electrocardiographic results (values 0,1,2)
+  # - thalach   maximum heart rate achieved
+  # - exang     exercise induced angina
+  # - oldpeak   ST depression induced by exercise relative to rest
+  # - slope     the slope of the peak exercise ST segment
+  # - ca        number of major vessels (0-3) colored by flourosopy
+  # - thal      3 = normal; 6 = fixed defect; 7 = reversable defect
+  feature_columns = [
+      fc.numeric_column('age', default_value=-1),
+      fc.categorical_column_with_vocabulary_list('sex', [0, 1]),
+      fc.numeric_column('ca'),
+      fc.categorical_column_with_vocabulary_list(
+          'thal', ['normal', 'fixed', 'reversible']),
+  ]
+
+  def model_fn(features, labels, mode, config):
+    """model_fn for the custom estimator."""
+    del config
+    input_tensors = tfl.estimators.transform_features(features, feature_columns)
+    inputs = {
+        key: tf.keras.layers.Input(shape=(1,), name=key)
+        for key in input_tensors
+    }
+
+    lattice_sizes = [3, 2, 2, 2]
+    lattice_monotonicities = ['increasing', 'none', 'increasing', 'increasing']
+    lattice_input = tf.keras.layers.Concatenate(axis=1)([
+        tfl.layers.PWLCalibration(
+            input_keypoints=np.linspace(10, 100, num=8, dtype=np.float32),
+            # The output range of the calibrator should be the input range of
+            # the following lattice dimension.
+            output_min=0.0,
+            output_max=lattice_sizes[0] - 1.0,
+            monotonicity='increasing',
+        )(inputs['age']),
+        tfl.layers.CategoricalCalibration(
+            # Number of categories including any missing/default category.
+            num_buckets=2,
+            output_min=0.0,
+            output_max=lattice_sizes[1] - 1.0,
+        )(inputs['sex']),
+        tfl.layers.PWLCalibration(
+            input_keypoints=[0.0, 1.0, 2.0, 3.0],
+            output_min=0.0,
+            output_max=lattice_sizes[0] - 1.0,
+            # You can specify TFL regularizers as tuple
+            # ('regularizer name', l1, l2).
+            kernel_regularizer=('hessian', 0.0, 1e-4),
+            monotonicity='increasing',
+        )(inputs['ca']),
+        tfl.layers.CategoricalCalibration(
+            num_buckets=3,
+            output_min=0.0,
+            output_max=lattice_sizes[1] - 1.0,
+            # Categorical monotonicity can be partial order.
+            # (i, j) indicates that we must have output(i) <= output(i).
+            # Make sure to set the lattice monotonicity to 1 for this dimension.
+            monotonicities=[(0, 1), (0, 2)],
+        )(inputs['thal']),
+    ])
+    output = tfl.layers.Lattice(
+        lattice_sizes=lattice_sizes, monotonicities=lattice_monotonicities)(
+            lattice_input)
+
+    training = (mode == tf.estimator.ModeKeys.TRAIN)
+    model = tf.keras.Model(inputs=inputs, outputs=output)
+    logits = model(input_tensors, training=training)
+
+    if training:
+      optimizer = optimizers.get_optimizer_instance_v2('Adam',
+                                                       FLAGS.learning_rate)
+    else:
+      optimizer = None
+
+    head = binary_class_head.BinaryClassHead()
+    return head.create_estimator_spec(
+        features=features,
+        mode=mode,
+        labels=labels,
+        optimizer=optimizer,
+        logits=logits,
+        trainable_variables=model.trainable_variables,
+        update_ops=model.updates)
+
+  estimator = tf.estimator.Estimator(model_fn=model_fn)
+  estimator.train(input_fn=train_input_fn)
+  results = estimator.evaluate(input_fn=test_input_fn)
+  print('Results: {}'.format(results))
+
+
+if __name__ == '__main__':
+  app.run(main)
diff --git a/examples/estimator_test.py b/examples/estimator_test.py
deleted file mode 100644
index f00b063..0000000
--- a/examples/estimator_test.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""A quick test script for TensorFlow Lattice's calibrated RTL estimator."""
-import numpy as np
-
-import tensorflow as tf
-import tensorflow_lattice as tfl
-
-# Feature definition.
-feature_columns = [
-    tf.feature_column.numeric_column('x0'),
-    tf.feature_column.numeric_column('x1'),
-]
-
-# Hyperparameters.
-num_keypoints = 10
-hparams = tfl.CalibratedRtlHParams(
-    num_keypoints=num_keypoints,
-    num_lattices=5,
-    lattice_rank=2,
-    learning_rate=0.1)
-def init_fn():
-  return tfl.uniform_keypoints_for_signal(num_keypoints,
-                                          input_min=-1.0,
-                                          input_max=1.0,
-                                          output_min=0.0,
-                                          output_max=1.0)
-
-# Estimator.
-rtl_estimator = tfl.calibrated_rtl_regressor(feature_columns=feature_columns,
-                                             hparams=hparams,
-                                             keypoints_initializers_fn=init_fn)
-
-# Prepare the dataset.
-num_examples = 1000
-x0 = np.random.uniform(-1.0, 1.0, size=num_examples)
-x1 = np.random.uniform(-1.0, 1.0, size=num_examples)
-y = x0 ** 2 + x1 ** 2
-
-# Example input function.
-twod_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-    x={
-        'x0': x0,
-        'x1': x1
-    }, y=y, batch_size=10, num_epochs=1, shuffle=False)
-
-# Train!
-rtl_estimator.train(input_fn=twod_input_fn)
-# Evaluate!
-print(rtl_estimator.evaluate(input_fn=twod_input_fn))
diff --git a/examples/etl_1d.py b/examples/etl_1d.py
deleted file mode 100644
index 28ef2a1..0000000
--- a/examples/etl_1d.py
+++ /dev/null
@@ -1,310 +0,0 @@
-# Copyright 2018 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Trains a small (2 inputs, single lattice) on toy data and visualizes it."""
-from __future__ import print_function
-
-import tempfile
-import matplotlib.pyplot as plt
-import numpy as np
-import scipy
-import tensorflow as tf
-import tensorflow_lattice as tfl
-
-np.random.seed(1)
-
-_FEATURE_KEYPOINTS = 'tfl_calibrated_etl/pwl_calibration/X_{}_keypoints_'
-_EMBED_KEYPOINTS = 'tfl_calibrated_etl/non_monotonic_lattices/'
-_EMBED_KEYPOINTS += 'pwl_calibration/signal_{}_keypoints_'
-_LATTICE_PARAMS = 'tfl_calibrated_etl/non_monotonic_lattices/lattice_{}/'
-_LATTICE_PARAMS += 'hypercube_lattice_parameters'
-
-
-def annulus_data(n_points, r_0, r_1):
-  """Creates toy dataset in quadrant I with a quarter annulus.
-
-  Args:
-      n_points: (int) number of points
-      r_0: (float) inner bounding radius
-      r_1: (float) outer bounding radius
-
-  Returns:
-      x: (np.Array) covariates
-      y: (np.Array) labels
-  """
-  x = np.random.random(size=(n_points, 2))
-  r = (x**2).sum(1)**.5
-  y = (r_0 < r) & (r < r_1)
-  return x, y.astype(int)
-
-
-def fit_model(x,
-              y,
-              lattice_size=5,
-              non_monotonic_num_lattices=1,
-              non_monotonic_lattice_rank=1):
-  """Fits a single 1D lattice to the provided data.
-
-  Args:
-      x: covariates
-      y: labels
-      lattice_size: (int, optional) Number of knots in each lattice dimension,
-        total knots is lattice_size^lattice_rank, for each lattice
-      non_monotonic_num_lattices: (int, optional)
-      non_monotonic_lattice_rank: (int, optional) number of inputs to each
-
-  Returns:
-      etl_estimator: fitted TF Estimator
-  """
-  # Hyperparameters.
-  num_keypoints = 100
-  hparams = tfl.CalibratedEtlHParams(
-      non_monotonic_lattice_rank=non_monotonic_lattice_rank,
-      non_monotonic_num_lattices=non_monotonic_num_lattices,
-      non_monotonic_lattice_size=lattice_size,
-      num_keypoints=num_keypoints,
-      learning_rate=0.007,
-      linear_embedding_calibration_num_keypoints=100)
-
-  # Estimator.
-  feature_columns = [
-      tf.feature_column.numeric_column('X_0'),
-      tf.feature_column.numeric_column('X_1'),
-  ]
-
-  # Training is sensitive to initialization
-  config = tf.estimator.RunConfig(tf_random_seed=1)
-  def keypoints_config():
-    return tfl.uniform_keypoints_for_signal(
-        num_keypoints,
-        input_min=0.0,
-        input_max=x.max(),
-        output_min=0.0,
-        output_max=lattice_size - 1
-    )
-  etl_estimator = tfl.calibrated_etl_classifier(
-      feature_columns=feature_columns,
-      hparams=hparams,
-      keypoints_initializers_fn=keypoints_config,
-      config=config
-  )
-
-  # Input function.
-  input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-      x={
-          'X_0': x[:, 0],
-          'X_1': x[:, 1]
-      },
-      y=y.flatten(),
-      batch_size=10000,
-      num_epochs=100,
-      shuffle=False)
-
-  # Train!
-  etl_estimator.train(input_fn=input_fn)
-
-  # Evaluate
-  eval_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-      x={
-          'X_0': x[:, 0],
-          'X_1': x[:, 1]
-      },
-      y=y.flatten(),
-      batch_size=10000,
-      num_epochs=1,
-      shuffle=False)
-  print(etl_estimator.evaluate(input_fn=eval_input_fn))
-
-  return etl_estimator
-
-
-def _get_calibration_params(estimator, dim, weight_key, prefix):
-  """Helps extract calibration parameters from TFL graph."""
-  input_key = '{}_keypoints_inputs'.format(prefix)
-  output_key = '{}_keypoints_outputs'.format(prefix)
-  calibrator_key = '{}_calibrators'.format(prefix)
-
-  params = {}
-  params[input_key], params[output_key], params[calibrator_key] = [], [], []
-  for i in xrange(dim):
-    params[input_key].append(
-        estimator.get_variable_value(weight_key.format(i) + 'inputs'))
-    params[output_key].append(
-        estimator.get_variable_value(weight_key.format(i) + 'outputs'))
-    params[calibrator_key].append(
-        scipy.interpolate.interp1d(
-            params[input_key][-1],
-            params[output_key][-1],
-            fill_value='extrapolate'))
-  return params
-
-
-def _get_parameters(etl_estimator):
-  """Extracts all parameters necessary to evaluate an ETL from estimator."""
-  params = {}
-  params['embed_weighting'] = etl_estimator.get_variable_value(
-      'tfl_calibrated_etl/linear_embedding/split_non_monotone/monotone_linear'
-      '/weight')
-  params['embed_bias'] = etl_estimator.get_variable_value(
-      'tfl_calibrated_etl/linear_embedding/split_non_monotone/monotone_linear'
-      '/bias')
-  params['final_bias'] = etl_estimator.get_variable_value(
-      'tfl_calibrated_etl/ensemble_average/ensemble_bias')
-  params['n_embed'] = params['embed_weighting'].shape[0]
-  params['n_feature'] = params['embed_weighting'].shape[1]
-
-  params.update(
-      _get_calibration_params(etl_estimator, params['n_feature'],
-                              _FEATURE_KEYPOINTS, 'feature'))
-
-  params.update(
-      _get_calibration_params(
-          etl_estimator,
-          params['n_embed'],
-          _EMBED_KEYPOINTS,
-          'embed',
-      ))
-
-  n, ws = 0, []
-  while _LATTICE_PARAMS.format(n) in etl_estimator.get_variable_names():
-    ws.append(etl_estimator.get_variable_value(_LATTICE_PARAMS.format(n)))
-    n += 1
-  params['lattice_knots'] = np.vstack(ws)
-
-  return params
-
-
-def _apply_callibration(x, calibrators):
-  x_ = x.copy()
-  for n in xrange(x.shape[1]):
-    x_[:, n] = calibrators[n](x[:, n])
-  return x_
-
-
-def _compress_0_1(x):
-  return (x - x.min()) / (x.max() - x.min())
-
-
-def plot_all(etl_estimator, x, y, save_dir):
-  """Makes visualizations of ETL Estimator.
-
-  Args:
-      etl_estimator: (TF ETL Estimator)
-      x: (np.Array) inputs
-      y: (np.Array) labels, in [0, 1]
-      save_dir: (string) directory for saving visualizations
-  """
-  params = _get_parameters(etl_estimator)
-
-  x_cal = _apply_callibration(x, params['feature_calibrators'])
-  x_cal_emb = x_cal.dot(params['embed_weighting'].T) + params['embed_bias']
-  x_cal_emb_cal = _apply_callibration(x_cal_emb, params['embed_calibrators'])
-  x_cal_emb_cal_lat = np.zeros_like(x_cal_emb_cal)
-  for i in xrange(params['lattice_knots'].shape[0]):
-    interpolator = scipy.interpolate.interp1d(
-        range(params['lattice_knots'].shape[1]),
-        params['lattice_knots'][i],
-        fill_value='extrapolate')
-    x_cal_emb_cal_lat[:, i] = interpolator(x_cal_emb_cal[:, i])
-
-  predictions = (x_cal_emb_cal_lat.mean(1) + params['final_bias'] >
-                 .5).astype(int)
-
-  plt.figure()
-  plt.title('Input Points Colored By Correct Classification')
-  plt.scatter(x[:10000, 0], x[:10000, 1], c=y[:10000], alpha=.3)
-  plt.savefig(save_dir + '/labeled.png')
-
-  for i, (inputs, outputs) in enumerate(
-      zip(params['feature_keypoints_inputs'],
-          params['feature_keypoints_outputs'])):
-    plt.figure()
-    plt.title('Calibration Keypoints For Input Column Number {}'.format(i))
-    plt.scatter(inputs, outputs)
-    plt.savefig(save_dir + '/feature_cal_{}.png'.format(i))
-
-  for i, (inputs, outputs) in enumerate(
-      zip(params['embed_keypoints_inputs'], params['embed_keypoints_outputs'])):
-    plt.figure()
-    plt.title('Calibration Keypoints For Emedding Number {}'.format(i))
-    plt.scatter(inputs, outputs)
-    plt.savefig(save_dir + '/embed_cal_{}.png'.format(i))
-
-  for i in xrange(params['lattice_knots'].shape[0]):
-    plt.figure()
-    plt.title('Lattice knots for lattice number {}'.format(i))
-    plt.plot(
-        range(params['lattice_knots'].shape[1]), params['lattice_knots'][i])
-    plt.savefig(save_dir + '/lattice_{}.png'.format(i))
-
-  plt.figure()
-  plt.title('Input Points After Calibration, Colored By Correct Classification')
-  plt.scatter(x_cal[:10000, 0], x_cal[:10000, 1], c=y[:10000], alpha=.3)
-  plt.savefig(save_dir + '/calibrated.png')
-
-  plt.figure()
-  plt.title('Input Points Colored By Value'
-            ' After Calibration and linear transformation')
-  plt.scatter(
-      x[:10000, 0],
-      x[:10000, 1],
-      c=_compress_0_1(x_cal_emb[:10000, 0]),
-      alpha=.3)
-  plt.savefig(save_dir + '/embed_colored.png')
-
-  plt.figure()
-  plt.title('Input Points Colored By Value After Calibration,'
-            '\n Linear Transformation, Second Calibration')
-  plt.scatter(
-      x[:10000, 0],
-      x[:10000, 1],
-      c=_compress_0_1(x_cal_emb_cal[:10000, 0]),
-      alpha=.3)
-  plt.savefig(save_dir + '/embed_calibrated_colored.png')
-
-  plt.figure()
-  plt.title('Input Points Colored by Value After Calibration,'
-            '\nlinear transformation, second calibration, and 1D lattice')
-  plt.scatter(
-      x[:10000, 0],
-      x[:10000, 1],
-      c=_compress_0_1(x_cal_emb_cal_lat[:10000, 0]),
-      alpha=.3)
-  plt.savefig(save_dir + '/lattice_colored.png')
-
-  plt.figure()
-  plt.title('Predictions')
-  plt.scatter(
-      x[:10000, 0],
-      x[:10000, 1],
-      c=_compress_0_1(predictions)[:10000],
-      alpha=.3)
-  plt.savefig(save_dir + '/predictions.png')
-
-
-def main():
-  # Make data
-  x, y = annulus_data(300000, .5, .8)
-
-  # Train model
-  etl_estimator = fit_model(x, y)
-
-  # Visualize
-  temp_dir = tempfile.mkdtemp()
-  print('Saving figures to {}'.format(temp_dir))
-  plot_all(etl_estimator, x, y, temp_dir)
-
-
-if __name__ == '__main__':
-  main()
diff --git a/examples/image_compression.py b/examples/image_compression.py
deleted file mode 100644
index 02e7885..0000000
--- a/examples/image_compression.py
+++ /dev/null
@@ -1,140 +0,0 @@
-# Copyright 2018 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""A quick example of TensorFlow Lattice's calibrated RTL estimator."""
-from __future__ import print_function
-import sys
-import tempfile
-import matplotlib.pyplot as plt
-import numpy as np
-import tensorflow as tf
-import tensorflow_lattice as tfl
-
-
-def _pixels(im):
-  out = np.zeros((im.shape[0] * im.shape[1], 3))
-  out[:, 0] = np.repeat(np.arange(im.shape[0]), im.shape[1])
-  out[:, 1] = np.tile(np.arange(im.shape[1]), im.shape[0])
-  out[:, 2] = im.ravel()
-  return out
-
-
-def _pixels_to_image(pixels):
-  out = np.zeros((int(pixels[:, 0].max() + 1), int(pixels[:, 1].max() + 1)))
-  out[pixels[:, 0].astype(int), pixels[:, 1].astype(int)] = pixels[:, 2]
-  return out
-
-
-def run_image(image_path, lattice_size=35):
-  """Reads image and fits a 2D lattice to compress it."""
-  im = plt.imread(image_path)[:, :, 2]
-  im_pixels = _pixels(im)
-
-  print('compression ratio is ', lattice_size**2 / float(im.size))
-
-  # Hyperparameters.
-  num_keypoints = 2
-  hparams = tfl.CalibratedRtlHParams(
-      num_keypoints=num_keypoints,
-      num_lattices=1,
-      lattice_rank=2,
-      learning_rate=0.003,
-      lattice_size=lattice_size)
-
-  # Estimator.
-  # input: coordinate of the pixel
-  # output: value of the pixel
-  feature_columns = [
-      tf.feature_column.numeric_column('pixel_x'),
-      tf.feature_column.numeric_column('pixel_y'),
-  ]
-
-  def keypoints_initializers():
-    return tfl.uniform_keypoints_for_signal(
-        num_keypoints,
-        input_min=0.0,
-        input_max=im_pixels.max(),
-        output_min=0.0,
-        output_max=lattice_size - 1
-    )
-  rtl_estimator = tfl.calibrated_rtl_regressor(
-      feature_columns=feature_columns,
-      hparams=hparams,
-      keypoints_initializers_fn=keypoints_initializers
-  )
-
-  # Example input function.
-  input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-      x={
-          'pixel_x': im_pixels[:, 0],
-          'pixel_y': im_pixels[:, 1]
-      },
-      y=im_pixels[:, 2],
-      batch_size=5000,
-      num_epochs=15,
-      shuffle=True)
-
-  # Train!
-  rtl_estimator.train(input_fn=input_fn)
-
-  # Evaluate!
-  eval_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-      x={
-          'pixel_x': im_pixels[:, 0],
-          'pixel_y': im_pixels[:, 1]
-      },
-      y=im_pixels[:, 2],
-      batch_size=5000,
-      num_epochs=1,
-      shuffle=True)
-  print(rtl_estimator.evaluate(input_fn=eval_input_fn))
-
-  return rtl_estimator
-
-
-def visualize(estimator, input_img_path, output_dir):
-  """Visualizes trained estimator."""
-  # This example pulls one channel, also would make sense to convert to gray
-  im = plt.imread(input_img_path)[:, :, 2]
-  im_pixels = _pixels(im)
-
-  input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-      x={
-          'pixel_x': im_pixels[:, 0],
-          'pixel_y': im_pixels[:, 1]
-      },
-      batch_size=10000,
-      num_epochs=1,
-      shuffle=False)
-
-  y_test = np.array(
-      [q['predictions'] for q in estimator.predict(input_fn=input_fn)])
-  img = _pixels_to_image(np.c_[im_pixels[:, :2], y_test])
-
-  plt.figure()
-  plt.imshow(img, cmap='gray')
-  plt.savefig(output_dir + '/image.png')
-  return img
-
-
-def main(image_path):
-  """Fits image and provides visualization."""
-  temp_dir = tempfile.mkdtemp()
-  print('Saving output to {}'.format(temp_dir))
-  estimator = run_image(image_path)
-  visualize(estimator, image_path, temp_dir)
-
-if __name__ == '__main__':
-  input_image_path = sys.argv[1]
-  main(input_image_path)
diff --git a/examples/keras_functional_uci_heart.py b/examples/keras_functional_uci_heart.py
new file mode 100644
index 0000000..252616e
--- /dev/null
+++ b/examples/keras_functional_uci_heart.py
@@ -0,0 +1,314 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Example usage of TFL within Keras Functional API.
+
+This example builds and trains a calibrated lattice model for the UCI heart
+dataset.
+
+"Calibrated lattice" is a commonly used architecture for datasets where number
+of input features does not exceed ~15.
+
+"Calibrated lattice" assumes every feature being transformed by PWLCalibration
+or CategoricalCalibration layers before nonlineary fusing result of calibration
+within a lattice layer.
+
+The TFL package does not have any layers dedicated to processing of sparse
+features but thanks to plug and play compatibility with any other Keras layers
+we can take advantage of standard Keras embedding to handle sparse features. UCI
+Heart dataset does not have any sparse features so for this example we replaced
+PWLCalibration layer for feature 'age' with Embedding layer in order to
+demonstrate such compatibility as well as advantage of monotonicity
+constraints for semantically meaningful features.
+
+Generally when you manually combine TFL layers you should keep track of:
+1) Ensuring that inputs to TFL layers are within expected range.
+  - Input range for PWLCalibration layer is defined by smallest and largest of
+    provided keypoints.
+  - Input range for Lattice layer is [0.0, lattice_sizes[d] - 1.0] for any
+    dimension d.
+  TFL layers can constraint their output to be within desired range. Feeding
+  output of other layers into TFL layers you might want to ensure that something
+  like sigmoid is used to constraint their output range.
+2) Properly configure monotonicity. If your calibration layer is monotonic then
+  corresponding dimension of lattice layer should also be monotonic.
+
+This example uses functional API for Keras model construction. For an example of
+sequential models with TFL layers see keras_sequential_uci_heart.py.
+
+In order to see how better generalization can be achieved with a properly
+constrained PWLCalibration layer compared to a vanila embedding layer, compare
+training and validation losses of this model with one defined in
+keras_sequential_uci_heart.py
+
+Note that the specifics of layer configurations are for demonstration purposes
+and might not result in optimal performance.
+
+Example usage:
+keras_functional_uci_heart
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl import app
+from absl import flags
+
+import numpy as np
+import pandas as pd
+
+import tensorflow as tf
+from tensorflow import keras
+import tensorflow_lattice as tfl
+
+FLAGS = flags.FLAGS
+flags.DEFINE_integer('num_epochs', 200, 'Number of training epoch.')
+
+
+def main(_):
+  # UCI Statlog (Heart) dataset.
+  csv_file = tf.keras.utils.get_file(
+      'heart.csv', 'http://storage.googleapis.com/applied-dl/heart.csv')
+  training_data_df = pd.read_csv(csv_file).sample(
+      frac=1.0, random_state=41).reset_index(drop=True)
+
+  # Feature columns.
+  # 0  age
+  # 1  sex
+  # 2  cp        chest pain type (4 values)
+  # 3  trestbps  resting blood pressure
+  # 4  chol      serum cholestoral in mg/dl
+  # 5  fbs       fasting blood sugar > 120 mg/dl
+  # 6  restecg   resting electrocardiographic results (values 0,1,2)
+  # 7  thalach   maximum heart rate achieved
+  # 8  exang     exercise induced angina
+  # 9  oldpeak   ST depression induced by exercise relative to rest
+  # 10 slope     the slope of the peak exercise ST segment
+  # 11 ca        number of major vessels (0-3) colored by flourosopy
+  # 12 thal      3 = normal; 6 = fixed defect; 7 = reversable defect
+
+  # Example slice of training data:
+  #     age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak
+  # 0   63    1   1       145   233    1        2      150      0      2.3
+  # 1   67    1   4       160   286    0        2      108      1      1.5
+  # 2   67    1   4       120   229    0        2      129      1      2.6
+  # 3   37    1   3       130   250    0        0      187      0      3.5
+  # 4   41    0   2       130   204    0        2      172      0      1.4
+  # 5   56    1   2       120   236    0        0      178      0      0.8
+  # 6   62    0   4       140   268    0        2      160      0      3.6
+  # 7   57    0   4       120   354    0        0      163      1      0.6
+  # 8   63    1   4       130   254    0        2      147      0      1.4
+  # 9   53    1   4       140   203    1        2      155      1      3.1
+
+  model_inputs = []
+  lattice_inputs = []
+  # We are going to have 2-d embedding as one of lattice inputs.
+  lattice_sizes_for_embedding = [2, 3]
+  lattice_sizes = lattice_sizes_for_embedding + [2, 2, 3, 3, 2, 2]
+
+  # ############### age ###############
+
+  age_input = keras.layers.Input(shape=[1])
+  model_inputs.append(age_input)
+  age_embedding = keras.layers.Embedding(
+      input_dim=10,
+      output_dim=len(lattice_sizes_for_embedding),
+      embeddings_initializer=keras.initializers.RandomNormal(seed=1)
+  )(age_input)
+  # Flatten to get rid of redundant tensor dimension created by embedding layer.
+  age_embedding = keras.layers.Flatten()(age_embedding)
+
+  # Lattice expects input data for lattice dimension d to be within
+  # [0, lattice_sizes[d]-1.0]. Apply sigmoid and multiply it by input range to
+  # ensure that lattice inputs are within expected range.
+  embedding_lattice_input_range = tf.constant(
+      [size - 1.0 for size in lattice_sizes_for_embedding],
+      # Insert dimension of size 1 in front to ensure that batch dimension
+      # will not collapse as result of multiplication.
+      shape=(1, 2))
+  age_ranged = keras.layers.multiply(
+      [keras.activations.sigmoid(age_embedding),
+       embedding_lattice_input_range])
+  lattice_inputs.append(age_ranged)
+
+  # ############### sex ###############
+
+  # For boolean features simply specify CategoricalCalibration layer with 2
+  # buckets.
+  sex_input = keras.layers.Input(shape=[1])
+  model_inputs.append(sex_input)
+  sex_calibrator = tfl.layers.CategoricalCalibration(
+      num_buckets=2,
+      output_min=0.0,
+      output_max=lattice_sizes[2] - 1.0,
+      # Initializes all outputs to (output_min + output_max) / 2.0.
+      kernel_initializer='constant',
+  )(sex_input)
+  lattice_inputs.append(sex_calibrator)
+
+  # ############### cp ###############
+
+  cp_input = keras.layers.Input(shape=[1])
+  model_inputs.append(cp_input)
+  cp_calibrator = tfl.layers.PWLCalibration(
+      # Here instead of specifying dtype of layer we convert keypoints into
+      # np.float32.
+      input_keypoints=np.linspace(1, 4, num=4, dtype=np.float32),
+      output_min=0.0,
+      output_max=lattice_sizes[3] - 1.0,
+      monotonicity='increasing',
+      # You can specify TFL regularizers as tuple ('regularizer name', l1, l2).
+      kernel_regularizer=('hessian', 0.0, 1e-4)
+  )(cp_input)
+  lattice_inputs.append(cp_calibrator)
+
+  # ############### trestbps ###############
+
+  trestbps_input = keras.layers.Input(shape=[1])
+  model_inputs.append(trestbps_input)
+  trestbps_calibrator = tfl.layers.PWLCalibration(
+      # Alternatively to uniform keypoints you might want to use quantiles as
+      # keypoints.
+      input_keypoints=np.quantile(
+          training_data_df['trestbps'], np.linspace(0.0, 1.0, num=5)),
+      dtype=tf.float32,
+      # Together with quantile keypoints you might want to initialize piecewise
+      # linear function to have 'equal_slopes' in order for output of layer
+      # after initialization to preserve original distribution.
+      kernel_initializer='equal_slopes',
+      output_min=0.0,
+      output_max=lattice_sizes[4] - 1.0,
+      # You might consider clamping extreme inputs of the calibrator to output
+      # bounds.
+      clamp_min=True,
+      clamp_max=True,
+      monotonicity='increasing',
+  )(trestbps_input)
+  lattice_inputs.append(trestbps_calibrator)
+
+  # ############### chol ###############
+
+  chol_input = keras.layers.Input(shape=[1])
+  model_inputs.append(chol_input)
+  chol_calibrator = tfl.layers.PWLCalibration(
+      # Explicit input keypoint initialization.
+      input_keypoints=[126.0, 210.0, 247.0, 286.0, 564.0],
+      output_min=0.0,
+      output_max=lattice_sizes[5] - 1.0,
+      # Monotonicity of calibrator can be decreasing. Note that corresponding
+      # lattice dimension must have INCREASING monotonicity regardless of
+      # monotonicity direction of calibrator.
+      # Its not some weird configuration hack. Its just how math works :)
+      monotonicity='decreasing',
+      # Convexity together with decreasing monotonicity result in diminishing
+      # return constraint.
+      convexity='convex',
+      # You can specify list of regularizers. You are not limited to TFL
+      # regularizrs. Feel free to use any :)
+      kernel_regularizer=[('laplacian', 0.0, 1e-4),
+                          keras.regularizers.l1_l2(l1=0.001)]
+  )(chol_input)
+  lattice_inputs.append(chol_calibrator)
+
+  # ############### fbs ###############
+
+  fbs_input = keras.layers.Input(shape=[1])
+  model_inputs.append(fbs_input)
+  fbs_calibrator = tfl.layers.CategoricalCalibration(
+      num_buckets=2,
+      output_min=0.0,
+      output_max=lattice_sizes[6] - 1.0,
+      # For categorical calibration layer monotonicity is specified for pairs
+      # of indices of categories. Output for first category in pair will be
+      # smaller than output for second category.
+      #
+      # Don't forget to set monotonicity of corresponding dimension of Lattice
+      # layer to 'increasing'.
+      monotonicities=[(0, 1)],
+      # This initializer is identical to default one ('uniform'), but has fixed
+      # seed in order to simplify experimentation.
+      kernel_initializer=keras.initializers.RandomUniform(
+          minval=0.0, maxval=lattice_sizes[5] - 1.0, seed=1),
+  )(fbs_input)
+  lattice_inputs.append(fbs_calibrator)
+
+  # ############### restecg ###############
+
+  restecg_input = keras.layers.Input(shape=[1])
+  model_inputs.append(restecg_input)
+  restecg_calibrator = tfl.layers.CategoricalCalibration(
+      num_buckets=3,
+      output_min=0.0,
+      output_max=lattice_sizes[7] - 1.0,
+      # Categorical monotonicity can be partial order.
+      monotonicities=[(0, 1), (0, 2)],
+      # Categorical calibration layer supports standard Keras regularizers.
+      kernel_regularizer=keras.regularizers.l1_l2(l1=0.001),
+      kernel_initializer='constant',
+  )(restecg_input)
+  lattice_inputs.append(restecg_calibrator)
+
+  # Lattice inputs must be either list of d tensors of rank (batch_size, 1) or
+  # single tensor of rank (batch_size, d) where d is dimensionality of lattice.
+  # Since our embedding layer has size 2 in second dimension - concatenate all
+  # of inputs to create single tensor.
+  lattice_inputs_tensor = keras.layers.concatenate(lattice_inputs, axis=1)
+
+  # Create Lattice layer to nonlineary fuse output of calibrators. Don't forget
+  # to specify 'increasing' monotonicity for any dimension for which
+  # monotonicity is configured regardless of monotonicity direction of those.
+  # This includes partial monotonicity of CategoricalCalibration layer.
+  # Note that making embedding inputs monotonic does not make sense.
+  lattice = tfl.layers.Lattice(
+      lattice_sizes=lattice_sizes,
+      monotonicities=['none', 'none', 'none', 'increasing', 'increasing',
+                      'increasing', 'increasing', 'increasing'],
+      output_min=0.0,
+      output_max=1.0,
+  )(lattice_inputs_tensor)
+
+  model = keras.models.Model(
+      inputs=model_inputs,
+      outputs=lattice)
+  model.compile(loss=keras.losses.mean_squared_error,
+                optimizer=keras.optimizers.Adagrad(learning_rate=1.0))
+
+  feature_names = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg']
+  features = np.split(training_data_df[feature_names].values.astype(np.float32),
+                      indices_or_sections=len(feature_names),
+                      axis=1)
+  target = training_data_df[['target']].values.astype(np.float32)
+
+  # Bucketize input for embedding.
+  embedding_bins = np.quantile(
+      features[0],
+      # 10 keypoints will produce 9 dims numbered 1..9 to match embedding input
+      # size of 10.
+      np.linspace(0.0, 1.0, num=10, dtype=np.float32))
+  # Ensure that highest age will get into last bin rather than its own one.
+  embedding_bins[-1] += 1.0
+  features[0] = np.digitize(features[0], bins=embedding_bins)
+
+  model.fit(features,
+            target,
+            batch_size=32,
+            epochs=FLAGS.num_epochs,
+            validation_split=0.2,
+            shuffle=False)
+
+
+if __name__ == '__main__':
+  app.run(main)
diff --git a/examples/keras_sequential_uci_heart.py b/examples/keras_sequential_uci_heart.py
new file mode 100644
index 0000000..3c721ec
--- /dev/null
+++ b/examples/keras_sequential_uci_heart.py
@@ -0,0 +1,275 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Example usage of TFL within Keras models.
+
+This example builds and trains a calibrated lattice model for the UCI heart
+dataset.
+
+"Calibrated lattice" is a commonly used architecture for datasets where number
+of input features does not exceed ~15.
+
+"Calibrated lattice" assumes every feature being transformed by PWLCalibration
+or CategoricalCalibration layers before nonlineary fusing result of calibration
+within a lattice layer.
+
+Generally when you manually combine TFL layers you should keep track of:
+1) Ensuring that inputs to TFL layers are within expected range.
+  - Input range for PWLCalibration layer is defined by smallest and largest of
+    provided keypoints.
+  - Input range for Lattice layer is [0.0, lattice_sizes[d] - 1.0] for any
+    dimension d.
+  TFL layers can constraint their output to be within desired range. Feeding
+  output of other layers into TFL layers you might want to ensure that something
+  like sigmoid is used to constraint their output range.
+2) Properly configure monotonicity. If your calibration layer is monotonic then
+  corresponding dimension of lattice layer should also be monotonic.
+
+This example creates a Sequential Keras model and only uses TFL layers. For an
+example of functional model construction that also use embedding layers see
+keras_functional_uci_heart.py.
+
+In order to see how better generalization can be achieved with a properly
+constrained PWLCalibration layer compared to a vanila embedding layer, compare
+training and validation losses of this model with one defined in
+keras_functional_uci_heart.py
+
+
+Note that the specifics of layer configurations are for demonstration purposes
+and might not result in optimal performance.
+
+Example usage:
+keras_sequential_uci_heart
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl import app
+from absl import flags
+
+import numpy as np
+import pandas as pd
+
+import tensorflow as tf
+from tensorflow import keras
+import tensorflow_lattice as tfl
+
+FLAGS = flags.FLAGS
+flags.DEFINE_integer('num_epochs', 200, 'Number of training epoch.')
+
+
+def main(_):
+  # UCI Statlog (Heart) dataset.
+  csv_file = tf.keras.utils.get_file(
+      'heart.csv', 'http://storage.googleapis.com/applied-dl/heart.csv')
+  training_data_df = pd.read_csv(csv_file).sample(
+      frac=1.0, random_state=41).reset_index(drop=True)
+
+  # Feature columns.
+  # 0  age
+  # 1  sex
+  # 2  cp        chest pain type (4 values)
+  # 3  trestbps  resting blood pressure
+  # 4  chol      serum cholestoral in mg/dl
+  # 5  fbs       fasting blood sugar > 120 mg/dl
+  # 6  restecg   resting electrocardiographic results (values 0,1,2)
+  # 7  thalach   maximum heart rate achieved
+  # 8  exang     exercise induced angina
+  # 9  oldpeak   ST depression induced by exercise relative to rest
+  # 10 slope     the slope of the peak exercise ST segment
+  # 11 ca        number of major vessels (0-3) colored by flourosopy
+  # 12 thal      3 = normal; 6 = fixed defect; 7 = reversable defect
+
+  # Example slice of training data:
+  #     age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak
+  # 0   63    1   1       145   233    1        2      150      0      2.3
+  # 1   67    1   4       160   286    0        2      108      1      1.5
+  # 2   67    1   4       120   229    0        2      129      1      2.6
+  # 3   37    1   3       130   250    0        0      187      0      3.5
+  # 4   41    0   2       130   204    0        2      172      0      1.4
+  # 5   56    1   2       120   236    0        0      178      0      0.8
+  # 6   62    0   4       140   268    0        2      160      0      3.6
+  # 7   57    0   4       120   354    0        0      163      1      0.6
+  # 8   63    1   4       130   254    0        2      147      0      1.4
+  # 9   53    1   4       140   203    1        2      155      1      3.1
+
+  # Lattice sizes per dimension for Lattice layer.
+  # Lattice layer expects input[i] to be within [0, lattice_sizes[i] - 1.0], so
+  # we need to define lattice sizes ahead of calibration layers so we can
+  # properly specify output range of calibration layers.
+  lattice_sizes = [3, 2, 2, 2, 2, 2, 2]
+
+  # Use ParallelCombination helper layer to group togehter calibration layers
+  # which have to be executed in paralel in order to be able to use Sequential
+  # model. Alternatively you can use functional API.
+  combined_calibrators = tfl.layers.ParallelCombination()
+
+  # Configure calibration layers for every feature:
+
+  # ############### age ###############
+
+  calibrator = tfl.layers.PWLCalibration(
+      # Every PWLCalibration layer must have keypoints of piecewise linear
+      # function specified. Easiest way to specify them is to uniformly cover
+      # entire input range by using numpy.linspace().
+      input_keypoints=np.linspace(training_data_df['age'].min(),
+                                  training_data_df['age'].max(),
+                                  num=5),
+      # You need to ensure that input keypoints have same dtype as layer input.
+      # You can do it by setting dtype here or by providing keypoints in such
+      # format which will be converted to deisred tf.dtype by default.
+      dtype=tf.float32,
+      # Output range must correspond to expected lattice input range.
+      output_min=0.0,
+      output_max=lattice_sizes[0] - 1.0,
+      monotonicity='increasing')
+  combined_calibrators.append(calibrator)
+
+  # ############### sex ###############
+
+  # For boolean features simply specify CategoricalCalibration layer with 2
+  # buckets.
+  calibrator = tfl.layers.CategoricalCalibration(
+      num_buckets=2,
+      output_min=0.0,
+      output_max=lattice_sizes[1] - 1.0,
+      # Initializes all outputs to (output_min + output_max) / 2.0.
+      kernel_initializer='constant')
+  combined_calibrators.append(calibrator)
+
+  # ############### cp ###############
+
+  calibrator = tfl.layers.PWLCalibration(
+      # Here instead of specifying dtype of layer we convert keypoints into
+      # np.float32.
+      input_keypoints=np.linspace(1, 4, num=4, dtype=np.float32),
+      output_min=0.0,
+      output_max=lattice_sizes[2] - 1.0,
+      monotonicity='increasing',
+      # You can specify TFL regularizers as tuple ('regularizer name', l1, l2).
+      kernel_regularizer=('hessian', 0.0, 1e-4))
+  combined_calibrators.append(calibrator)
+
+  # ############### trestbps ###############
+
+  calibrator = tfl.layers.PWLCalibration(
+      # Alternatively to uniform keypoints you might want to use quantiles as
+      # keypoints.
+      input_keypoints=np.quantile(
+          training_data_df['trestbps'], np.linspace(0.0, 1.0, num=5)),
+      dtype=tf.float32,
+      # Together with quantile keypoints you might want to initialize piecewise
+      # linear function to have 'equal_slopes' in order for output of layer
+      # after initialization to preserve original distribution.
+      kernel_initializer='equal_slopes',
+      output_min=0.0,
+      output_max=lattice_sizes[3] - 1.0,
+      # You might consider clamping extreme inputs of the calibrator to output
+      # bounds.
+      clamp_min=True,
+      clamp_max=True,
+      monotonicity='increasing')
+  combined_calibrators.append(calibrator)
+
+  # ############### chol ###############
+
+  calibrator = tfl.layers.PWLCalibration(
+      # Explicit input keypoint initialization.
+      input_keypoints=[126.0, 210.0, 247.0, 286.0, 564.0],
+      dtype=tf.float32,
+      output_min=0.0,
+      output_max=lattice_sizes[4] - 1.0,
+      # Monotonicity of calibrator can be 'decreasing'. Note that corresponding
+      # lattice dimension must have 'increasing' monotonicity regardless of
+      # monotonicity direction of calibrator.
+      # Its not some weird configuration hack. Its just how math works :)
+      monotonicity='decreasing',
+      # Convexity together with decreasing monotonicity result in diminishing
+      # return constraint.
+      convexity='convex',
+      # You can specify list of regularizers. You are not limited to TFL
+      # regularizrs. Feel free to use any :)
+      kernel_regularizer=[('laplacian', 0.0, 1e-4),
+                          keras.regularizers.l1_l2(l1=0.001)])
+  combined_calibrators.append(calibrator)
+
+  # ############### fbs ###############
+
+  calibrator = tfl.layers.CategoricalCalibration(
+      num_buckets=2,
+      output_min=0.0,
+      output_max=lattice_sizes[5] - 1.0,
+      # For categorical calibration layer monotonicity is specified for pairs
+      # of indices of categories. Output for first category in pair will be
+      # smaller than output for second category.
+      #
+      # Don't forget to set monotonicity of corresponding dimension of Lattice
+      # layer to 'increasing'.
+      monotonicities=[(0, 1)],
+      # This initializer is identical to default one('uniform'), but has fixed
+      # seed in order to simplify experimentation.
+      kernel_initializer=keras.initializers.RandomUniform(
+          minval=0.0, maxval=lattice_sizes[5] - 1.0, seed=1))
+  combined_calibrators.append(calibrator)
+
+  # ############### restecg ###############
+
+  calibrator = tfl.layers.CategoricalCalibration(
+      num_buckets=3,
+      output_min=0.0,
+      output_max=lattice_sizes[6] - 1.0,
+      # Categorical monotonicity can be partial order.
+      monotonicities=[(0, 1), (0, 2)],
+      # Categorical calibration layer supports standard Keras regularizers.
+      kernel_regularizer=keras.regularizers.l1_l2(l1=0.001),
+      kernel_initializer='constant')
+  combined_calibrators.append(calibrator)
+
+  # Create Lattice layer to nonlineary fuse output of calibrators. Don't forget
+  # to specify monotonicity 'increasing' for any dimension which calibrator is
+  # monotonic regardless of monotonicity direction of calibrator. This includes
+  # partial monotonicity of CategoricalCalibration layer.
+  lattice = tfl.layers.Lattice(
+      lattice_sizes=lattice_sizes,
+      monotonicities=['increasing', 'none', 'increasing', 'increasing',
+                      'increasing', 'increasing', 'increasing'],
+      output_min=0.0,
+      output_max=1.0)
+
+  model = keras.models.Sequential()
+  # We have just 2 layer as far as Sequential model is concerned.
+  # PWLConcatenate layer takes care of grouping calibrators.
+  model.add(combined_calibrators)
+  model.add(lattice)
+  model.compile(loss=keras.losses.mean_squared_error,
+                optimizer=keras.optimizers.Adagrad(learning_rate=1.0))
+
+  features = training_data_df[
+      ['age', 'sex', 'cp',
+       'trestbps', 'chol', 'fbs', 'restecg']].values.astype(np.float32)
+  target = training_data_df[['target']].values.astype(np.float32)
+
+  model.fit(features,
+            target,
+            batch_size=32,
+            epochs=FLAGS.num_epochs,
+            validation_split=0.2,
+            shuffle=False)
+
+
+if __name__ == '__main__':
+  app.run(main)
diff --git a/examples/lattice_test.py b/examples/lattice_test.py
deleted file mode 100644
index a441086..0000000
--- a/examples/lattice_test.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""A quick test script for TensorFlow Lattice's lattice layer."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-import tensorflow_lattice as tfl
-
-x = tf.compat.v1.placeholder(tf.float32, shape=(None, 2))
-(y, _, _, _) = tfl.lattice_layer(x, lattice_sizes=(2, 2))
-
-with tf.Session() as sess:
-  sess.run(tf.global_variables_initializer())
-  print(sess.run(y, feed_dict={x: [[0.0, 0.0]]}))
diff --git a/examples/uci_census.py b/examples/uci_census.py
deleted file mode 100644
index 55bbc5a..0000000
--- a/examples/uci_census.py
+++ /dev/null
@@ -1,557 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Train and evaluate models on UCI Census data.
-
-This is an example TensorFlow Lattice model training and evaluating program,
-using TensorFlow's `tf.estimators` library, a high level abstraction layer
-for machine learning models.
-
-TensorFlow Lattice also offers "layer" level components, so one can customize
-their own models, but these are not included in this example.
-
-Example run for calibrated linear model:
-
-* Uses bash variables `type` and `attempt` for convenience. You can bump
-  `attempt` when trying different hyper-parameters.
-* The flag `--create_quantiles` need to be set just the very first time you
-  run, since the data quantiles information used for calibration is the same
-  for all models.
-* Use `--hparams` to set changes to default parameters.
-* It will print out evaluation on the training data and evaluation data
-  every 1/10th of the training epochs.
-
-```bash
-$ type=calibrated_linear ; attempt=1 ;
-  python uci_census.py --run=train --model_type=${type}
-    --output_dir=${HOME}/experiments/uci_census/${type}_${attempt}
-    --quantiles_dir=${HOME}/experiments/uci_census
-    --train_epochs=600 --batch_size=1000
-    --hparams=learning_rate=1e-3
-    --create_quantiles
-```
-
-Example run for calibrated RTL model (assumes you already created the
-quantiles):
-
-* Notice calibrated RTL models train slower than calibrated linear model, but
-should yield slightly better results.
-
-```bash
-$ type=calibrated_rtl ; attempt=1 ;
-  python uci_census.py --run=train --model_type=${type}
-    --output_dir=${HOME}/experiments/uci_census/${type}_${attempt}
-    --quantiles_dir=${HOME}/experiments/uci_census
-    --train_epochs=600 --batch_size=1000
-    --hparams=learning_rate=1e-2
-```
-
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import tempfile
-
-import pandas as pd
-import six
-import tensorflow as tf
-import tensorflow_lattice as tfl
-
-flags = tf.flags
-FLAGS = flags.FLAGS
-
-# Run mode of the program.
-flags.DEFINE_string(
-    "run", "train", "One of 'train', 'evaluate' or 'save', train will "
-    "train on training data and also optionally evaluate; evaluate will "
-    "evaluate train and test data; save saves the trained model so far "
-    "so it can be used by TensorFlow Serving.")
-
-# Dataset.
-flags.DEFINE_string("test", "/tmp/uci_census/adult.test", "Path to test file.")
-flags.DEFINE_string("train", "/tmp/uci_census/adult.data",
-                    "Path to train file.")
-
-# Model flags.
-flags.DEFINE_string(
-    "output_dir", None,
-    "Directory where to store the model. If not set a temporary directory "
-    "will be automatically created.")
-flags.DEFINE_string(
-    "model_type", "calibrated_linear",
-    "Types defined in this example: calibrated_linear, calibrated_lattice, "
-    " calibrated_rtl, calibrated_etl, calibrated_dnn")
-flags.DEFINE_integer("batch_size", 1000,
-                     "Number of examples to include in one batch. Increase "
-                     "this number to improve parallelism, at cost of memory.")
-flags.DEFINE_string("hparams", None,
-                    "Model hyperparameters, see hyper-parameters in Tensorflow "
-                    "Lattice documentation. Example: --hparams=learning_rate="
-                    "0.1,lattice_size=2,num_keypoints=100")
-
-# Calibration quantiles flags.
-flags.DEFINE_bool("create_quantiles", False,
-                  "Run once to create histogram of features for calibration. "
-                  "It will use the --train dataset for that.")
-flags.DEFINE_string(
-    "quantiles_dir", None,
-    "Directory where to store quantile information, defaults to the model "
-    "directory (set by --output-dir) but since quantiles can be reused by "
-    "models with different parameters, you may want to have a separate "
-    "directory.")
-
-# Training flags.
-flags.DEFINE_integer("train_epochs", 10,
-                     "How many epochs over data during training.")
-flags.DEFINE_bool(
-    "train_evaluate_on_train", True,
-    "If set, every 1/10th of the train_epochs runs an evaluation on the "
-    "full train data.")
-flags.DEFINE_bool(
-    "train_evaluate_on_test", True,
-    "If set, every 1/10th of the train_epochs runs an evaluation on the "
-    "full test data.")
-
-# Columns in dataset files.
-CSV_COLUMNS = [
-    "age", "workclass", "fnlwgt", "education", "education_num",
-    "marital_status", "occupation", "relationship", "race", "gender",
-    "capital_gain", "capital_loss", "hours_per_week", "native_country",
-    "income_bracket"
-]
-
-
-def get_test_input_fn(batch_size, num_epochs, shuffle):
-  return get_input_fn(FLAGS.test, batch_size, num_epochs, shuffle)
-
-
-def get_train_input_fn(batch_size, num_epochs, shuffle):
-  return get_input_fn(FLAGS.train, batch_size, num_epochs, shuffle)
-
-
-# Copy of data read from train/test files: keep copy to avoid re-reading
-# it at every training/evaluation loop.
-_df_data = {}
-_df_data_labels = {}
-
-
-def get_input_fn(file_path, batch_size, num_epochs, shuffle):
-  """Returns an input_fn closure for given parameters."""
-  if file_path not in _df_data:
-    _df_data[file_path] = pd.read_csv(
-        tf.gfile.Open(file_path),
-        names=CSV_COLUMNS,
-        skipinitialspace=True,
-        engine="python",
-        skiprows=1)
-    _df_data[file_path] = _df_data[file_path].dropna(how="any", axis=0)
-    _df_data_labels[file_path] = _df_data[file_path]["income_bracket"].apply(
-        lambda x: ">50K" in x).astype(int)
-  return tf.compat.v1.estimator.inputs.pandas_input_fn(
-      x=_df_data[file_path],
-      y=_df_data_labels[file_path],
-      batch_size=batch_size,
-      shuffle=shuffle,
-      num_epochs=num_epochs,
-      num_threads=1)
-
-
-def create_feature_columns():
-  """Creates feature columns for UCI Census, some are sparse."""
-  # Categorical features.
-  gender = tf.feature_column.categorical_column_with_vocabulary_list(
-      "gender", ["Female", "Male"])
-  education = tf.feature_column.categorical_column_with_vocabulary_list(
-      "education", [
-          "Bachelors", "HS-grad", "11th", "Masters", "9th", "Some-college",
-          "Assoc-acdm", "Assoc-voc", "7th-8th", "Doctorate", "Prof-school",
-          "5th-6th", "10th", "1st-4th", "Preschool", "12th"
-      ])
-  marital_status = tf.feature_column.categorical_column_with_vocabulary_list(
-      "marital_status", [
-          "Married-civ-spouse", "Divorced", "Married-spouse-absent",
-          "Never-married", "Separated", "Married-AF-spouse", "Widowed"
-      ])
-  relationship = tf.feature_column.categorical_column_with_vocabulary_list(
-      "relationship", [
-          "Husband", "Not-in-family", "Wife", "Own-child", "Unmarried",
-          "Other-relative"
-      ])
-  workclass = tf.feature_column.categorical_column_with_vocabulary_list(
-      "workclass", [
-          "Self-emp-not-inc", "Private", "State-gov", "Federal-gov",
-          "Local-gov", "?", "Self-emp-inc", "Without-pay", "Never-worked"
-      ])
-  occupation = tf.feature_column.categorical_column_with_vocabulary_list(
-      "occupation", [
-          "Prof-specialty", "Craft-repair", "Exec-managerial", "Adm-clerical",
-          "Sales", "Other-service", "Machine-op-inspct", "?",
-          "Transport-moving", "Handlers-cleaners", "Farming-fishing",
-          "Tech-support", "Protective-serv", "Priv-house-serv", "Armed-Forces"
-      ])
-  race = tf.feature_column.categorical_column_with_vocabulary_list(
-      "race", [
-          "White",
-          "Black",
-          "Asian-Pac-Islander",
-          "Amer-Indian-Eskimo",
-          "Other",
-      ])
-  native_country = tf.feature_column.categorical_column_with_vocabulary_list(
-      "native_country", [
-          "United-States",
-          "Mexico",
-          "?",
-          "Philippines",
-          "Germany",
-          "Canada",
-          "Puerto-Rico",
-          "El-Salvador",
-          "India",
-          "Cuba",
-          "England",
-          "Jamaica",
-          "South",
-          "China",
-          "Italy",
-          "Dominican-Republic",
-          "Vietnam",
-          "Guatemala",
-          "Japan",
-          "Poland",
-          "Columbia",
-          "Taiwan",
-          "Haiti",
-          "Iran",
-          "Portugal",
-          "Nicaragua",
-          "Peru",
-          "Greece",
-          "France",
-          "Ecuador",
-          "Ireland",
-          "Hong",
-          "Trinadad&Tobago",
-          "Cambodia",
-          "Thailand",
-          "Laos",
-          "Yugoslavia",
-          "Outlying-US(Guam-USVI-etc)",
-          "Hungary",
-          "Honduras",
-          "Scotland",
-          "Holand-Netherlands",
-      ])
-
-  # Numerical (continuous) base columns.
-  age = tf.feature_column.numeric_column("age")
-  education_num = tf.feature_column.numeric_column("education_num")
-  capital_gain = tf.feature_column.numeric_column("capital_gain")
-  capital_loss = tf.feature_column.numeric_column("capital_loss")
-  hours_per_week = tf.feature_column.numeric_column("hours_per_week")
-
-  # fnlwgt: this should be the weight, how representative this example is of
-  #    the population, we don't use it here.
-  # fnlwgt = tf.feature_column.numeric_column("fnlwgt")
-
-  # income-bracket is the label, so, not returned here.
-  return [
-      age,
-      workclass,
-      education,
-      education_num,
-      marital_status,
-      occupation,
-      relationship,
-      race,
-      gender,
-      capital_gain,
-      capital_loss,
-      hours_per_week,
-      native_country,
-  ]
-
-
-def create_quantiles(quantiles_dir):
-  """Creates quantiles directory if it doesn't yet exist."""
-  batch_size = 10000
-  input_fn = get_train_input_fn(
-      batch_size=batch_size, num_epochs=1, shuffle=False)
-  # Reads until input is exhausted, 10000 at a time.
-  tfl.save_quantiles_for_keypoints(
-      input_fn=input_fn,
-      save_dir=quantiles_dir,
-      feature_columns=create_feature_columns(),
-      num_steps=None)
-
-
-def _pprint_hparams(hparams):
-  """Pretty-print hparams."""
-  print("* hparams=[")
-  for (key, value) in sorted(six.iteritems(hparams.values())):
-    print("\t{}={}".format(key, value))
-  print("]")
-
-
-def create_calibrated_linear(feature_columns, config, quantiles_dir):
-  feature_names = [fc.name for fc in feature_columns]
-  hparams = tfl.CalibratedLinearHParams(
-      feature_names=feature_names, num_keypoints=200, learning_rate=1e-4)
-  hparams.parse(FLAGS.hparams)
-  hparams.set_feature_param("capital_gain", "calibration_l2_laplacian_reg",
-                            4.0e-3)
-  _pprint_hparams(hparams)
-  return tfl.calibrated_linear_classifier(
-      feature_columns=feature_columns,
-      model_dir=config.model_dir,
-      config=config,
-      hparams=hparams,
-      quantiles_dir=quantiles_dir)
-
-
-def create_calibrated_lattice(feature_columns, config, quantiles_dir):
-  """Creates a calibrated lattice estimator."""
-  feature_names = [fc.name for fc in feature_columns]
-  hparams = tfl.CalibratedLatticeHParams(
-      feature_names=feature_names,
-      num_keypoints=200,
-      lattice_l2_laplacian_reg=5.0e-3,
-      lattice_l2_torsion_reg=1.0e-4,
-      learning_rate=0.1,
-      lattice_size=2)
-  hparams.parse(FLAGS.hparams)
-  _pprint_hparams(hparams)
-  return tfl.calibrated_lattice_classifier(
-      feature_columns=feature_columns,
-      model_dir=config.model_dir,
-      config=config,
-      hparams=hparams,
-      quantiles_dir=quantiles_dir)
-
-
-def create_calibrated_rtl(feature_columns, config, quantiles_dir):
-  """Creates a calibrated RTL estimator."""
-  feature_names = [fc.name for fc in feature_columns]
-  hparams = tfl.CalibratedRtlHParams(
-      feature_names=feature_names,
-      num_keypoints=200,
-      learning_rate=0.02,
-      lattice_l2_laplacian_reg=5.0e-4,
-      lattice_l2_torsion_reg=1.0e-4,
-      lattice_size=3,
-      lattice_rank=4,
-      num_lattices=100)
-  # Specific feature parameters.
-  hparams.set_feature_param("capital_gain", "lattice_size", 8)
-  hparams.set_feature_param("native_country", "lattice_size", 8)
-  hparams.set_feature_param("marital_status", "lattice_size", 4)
-  hparams.set_feature_param("age", "lattice_size", 8)
-  hparams.parse(FLAGS.hparams)
-  _pprint_hparams(hparams)
-  return tfl.calibrated_rtl_classifier(
-      feature_columns=feature_columns,
-      model_dir=config.model_dir,
-      config=config,
-      hparams=hparams,
-      quantiles_dir=quantiles_dir)
-
-
-def create_calibrated_etl(feature_columns, config, quantiles_dir):
-  """Creates a calibrated ETL estimator."""
-  # No enforced monotonicity in this example.
-  feature_names = [fc.name for fc in feature_columns]
-  hparams = tfl.CalibratedEtlHParams(
-      feature_names=feature_names,
-      num_keypoints=200,
-      learning_rate=0.02,
-      non_monotonic_num_lattices=200,
-      non_monotonic_lattice_rank=2,
-      non_monotonic_lattice_size=2,
-      calibration_l2_laplacian_reg=4.0e-3,
-      lattice_l2_laplacian_reg=1.0e-5,
-      lattice_l2_torsion_reg=4.0e-4)
-  hparams.parse(FLAGS.hparams)
-  _pprint_hparams(hparams)
-  return tfl.calibrated_etl_classifier(
-      feature_columns=feature_columns,
-      model_dir=config.model_dir,
-      config=config,
-      hparams=hparams,
-      quantiles_dir=quantiles_dir)
-
-
-def create_calibrated_dnn(feature_columns, config, quantiles_dir):
-  """Creates a calibrated DNN model."""
-  # This is an example of a hybrid model that uses input calibration layer
-  # offered by TensorFlow Lattice library and connects it to a DNN.
-  feature_names = [fc.name for fc in feature_columns]
-  hparams = tfl.CalibratedHParams(
-      feature_names=feature_names,
-      num_keypoints=200,
-      learning_rate=1.0e-3,
-      calibration_output_min=-1.0,
-      calibration_output_max=1.0,
-      nodes_per_layer=10,  # All layers have the same number of nodes.
-      layers=2,  # Includes output layer, therefore >= 1.
-  )
-  hparams.parse(FLAGS.hparams)
-  _pprint_hparams(hparams)
-
-  def _model_fn(features, labels, mode, params):
-    """Model construction closure used when creating estimator."""
-    del params  # Hyper-params are read directly from the bound variable hparams
-
-    # Calibrate: since there is no monotonicity, there are no projection ops.
-    # We also discard the ordered names of the features.
-    (output, _, _, regularization) = tfl.input_calibration_layer_from_hparams(
-        features, feature_columns, hparams, quantiles_dir)
-
-    # Hidden-layers.
-    for _ in range(hparams.layers - 1):
-      output = tf.layers.dense(
-          inputs=output, units=hparams.nodes_per_layer, activation=tf.sigmoid)
-
-    # Classifier logits and prediction.
-    logits = tf.layers.dense(inputs=output, units=1)
-    predictions = tf.reshape(tf.sigmoid(logits), [-1])
-
-    # Notice loss doesn't include regularization, which is added separately
-    # by means of tf.contrib.layers.apply_regularization().
-    loss_no_regularization = tf.losses.log_loss(labels, predictions)
-    loss = loss_no_regularization
-    if regularization is not None:
-      loss += regularization
-    optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate)
-    train_op = optimizer.minimize(
-        loss,
-        global_step=tf.train.get_global_step(),
-        name="calibrated_dnn_minimize")
-
-    eval_metric_ops = {
-        "accuracy": tf.metrics.accuracy(labels, predictions),
-
-        # We want to report the loss without the regularization, so metric is
-        # comparable with different regularizations. FutureWork, list both.
-        "average_loss": tf.metrics.mean(loss_no_regularization),
-    }
-
-    return tf.estimator.EstimatorSpec(mode, predictions, loss, train_op,
-                                      eval_metric_ops)
-
-  # Hyper-parameters are passed directly to the model_fn closure by the context.
-  return tf.estimator.Estimator(
-      model_fn=_model_fn,
-      model_dir=config.model_dir,
-      config=config,
-      params=None)
-
-
-def create_estimator(config, quantiles_dir):
-  """Creates estimator for given configuration based on --model_type."""
-  feature_columns = create_feature_columns()
-  if FLAGS.model_type == "calibrated_linear":
-    return create_calibrated_linear(feature_columns, config, quantiles_dir)
-  elif FLAGS.model_type == "calibrated_lattice":
-    return create_calibrated_lattice(feature_columns, config, quantiles_dir)
-  elif FLAGS.model_type == "calibrated_rtl":
-    return create_calibrated_rtl(feature_columns, config, quantiles_dir)
-  elif FLAGS.model_type == "calibrated_etl":
-    return create_calibrated_etl(feature_columns, config, quantiles_dir)
-  elif FLAGS.model_type == "calibrated_dnn":
-    return create_calibrated_dnn(feature_columns, config, quantiles_dir)
-
-  raise ValueError("Unknown model_type={}".format(FLAGS.model_type))
-
-
-def evaluate_on_data(estimator, data):
-  """Evaluates and prints results, set data to FLAGS.test or FLAGS.train."""
-  name = os.path.basename(data)
-  evaluation = estimator.evaluate(
-      input_fn=get_input_fn(
-          file_path=data,
-          batch_size=FLAGS.batch_size,
-          num_epochs=1,
-          shuffle=False),
-      name=name)
-  print("  Evaluation on '{}':\taccuracy={:.4f}\taverage_loss={:.4f}".format(
-      name, evaluation["accuracy"], evaluation["average_loss"]))
-
-
-def train(estimator):
-  """Trains estimator and optionally intermediary evaluations."""
-  if not FLAGS.train_evaluate_on_train and not FLAGS.train_evaluate_on_test:
-    estimator.train(input_fn=get_train_input_fn(
-        batch_size=FLAGS.batch_size,
-        num_epochs=FLAGS.train_epochs,
-        shuffle=True))
-  else:
-    # Train 1/10th of the epochs requested per loop, but at least 1 per loop.
-    epochs_trained = 0
-    loops = 0
-    while epochs_trained < FLAGS.train_epochs:
-      loops += 1
-      next_epochs_trained = int(loops * FLAGS.train_epochs / 10.0)
-      epochs = max(1, next_epochs_trained - epochs_trained)
-      epochs_trained += epochs
-      estimator.train(input_fn=get_train_input_fn(
-          batch_size=FLAGS.batch_size, num_epochs=epochs, shuffle=True))
-      print("Trained for {} epochs, total so far {}:".format(
-          epochs, epochs_trained))
-      evaluate_on_data(estimator, FLAGS.train)
-      evaluate_on_data(estimator, FLAGS.test)
-
-
-def evaluate(estimator):
-  """Runs straight evaluation on a currently trained model."""
-  evaluate_on_data(estimator, FLAGS.train)
-  evaluate_on_data(estimator, FLAGS.test)
-
-
-def main(args):
-  del args  # Not used.
-
-  # Prepare directories.
-  output_dir = FLAGS.output_dir
-  if output_dir is None:
-    output_dir = tempfile.mkdtemp()
-    tf.logging.warning("Using temporary folder as model directory: %s",
-                       output_dir)
-  quantiles_dir = FLAGS.quantiles_dir or output_dir
-
-  # Create quantiles if required.
-  if FLAGS.create_quantiles:
-    if FLAGS.run != "train":
-      raise ValueError(
-          "Can not create_quantiles for mode --run='{}'".format(FLAGS.run))
-    create_quantiles(quantiles_dir)
-
-  # Create config and then model.
-  config = tf.estimator.RunConfig().replace(model_dir=output_dir)
-  estimator = create_estimator(config, quantiles_dir)
-
-  if FLAGS.run == "train":
-    train(estimator)
-
-  elif FLAGS.run == "evaluate":
-    evaluate(estimator)
-
-  else:
-    raise ValueError("Unknonw --run={}".format(FLAGS.run))
-
-
-if __name__ == "__main__":
-  tf.app.run()
diff --git a/g3doc/api_docs/python/_toc.yaml b/g3doc/api_docs/python/_toc.yaml
deleted file mode 100644
index 2a3a8c1..0000000
--- a/g3doc/api_docs/python/_toc.yaml
+++ /dev/null
@@ -1,62 +0,0 @@
-# Automatically generated file; please do not edit
-toc:
-  - title: tensorflow_lattice
-    section:
-    - title: Overview
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice
-    - title: CalibratedEtlHParams
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/CalibratedEtlHParams
-    - title: CalibratedHParams
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/CalibratedHParams
-    - title: CalibratedLatticeHParams
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/CalibratedLatticeHParams
-    - title: CalibratedLinearHParams
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/CalibratedLinearHParams
-    - title: CalibratedRtlHParams
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/CalibratedRtlHParams
-    - title: calibrated_etl_classifier
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/calibrated_etl_classifier
-    - title: calibrated_etl_regressor
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/calibrated_etl_regressor
-    - title: calibrated_lattice_classifier
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/calibrated_lattice_classifier
-    - title: calibrated_lattice_regressor
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/calibrated_lattice_regressor
-    - title: calibrated_linear_classifier
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/calibrated_linear_classifier
-    - title: calibrated_linear_regressor
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/calibrated_linear_regressor
-    - title: calibrated_rtl_classifier
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/calibrated_rtl_classifier
-    - title: calibrated_rtl_regressor
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/calibrated_rtl_regressor
-    - title: calibration_layer
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/calibration_layer
-    - title: calibrator_regularization
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/calibrator_regularization
-    - title: ensemble_lattices_layer
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/ensemble_lattices_layer
-    - title: input_calibration_layer
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/input_calibration_layer
-    - title: input_calibration_layer_from_hparams
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/input_calibration_layer_from_hparams
-    - title: lattice
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/lattice
-    - title: lattice_layer
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/lattice_layer
-    - title: lattice_regularization
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/lattice_regularization
-    - title: load_keypoints_from_quantiles
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/load_keypoints_from_quantiles
-    - title: monotone_lattice
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/monotone_lattice
-    - title: monotonic_projection
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/monotonic_projection
-    - title: PerFeatureHParams
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/PerFeatureHParams
-    - title: pwl_indexing_calibrator
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/pwl_indexing_calibrator
-    - title: save_quantiles_for_keypoints
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/save_quantiles_for_keypoints
-    - title: uniform_keypoints_for_signal
-      path: /TARGET_DOC_ROOT/VERSION/api_docs/python/tensorflow_lattice/uniform_keypoints_for_signal
diff --git a/g3doc/api_docs/python/index.md b/g3doc/api_docs/python/index.md
deleted file mode 100644
index 33a4bdc..0000000
--- a/g3doc/api_docs/python/index.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# All symbols in TensorFlow Lattice
-
-*  [`tensorflow_lattice`](./tensorflow_lattice.md)
-*  [`tensorflow_lattice.CalibratedEtlHParams`](./tensorflow_lattice/CalibratedEtlHParams.md)
-*  [`tensorflow_lattice.CalibratedHParams`](./tensorflow_lattice/CalibratedHParams.md)
-*  [`tensorflow_lattice.CalibratedLatticeHParams`](./tensorflow_lattice/CalibratedLatticeHParams.md)
-*  [`tensorflow_lattice.CalibratedLinearHParams`](./tensorflow_lattice/CalibratedLinearHParams.md)
-*  [`tensorflow_lattice.CalibratedRtlHParams`](./tensorflow_lattice/CalibratedRtlHParams.md)
-*  [`tensorflow_lattice.PerFeatureHParams`](./tensorflow_lattice/PerFeatureHParams.md)
-*  [`tensorflow_lattice.calibrated_etl_classifier`](./tensorflow_lattice/calibrated_etl_classifier.md)
-*  [`tensorflow_lattice.calibrated_etl_regressor`](./tensorflow_lattice/calibrated_etl_regressor.md)
-*  [`tensorflow_lattice.calibrated_lattice_classifier`](./tensorflow_lattice/calibrated_lattice_classifier.md)
-*  [`tensorflow_lattice.calibrated_lattice_regressor`](./tensorflow_lattice/calibrated_lattice_regressor.md)
-*  [`tensorflow_lattice.calibrated_linear_classifier`](./tensorflow_lattice/calibrated_linear_classifier.md)
-*  [`tensorflow_lattice.calibrated_linear_regressor`](./tensorflow_lattice/calibrated_linear_regressor.md)
-*  [`tensorflow_lattice.calibrated_rtl_classifier`](./tensorflow_lattice/calibrated_rtl_classifier.md)
-*  [`tensorflow_lattice.calibrated_rtl_regressor`](./tensorflow_lattice/calibrated_rtl_regressor.md)
-*  [`tensorflow_lattice.calibration_layer`](./tensorflow_lattice/calibration_layer.md)
-*  [`tensorflow_lattice.calibrator_regularization`](./tensorflow_lattice/calibrator_regularization.md)
-*  [`tensorflow_lattice.ensemble_lattices_layer`](./tensorflow_lattice/ensemble_lattices_layer.md)
-*  [`tensorflow_lattice.input_calibration_layer`](./tensorflow_lattice/input_calibration_layer.md)
-*  [`tensorflow_lattice.input_calibration_layer_from_hparams`](./tensorflow_lattice/input_calibration_layer_from_hparams.md)
-*  [`tensorflow_lattice.lattice`](./tensorflow_lattice/lattice.md)
-*  [`tensorflow_lattice.lattice_layer`](./tensorflow_lattice/lattice_layer.md)
-*  [`tensorflow_lattice.lattice_regularization`](./tensorflow_lattice/lattice_regularization.md)
-*  [`tensorflow_lattice.load_keypoints_from_quantiles`](./tensorflow_lattice/load_keypoints_from_quantiles.md)
-*  [`tensorflow_lattice.monotone_lattice`](./tensorflow_lattice/monotone_lattice.md)
-*  [`tensorflow_lattice.monotonic_projection`](./tensorflow_lattice/monotonic_projection.md)
-*  [`tensorflow_lattice.pwl_indexing_calibrator`](./tensorflow_lattice/pwl_indexing_calibrator.md)
-*  [`tensorflow_lattice.save_quantiles_for_keypoints`](./tensorflow_lattice/save_quantiles_for_keypoints.md)
-*  [`tensorflow_lattice.uniform_keypoints_for_signal`](./tensorflow_lattice/uniform_keypoints_for_signal.md)
\ No newline at end of file
diff --git a/g3doc/api_docs/python/tensorflow_lattice.md b/g3doc/api_docs/python/tensorflow_lattice.md
deleted file mode 100644
index 10220d3..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice.md
+++ /dev/null
@@ -1,83 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice" />
-<meta itemprop="property" content="DEFAULT_NAME"/>
-<meta itemprop="property" content="absolute_import"/>
-</div>
-
-# Module: tensorflow_lattice
-
-Lattice modeling.
-
-This package provides functions and classes for lattice modeling.
-
-See full description in `README.md` file.
-
-
-  use them.
-
-## Classes
-
-[`class CalibratedEtlHParams`](./tensorflow_lattice/CalibratedEtlHParams.md): Hyper-parameters for CalibratedEtl (Embedded tiny lattices) models.
-
-[`class CalibratedHParams`](./tensorflow_lattice/CalibratedHParams.md): PerFeatureHParams specialization with input calibration parameters.
-
-[`class CalibratedLatticeHParams`](./tensorflow_lattice/CalibratedLatticeHParams.md): Hyper-parameters for CalibratedLattice models.
-
-[`class CalibratedLinearHParams`](./tensorflow_lattice/CalibratedLinearHParams.md): Hyper-parameters for CalibratedLinear models.
-
-[`class CalibratedRtlHParams`](./tensorflow_lattice/CalibratedRtlHParams.md): Hyper-parameters for CalibratedRtl (RandomTinyLattices) models.
-
-[`class PerFeatureHParams`](./tensorflow_lattice/PerFeatureHParams.md): Parameters object with per feature parametrization.
-
-## Functions
-
-[`calibrated_etl_classifier(...)`](./tensorflow_lattice/calibrated_etl_classifier.md): Calibrated etl binary classifier model.
-
-[`calibrated_etl_regressor(...)`](./tensorflow_lattice/calibrated_etl_regressor.md): Calibrated etl regressor model.
-
-[`calibrated_lattice_classifier(...)`](./tensorflow_lattice/calibrated_lattice_classifier.md): Calibrated lattice classifier binary model.
-
-[`calibrated_lattice_regressor(...)`](./tensorflow_lattice/calibrated_lattice_regressor.md): Calibrated lattice estimator (model) for regression.
-
-[`calibrated_linear_classifier(...)`](./tensorflow_lattice/calibrated_linear_classifier.md): Calibrated linear classifier binary model.
-
-[`calibrated_linear_regressor(...)`](./tensorflow_lattice/calibrated_linear_regressor.md): Calibrated linear estimator (model) for regression.
-
-[`calibrated_rtl_classifier(...)`](./tensorflow_lattice/calibrated_rtl_classifier.md): Calibrated rtl binary classifier model.
-
-[`calibrated_rtl_regressor(...)`](./tensorflow_lattice/calibrated_rtl_regressor.md): Calibrated rtl regressor model.
-
-[`calibration_layer(...)`](./tensorflow_lattice/calibration_layer.md): Creates a calibration layer for uncalibrated values.
-
-[`calibrator_regularization(...)`](./tensorflow_lattice/calibrator_regularization.md): Returns a calibrator regularization op.
-
-[`ensemble_lattices_layer(...)`](./tensorflow_lattice/ensemble_lattices_layer.md): Creates a ensemble of lattices layer.
-
-[`input_calibration_layer(...)`](./tensorflow_lattice/input_calibration_layer.md): Creates a calibration layer for the given input and feature_columns.
-
-[`input_calibration_layer_from_hparams(...)`](./tensorflow_lattice/input_calibration_layer_from_hparams.md): Creates a calibration layer for the input using hyper-parameters.
-
-[`lattice(...)`](./tensorflow_lattice/lattice.md): Returns an interpolated look-up table (lattice) op.
-
-[`lattice_layer(...)`](./tensorflow_lattice/lattice_layer.md): Creates a lattice layer.
-
-[`lattice_regularization(...)`](./tensorflow_lattice/lattice_regularization.md): Returns a lattice regularization op.
-
-[`load_keypoints_from_quantiles(...)`](./tensorflow_lattice/load_keypoints_from_quantiles.md): Retrieves keypoints initialization values for selected features.
-
-[`monotone_lattice(...)`](./tensorflow_lattice/monotone_lattice.md): Returns a projected lattice parameters onto the monotonicity constraints.
-
-[`monotonic_projection(...)`](./tensorflow_lattice/monotonic_projection.md): Returns a not-strict monotonic projection of the vector.
-
-[`pwl_indexing_calibrator(...)`](./tensorflow_lattice/pwl_indexing_calibrator.md): Returns tensor representing interpolation weights in a piecewise linear
-
-[`save_quantiles_for_keypoints(...)`](./tensorflow_lattice/save_quantiles_for_keypoints.md): Calculates and saves quantiles for given features.
-
-[`uniform_keypoints_for_signal(...)`](./tensorflow_lattice/uniform_keypoints_for_signal.md): Returns a pair of initialization tensors for calibration keypoints.
-
-## Other Members
-
-`DEFAULT_NAME`
-
-`absolute_import`
-
diff --git a/g3doc/api_docs/python/tensorflow_lattice/CalibratedEtlHParams.md b/g3doc/api_docs/python/tensorflow_lattice/CalibratedEtlHParams.md
deleted file mode 100644
index 73d4cc0..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/CalibratedEtlHParams.md
+++ /dev/null
@@ -1,294 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.CalibratedEtlHParams" />
-<meta itemprop="property" content="__getattr__"/>
-<meta itemprop="property" content="__init__"/>
-<meta itemprop="property" content="add_feature"/>
-<meta itemprop="property" content="get_feature_names"/>
-<meta itemprop="property" content="get_feature_param"/>
-<meta itemprop="property" content="get_global_and_feature_params"/>
-<meta itemprop="property" content="get_param"/>
-<meta itemprop="property" content="is_feature_set_param"/>
-<meta itemprop="property" content="param_name_for_feature"/>
-<meta itemprop="property" content="parse"/>
-<meta itemprop="property" content="parse_hparams"/>
-<meta itemprop="property" content="parse_param"/>
-<meta itemprop="property" content="set_feature_param"/>
-<meta itemprop="property" content="set_param"/>
-<meta itemprop="property" content="set_param_type"/>
-<meta itemprop="property" content="values"/>
-<meta itemprop="property" content="FEATURE_PREFIX"/>
-<meta itemprop="property" content="FEATURE_SEPARATOR"/>
-</div>
-
-# tensorflow_lattice.CalibratedEtlHParams
-
-## Class `CalibratedEtlHParams`
-
-Inherits From: [`CalibratedHParams`](../tensorflow_lattice/CalibratedHParams.md)
-
-Hyper-parameters for CalibratedEtl (Embedded tiny lattices) models.
-
-Supports regularization and monotonicity like described in `CalibratedHParam`.
-Values for `calibration_output_min`, `calibration_output_max` and 
-`missing_output_value` get set automatically.
-
-Note that this architecture does not support any of per-feature based lattice
-hyper-parameters such as missing_vertex, per-feature missing_input_value,
-per-feature lattice_size, per-feature lattice regularization, because after
-the linear embedding, all of features are mixed together, so it is not clear
-how to merge per-feature parameters after the linear embedding layer.
-
-If there is no non-monotonic feature, but `non_monotonic_lattice_rank` or
-`non_monotonic_num_lattices` are not `None`, then this will raise the error.
-
-Added parameters:
-
-* `learning_rate`: (float) a global parameter that assigns a step size of an
-  optimizer.
-* `lattice_size`: (int) a global parameter that controls number of
-  cells for a feature. Should be greater than equal to 2, and the recommended
-  default value is 2. Also calibrator output min and max should be
-  [0, `lattice_size` - 1], and the output should be bounded.
-* `interpolation_type`: a global parameter that defines if the lattice will
-  interpolate using the full hypercube or only the simplex ("hyper-triangle",
-  much faster for larger lattices) around the point being evaluated.
-  Valid values: 'hypercube' or 'simplex'
-* `monotonic_lattice_rank`: (int) a lattice rank in each monotonic lattice.
-* `monotonic_num_lattices`: (int) a number of monotonic lattices to be
-  created.
-* `monotonic_lattice_size`: (int) lattice cell size for each monotonic lattice
-  in the ensemble lattices layer.
-* `non_monotonic_lattice_rank`: (int) a lattice rank in each non monotonic
-  lattice. If all features are monotonic, this parameter should be None.
-* `non_monotonic_num_lattices`: (int) a number of non-monotonic lattices to be
-  created. If all features are monotonic, this parameter should be None.
-* `monotonic_lattice_size`: (int) lattice cell size for each non-monotonic
-  lattice in the ensemble lattices layer.
-* `linear_embedding_calibration_min`: (float) a global parameter that controls
-  a minimum value of intermediate calibration layers. Default is -100.
-* `linear_embedding_calibration_max`: (float) a global parameter that controls
-  a maximum value of intermediate calibration layers. Default is 100.
-* `linear_embedding_calibration_num_keypoints`: (float) a global parameter
-  that controls a `num_keypoints` in intermediate calibration layers. Default
-  is 100.
-
-## Methods
-
-<h3 id="__init__"><code>__init__</code></h3>
-
-``` python
-__init__(
-    feature_names=None,
-    **kwargs
-)
-```
-
-
-
-<h3 id="__getattr__"><code>__getattr__</code></h3>
-
-``` python
-__getattr__(param_name)
-```
-
-
-
-<h3 id="add_feature"><code>add_feature</code></h3>
-
-``` python
-add_feature(feature_name)
-```
-
-Add feature_name (one name or list of names) to list of known names.
-
-<h3 id="get_feature_names"><code>get_feature_names</code></h3>
-
-``` python
-get_feature_names()
-```
-
-Returns copy of list of known feature names.
-
-<h3 id="get_feature_param"><code>get_feature_param</code></h3>
-
-``` python
-get_feature_param(
-    feature_name,
-    param_name,
-    default=None
-)
-```
-
-Returns parameter for feature or falls back to global parameter.
-
-<h3 id="get_global_and_feature_params"><code>get_global_and_feature_params</code></h3>
-
-``` python
-get_global_and_feature_params(
-    param_names,
-    feature_names
-)
-```
-
-Returns values for multiple params, global and for each feature.
-
-#### Args:
-
-* <b>`param_names`</b>: list of parameters to get values for.
-* <b>`feature_names`</b>: list of features to get specific values for.
-
-
-#### Returns:
-
-* List of global values for parameters requested in `param_names`.
-* List of list of per feature values for parameters requested in
-  `param_names` for features requested in `feature_names`.
-
-<h3 id="get_param"><code>get_param</code></h3>
-
-``` python
-get_param(
-    param_name,
-    default=None
-)
-```
-
-Returns the global parameter or falls back to default.
-
-<h3 id="is_feature_set_param"><code>is_feature_set_param</code></h3>
-
-``` python
-is_feature_set_param(
-    feature_name,
-    param_name
-)
-```
-
-Returns whether param_name parameter is set for feature_name.
-
-<h3 id="param_name_for_feature"><code>param_name_for_feature</code></h3>
-
-``` python
-param_name_for_feature(
-    feature_name,
-    param_name
-)
-```
-
-Returns parameter name for specific feature parameter.
-
-<h3 id="parse"><code>parse</code></h3>
-
-``` python
-parse(hparams_str)
-```
-
-Parses strings into hparams.
-
-#### Args:
-
-* <b>`hparams_str`</b>: must be a comma separated list of "<key>=<value>",
-  where "<key>" is a hyper-parameter name, and "<value>" its value.
-
-
-#### Returns:
-
-Changes affect self, but returns self for convenience.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: if there is a problem with the input:
-     * if trying to set an unknown parameter.
-     * if trying to set unknown feature(s)
-     * if can't convert value to parameter type.
-
-<h3 id="parse_hparams"><code>parse_hparams</code></h3>
-
-``` python
-parse_hparams(hparams)
-```
-
-Incorporates hyper-parameters from another HParams object.
-
-Copies over values of hyper-parameters from the given object. New parameters
-may be set, but not new features. Also works with
-`tf.contrib.training.HParams` objects.
-
-#### Args:
-
-* <b>`hparams`</b>: `PerFeatureHParams` object, but also works with the standard
-    `tf.contrib.training.HParams` object.
-
-
-#### Returns:
-
-Changes affect self, but returns self for convenience.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: if trying to set unknown features, or if setting a feature
-    specific parameter for an unknown parameter.
-
-<h3 id="parse_param"><code>parse_param</code></h3>
-
-``` python
-parse_param(
-    param_name,
-    value_str
-)
-```
-
-Parses parameter values from string. Returns self.
-
-<h3 id="set_feature_param"><code>set_feature_param</code></h3>
-
-``` python
-set_feature_param(
-    feature_name,
-    param_name,
-    value
-)
-```
-
-Sets parameter value specific for feature. Returns self.
-
-<h3 id="set_param"><code>set_param</code></h3>
-
-``` python
-set_param(
-    param_name,
-    value
-)
-```
-
-Sets parameter value. Returns self.
-
-<h3 id="set_param_type"><code>set_param_type</code></h3>
-
-``` python
-set_param_type(
-    param_name,
-    param_type
-)
-```
-
-Sets the parameter type, it must already exist. Returns self.
-
-<h3 id="values"><code>values</code></h3>
-
-``` python
-values()
-```
-
-Returns shallow copy of the hyperparameter dict.
-
-
-
-## Class Members
-
-<h3 id="FEATURE_PREFIX"><code>FEATURE_PREFIX</code></h3>
-
-<h3 id="FEATURE_SEPARATOR"><code>FEATURE_SEPARATOR</code></h3>
-
diff --git a/g3doc/api_docs/python/tensorflow_lattice/CalibratedHParams.md b/g3doc/api_docs/python/tensorflow_lattice/CalibratedHParams.md
deleted file mode 100644
index 9a294b6..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/CalibratedHParams.md
+++ /dev/null
@@ -1,282 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.CalibratedHParams" />
-<meta itemprop="property" content="__getattr__"/>
-<meta itemprop="property" content="__init__"/>
-<meta itemprop="property" content="add_feature"/>
-<meta itemprop="property" content="get_feature_names"/>
-<meta itemprop="property" content="get_feature_param"/>
-<meta itemprop="property" content="get_global_and_feature_params"/>
-<meta itemprop="property" content="get_param"/>
-<meta itemprop="property" content="is_feature_set_param"/>
-<meta itemprop="property" content="param_name_for_feature"/>
-<meta itemprop="property" content="parse"/>
-<meta itemprop="property" content="parse_hparams"/>
-<meta itemprop="property" content="parse_param"/>
-<meta itemprop="property" content="set_feature_param"/>
-<meta itemprop="property" content="set_param"/>
-<meta itemprop="property" content="set_param_type"/>
-<meta itemprop="property" content="values"/>
-<meta itemprop="property" content="FEATURE_PREFIX"/>
-<meta itemprop="property" content="FEATURE_SEPARATOR"/>
-</div>
-
-# tensorflow_lattice.CalibratedHParams
-
-## Class `CalibratedHParams`
-
-Inherits From: [`PerFeatureHParams`](../tensorflow_lattice/PerFeatureHParams.md)
-
-PerFeatureHParams specialization with input calibration parameters.
-
-The following hyper-parameters can be set as global, or per-feature (see
-base `PerFeatureHParams` for details):
-
-  * `feature_names`: list of feature names. Only features names listed here
-    (or added later with add_feature) can have feature specific parameter
-    values.
-  * `num_keypoints`: Number of keypoints to use for calibration, Set to 0 or
-    `None` for no calibration.
-  * `calibration_output_min`, `calibration_output_max`: initial and final
-    values for calibrations. -1.0 to 1.0 works well for calibrated linear
-    models. For lattices one will want to set these to (0, `lattice_size`-1).
-    Only used during initialization of the calibration, if `quantiles_dir`
-    is given to the calibrated model (as opposed to defining one's own value
-    with `keypoints_initializers_fn`). It must be defined for calibration to
-    work, no default is set.
-  * `calibration_bound`: If output of calibration max/min are bound to the
-    limits given in `calibration_output_min/max`.
-  * `monotonicity`: Monotonicity for the feature. 0 for no monotonicity,
-    1 and -1 for increasing and decreasing monotonicity respectively.
-  * `missing_input_value`: If set, and if the input has this value it is
-  assumed
-    to be missing and the output will either be calibrated to some value
-    between `[calibration_output_min, calibration_output_max]` or set to a
-    fixed value set by missing_output_value.
-  * `missing_output_value`: Requires missing_input_value also to be set. If
-  set
-    if will convert missing input to this value. Leave it undefined and the
-    output will be learned.
-  * `calibration_l1_reg`, `calibration_l2_reg`,
-    `calibration_l1_laplacian_reg`, `calibration_l2_laplacian_reg`: Calibrator
-    regularizers regularization amount. Default is `None`.
-
-## Methods
-
-<h3 id="__init__"><code>__init__</code></h3>
-
-``` python
-__init__(
-    feature_names=None,
-    **kwargs
-)
-```
-
-
-
-<h3 id="__getattr__"><code>__getattr__</code></h3>
-
-``` python
-__getattr__(param_name)
-```
-
-
-
-<h3 id="add_feature"><code>add_feature</code></h3>
-
-``` python
-add_feature(feature_name)
-```
-
-Add feature_name (one name or list of names) to list of known names.
-
-<h3 id="get_feature_names"><code>get_feature_names</code></h3>
-
-``` python
-get_feature_names()
-```
-
-Returns copy of list of known feature names.
-
-<h3 id="get_feature_param"><code>get_feature_param</code></h3>
-
-``` python
-get_feature_param(
-    feature_name,
-    param_name,
-    default=None
-)
-```
-
-Returns parameter for feature or falls back to global parameter.
-
-<h3 id="get_global_and_feature_params"><code>get_global_and_feature_params</code></h3>
-
-``` python
-get_global_and_feature_params(
-    param_names,
-    feature_names
-)
-```
-
-Returns values for multiple params, global and for each feature.
-
-#### Args:
-
-* <b>`param_names`</b>: list of parameters to get values for.
-* <b>`feature_names`</b>: list of features to get specific values for.
-
-
-#### Returns:
-
-* List of global values for parameters requested in `param_names`.
-* List of list of per feature values for parameters requested in
-  `param_names` for features requested in `feature_names`.
-
-<h3 id="get_param"><code>get_param</code></h3>
-
-``` python
-get_param(
-    param_name,
-    default=None
-)
-```
-
-Returns the global parameter or falls back to default.
-
-<h3 id="is_feature_set_param"><code>is_feature_set_param</code></h3>
-
-``` python
-is_feature_set_param(
-    feature_name,
-    param_name
-)
-```
-
-Returns whether param_name parameter is set for feature_name.
-
-<h3 id="param_name_for_feature"><code>param_name_for_feature</code></h3>
-
-``` python
-param_name_for_feature(
-    feature_name,
-    param_name
-)
-```
-
-Returns parameter name for specific feature parameter.
-
-<h3 id="parse"><code>parse</code></h3>
-
-``` python
-parse(hparams_str)
-```
-
-Parses strings into hparams.
-
-#### Args:
-
-* <b>`hparams_str`</b>: must be a comma separated list of "<key>=<value>",
-  where "<key>" is a hyper-parameter name, and "<value>" its value.
-
-
-#### Returns:
-
-Changes affect self, but returns self for convenience.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: if there is a problem with the input:
-     * if trying to set an unknown parameter.
-     * if trying to set unknown feature(s)
-     * if can't convert value to parameter type.
-
-<h3 id="parse_hparams"><code>parse_hparams</code></h3>
-
-``` python
-parse_hparams(hparams)
-```
-
-Incorporates hyper-parameters from another HParams object.
-
-Copies over values of hyper-parameters from the given object. New parameters
-may be set, but not new features. Also works with
-`tf.contrib.training.HParams` objects.
-
-#### Args:
-
-* <b>`hparams`</b>: `PerFeatureHParams` object, but also works with the standard
-    `tf.contrib.training.HParams` object.
-
-
-#### Returns:
-
-Changes affect self, but returns self for convenience.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: if trying to set unknown features, or if setting a feature
-    specific parameter for an unknown parameter.
-
-<h3 id="parse_param"><code>parse_param</code></h3>
-
-``` python
-parse_param(
-    param_name,
-    value_str
-)
-```
-
-Parses parameter values from string. Returns self.
-
-<h3 id="set_feature_param"><code>set_feature_param</code></h3>
-
-``` python
-set_feature_param(
-    feature_name,
-    param_name,
-    value
-)
-```
-
-Sets parameter value specific for feature. Returns self.
-
-<h3 id="set_param"><code>set_param</code></h3>
-
-``` python
-set_param(
-    param_name,
-    value
-)
-```
-
-Sets parameter value. Returns self.
-
-<h3 id="set_param_type"><code>set_param_type</code></h3>
-
-``` python
-set_param_type(
-    param_name,
-    param_type
-)
-```
-
-Sets the parameter type, it must already exist. Returns self.
-
-<h3 id="values"><code>values</code></h3>
-
-``` python
-values()
-```
-
-Returns shallow copy of the hyperparameter dict.
-
-
-
-## Class Members
-
-<h3 id="FEATURE_PREFIX"><code>FEATURE_PREFIX</code></h3>
-
-<h3 id="FEATURE_SEPARATOR"><code>FEATURE_SEPARATOR</code></h3>
-
diff --git a/g3doc/api_docs/python/tensorflow_lattice/CalibratedLatticeHParams.md b/g3doc/api_docs/python/tensorflow_lattice/CalibratedLatticeHParams.md
deleted file mode 100644
index 6803985..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/CalibratedLatticeHParams.md
+++ /dev/null
@@ -1,277 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.CalibratedLatticeHParams" />
-<meta itemprop="property" content="__getattr__"/>
-<meta itemprop="property" content="__init__"/>
-<meta itemprop="property" content="add_feature"/>
-<meta itemprop="property" content="get_feature_names"/>
-<meta itemprop="property" content="get_feature_param"/>
-<meta itemprop="property" content="get_global_and_feature_params"/>
-<meta itemprop="property" content="get_param"/>
-<meta itemprop="property" content="is_feature_set_param"/>
-<meta itemprop="property" content="param_name_for_feature"/>
-<meta itemprop="property" content="parse"/>
-<meta itemprop="property" content="parse_hparams"/>
-<meta itemprop="property" content="parse_param"/>
-<meta itemprop="property" content="set_feature_param"/>
-<meta itemprop="property" content="set_param"/>
-<meta itemprop="property" content="set_param_type"/>
-<meta itemprop="property" content="values"/>
-<meta itemprop="property" content="FEATURE_PREFIX"/>
-<meta itemprop="property" content="FEATURE_SEPARATOR"/>
-</div>
-
-# tensorflow_lattice.CalibratedLatticeHParams
-
-## Class `CalibratedLatticeHParams`
-
-Inherits From: [`CalibratedHParams`](../tensorflow_lattice/CalibratedHParams.md)
-
-Hyper-parameters for CalibratedLattice models.
-
-Supports regularization and monotonicity like described in `CalibratedHParam`.
-Values for `calibration_output_min`, `calibration_output_max` and 
-`missing_output_value` get set automatically.
-
-Added parameters:
-
-* `learning_rate`: (float) a global parameter that assigns a step size of an
-  optimizer.
-* `lattice_size`: (int) a global or per feature parameter that controls number
-  of cells for a feature. Should be greater than equal to 2, and the
-  recommended default value is 2. Also calibrator output min and max should be
-  [0, lattice_size - 1], and the output should be bounded, since a lattice
-  expects an input in the range [0, lattice_size - 1].
-* `interpolation_type`: a global parameter that defines if the lattice will
-  interpolate using the full hypercube or only the simplex ("hyper-triangle",
-  much faster for larger lattices) around the point being evaluated.
-  Valid values: 'hypercube' or 'simplex'
-* `missing_input_value`: Value for which a feature is considered missing. Such
-  values are either automatically learned to some calibrated value, or,
-  if missing_vertex is set, they get their own value in the lattice.
-* `missing_vertex`: if missing_input_value is set, this boolean value indicate
-  whether to create an extra vertex for missing values.
-* `lattice_l1_reg`, `lattice_l2_reg`, `lattice_l1_torsion_reg`,
-  `lattice_l2_torsion_reg`, `lattice_l1_laplacian_reg`,
-  `lattice_l2_laplacian_reg`: Lattice regularizers regularization amount.
-  Default is `None`.
-
-## Methods
-
-<h3 id="__init__"><code>__init__</code></h3>
-
-``` python
-__init__(
-    feature_names=None,
-    **kwargs
-)
-```
-
-
-
-<h3 id="__getattr__"><code>__getattr__</code></h3>
-
-``` python
-__getattr__(param_name)
-```
-
-
-
-<h3 id="add_feature"><code>add_feature</code></h3>
-
-``` python
-add_feature(feature_name)
-```
-
-Add feature_name (one name or list of names) to list of known names.
-
-<h3 id="get_feature_names"><code>get_feature_names</code></h3>
-
-``` python
-get_feature_names()
-```
-
-Returns copy of list of known feature names.
-
-<h3 id="get_feature_param"><code>get_feature_param</code></h3>
-
-``` python
-get_feature_param(
-    feature_name,
-    param_name,
-    default=None
-)
-```
-
-Returns parameter for feature or falls back to global parameter.
-
-<h3 id="get_global_and_feature_params"><code>get_global_and_feature_params</code></h3>
-
-``` python
-get_global_and_feature_params(
-    param_names,
-    feature_names
-)
-```
-
-Returns values for multiple params, global and for each feature.
-
-#### Args:
-
-* <b>`param_names`</b>: list of parameters to get values for.
-* <b>`feature_names`</b>: list of features to get specific values for.
-
-
-#### Returns:
-
-* List of global values for parameters requested in `param_names`.
-* List of list of per feature values for parameters requested in
-  `param_names` for features requested in `feature_names`.
-
-<h3 id="get_param"><code>get_param</code></h3>
-
-``` python
-get_param(
-    param_name,
-    default=None
-)
-```
-
-Returns the global parameter or falls back to default.
-
-<h3 id="is_feature_set_param"><code>is_feature_set_param</code></h3>
-
-``` python
-is_feature_set_param(
-    feature_name,
-    param_name
-)
-```
-
-Returns whether param_name parameter is set for feature_name.
-
-<h3 id="param_name_for_feature"><code>param_name_for_feature</code></h3>
-
-``` python
-param_name_for_feature(
-    feature_name,
-    param_name
-)
-```
-
-Returns parameter name for specific feature parameter.
-
-<h3 id="parse"><code>parse</code></h3>
-
-``` python
-parse(hparams_str)
-```
-
-Parses strings into hparams.
-
-#### Args:
-
-* <b>`hparams_str`</b>: must be a comma separated list of "<key>=<value>",
-  where "<key>" is a hyper-parameter name, and "<value>" its value.
-
-
-#### Returns:
-
-Changes affect self, but returns self for convenience.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: if there is a problem with the input:
-     * if trying to set an unknown parameter.
-     * if trying to set unknown feature(s)
-     * if can't convert value to parameter type.
-
-<h3 id="parse_hparams"><code>parse_hparams</code></h3>
-
-``` python
-parse_hparams(hparams)
-```
-
-Incorporates hyper-parameters from another HParams object.
-
-Copies over values of hyper-parameters from the given object. New parameters
-may be set, but not new features. Also works with
-`tf.contrib.training.HParams` objects.
-
-#### Args:
-
-* <b>`hparams`</b>: `PerFeatureHParams` object, but also works with the standard
-    `tf.contrib.training.HParams` object.
-
-
-#### Returns:
-
-Changes affect self, but returns self for convenience.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: if trying to set unknown features, or if setting a feature
-    specific parameter for an unknown parameter.
-
-<h3 id="parse_param"><code>parse_param</code></h3>
-
-``` python
-parse_param(
-    param_name,
-    value_str
-)
-```
-
-Parses parameter values from string. Returns self.
-
-<h3 id="set_feature_param"><code>set_feature_param</code></h3>
-
-``` python
-set_feature_param(
-    feature_name,
-    param_name,
-    value
-)
-```
-
-Sets parameter value specific for feature. Returns self.
-
-<h3 id="set_param"><code>set_param</code></h3>
-
-``` python
-set_param(
-    param_name,
-    value
-)
-```
-
-Sets parameter value. Returns self.
-
-<h3 id="set_param_type"><code>set_param_type</code></h3>
-
-``` python
-set_param_type(
-    param_name,
-    param_type
-)
-```
-
-Sets the parameter type, it must already exist. Returns self.
-
-<h3 id="values"><code>values</code></h3>
-
-``` python
-values()
-```
-
-Returns shallow copy of the hyperparameter dict.
-
-
-
-## Class Members
-
-<h3 id="FEATURE_PREFIX"><code>FEATURE_PREFIX</code></h3>
-
-<h3 id="FEATURE_SEPARATOR"><code>FEATURE_SEPARATOR</code></h3>
-
diff --git a/g3doc/api_docs/python/tensorflow_lattice/CalibratedLinearHParams.md b/g3doc/api_docs/python/tensorflow_lattice/CalibratedLinearHParams.md
deleted file mode 100644
index 3e8d106..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/CalibratedLinearHParams.md
+++ /dev/null
@@ -1,260 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.CalibratedLinearHParams" />
-<meta itemprop="property" content="__getattr__"/>
-<meta itemprop="property" content="__init__"/>
-<meta itemprop="property" content="add_feature"/>
-<meta itemprop="property" content="get_feature_names"/>
-<meta itemprop="property" content="get_feature_param"/>
-<meta itemprop="property" content="get_global_and_feature_params"/>
-<meta itemprop="property" content="get_param"/>
-<meta itemprop="property" content="is_feature_set_param"/>
-<meta itemprop="property" content="param_name_for_feature"/>
-<meta itemprop="property" content="parse"/>
-<meta itemprop="property" content="parse_hparams"/>
-<meta itemprop="property" content="parse_param"/>
-<meta itemprop="property" content="set_feature_param"/>
-<meta itemprop="property" content="set_param"/>
-<meta itemprop="property" content="set_param_type"/>
-<meta itemprop="property" content="values"/>
-<meta itemprop="property" content="FEATURE_PREFIX"/>
-<meta itemprop="property" content="FEATURE_SEPARATOR"/>
-</div>
-
-# tensorflow_lattice.CalibratedLinearHParams
-
-## Class `CalibratedLinearHParams`
-
-Inherits From: [`CalibratedHParams`](../tensorflow_lattice/CalibratedHParams.md)
-
-Hyper-parameters for CalibratedLinear models.
-
-Same as `CalibratedHParams` (hyper-parameters for input calibration) plus
-the global learning_rate.
-
-The parameters `calibration_output_min` and `calibration_output_max` shouldn't
-be changed (they are fixed at -1. and +1), since they are eventually re-scaled
-by the linear layer on top.
-
-It supports regularization, monotonicity and missing values (input and
-optionally output).
-
-## Methods
-
-<h3 id="__init__"><code>__init__</code></h3>
-
-``` python
-__init__(
-    feature_names=None,
-    **kwargs
-)
-```
-
-
-
-<h3 id="__getattr__"><code>__getattr__</code></h3>
-
-``` python
-__getattr__(param_name)
-```
-
-
-
-<h3 id="add_feature"><code>add_feature</code></h3>
-
-``` python
-add_feature(feature_name)
-```
-
-Add feature_name (one name or list of names) to list of known names.
-
-<h3 id="get_feature_names"><code>get_feature_names</code></h3>
-
-``` python
-get_feature_names()
-```
-
-Returns copy of list of known feature names.
-
-<h3 id="get_feature_param"><code>get_feature_param</code></h3>
-
-``` python
-get_feature_param(
-    feature_name,
-    param_name,
-    default=None
-)
-```
-
-Returns parameter for feature or falls back to global parameter.
-
-<h3 id="get_global_and_feature_params"><code>get_global_and_feature_params</code></h3>
-
-``` python
-get_global_and_feature_params(
-    param_names,
-    feature_names
-)
-```
-
-Returns values for multiple params, global and for each feature.
-
-#### Args:
-
-* <b>`param_names`</b>: list of parameters to get values for.
-* <b>`feature_names`</b>: list of features to get specific values for.
-
-
-#### Returns:
-
-* List of global values for parameters requested in `param_names`.
-* List of list of per feature values for parameters requested in
-  `param_names` for features requested in `feature_names`.
-
-<h3 id="get_param"><code>get_param</code></h3>
-
-``` python
-get_param(
-    param_name,
-    default=None
-)
-```
-
-Returns the global parameter or falls back to default.
-
-<h3 id="is_feature_set_param"><code>is_feature_set_param</code></h3>
-
-``` python
-is_feature_set_param(
-    feature_name,
-    param_name
-)
-```
-
-Returns whether param_name parameter is set for feature_name.
-
-<h3 id="param_name_for_feature"><code>param_name_for_feature</code></h3>
-
-``` python
-param_name_for_feature(
-    feature_name,
-    param_name
-)
-```
-
-Returns parameter name for specific feature parameter.
-
-<h3 id="parse"><code>parse</code></h3>
-
-``` python
-parse(hparams_str)
-```
-
-Parses strings into hparams.
-
-#### Args:
-
-* <b>`hparams_str`</b>: must be a comma separated list of "<key>=<value>",
-  where "<key>" is a hyper-parameter name, and "<value>" its value.
-
-
-#### Returns:
-
-Changes affect self, but returns self for convenience.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: if there is a problem with the input:
-     * if trying to set an unknown parameter.
-     * if trying to set unknown feature(s)
-     * if can't convert value to parameter type.
-
-<h3 id="parse_hparams"><code>parse_hparams</code></h3>
-
-``` python
-parse_hparams(hparams)
-```
-
-Incorporates hyper-parameters from another HParams object.
-
-Copies over values of hyper-parameters from the given object. New parameters
-may be set, but not new features. Also works with
-`tf.contrib.training.HParams` objects.
-
-#### Args:
-
-* <b>`hparams`</b>: `PerFeatureHParams` object, but also works with the standard
-    `tf.contrib.training.HParams` object.
-
-
-#### Returns:
-
-Changes affect self, but returns self for convenience.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: if trying to set unknown features, or if setting a feature
-    specific parameter for an unknown parameter.
-
-<h3 id="parse_param"><code>parse_param</code></h3>
-
-``` python
-parse_param(
-    param_name,
-    value_str
-)
-```
-
-Parses parameter values from string. Returns self.
-
-<h3 id="set_feature_param"><code>set_feature_param</code></h3>
-
-``` python
-set_feature_param(
-    feature_name,
-    param_name,
-    value
-)
-```
-
-Sets parameter value specific for feature. Returns self.
-
-<h3 id="set_param"><code>set_param</code></h3>
-
-``` python
-set_param(
-    param_name,
-    value
-)
-```
-
-Sets parameter value. Returns self.
-
-<h3 id="set_param_type"><code>set_param_type</code></h3>
-
-``` python
-set_param_type(
-    param_name,
-    param_type
-)
-```
-
-Sets the parameter type, it must already exist. Returns self.
-
-<h3 id="values"><code>values</code></h3>
-
-``` python
-values()
-```
-
-Returns shallow copy of the hyperparameter dict.
-
-
-
-## Class Members
-
-<h3 id="FEATURE_PREFIX"><code>FEATURE_PREFIX</code></h3>
-
-<h3 id="FEATURE_SEPARATOR"><code>FEATURE_SEPARATOR</code></h3>
-
diff --git a/g3doc/api_docs/python/tensorflow_lattice/CalibratedRtlHParams.md b/g3doc/api_docs/python/tensorflow_lattice/CalibratedRtlHParams.md
deleted file mode 100644
index 98f9cad..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/CalibratedRtlHParams.md
+++ /dev/null
@@ -1,283 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.CalibratedRtlHParams" />
-<meta itemprop="property" content="__getattr__"/>
-<meta itemprop="property" content="__init__"/>
-<meta itemprop="property" content="add_feature"/>
-<meta itemprop="property" content="get_feature_names"/>
-<meta itemprop="property" content="get_feature_param"/>
-<meta itemprop="property" content="get_global_and_feature_params"/>
-<meta itemprop="property" content="get_param"/>
-<meta itemprop="property" content="is_feature_set_param"/>
-<meta itemprop="property" content="param_name_for_feature"/>
-<meta itemprop="property" content="parse"/>
-<meta itemprop="property" content="parse_hparams"/>
-<meta itemprop="property" content="parse_param"/>
-<meta itemprop="property" content="set_feature_param"/>
-<meta itemprop="property" content="set_param"/>
-<meta itemprop="property" content="set_param_type"/>
-<meta itemprop="property" content="values"/>
-<meta itemprop="property" content="FEATURE_PREFIX"/>
-<meta itemprop="property" content="FEATURE_SEPARATOR"/>
-</div>
-
-# tensorflow_lattice.CalibratedRtlHParams
-
-## Class `CalibratedRtlHParams`
-
-Inherits From: [`CalibratedHParams`](../tensorflow_lattice/CalibratedHParams.md)
-
-Hyper-parameters for CalibratedRtl (RandomTinyLattices) models.
-
-Supports regularization and monotonicity like described in `CalibratedHParam`.
-Values for `calibration_output_min`, `calibration_output_max` and 
-`missing_output_value` get set automatically.
-
-Added parameters:
-
-* `learning_rate`: (float) a global parameter that assigns a step size of an
-  optimizer.
-* `lattice_size`: (int) a global or per feature parameter that controls number
-  of cells for a feature. Should be greater than equal to 2, and the
-  recommended default value is 2. Also calibrator output min and max should be
-  [0, lattice_size - 1], and the output should be bounded, since a lattice
-  expects an input in the range [0, lattice_size - 1]. (Note if missing_vertex
-  is True, then we add an extra vertex, so input range is [0, lattice_size])
-* `num_lattices`: (int) a number of lattices to be created.
-* `lattice_rank`: (int) a lattice rank in each lattice.
-* `interpolation_type`: a global parameter that defines if the lattice will
-  interpolate using the full hypercube or only the simplex ("hyper-triangle",
-  much faster for larger lattices) around the point being evaluated.
-  Valid values: 'hypercube' or 'simplex'
-* `ensemble_bias`: (float) an initial value of bias term to be added to the
-  output of ensemble.
-* `rtl_seed`: (int) a random seed for rtl construction.
-* `missing_input_value`: Value for which a feature is considered missing. Such
-  values are either automatically learned to some calibrated value, or,
-  if missing_vertex is set, they get their own value in the lattice.
-* `missing_vertex`: if missing_input_value is set, this boolean value indicate
-  whether to create an extra vertex for missing values.
-* `lattice_l1_reg`, `lattice_l2_reg`, `lattice_l1_torsion_reg`,
-  `lattice_l2_torsion_reg`, `lattice_l1_laplacian_reg`,
-  `lattice_l2_laplacian_reg`: Latticer regularizers regularization amount.
-  Default is `None`.
-
-## Methods
-
-<h3 id="__init__"><code>__init__</code></h3>
-
-``` python
-__init__(
-    feature_names=None,
-    **kwargs
-)
-```
-
-
-
-<h3 id="__getattr__"><code>__getattr__</code></h3>
-
-``` python
-__getattr__(param_name)
-```
-
-
-
-<h3 id="add_feature"><code>add_feature</code></h3>
-
-``` python
-add_feature(feature_name)
-```
-
-Add feature_name (one name or list of names) to list of known names.
-
-<h3 id="get_feature_names"><code>get_feature_names</code></h3>
-
-``` python
-get_feature_names()
-```
-
-Returns copy of list of known feature names.
-
-<h3 id="get_feature_param"><code>get_feature_param</code></h3>
-
-``` python
-get_feature_param(
-    feature_name,
-    param_name,
-    default=None
-)
-```
-
-Returns parameter for feature or falls back to global parameter.
-
-<h3 id="get_global_and_feature_params"><code>get_global_and_feature_params</code></h3>
-
-``` python
-get_global_and_feature_params(
-    param_names,
-    feature_names
-)
-```
-
-Returns values for multiple params, global and for each feature.
-
-#### Args:
-
-* <b>`param_names`</b>: list of parameters to get values for.
-* <b>`feature_names`</b>: list of features to get specific values for.
-
-
-#### Returns:
-
-* List of global values for parameters requested in `param_names`.
-* List of list of per feature values for parameters requested in
-  `param_names` for features requested in `feature_names`.
-
-<h3 id="get_param"><code>get_param</code></h3>
-
-``` python
-get_param(
-    param_name,
-    default=None
-)
-```
-
-Returns the global parameter or falls back to default.
-
-<h3 id="is_feature_set_param"><code>is_feature_set_param</code></h3>
-
-``` python
-is_feature_set_param(
-    feature_name,
-    param_name
-)
-```
-
-Returns whether param_name parameter is set for feature_name.
-
-<h3 id="param_name_for_feature"><code>param_name_for_feature</code></h3>
-
-``` python
-param_name_for_feature(
-    feature_name,
-    param_name
-)
-```
-
-Returns parameter name for specific feature parameter.
-
-<h3 id="parse"><code>parse</code></h3>
-
-``` python
-parse(hparams_str)
-```
-
-Parses strings into hparams.
-
-#### Args:
-
-* <b>`hparams_str`</b>: must be a comma separated list of "<key>=<value>",
-  where "<key>" is a hyper-parameter name, and "<value>" its value.
-
-
-#### Returns:
-
-Changes affect self, but returns self for convenience.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: if there is a problem with the input:
-     * if trying to set an unknown parameter.
-     * if trying to set unknown feature(s)
-     * if can't convert value to parameter type.
-
-<h3 id="parse_hparams"><code>parse_hparams</code></h3>
-
-``` python
-parse_hparams(hparams)
-```
-
-Incorporates hyper-parameters from another HParams object.
-
-Copies over values of hyper-parameters from the given object. New parameters
-may be set, but not new features. Also works with
-`tf.contrib.training.HParams` objects.
-
-#### Args:
-
-* <b>`hparams`</b>: `PerFeatureHParams` object, but also works with the standard
-    `tf.contrib.training.HParams` object.
-
-
-#### Returns:
-
-Changes affect self, but returns self for convenience.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: if trying to set unknown features, or if setting a feature
-    specific parameter for an unknown parameter.
-
-<h3 id="parse_param"><code>parse_param</code></h3>
-
-``` python
-parse_param(
-    param_name,
-    value_str
-)
-```
-
-Parses parameter values from string. Returns self.
-
-<h3 id="set_feature_param"><code>set_feature_param</code></h3>
-
-``` python
-set_feature_param(
-    feature_name,
-    param_name,
-    value
-)
-```
-
-Sets parameter value specific for feature. Returns self.
-
-<h3 id="set_param"><code>set_param</code></h3>
-
-``` python
-set_param(
-    param_name,
-    value
-)
-```
-
-Sets parameter value. Returns self.
-
-<h3 id="set_param_type"><code>set_param_type</code></h3>
-
-``` python
-set_param_type(
-    param_name,
-    param_type
-)
-```
-
-Sets the parameter type, it must already exist. Returns self.
-
-<h3 id="values"><code>values</code></h3>
-
-``` python
-values()
-```
-
-Returns shallow copy of the hyperparameter dict.
-
-
-
-## Class Members
-
-<h3 id="FEATURE_PREFIX"><code>FEATURE_PREFIX</code></h3>
-
-<h3 id="FEATURE_SEPARATOR"><code>FEATURE_SEPARATOR</code></h3>
-
diff --git a/g3doc/api_docs/python/tensorflow_lattice/PerFeatureHParams.md b/g3doc/api_docs/python/tensorflow_lattice/PerFeatureHParams.md
deleted file mode 100644
index 3530c2a..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/PerFeatureHParams.md
+++ /dev/null
@@ -1,297 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.PerFeatureHParams" />
-<meta itemprop="property" content="__getattr__"/>
-<meta itemprop="property" content="__init__"/>
-<meta itemprop="property" content="add_feature"/>
-<meta itemprop="property" content="get_feature_names"/>
-<meta itemprop="property" content="get_feature_param"/>
-<meta itemprop="property" content="get_global_and_feature_params"/>
-<meta itemprop="property" content="get_param"/>
-<meta itemprop="property" content="is_feature_set_param"/>
-<meta itemprop="property" content="param_name_for_feature"/>
-<meta itemprop="property" content="parse"/>
-<meta itemprop="property" content="parse_hparams"/>
-<meta itemprop="property" content="parse_param"/>
-<meta itemprop="property" content="set_feature_param"/>
-<meta itemprop="property" content="set_param"/>
-<meta itemprop="property" content="set_param_type"/>
-<meta itemprop="property" content="values"/>
-<meta itemprop="property" content="FEATURE_PREFIX"/>
-<meta itemprop="property" content="FEATURE_SEPARATOR"/>
-</div>
-
-# tensorflow_lattice.PerFeatureHParams
-
-## Class `PerFeatureHParams`
-
-
-
-Parameters object with per feature parametrization.
-
-Each parameter can be overwritten for specific features by setting
-`feature__<feature_name>__<parameter_name>`, otherwise it falls back to the
-global parameter name value `<parameter_name>`.
-
-Parameter types are set from their first value set -- but they can also be
-reset by `set_param_type`.
-
-Example: let's say we have a parameter `lattice_size` that should be 2 if not
-specified (global value), but can be overridden per feature; let's assume
-there are 3 features: `a`, `b`, and `c` (added after construction). Then:
-
-```python
-    hparams = PerFeatureHParams(["a", "b"], lattice_size=2,
-                                feature__b__lattice_size=3)
-    hparams.add_feature(["c"])
-    hparams.get_param("lattice_size") == 2
-    hparams.get_feature_param("a", "lattice_size") == 2
-    hparams.get_feature_param("b", "lattice_size") == 3
-    hparams.get_feature_param("c", "lattice_size") == 2
-    hparams.get_feature_param("d", "lattice_size") raises a ValueError
-```
-
-Use the `get_feature_param` method to automatically get the specialized value,
-or fall-back to the global one.
-
-
-
-
-
-## Methods
-
-<h3 id="__init__"><code>__init__</code></h3>
-
-``` python
-__init__(
-    feature_names=None,
-    **kwargs
-)
-```
-
-Construct with arbitrary list of parameters.
-
-#### Args:
-
-* <b>`feature_names`</b>: list of feature names. Only features names listed here
-    (or added later with add_feature) can have feature specific parameter
-    values.
-* <b>`**kwargs`</b>: parameters names.
-
-
-#### Returns:
-
-PerFeatureHParams object.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: if a feature-specific parameter value is set for an
-    unknown feature.
-
-<h3 id="__getattr__"><code>__getattr__</code></h3>
-
-``` python
-__getattr__(param_name)
-```
-
-
-
-<h3 id="add_feature"><code>add_feature</code></h3>
-
-``` python
-add_feature(feature_name)
-```
-
-Add feature_name (one name or list of names) to list of known names.
-
-<h3 id="get_feature_names"><code>get_feature_names</code></h3>
-
-``` python
-get_feature_names()
-```
-
-Returns copy of list of known feature names.
-
-<h3 id="get_feature_param"><code>get_feature_param</code></h3>
-
-``` python
-get_feature_param(
-    feature_name,
-    param_name,
-    default=None
-)
-```
-
-Returns parameter for feature or falls back to global parameter.
-
-<h3 id="get_global_and_feature_params"><code>get_global_and_feature_params</code></h3>
-
-``` python
-get_global_and_feature_params(
-    param_names,
-    feature_names
-)
-```
-
-Returns values for multiple params, global and for each feature.
-
-#### Args:
-
-* <b>`param_names`</b>: list of parameters to get values for.
-* <b>`feature_names`</b>: list of features to get specific values for.
-
-
-#### Returns:
-
-* List of global values for parameters requested in `param_names`.
-* List of list of per feature values for parameters requested in
-  `param_names` for features requested in `feature_names`.
-
-<h3 id="get_param"><code>get_param</code></h3>
-
-``` python
-get_param(
-    param_name,
-    default=None
-)
-```
-
-Returns the global parameter or falls back to default.
-
-<h3 id="is_feature_set_param"><code>is_feature_set_param</code></h3>
-
-``` python
-is_feature_set_param(
-    feature_name,
-    param_name
-)
-```
-
-Returns whether param_name parameter is set for feature_name.
-
-<h3 id="param_name_for_feature"><code>param_name_for_feature</code></h3>
-
-``` python
-param_name_for_feature(
-    feature_name,
-    param_name
-)
-```
-
-Returns parameter name for specific feature parameter.
-
-<h3 id="parse"><code>parse</code></h3>
-
-``` python
-parse(hparams_str)
-```
-
-Parses strings into hparams.
-
-#### Args:
-
-* <b>`hparams_str`</b>: must be a comma separated list of "<key>=<value>",
-  where "<key>" is a hyper-parameter name, and "<value>" its value.
-
-
-#### Returns:
-
-Changes affect self, but returns self for convenience.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: if there is a problem with the input:
-     * if trying to set an unknown parameter.
-     * if trying to set unknown feature(s)
-     * if can't convert value to parameter type.
-
-<h3 id="parse_hparams"><code>parse_hparams</code></h3>
-
-``` python
-parse_hparams(hparams)
-```
-
-Incorporates hyper-parameters from another HParams object.
-
-Copies over values of hyper-parameters from the given object. New parameters
-may be set, but not new features. Also works with
-`tf.contrib.training.HParams` objects.
-
-#### Args:
-
-* <b>`hparams`</b>: `PerFeatureHParams` object, but also works with the standard
-    `tf.contrib.training.HParams` object.
-
-
-#### Returns:
-
-Changes affect self, but returns self for convenience.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: if trying to set unknown features, or if setting a feature
-    specific parameter for an unknown parameter.
-
-<h3 id="parse_param"><code>parse_param</code></h3>
-
-``` python
-parse_param(
-    param_name,
-    value_str
-)
-```
-
-Parses parameter values from string. Returns self.
-
-<h3 id="set_feature_param"><code>set_feature_param</code></h3>
-
-``` python
-set_feature_param(
-    feature_name,
-    param_name,
-    value
-)
-```
-
-Sets parameter value specific for feature. Returns self.
-
-<h3 id="set_param"><code>set_param</code></h3>
-
-``` python
-set_param(
-    param_name,
-    value
-)
-```
-
-Sets parameter value. Returns self.
-
-<h3 id="set_param_type"><code>set_param_type</code></h3>
-
-``` python
-set_param_type(
-    param_name,
-    param_type
-)
-```
-
-Sets the parameter type, it must already exist. Returns self.
-
-<h3 id="values"><code>values</code></h3>
-
-``` python
-values()
-```
-
-Returns shallow copy of the hyperparameter dict.
-
-
-
-## Class Members
-
-<h3 id="FEATURE_PREFIX"><code>FEATURE_PREFIX</code></h3>
-
-<h3 id="FEATURE_SEPARATOR"><code>FEATURE_SEPARATOR</code></h3>
-
diff --git a/g3doc/api_docs/python/tensorflow_lattice/calibrated_etl_classifier.md b/g3doc/api_docs/python/tensorflow_lattice/calibrated_etl_classifier.md
deleted file mode 100644
index 26454f8..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/calibrated_etl_classifier.md
+++ /dev/null
@@ -1,114 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.calibrated_etl_classifier" />
-</div>
-
-# tensorflow_lattice.calibrated_etl_classifier
-
-``` python
-calibrated_etl_classifier(
-    feature_columns=None,
-    model_dir=None,
-    quantiles_dir=None,
-    keypoints_initializers_fn=None,
-    optimizer=None,
-    config=None,
-    hparams=None
-)
-```
-
-Calibrated etl binary classifier model.
-
-
-
-This model uses a piecewise lattice calibration function on each of the
-inputs (parametrized) and then feeds them to ensemble of random lattices.
-num_lattices and lattice_rank (number of inputs to each lattice) must be
-specified in the hyperparameter. Optionally calibration can be made monotonic.
-
-It usually requires a preprocessing step on the data, to calculate the
-quantiles of each used feature. This can be done locally or in one worker
-only before training, in a separate invocation of your program (or directly).
-Typically this can be saved (`save_dir` parameter) to the same
-directory where the data is.
-
-Hyper-parameters are given in the form of the object
-tfl_hparams.CalibrationEtlHParams. lattice_rank and num_lattices must
-be specified; there would be no default value for this. It also takes in
-per-feature parameters.
-
-Internally values will be converted to tf.float32.
-
-Example:
-
-```python
-def input_fn_train: ...
-def input_fn_eval: ...
-
-my_feature_columns=[...]
-
-# Have a separate program flag to generate the quantiles. Need to be run
-# only once.
-if FLAGS.create_quantiles:
-  pwl_calibrators_layers.calculate_quantiles_for_keypoints(
-    input_fn=input_fn_train,
-    feature_columns=my_feature_columns,
-    save_dir=FLAGS.data_dir,
-    num_quantiles=1000,
-    override=True)
-  return  # Exit program.
-
-hparams = hparams.CalibratedEtlHparams(num_lattices=10, lattice_rank=2)
-estimator = calibrated_etl.calibrated_etl_classifier(
-  feature_columns=feature_columns, hparams=hparams)
-estimator.train(input_fn=input_fn_train)
-estimator.evaluate(input_fn=input_fn_eval)
-estimator.predict(input_fn=input_fn_predict)
-```
-
-#### Args:
-
-* <b>`feature_columns`</b>: Optional, an iteratable containing all the feature
-    columns used by the model. All items in the set should be instances of
-    classes derived from `FeatureColumn`. If not given, the model will
-    use as features the tensors returned by input_fn.
-    Supported types of columns: RealValuedColumn.
-* <b>`model_dir`</b>: Directory to save model parameters, graphs and etc. This can
-    also be used to load checkpoints from the directory into an estimator to
-    continue training a previously saved model.
-* <b>`quantiles_dir`</b>: location where quantiles for the data was saved. Typically
-    the same directory as the training data. These quantiles can be
-    generated only once with
-    `pwl_calibration_layers.calculate_quantiles_for_keypoints` in a separate
-    invocation of your program. If you don't want to use quantiles, you can
-    set `keypoints_initializer` instead.
-* <b>`keypoints_initializers_fn`</b>: if you know the distribution of your
-    input features you can provide that directly instead of `quantiles_dir`.
-    See `pwl_calibrators_layers.uniform_keypoints_for_signal`. It must be
-    a closure that returns a pair of tensors with keypoints inputs and
-    outputs to use for initialization (must match `num_keypoints` configured
-    in `hparams`). Alternatively the closure can return a dict mapping
-    feature name to pairs for initialization per feature. If `quantiles_dir`
-    and `keypoints_initializers_fn` are set, the later takes precendence,
-    and the features for which `keypoints_initializers` are not defined
-    fallback to using the quantiles found in `quantiles_dir`. It uses a
-    closure instead of the tensors themselves because the graph has to be
-    created at the time the model is being build, which happens at a later
-    time.
-* <b>`optimizer`</b>: string, `Optimizer` object, or callable that defines the
-    optimizer to use for training -- if a callable, it will be called with
-    learning_rate=hparams.learning_rate.
-* <b>`config`</b>: RunConfig object to configure the runtime settings. Typically set
-    to learn_runner.EstimatorConfig().
-* <b>`hparams`</b>: an instance of tfl_hparams.CalibrationEtlHParams. If set to
-    None default parameters are used.
-
-
-#### Returns:
-
-A `calibrated_etl_classifier` estimator.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: invalid parameters.
-* <b>`KeyError`</b>: type of feature not supported.
\ No newline at end of file
diff --git a/g3doc/api_docs/python/tensorflow_lattice/calibrated_etl_regressor.md b/g3doc/api_docs/python/tensorflow_lattice/calibrated_etl_regressor.md
deleted file mode 100644
index 2c200ea..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/calibrated_etl_regressor.md
+++ /dev/null
@@ -1,112 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.calibrated_etl_regressor" />
-</div>
-
-# tensorflow_lattice.calibrated_etl_regressor
-
-``` python
-calibrated_etl_regressor(
-    feature_columns=None,
-    model_dir=None,
-    quantiles_dir=None,
-    keypoints_initializers_fn=None,
-    optimizer=None,
-    config=None,
-    hparams=None
-)
-```
-
-Calibrated etl regressor model.
-
-This model uses a piecewise lattice calibration function on each of the
-inputs (parametrized) and then feeds them to ensemble of random lattices.
-num_lattices and lattice_rank (number of inputs to each lattice) must be
-specified in the hyperparameter. Optionally calibration can be made monotonic.
-
-It usually requires a preprocessing step on the data, to calculate the
-quantiles of each used feature. This can be done locally or in one worker
-only before training, in a separate invocation of your program (or directly).
-Typically this can be saved (`save_dir` parameter) to the same
-directory where the data is.
-
-Hyper-parameters are given in the form of the object
-tfl_hparams.CalibrationEtlHParams. lattice_rank and num_lattices must
-be specified; there would be no default value for this. It also takes in
-per-feature parameters.
-
-Internally values will be converted to tf.float32.
-
-Example:
-
-```python
-def input_fn_train: ...
-def input_fn_eval: ...
-
-my_feature_columns=[...]
-
-# Have a separate program flag to generate the quantiles. Need to be run
-# only once.
-if FLAGS.create_quantiles:
-  pwl_calibrators_layers.calculate_quantiles_for_keypoints(
-    input_fn=input_fn_train,
-    feature_columns=my_feature_columns,
-    save_dir=FLAGS.data_dir,
-    num_quantiles=1000,
-    override=True)
-  return  # Exit program.
-
-hparams = hparams.CalibratedEtlHparams(num_lattices=10, lattice_rank=2)
-estimator = calibrated_etl.calibrated_etl_classifier(
-  feature_columns=feature_columns, hparams=hparams)
-estimator.train(input_fn=input_fn_train)
-estimator.evaluate(input_fn=input_fn_eval)
-estimator.predict(input_fn=input_fn_predict)
-```
-
-#### Args:
-
-* <b>`feature_columns`</b>: Optional, an iteratable containing all the feature
-    columns used by the model. All items in the set should be instances of
-    classes derived from `FeatureColumn`. If not given, the model will
-    use as features the tensors returned by input_fn.
-    Supported types of columns: RealValuedColumn.
-* <b>`model_dir`</b>: Directory to save model parameters, graphs and etc. This can
-    also be used to load checkpoints from the directory into an estimator to
-    continue training a previously saved model.
-* <b>`quantiles_dir`</b>: location where quantiles for the data was saved. Typically
-    the same directory as the training data. These quantiles can be
-    generated only once with
-    `pwl_calibration_layers.calculate_quantiles_for_keypoints` in a separate
-    invocation of your program. If you don't want to use quantiles, you can
-    set `keypoints_initializer` instead.
-* <b>`keypoints_initializers_fn`</b>: if you know the distribution of your
-    input features you can provide that directly instead of `quantiles_dir`.
-    See `pwl_calibrators_layers.uniform_keypoints_for_signal`. It must be
-    a closure that returns a pair of tensors with keypoints inputs and
-    outputs to use for initialization (must match `num_keypoints` configured
-    in `hparams`). Alternatively the closure can return a dict mapping
-    feature name to pairs for initialization per feature. If `quantiles_dir`
-    and `keypoints_initializers_fn` are set, the later takes precendence,
-    and the features for which `keypoints_initializers` are not defined
-    fallback to using the quantiles found in `quantiles_dir`. It uses a
-    closure instead of the tensors themselves because the graph has to be
-    created at the time the model is being build, which happens at a later
-    time.
-* <b>`optimizer`</b>: string, `Optimizer` object, or callable that defines the
-    optimizer to use for training -- if a callable, it will be called with
-    learning_rate=hparams.learning_rate.
-* <b>`config`</b>: RunConfig object to configure the runtime settings. Typically set
-    to learn_runner.EstimatorConfig().
-* <b>`hparams`</b>: an instance of tfl_hparams.CalibrationEtlHParams. If set to
-    None default parameters are used.
-
-
-#### Returns:
-
-A `calibrated_etl_regressor` estimator.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: invalid parameters.
-* <b>`KeyError`</b>: type of feature not supported.
\ No newline at end of file
diff --git a/g3doc/api_docs/python/tensorflow_lattice/calibrated_lattice_classifier.md b/g3doc/api_docs/python/tensorflow_lattice/calibrated_lattice_classifier.md
deleted file mode 100644
index 6e0dda2..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/calibrated_lattice_classifier.md
+++ /dev/null
@@ -1,107 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.calibrated_lattice_classifier" />
-</div>
-
-# tensorflow_lattice.calibrated_lattice_classifier
-
-``` python
-calibrated_lattice_classifier(
-    feature_columns=None,
-    model_dir=None,
-    quantiles_dir=None,
-    keypoints_initializers_fn=None,
-    optimizer=None,
-    config=None,
-    hparams=None
-)
-```
-
-Calibrated lattice classifier binary model.
-
-
-
-This model uses a piecewise lattice calibration function on each of the
-real (as opposed to binary) inputs (parametrized) and then combines (sum up)
-the results. Optionally calibration can be made monotonic.
-
-It usually requires a preprocessing step on the data, to calculate the
-quantiles of each used feature. This can be done locally or in one worker
-only before training, in a separate invocation of your program (or directly).
-Typically this can be save (`save_dir` parameter) to the same
-directory where the data is.
-
-Hyper-parameters are given in the form of the object
-tfl_hparams.CalibrationHParams. It takes in per-feature calibration
-parameters.
-
-Internally values will be converted to tf.float32.
-
-Example:
-
-```python
-def input_fn_train: ...
-def input_fn_eval: ...
-
-my_feature_columns=[...]
-
-# Have a separate program flag to generate the quantiles. Need to be run
-# only once.
-if FLAGS.create_quantiles:
-  pwl_calibrators_layers.calculate_quantiles_for_keypoints(
-    input_fn=input_fn_train,
-    feature_columns=my_feature_columns,
-    save_dir=FLAGS.data_dir,
-    num_quantiles=1000,
-    override=True)
-  return  # Exit program.
-
-estimator = calibrated_lattice.CalibratedLatticeClassifier(
-  feature_columns=feature_columns)
-estimator.train(input_fn=input_fn_train)
-estimator.evaluate(input_fn=input_fn_eval)
-estimator.predict(input_fn=input_fn_predict)
-```
-
-#### Args:
-
-* <b>`feature_columns`</b>: Optional, an iteratable containing all the feature
-    columns used by the model. All items in the set should be instances of
-    classes derived from `FeatureColumn`. If not given, the model will
-    use as features the tensors returned by input_fn.
-    Supported types of columns: RealValuedColumn.
-* <b>`model_dir`</b>: Directory to save model parameters, graph and etc. This can
-    also be used to load checkpoints from the directory into a estimator to
-    continue training a previously saved model.
-* <b>`quantiles_dir`</b>: location where quantiles for the data was saved. Typically
-    the same directory as the training data. These quantiles can be
-    generated only once with
-    `pwl_calibrators_layers.calculate_quantiles_for_keypoints` in a separate
-    invocation of your program. If you don't want to use quantiles, you can
-    set `keypoints_initializer` instead.
-* <b>`keypoints_initializers_fn`</b>: if you know the distribution of your
-    input features you can provide that directly instead of `quantiles_dir`.
-    See `pwl_calibrators_layers.uniform_keypoints_for_signal`. It must be
-    as a closure that when called will return a pair of tensors with
-    keypoints input and output initializes. Alternatively can be given as
-    a dict mapping feature name to keypoints_initializers_fn, so one
-    can have one initialization per feature. It uses a closure instead of
-    the tensors themselves because the graph has to be created at the time
-    the model is being build, which happens at a later time.
-* <b>`optimizer`</b>: string, `Optimizer` object, or callable that defines the
-    optimizer to use for training -- if a callable, it will be called with
-    learning_rate=hparams.learning_rate.
-* <b>`config`</b>: RunConfig object to configure the runtime settings. Typically set
-    to learn_runner.EstimatorConfig().
-* <b>`hparams`</b>: an instance of tfl_hparams.CalibrationHParams. If set to
-    None default parameters are used.
-
-
-#### Returns:
-
-A `CalibratedLatticeClassifier` estimator.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: invalid parameters.
-* <b>`KeyError`</b>: type of feature not supported.
\ No newline at end of file
diff --git a/g3doc/api_docs/python/tensorflow_lattice/calibrated_lattice_regressor.md b/g3doc/api_docs/python/tensorflow_lattice/calibrated_lattice_regressor.md
deleted file mode 100644
index df56ee7..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/calibrated_lattice_regressor.md
+++ /dev/null
@@ -1,108 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.calibrated_lattice_regressor" />
-</div>
-
-# tensorflow_lattice.calibrated_lattice_regressor
-
-``` python
-calibrated_lattice_regressor(
-    feature_columns=None,
-    model_dir=None,
-    quantiles_dir=None,
-    keypoints_initializers_fn=None,
-    optimizer=None,
-    config=None,
-    hparams=None
-)
-```
-
-Calibrated lattice estimator (model) for regression.
-
-This model uses a piecewise lattice calibration function on each of the
-inputs (parametrized) and then combine (sum up) the results. Optionally
-calibration can be made monotonic.
-
-It usually requires a preprocessing step on the data, to calculate the
-quantiles of each used feature. This can be done locally or in one worker
-only before training, in a separate invocation of your program (or directly)
-in . Typically this can be save (`save_dir` parameter) to the same
-directory where the data is.
-
-Hyper-parameters are given in the form of the object
-tfl_hparams.CalibrationHParams. It takes in per-feature calibration
-parameters.
-
-Internally values will be converted to tf.float32.
-
-
-
-Example:
-
-```python
-def input_fn_train: ...
-def input_fn_eval: ...
-
-my_feature_columns=[...]
-
-# Have a separate program flag to generate the quantiles. Need to be run
-# only once.
-if FLAGS.create_quantiles:
-  pwl_calibrators_layers.calculate_quantiles_for_keypoints(
-    input_fn=input_fn_train,
-    feature_columns=my_feature_columns,
-    save_dir=FLAGS.data_dir,
-    num_quantiles=1000,
-    override=True)
-  return  # Exit program.
-
-estimator = calibrated_lattice.calibrated_lattice_regressor(
-  feature_columns=feature_columns)
-estimator.train(input_fn=input_fn_train)
-estimator.evaluate(input_fn=input_fn_eval)
-estimator.predict(input_fn=input_fn_predict)
-```
-
-#### Args:
-
-* <b>`feature_columns`</b>: Optional, if not set the model will use all features
-    returned by input_fn. An iteratable containing all the feature
-    columns used by the model. All items in the set should be instances of
-    classes derived from `FeatureColumn`. If not given, the model will
-    use as features the tensors returned by input_fn.
-    Supported types: RealValuedColumn.
-* <b>`model_dir`</b>: Directory to save model parameters, graph and etc. This can
-    also be used to load checkpoints from the directory into a estimator to
-    continue training a previously saved model.
-* <b>`quantiles_dir`</b>: location where quantiles for the data was saved. Typically
-    the same directory as the training data. These quantiles can be
-    generated only once with
-    `pwl_calibrators_layers.calculate_quantiles_for_keypoints` in a separate
-    invocation of your program. If you don't want to use quantiles, you can
-    set `keypoints_initializer` instead.
-* <b>`keypoints_initializers_fn`</b>: if you know the distribution of your
-    input features you can provide that directly instead of `quantiles_dir`.
-    See `pwl_calibrators_layers.uniform_keypoints_for_signal`. It must be
-    as a closure that when called will return a pair of tensors with
-    keypoints input and output initializes. Alternatively can be given as
-    a dict mapping feature name to keypoints_initializers_fn, so one
-    can have one initialization per feature. It uses a closure instead of
-    the tensors themselves because the graph has to be created at the time
-    the model is being build, which happens at a later time.
-* <b>`optimizer`</b>: string, `Optimizer` object, or callable that defines the
-    optimizer to use for training -- if a callable, it will be called with
-    learning_rate=hparams.learning_rate.
-* <b>`config`</b>: RunConfig object to configure the runtime settings. Typically set
-    to learn_runner.EstimatorConfig().
-* <b>`hparams`</b>: an instance of tfl_hparams.CalibrationHParams. If set to
-    None default parameters are used.
-
-
-#### Returns:
-
-A `CalibratedLatticeRegressor` estimator.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: invalid parameters.
-* <b>`KeyError`</b>: type of feature not supported.
\ No newline at end of file
diff --git a/g3doc/api_docs/python/tensorflow_lattice/calibrated_linear_classifier.md b/g3doc/api_docs/python/tensorflow_lattice/calibrated_linear_classifier.md
deleted file mode 100644
index 49c3f66..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/calibrated_linear_classifier.md
+++ /dev/null
@@ -1,111 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.calibrated_linear_classifier" />
-</div>
-
-# tensorflow_lattice.calibrated_linear_classifier
-
-``` python
-calibrated_linear_classifier(
-    feature_columns=None,
-    model_dir=None,
-    quantiles_dir=None,
-    keypoints_initializers_fn=None,
-    optimizer=None,
-    config=None,
-    hparams=None
-)
-```
-
-Calibrated linear classifier binary model.
-
-
-
-This model uses a piecewise linear calibration function on each of the
-real (as opposed to binary) inputs (parametrized) and then combines (sum up)
-the results. Optionally calibration can be made monotonic.
-
-It usually requires a preprocessing step on the data, to calculate the
-quantiles of each used feature. This can be done locally or in one worker
-only before training, in a separate invocation of your program (or directly).
-Typically this can be save (`save_dir` parameter) to the same
-directory where the data is.
-
-Hyper-parameters are given in the form of the object
-tfl_hparams.CalibrationHParams. It takes in per-feature calibration
-parameters.
-
-Internally values will be converted to tf.float32.
-
-Example:
-
-```python
-def input_fn_train: ...
-def input_fn_eval: ...
-
-my_feature_columns=[...]
-
-# Have a separate program flag to generate the quantiles. Need to be run
-# only once.
-if FLAGS.create_quantiles:
-  pwl_calibrators_layers.calculate_quantiles_for_keypoints(
-    input_fn=input_fn_train,
-    feature_columns=my_feature_columns,
-    save_dir=FLAGS.data_dir,
-    num_quantiles=1000,
-    override=True)
-  return  # Exit program.
-
-estimator = calibrated_linear.CalibratedLinearClassifier(
-  feature_columns=feature_columns)
-estimator.train(input_fn=input_fn_train)
-estimator.evaluate(input_fn=input_fn_eval)
-estimator.predict(input_fn=input_fn_predict)
-```
-
-#### Args:
-
-* <b>`feature_columns`</b>: Optional, an iteratable containing all the feature
-    columns used by the model. All items in the set should be instances of
-    classes derived from `FeatureColumn`. If not given, the model will
-    use as features the tensors returned by input_fn.
-    Supported types of columns: RealValuedColumn.
-* <b>`model_dir`</b>: Directory to save model parameters, graph and etc. This can
-    also be used to load checkpoints from the directory into a estimator to
-    continue training a previously saved model.
-* <b>`quantiles_dir`</b>: location where quantiles for the data was saved. Typically
-    the same directory as the training data. These quantiles can be
-    generated only once with
-    `pwl_calibration_layers.calculate_quantiles_for_keypoints` in a separate
-    invocation of your program. If you don't want to use quantiles, you can
-    set `keypoints_initializer` instead.
-* <b>`keypoints_initializers_fn`</b>: if you know the distribution of your
-    input features you can provide that directly instead of `quantiles_dir`.
-    See `pwl_calibrators_layers.uniform_keypoints_for_signal`. It must be
-    a closure that returns a pair of tensors with keypoints inputs and
-    outputs to use for initialization (must match `num_keypoints` configured
-    in `hparams`). Alternatively the closure can return a dict mapping
-    feature name to pairs for initialization per feature. If `quantiles_dir`
-    and `keypoints_initializers_fn` are set, the later takes precendence,
-    and the features for which `keypoints_initializers` are not defined
-    fallback to using the quantiles found in `quantiles_dir`. It uses a
-    closure instead of the tensors themselves because the graph has to be
-    created at the time the model is being build, which happens at a later
-    time.
-* <b>`optimizer`</b>: string, `Optimizer` object, or callable that defines the
-    optimizer to use for training -- if a callable, it will be called with
-    learning_rate=hparams.learning_rate.
-* <b>`config`</b>: RunConfig object to configure the runtime settings. Typically set
-    to learn_runner.EstimatorConfig().
-* <b>`hparams`</b>: an instance of tfl_hparams.CalibrationHParams. If set to
-    None default parameters are used.
-
-
-#### Returns:
-
-A `CalibratedLinearClassifier` estimator.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: invalid parameters.
-* <b>`KeyError`</b>: type of feature not supported.
\ No newline at end of file
diff --git a/g3doc/api_docs/python/tensorflow_lattice/calibrated_linear_regressor.md b/g3doc/api_docs/python/tensorflow_lattice/calibrated_linear_regressor.md
deleted file mode 100644
index be5a622..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/calibrated_linear_regressor.md
+++ /dev/null
@@ -1,112 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.calibrated_linear_regressor" />
-</div>
-
-# tensorflow_lattice.calibrated_linear_regressor
-
-``` python
-calibrated_linear_regressor(
-    feature_columns=None,
-    model_dir=None,
-    quantiles_dir=None,
-    keypoints_initializers_fn=None,
-    optimizer=None,
-    config=None,
-    hparams=None
-)
-```
-
-Calibrated linear estimator (model) for regression.
-
-This model uses a piecewise linear calibration function on each of the
-inputs (parametrized) and then combine (sum up) the results. Optionally
-calibration can be made monotonic.
-
-It usually requires a preprocessing step on the data, to calculate the
-quantiles of each used feature. This can be done locally or in one worker
-only before training, in a separate invocation of your program (or directly)
-in . Typically this can be save (`save_dir` parameter) to the same
-directory where the data is.
-
-Hyper-parameters are given in the form of the object
-tfl_hparams.CalibrationHParams. It takes in per-feature calibration
-parameters.
-
-Internally values will be converted to tf.float32.
-
-
-
-Example:
-
-```python
-def input_fn_train: ...
-def input_fn_eval: ...
-
-my_feature_columns=[...]
-
-# Have a separate program flag to generate the quantiles. Need to be run
-# only once.
-if FLAGS.create_quantiles:
-  pwl_calibrators_layers.calculate_quantiles_for_keypoints(
-    input_fn=input_fn_train,
-    feature_columns=my_feature_columns,
-    save_dir=FLAGS.data_dir,
-    num_quantiles=1000,
-    override=True)
-  return  # Exit program.
-
-estimator = calibrated_linear.calibrated_linear_regressor(
-  feature_columns=feature_columns)
-estimator.train(input_fn=input_fn_train)
-estimator.evaluate(input_fn=input_fn_eval)
-estimator.predict(input_fn=input_fn_predict)
-```
-
-#### Args:
-
-* <b>`feature_columns`</b>: Optional, if not set the model will use all features
-    returned by input_fn. An iteratable containing all the feature
-    columns used by the model. All items in the set should be instances of
-    classes derived from `FeatureColumn`. If not given, the model will
-    use as features the tensors returned by input_fn.
-    Supported types: RealValuedColumn.
-* <b>`model_dir`</b>: Directory to save model parameters, graph and etc. This can
-    also be used to load checkpoints from the directory into a estimator to
-    continue training a previously saved model.
-* <b>`quantiles_dir`</b>: location where quantiles for the data was saved. Typically
-    the same directory as the training data. These quantiles can be
-    generated only once with
-    `pwl_calibration_layers.calculate_quantiles_for_keypoints` in a separate
-    invocation of your program. If you don't want to use quantiles, you can
-    set `keypoints_initializer` instead.
-* <b>`keypoints_initializers_fn`</b>: if you know the distribution of your
-    input features you can provide that directly instead of `quantiles_dir`.
-    See `pwl_calibrators_layers.uniform_keypoints_for_signal`. It must be
-    a closure that returns a pair of tensors with keypoints inputs and
-    outputs to use for initialization (must match `num_keypoints` configured
-    in `hparams`). Alternatively the closure can return a dict mapping
-    feature name to pairs for initialization per feature. If `quantiles_dir`
-    and `keypoints_initializers_fn` are set, the later takes precendence,
-    and the features for which `keypoints_initializers` are not defined
-    fallback to using the quantiles found in `quantiles_dir`. It uses a
-    closure instead of the tensors themselves because the graph has to be
-    created at the time the model is being build, which happens at a later
-    time.
-* <b>`optimizer`</b>: string, `Optimizer` object, or callable that defines the
-    optimizer to use for training -- if a callable, it will be called with
-    learning_rate=hparams.learning_rate.
-* <b>`config`</b>: RunConfig object to configure the runtime settings. Typically set
-    to learn_runner.EstimatorConfig().
-* <b>`hparams`</b>: an instance of tfl_hparams.CalibrationHParams. If set to
-    None default parameters are used.
-
-
-#### Returns:
-
-A `CalibratedLinearRegressor` estimator.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: invalid parameters.
-* <b>`KeyError`</b>: type of feature not supported.
\ No newline at end of file
diff --git a/g3doc/api_docs/python/tensorflow_lattice/calibrated_rtl_classifier.md b/g3doc/api_docs/python/tensorflow_lattice/calibrated_rtl_classifier.md
deleted file mode 100644
index 1e3927f..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/calibrated_rtl_classifier.md
+++ /dev/null
@@ -1,114 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.calibrated_rtl_classifier" />
-</div>
-
-# tensorflow_lattice.calibrated_rtl_classifier
-
-``` python
-calibrated_rtl_classifier(
-    feature_columns=None,
-    model_dir=None,
-    quantiles_dir=None,
-    keypoints_initializers_fn=None,
-    optimizer=None,
-    config=None,
-    hparams=None
-)
-```
-
-Calibrated rtl binary classifier model.
-
-
-
-This model uses a piecewise lattice calibration function on each of the
-inputs (parametrized) and then feeds them to ensemble of random lattices.
-num_lattices and lattice_rank (number of inputs to each lattice) must be
-specified in the hyperparameter. Optionally calibration can be made monotonic.
-
-It usually requires a preprocessing step on the data, to calculate the
-quantiles of each used feature. This can be done locally or in one worker
-only before training, in a separate invocation of your program (or directly).
-Typically this can be saved (`save_dir` parameter) to the same
-directory where the data is.
-
-Hyper-parameters are given in the form of the object
-tfl_hparams.CalibrationRtlHParams. lattice_rank and num_lattices must
-be specified; there would be no default value for this. It also takes in
-per-feature parameters.
-
-Internally values will be converted to tf.float32.
-
-Example:
-
-```python
-def input_fn_train: ...
-def input_fn_eval: ...
-
-my_feature_columns=[...]
-
-# Have a separate program flag to generate the quantiles. Need to be run
-# only once.
-if FLAGS.create_quantiles:
-  pwl_calibrators_layers.calculate_quantiles_for_keypoints(
-    input_fn=input_fn_train,
-    feature_columns=my_feature_columns,
-    save_dir=FLAGS.data_dir,
-    num_quantiles=1000,
-    override=True)
-  return  # Exit program.
-
-hparams = hparams.CalibratedRtlHparams(num_lattices=10, lattice_rank=2)
-estimator = calibrated_rtl.calibrated_rtl_classifier(
-  feature_columns=feature_columns, hparams=hparams)
-estimator.train(input_fn=input_fn_train)
-estimator.evaluate(input_fn=input_fn_eval)
-estimator.predict(input_fn=input_fn_predict)
-```
-
-#### Args:
-
-* <b>`feature_columns`</b>: Optional, an iteratable containing all the feature
-    columns used by the model. All items in the set should be instances of
-    classes derived from `FeatureColumn`. If not given, the model will
-    use as features the tensors returned by input_fn.
-    Supported types of columns: RealValuedColumn.
-* <b>`model_dir`</b>: Directory to save model parameters, graphs and etc. This can
-    also be used to load checkpoints from the directory into an estimator to
-    continue training a previously saved model.
-* <b>`quantiles_dir`</b>: location where quantiles for the data was saved. Typically
-    the same directory as the training data. These quantiles can be
-    generated only once with
-    `pwl_calibration_layers.calculate_quantiles_for_keypoints` in a separate
-    invocation of your program. If you don't want to use quantiles, you can
-    set `keypoints_initializer` instead.
-* <b>`keypoints_initializers_fn`</b>: if you know the distribution of your
-    input features you can provide that directly instead of `quantiles_dir`.
-    See `pwl_calibrators_layers.uniform_keypoints_for_signal`. It must be
-    a closure that returns a pair of tensors with keypoints inputs and
-    outputs to use for initialization (must match `num_keypoints` configured
-    in `hparams`). Alternatively the closure can return a dict mapping
-    feature name to pairs for initialization per feature. If `quantiles_dir`
-    and `keypoints_initializers_fn` are set, the later takes precendence,
-    and the features for which `keypoints_initializers` are not defined
-    fallback to using the quantiles found in `quantiles_dir`. It uses a
-    closure instead of the tensors themselves because the graph has to be
-    created at the time the model is being build, which happens at a later
-    time.
-* <b>`optimizer`</b>: string, `Optimizer` object, or callable that defines the
-    optimizer to use for training -- if a callable, it will be called with
-    learning_rate=hparams.learning_rate.
-* <b>`config`</b>: RunConfig object to configure the runtime settings. Typically set
-    to learn_runner.EstimatorConfig().
-* <b>`hparams`</b>: an instance of tfl_hparams.CalibrationRtlHParams. If set to
-    None default parameters are used.
-
-
-#### Returns:
-
-A `calibrated_rtl_classifier` estimator.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: invalid parameters.
-* <b>`KeyError`</b>: type of feature not supported.
\ No newline at end of file
diff --git a/g3doc/api_docs/python/tensorflow_lattice/calibrated_rtl_regressor.md b/g3doc/api_docs/python/tensorflow_lattice/calibrated_rtl_regressor.md
deleted file mode 100644
index 9365747..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/calibrated_rtl_regressor.md
+++ /dev/null
@@ -1,112 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.calibrated_rtl_regressor" />
-</div>
-
-# tensorflow_lattice.calibrated_rtl_regressor
-
-``` python
-calibrated_rtl_regressor(
-    feature_columns=None,
-    model_dir=None,
-    quantiles_dir=None,
-    keypoints_initializers_fn=None,
-    optimizer=None,
-    config=None,
-    hparams=None
-)
-```
-
-Calibrated rtl regressor model.
-
-This model uses a piecewise lattice calibration function on each of the
-inputs (parametrized) and then feeds them to ensemble of random lattices.
-num_lattices and lattice_rank (number of inputs to each lattice) must be
-specified in the hyperparameter. Optionally calibration can be made monotonic.
-
-It usually requires a preprocessing step on the data, to calculate the
-quantiles of each used feature. This can be done locally or in one worker
-only before training, in a separate invocation of your program (or directly).
-Typically this can be saved (`save_dir` parameter) to the same
-directory where the data is.
-
-Hyper-parameters are given in the form of the object
-tfl_hparams.CalibrationRtlHParams. lattice_rank and num_lattices must
-be specified; there would be no default value for this. It also takes in
-per-feature parameters.
-
-Internally values will be converted to tf.float32.
-
-Example:
-
-```python
-def input_fn_train: ...
-def input_fn_eval: ...
-
-my_feature_columns=[...]
-
-# Have a separate program flag to generate the quantiles. Need to be run
-# only once.
-if FLAGS.create_quantiles:
-  pwl_calibrators_layers.calculate_quantiles_for_keypoints(
-    input_fn=input_fn_train,
-    feature_columns=my_feature_columns,
-    save_dir=FLAGS.data_dir,
-    num_quantiles=1000,
-    override=True)
-  return  # Exit program.
-
-hparams = hparams.CalibratedRtlHparams(num_lattices=10, lattice_rank=2)
-estimator = calibrated_rtl.calibrated_rtl_classifier(
-  feature_columns=feature_columns, hparams=hparams)
-estimator.train(input_fn=input_fn_train)
-estimator.evaluate(input_fn=input_fn_eval)
-estimator.predict(input_fn=input_fn_test)
-```
-
-#### Args:
-
-* <b>`feature_columns`</b>: Optional, an iteratable containing all the feature
-    columns used by the model. All items in the set should be instances of
-    classes derived from `FeatureColumn`. If not given, the model will
-    use as features the tensors returned by input_fn.
-    Supported types of columns: RealValuedColumn.
-* <b>`model_dir`</b>: Directory to save model parameters, graphs and etc. This can
-    also be used to load checkpoints from the directory into an estimator to
-    continue training a previously saved model.
-* <b>`quantiles_dir`</b>: location where quantiles for the data was saved. Typically
-    the same directory as the training data. These quantiles can be
-    generated only once with
-    `pwl_calibration_layers.calculate_quantiles_for_keypoints` in a separate
-    invocation of your program. If you don't want to use quantiles, you can
-    set `keypoints_initializer` instead.
-* <b>`keypoints_initializers_fn`</b>: if you know the distribution of your
-    input features you can provide that directly instead of `quantiles_dir`.
-    See `pwl_calibrators_layers.uniform_keypoints_for_signal`. It must be
-    a closure that returns a pair of tensors with keypoints inputs and
-    outputs to use for initialization (must match `num_keypoints` configured
-    in `hparams`). Alternatively the closure can return a dict mapping
-    feature name to pairs for initialization per feature. If `quantiles_dir`
-    and `keypoints_initializers_fn` are set, the later takes precendence,
-    and the features for which `keypoints_initializers` are not defined
-    fallback to using the quantiles found in `quantiles_dir`. It uses a
-    closure instead of the tensors themselves because the graph has to be
-    created at the time the model is being build, which happens at a later
-    time.
-* <b>`optimizer`</b>: string, `Optimizer` object, or callable that defines the
-    optimizer to use for training -- if a callable, it will be called with
-    learning_rate=hparams.learning_rate.
-* <b>`config`</b>: RunConfig object to configure the runtime settings. Typically set
-    to learn_runner.EstimatorConfig().
-* <b>`hparams`</b>: an instance of tfl_hparams.CalibrationRtlHParams. If set to
-    None default parameters are used.
-
-
-#### Returns:
-
-A `calibrated_rtl_regressor` estimator.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: invalid parameters.
-* <b>`KeyError`</b>: type of feature not supported.
\ No newline at end of file
diff --git a/g3doc/api_docs/python/tensorflow_lattice/calibration_layer.md b/g3doc/api_docs/python/tensorflow_lattice/calibration_layer.md
deleted file mode 100644
index b5b65c8..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/calibration_layer.md
+++ /dev/null
@@ -1,96 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.calibration_layer" />
-</div>
-
-# tensorflow_lattice.calibration_layer
-
-``` python
-calibration_layer(
-    uncalibrated_tensor,
-    num_keypoints,
-    keypoints_initializers=None,
-    keypoints_initializer_fns=None,
-    bound=False,
-    monotonic=None,
-    missing_input_values=None,
-    missing_output_values=None,
-    l1_reg=None,
-    l2_reg=None,
-    l1_laplacian_reg=None,
-    l2_laplacian_reg=None,
-    name=None
-)
-```
-
-Creates a calibration layer for uncalibrated values.
-
-Returns a calibrated tensor of the same shape as the uncalibrated continuous
-signals passed in, and a list of projection ops, that must be applied at
-each step (or every so many steps) to project the model to a feasible space:
-used for bounding the outputs or for imposing monotonicity -- the list will be
-empty if bound and monotonic are not set.
-
-#### Args:
-
-* <b>`uncalibrated_tensor`</b>: Tensor of shape [batch_size, ...] with uncalibrated
-    values.
-* <b>`num_keypoints`</b>: Number of keypoints to use. Either a scalar value that
-    will be used for every uncalibrated signal, or a list of n values,
-    per uncalibrated signal -- uncalibrated is first flattened (
-    see tf.contrib.layers.flatten) to [batch_size, n], and there should
-    be one value in the list per n. If a value of the list is 0 or None
-    the correspondent signal won't be calibrated.
-* <b>`keypoints_initializers`</b>: For evaluation or inference (or when resuming
-    training from a checkpoint) the values will be loaded from disk, so they
-    don't need to be given (leave it as None).
-    Otherwise provide either a tuple of two tensors of shape [num_keypoints],
-    or a list of n pairs of tensors, each of shape [num_keypoints]. In this
-    list there should be one pair per uncalibrated signal, just like
-    num_keypoints above. Notice that num_keypoints can be different per
-    signal.
-* <b>`keypoints_initializer_fns`</b>: Like keypoints_initializers but using lambda
-    initializers. They should be compatible with tf.get_variable. If this is
-    set, then keypoints_initializers must be None.
-* <b>`bound`</b>: boolean whether output of calibration must be bound. Alternatively
-    a list of n booleans, one per uncalibrated value, like num_keypoints
-    above.
-* <b>`monotonic`</b>: whether calibration is monotonic: None or 0 means no
-    monotonicity. Positive or negative values mean increasing or decreasing
-    monotonicity respectively. Alternatively a list of n monotonic values,
-    one per uncalibrated value, like num_keypoints above.
-* <b>`missing_input_values`</b>: If set, and if the input has this value it is assumed
-    to be missing and the output will either be calibrated to some value
-    between `[calibration_output_min, calibration_output_max]` or set to a
-    fixed value set by missing_output_value. Limitation: it only works for
-    scalars. Either one value for all inputs, or a list with one value per
-    uncalibrated value.
-* <b>`missing_output_values`</b>: Requires missing_input_value also to be set. If set
-    if will convert missing input to this value. Either one value for all
-    outputs, or a list with one value per uncalibrated value.
-* <b>`l1_reg`</b>: (list of floats or float) l1 regularization amount.
-    If float, then same value is applied to all dimensions.
-* <b>`l2_reg`</b>: (list of floats or float) l2 regularization amount.
-    If float, then same value is applied to all dimensions.
-* <b>`l1_laplacian_reg`</b>: (list of floats or float) l1 laplacian
-    regularization amount. If float, then same value is applied to all
-    dimensions.
-* <b>`l2_laplacian_reg`</b>:  (list of floats or float) l2 laplacian
-    regularization amount. If float, then same value is applied to all
-    dimensions.
-* <b>`name`</b>: Name scope for operations.
-
-
-#### Returns:
-
-A tuple of:
-* calibrated tensor of shape [batch_size, ...], the same shape as
-  uncalibrated.
-* list of projection ops, that must be applied at each step (or every so
-  many steps) to project the model to a feasible space: used for bounding
-  the outputs or for imposing monotonicity. Empty if none are requested.
-* None or tensor with regularization loss.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: If dimensions don't match.
\ No newline at end of file
diff --git a/g3doc/api_docs/python/tensorflow_lattice/calibrator_regularization.md b/g3doc/api_docs/python/tensorflow_lattice/calibrator_regularization.md
deleted file mode 100644
index 67f64bd..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/calibrator_regularization.md
+++ /dev/null
@@ -1,39 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.calibrator_regularization" />
-</div>
-
-# tensorflow_lattice.calibrator_regularization
-
-``` python
-calibrator_regularization(
-    output_keypoints,
-    l1_reg=None,
-    l2_reg=None,
-    l1_laplacian_reg=None,
-    l2_laplacian_reg=None,
-    name='calibrator_regularization'
-)
-```
-
-Returns a calibrator regularization op.
-
-#### Args:
-
-output_keypoints: (Rank-1 tensor with shape [num_keypoints]) 1d calibrator's
-   output keypoints tensor.
-l1_reg: (float) l1 regularization amount.
-l2_reg: (float) l2 regularization amount.
-l1_laplacian_reg: (float) l1 Laplacian regularization amount.
-l2_laplacian_reg: (float) l2 Laplacian regularization amount.
-name: name scope of calibrator regularization.
-
-
-#### Returns:
-
-Rank-0 tensor (scalar) that contains calibrator regularization.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: * If output_keypoints is not rank-1 tensor.
-              * If the shape of output_keypoints is unknown.
\ No newline at end of file
diff --git a/g3doc/api_docs/python/tensorflow_lattice/ensemble_lattices_layer.md b/g3doc/api_docs/python/tensorflow_lattice/ensemble_lattices_layer.md
deleted file mode 100644
index 836943f..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/ensemble_lattices_layer.md
+++ /dev/null
@@ -1,69 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.ensemble_lattices_layer" />
-</div>
-
-# tensorflow_lattice.ensemble_lattices_layer
-
-``` python
-ensemble_lattices_layer(
-    input_tensor,
-    lattice_sizes,
-    structure_indices,
-    is_monotone=None,
-    output_dim=1,
-    interpolation_type='hypercube',
-    lattice_initializers=None,
-    l1_reg=None,
-    l2_reg=None,
-    l1_torsion_reg=None,
-    l2_torsion_reg=None,
-    l1_laplacian_reg=None,
-    l2_laplacian_reg=None
-)
-```
-
-Creates a ensemble of lattices layer.
-
-Returns a list of output of lattices, lattice parameters, and projection ops.
-
-#### Args:
-
-* <b>`input_tensor`</b>: [batch_size, input_dim] tensor.
-* <b>`lattice_sizes`</b>: A list of lattice sizes of each dimension.
-* <b>`structure_indices`</b>: A list of list of ints. structure_indices[k] is a list
-  of indices that belongs to kth lattices.
-* <b>`is_monotone`</b>: A list of input_dim booleans, boolean or None. If None or
-    False, lattice will not have monotonicity constraints. If
-    is_monotone[k] == True, then the lattice output has the non-decreasing
-    monotonicity with respect to input_tensor[?, k] (the kth coordinate). If
-    True, all the input coordinate will have the non-decreasing monotonicity.
-* <b>`output_dim`</b>: Number of outputs.
-* <b>`interpolation_type`</b>: 'hypercube' or 'simplex'.
-* <b>`lattice_initializers`</b>: (Optional) A list of initializer for each lattice
-    parameter vectors. lattice_initializer[k] is a 2D tensor
-    [output_dim, parameter_dim[k]], where parameter_dim[k] is the number of
-    parameter in the kth lattice. If None, lattice_param_as_linear initializer
-    will be used with
-    linear_weights=[1 if monotone else 0 for monotone in is_monotone].
-* <b>`l1_reg`</b>: (float) l1 regularization amount.
-* <b>`l2_reg`</b>: (float) l2 regularization amount.
-* <b>`l1_torsion_reg`</b>: (float) l1 torsion regularization amount.
-* <b>`l2_torsion_reg`</b>: (float) l2 torsion regularization amount.
-* <b>`l1_laplacian_reg`</b>: (list of floats or float) list of L1 Laplacian
-     regularization amount per each dimension. If a single float value is
-     provided, then all diemnsion will get the same value.
-* <b>`l2_laplacian_reg`</b>: (list of floats or float) list of L2 Laplacian
-     regularization amount per each dimension. If a single float value is
-     provided, then all diemnsion will get the same value.
-
-
-#### Returns:
-
-A tuple of:
-* a list of output tensors, [batch_size, output_dim], with length
-  len(structure_indices), i.e., one for each lattice.
-* a list of parameter tensors shape [output_dim, parameter_dim]
-* None or projection ops, that must be applied at each
-  step (or every so many steps) to project the model to a feasible space:
-  used for bounding the outputs or for imposing monotonicity.
-* None or a regularization loss, if regularization is configured.
\ No newline at end of file
diff --git a/g3doc/api_docs/python/tensorflow_lattice/input_calibration_layer.md b/g3doc/api_docs/python/tensorflow_lattice/input_calibration_layer.md
deleted file mode 100644
index 633d3bf..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/input_calibration_layer.md
+++ /dev/null
@@ -1,102 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.input_calibration_layer" />
-</div>
-
-# tensorflow_lattice.input_calibration_layer
-
-``` python
-input_calibration_layer(
-    columns_to_tensors,
-    num_keypoints,
-    feature_columns=None,
-    keypoints_initializers=None,
-    keypoints_initializer_fns=None,
-    bound=False,
-    monotonic=None,
-    missing_input_values=None,
-    missing_output_values=None,
-    l1_reg=None,
-    l2_reg=None,
-    l1_laplacian_reg=None,
-    l2_laplacian_reg=None,
-    dtype=dtypes.float32
-)
-```
-
-Creates a calibration layer for the given input and feature_columns.
-
-Returns a tensor with the calibrated values of the given features, a list
-of the names of the features in the order they feature in the returned, and
-a list of projection ops, that must be applied at each step (or every so many
-steps) to project the model to a feasible space: used for bounding the outputs
-or for imposing monotonic -- the list will be empty if bound and
-monotonic are not set.
-
-#### Args:
-
-* <b>`columns_to_tensors`</b>: A mapping from feature name to tensors. 'string' key
-    means a base feature (not-transformed). If feature_columns is not set
-    these are the features calibrated. Otherwise the transformed
-    feature_columns are the ones calibrated.
-* <b>`num_keypoints`</b>: Number of keypoints to use. Either a single int, or a dict
-    mapping feature names to num_keypoints. If a value of the dict is 0 or
-    None the correspondent feature won't be calibrated.
-* <b>`feature_columns`</b>: Optional. If set to a set of FeatureColumns, these will
-    be the features used and calibrated.
-* <b>`keypoints_initializers`</b>: For evaluation or inference (or when resuming
-    training from a checkpoint) the values will be loaded from disk, so they
-    don't need to be given (leave it as None).
-    Either a tuple of two tensors of shape [num_keypoints], or a dict mapping
-    feature names to pair of tensors of shape [num_keypoints[feature_name]].
-    See load_keypoints_from_quantiles or uniform_keypoints_for_signal on how
-    to generate these (module keypoints_initialization).
-* <b>`keypoints_initializer_fns`</b>: Like keypoints_initializers but using lambda
-    initializers. They should be compatible with tf.get_variable. If this is
-    set, then keypoints_initializers must be None.
-* <b>`bound`</b>: boolean whether output of calibration must be bound. Alternatively
-    a dict mapping feature name to boundness.
-* <b>`monotonic`</b>: whether calibration has to be kept monotonic: None or 0 means
-    no monotonic. Positive or negative values mean increasing or decreasing
-    monotonic respectively. Alternatively a dict mapping feature name
-    to monotonic.
-* <b>`missing_input_values`</b>: If set, and if the input has this value it is assumed
-    to be missing and the output will either be calibrated to some value
-    between `[calibration_output_min, calibration_output_max]` or set to a
-    fixed value set by missing_output_value. Limitation: it only works for
-    scalars. Either one value for all inputs, or a dict mapping feature name
-    to missing_input_value for the respective feature.
-* <b>`missing_output_values`</b>: Requires missing_input_value also to be set. If set
-    if will convert missing input to this value. Either one value for all
-    inputs, or a dict mapping feature name to missing_input_value for the
-    respective feature.
-* <b>`l1_reg`</b>: ({feature_name: float} dict or float) l1 regularization amount.
-    If float, then same value is applied to all features.
-* <b>`l2_reg`</b>: ({feature_name: float} dict or float) l2 regularization amount.
-    If float, then same value is applied to all features.
-* <b>`l1_laplacian_reg`</b>: ({feature_name: float} dict or float) l1 laplacian
-    regularization amount. If float, then same value is applied to all
-    features.
-* <b>`l2_laplacian_reg`</b>:  ({feature_name: float} dict or float) l2 laplacian
-    regularization amount. If float, then same value is applied to all
-    features.
-* <b>`dtype`</b>: If any of the scalars are not given as tensors, they are converted
-    to tensors with this dtype.
-
-
-#### Returns:
-
-A tuple of:
-* calibrated tensor of shape [batch_size, sum(features dimensions)].
-* list of the feature names in the order they feature in the calibrated
-  tensor. A name may appear more than once if the feature is
-  multi-dimension (for instance a multi-dimension embedding)
-* list of projection ops, that must be applied at each step (or every so
-  many steps) to project the model to a feasible space: used for bounding
-  the outputs or for imposing monotonicity. Empty if none are requested.
-* None or tensor with regularization loss.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: if dtypes are incompatible.
-
diff --git a/g3doc/api_docs/python/tensorflow_lattice/input_calibration_layer_from_hparams.md b/g3doc/api_docs/python/tensorflow_lattice/input_calibration_layer_from_hparams.md
deleted file mode 100644
index 770b2a1..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/input_calibration_layer_from_hparams.md
+++ /dev/null
@@ -1,76 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.input_calibration_layer_from_hparams" />
-</div>
-
-# tensorflow_lattice.input_calibration_layer_from_hparams
-
-``` python
-input_calibration_layer_from_hparams(
-    columns_to_tensors,
-    feature_columns,
-    hparams,
-    quantiles_dir=None,
-    keypoints_initializers=None,
-    name=None,
-    dtype=dtypes.float32
-)
-```
-
-Creates a calibration layer for the input using hyper-parameters.
-
-Similar to `input_calibration_layer` but reads its parameters from a
-`CalibratedHParams` object.
-
-#### Args:
-
-* <b>`columns_to_tensors`</b>: A mapping from feature name to tensors. 'string' key
-    means a base feature (not-transformed). If feature_columns is not set
-    these are the features calibrated. Otherwise the transformed
-    feature_columns are the ones calibrated.
-* <b>`feature_columns`</b>: An iterable containing all the feature columns used by the
-    model. Optional, if not set the model will use all features given in
-    columns_to_tensors. All items in the set should be instances of
-    classes derived from `FeatureColumn`.
-* <b>`hparams`</b>: Hyper-parameters, need to inherit from `CalibratedHParams`.
-    It is also changed to include all feature names found in
-    `feature_columns`. See `CalibratedHParams` and `input_calibration_layer`
-    for descriptions of how these hyper-parameters work.
-* <b>`quantiles_dir`</b>: location where quantiles for the data was saved. Typically
-    the same directory as the training data. These quantiles can be
-    generated with `pwl_calibration_layers.calculate_quantiles_for_keypoints`,
-    maybe in a separate invocation of your program. Different models that
-    share the same quantiles information -- so this needs to be generated only
-    once when hyper-parameter tuning. If you don't want to use quantiles, you
-    can set `keypoints_initializers` instead.
-* <b>`keypoints_initializers`</b>: if you know the distribution of your
-    input features you can provide that directly instead of `quantiles_dir`.
-    See `pwl_calibrators_layers.uniform_keypoints_for_signal`. It must be
-    a pair of tensors with keypoints inputs and outputs to use for
-    initialization (must match `num_keypoints` configured in `hparams`).
-    Alternatively can be given as a dict mapping feature name to pairs,
-    for initialization per feature. If `quantiles_dir` and
-    `keypoints_initializer` are set, the later takes precendence, and the
-    features for which `keypoints_initializers` are not defined fallback to
-    using the quantiles found in `quantiles_dir`.
-* <b>`name`</b>: Name scope for layer.
-* <b>`dtype`</b>: If any of the scalars are not given as tensors, they are converted
-    to tensors with this dtype.
-
-
-#### Returns:
-
-A tuple of:
-* calibrated tensor of shape [batch_size, sum(features dimensions)].
-* list of the feature names in the order they feature in the calibrated
-  tensor. A name may appear more than once if the feature is
-  multi-dimension (for instance a multi-dimension embedding)
-* list of projection ops, that must be applied at each step (or every so
-  many steps) to project the model to a feasible space: used for bounding
-  the outputs or for imposing monotonicity. Empty if none are requested.
-* None or tensor with regularization loss.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: if dtypes are incompatible.
-
diff --git a/g3doc/api_docs/python/tensorflow_lattice/lattice.md b/g3doc/api_docs/python/tensorflow_lattice/lattice.md
deleted file mode 100644
index 1ac115d..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/lattice.md
+++ /dev/null
@@ -1,36 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.lattice" />
-</div>
-
-# tensorflow_lattice.lattice
-
-``` python
-lattice(
-    input_tensor,
-    parameter_tensor,
-    lattice_sizes,
-    interpolation_type='hypercube'
-)
-```
-
-Returns an interpolated look-up table (lattice) op.
-
-#### Args:
-
-* <b>`input_tensor`</b>: [batch_size, input_dim] tensor.
-* <b>`parameter_tensor`</b>: [output_dim, param_dim] tensor, where param_dim ==
-    lattice_sizes[0] * ... * lattice_sizes[input_dim - 1].
-* <b>`lattice_sizes`</b>: A list of lattice sizes of each dimension.
-* <b>`interpolation_type`</b>: 'hypercube' or 'simplex'.
-
-
-#### Returns:
-
-* <b>`output_tensor`</b>: [batch_size, num_outputs] tensor that contains the output of
-  hypercube lattice.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: If interpolation_type is not 'hypercube' nor 'simplex'.
-
diff --git a/g3doc/api_docs/python/tensorflow_lattice/lattice_layer.md b/g3doc/api_docs/python/tensorflow_lattice/lattice_layer.md
deleted file mode 100644
index d4824f4..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/lattice_layer.md
+++ /dev/null
@@ -1,69 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.lattice_layer" />
-</div>
-
-# tensorflow_lattice.lattice_layer
-
-``` python
-lattice_layer(
-    input_tensor,
-    lattice_sizes,
-    is_monotone=None,
-    output_dim=1,
-    interpolation_type='hypercube',
-    lattice_initializer=None,
-    l1_reg=None,
-    l2_reg=None,
-    l1_torsion_reg=None,
-    l2_torsion_reg=None,
-    l1_laplacian_reg=None,
-    l2_laplacian_reg=None
-)
-```
-
-Creates a lattice layer.
-
-Returns an output of lattice, lattice parameters, and projection ops.
-
-#### Args:
-
-* <b>`input_tensor`</b>: [batch_size, input_dim] tensor.
-* <b>`lattice_sizes`</b>: A list of lattice sizes of each dimension.
-* <b>`is_monotone`</b>: A list of input_dim booleans, boolean or None. If None or
-    False, lattice will not have monotonicity constraints. If
-    is_monotone[k] == True, then the lattice output has the non-decreasing
-    monotonicity with respect to input_tensor[?, k] (the kth coordinate). If
-    True, all the input coordinate will have the non-decreasing monotonicity.
-* <b>`output_dim`</b>: Number of outputs.
-* <b>`interpolation_type`</b>: 'hypercube' or 'simplex'.
-* <b>`lattice_initializer`</b>: (Optional) Initializer for lattice parameter vectors,
-    a 2D tensor [output_dim, parameter_dim] (where parameter_dim ==
-    lattice_sizes[0] * ... * lattice_sizes[input_dim - 1]). If None,
-    lattice_param_as_linear initializer will be used with
-    linear_weights=[1 if monotone else 0 for monotone in is_monotone].
-* <b>`l1_reg`</b>: (float) l1 regularization amount.
-* <b>`l2_reg`</b>: (float) l2 regularization amount.
-* <b>`l1_torsion_reg`</b>: (float) l1 torsion regularization amount.
-* <b>`l2_torsion_reg`</b>: (float) l2 torsion regularization amount.
-* <b>`l1_laplacian_reg`</b>: (list of floats or float) list of L1 Laplacian
-     regularization amount per each dimension. If a single float value is
-     provided, then all diemnsion will get the same value.
-* <b>`l2_laplacian_reg`</b>: (list of floats or float) list of L2 Laplacian
-     regularization amount per each dimension. If a single float value is
-     provided, then all diemnsion will get the same value.
-
-
-#### Returns:
-
-A tuple of:
-* output tensor of shape [batch_size, output_dim]
-* parameter tensor of shape [output_dim, parameter_dim]
-* None or projection ops, that must be applied at each
-  step (or every so many steps) to project the model to a feasible space:
-  used for bounding the outputs or for imposing monotonicity.
-* None or a regularization loss, if regularization is configured.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: for invalid parameters.
\ No newline at end of file
diff --git a/g3doc/api_docs/python/tensorflow_lattice/lattice_regularization.md b/g3doc/api_docs/python/tensorflow_lattice/lattice_regularization.md
deleted file mode 100644
index b52ee03..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/lattice_regularization.md
+++ /dev/null
@@ -1,49 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.lattice_regularization" />
-</div>
-
-# tensorflow_lattice.lattice_regularization
-
-``` python
-lattice_regularization(
-    lattice_params,
-    lattice_sizes,
-    l1_reg=None,
-    l2_reg=None,
-    l1_torsion_reg=None,
-    l2_torsion_reg=None,
-    l1_laplacian_reg=None,
-    l2_laplacian_reg=None,
-    name='lattice_regularization'
-)
-```
-
-Returns a lattice regularization op.
-
-#### Args:
-
-lattice_params: (Rank-2 tensor with shape [output_dim, param_dim]) Lattice
-  parameter tensor.
-lattice_sizes: (list of integers) lattice size of each dimension.
-l1_reg: (float) l1 regularization amount.
-l2_reg: (float) l2 regularization amount.
-l1_torsion_reg: (float) l1 torsion regularization amount.
-l2_torsion_reg: (float) l2 torsion regularization amount.
-l1_laplacian_reg: (list of floats or float) list of L1 Laplacian
-  regularization amount per each dimension. If a single float value is
-  provided, then all diemnsion will get the same value.
-l2_laplacian_reg: (list of floats or float) list of L2 Laplacian
-  regularization amount per each dimension. If a single float value is
-  provided, then all diemnsion will get the same value.
-name: name scope of lattice regularization.
-
-
-#### Returns:
-
-Rank-0 tensor (scalar) that contains lattice regularization.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: * lattice_param is not rank-2 tensor.
-              * output_dim or param_dim is unknown.
\ No newline at end of file
diff --git a/g3doc/api_docs/python/tensorflow_lattice/load_keypoints_from_quantiles.md b/g3doc/api_docs/python/tensorflow_lattice/load_keypoints_from_quantiles.md
deleted file mode 100644
index b6ace1f..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/load_keypoints_from_quantiles.md
+++ /dev/null
@@ -1,57 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.load_keypoints_from_quantiles" />
-</div>
-
-# tensorflow_lattice.load_keypoints_from_quantiles
-
-``` python
-load_keypoints_from_quantiles(
-    feature_names,
-    save_dir,
-    num_keypoints,
-    output_min,
-    output_max,
-    dtype=dtypes.float32
-)
-```
-
-Retrieves keypoints initialization values for selected features.
-
-It expects that the quantiles have already been calculated and saved in the
-save_dir by the save_quantiles_for_keypoints function. It will raise
-an I/O error if not.
-
-#### Args:
-
-* <b>`feature_names`</b>: List of features names for which to get keypoints
-    initialization values.
-* <b>`save_dir`</b>: Directory where the quantiles have been saved to. Same value used
-    when save_quantiles_for_keypoints was called.
-* <b>`num_keypoints`</b>: Desired number of keypoints to use for calibration. This
-    can either be a scalar to be used for all features, or a dict mapping
-    feature name to num_keypoints. Fewer keypoints than requested can end
-    up being used when for the given feature there are not enough different
-    values. If num_keypoints for a feature is missing, None or 0, no
-    initialization is generated.
-* <b>`output_min`</b>: Initial calibrated value associated with the first calibration
-    keypoint. The keypoints outputs in between will be linearly interpolated.
-    It can be given as a scalar, in which case value is used for all features,
-    or a dict mapping feature name to output_min.
-* <b>`output_max`</b>: Like output_min, but the calibrated value associated to the
-    last keypoint. Scalar or dict.
-* <b>`dtype`</b>: Type to be used for calibration.
-
-
-#### Returns:
-
-Dict of feature name to pair of constant tensors that can be used to
-initialize calibrators keypoints inputs and outputs.
-
-
-#### Raises:
-
-* <b>`tf.errors.NotFoundError`</b>: if quantiles file not found.
-
-
-  values in the signal. This would probably be better handled as categorical,
-  but still this should handle the case correctly.
\ No newline at end of file
diff --git a/g3doc/api_docs/python/tensorflow_lattice/monotone_lattice.md b/g3doc/api_docs/python/tensorflow_lattice/monotone_lattice.md
deleted file mode 100644
index e666518..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/monotone_lattice.md
+++ /dev/null
@@ -1,58 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.monotone_lattice" />
-</div>
-
-# tensorflow_lattice.monotone_lattice
-
-``` python
-monotone_lattice(
-    lattice_params,
-    is_monotone=[],
-    lattice_sizes=[],
-    tolerance=1e-07,
-    max_iter=1000,
-    name=None
-)
-```
-
-Returns a projected lattice parameters onto the monotonicity constraints.
-
-Monotonicity constraints are specified is_monotone. If is_monotone[k] == True,
-then the kth input has a non-decreasing monotonicity, otherwise there will be no
-constraints.
-
-This operator uses an iterative algorithm, Alternating Direction Method of
-Multipliers (ADMM) method, to find the projection, so tolerance and max_iter can
-be used to control the accuracy vs. the time spent trade-offs in the ADMM
-method.
-
-Inputs
-  lattice_params: 2D tensor, `[number of outputs, number of parameters]`
-
-Params
-  is_monotone: 1D bool tensor that contains whether the kth dimension should be
-  monotonic.
-  lattice_sizes: 1D int tensor that contains a lattice size per each dimension,
-  [m_0, ..., m_{d - 1}].
-  tolerance: The tolerance in ||true projection - projection|| in the ADMM
-  method.
-  max_iter: Maximum number of iterations in the ADMM method.
-
-Outputs
-  projected_lattice_params: 2D tensor,
-  `[number of outputs, number of parameters]`, that contains the projected
-  parameters.
-
-#### Args:
-
-* <b>`lattice_params`</b>: A `Tensor`. Must be one of the following types: `float32`, `float64`.
-* <b>`is_monotone`</b>: An optional list of `bools`. Defaults to `[]`.
-* <b>`lattice_sizes`</b>: An optional list of `ints`. Defaults to `[]`.
-* <b>`tolerance`</b>: An optional `float`. Defaults to `1e-07`.
-* <b>`max_iter`</b>: An optional `int`. Defaults to `1000`.
-* <b>`name`</b>: A name for the operation (optional).
-
-
-#### Returns:
-
-A `Tensor`. Has the same type as `lattice_params`.
\ No newline at end of file
diff --git a/g3doc/api_docs/python/tensorflow_lattice/monotonic_projection.md b/g3doc/api_docs/python/tensorflow_lattice/monotonic_projection.md
deleted file mode 100644
index 4d61848..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/monotonic_projection.md
+++ /dev/null
@@ -1,41 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.monotonic_projection" />
-</div>
-
-# tensorflow_lattice.monotonic_projection
-
-``` python
-monotonic_projection(
-    values,
-    increasing,
-    name=None
-)
-```
-
-Returns a not-strict monotonic projection of the vector.
-
-The returned vector is of the same size as the input and values (optionally)
-changed to make them monotonically, minimizing the sum of the square distance
-to the original values.
-
-This is part of the set of ops that support monotonicity in piecewise-linear
-calibration.
-
-Note that the gradient is undefined for this function.
-
-  values: `Tensor` with values to be made monotonic.
-  increasing: Defines if projection it to monotonic increasing values
-    or to monotonic decreasing ones.
-
-  monotonic: output `Tensor` with values made monotonic.
-
-#### Args:
-
-* <b>`values`</b>: A `Tensor`. Must be one of the following types: `float32`, `float64`.
-* <b>`increasing`</b>: A `Tensor` of type `bool`.
-* <b>`name`</b>: A name for the operation (optional).
-
-
-#### Returns:
-
-A `Tensor`. Has the same type as `values`.
\ No newline at end of file
diff --git a/g3doc/api_docs/python/tensorflow_lattice/pwl_indexing_calibrator.md b/g3doc/api_docs/python/tensorflow_lattice/pwl_indexing_calibrator.md
deleted file mode 100644
index a6534c0..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/pwl_indexing_calibrator.md
+++ /dev/null
@@ -1,42 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.pwl_indexing_calibrator" />
-</div>
-
-# tensorflow_lattice.pwl_indexing_calibrator
-
-``` python
-pwl_indexing_calibrator(
-    input,
-    kp_inputs,
-    name=None
-)
-```
-
-Returns tensor representing interpolation weights in a piecewise linear
-
-function. If using a large number of keypoints, try PwlIndexingCalibratorSparse.
-
-Notice that in this version the keypoints inputs (given by kp_inputs) is kept
-fixed by forcing its gradient to be always 0. FutureWork: allow kp_inputs to
-also be optimized, by providing a gradient.
-
-Inputs
-  input: uncalibrated weights, `[batch_size]`
-  kp_input: keypoints' input weights, can be initialized with the
-            pwl_calibrator_initialize_input_keypoints op. `[num_keypoints]`
-
-Outputs
-  weights: Interpolation weights for a piecewise linear function. Its shape is
-    `[batch_size, num_keypoints]`. The dot product of this and the keypoints
-    output will give the calibrated value.
-
-#### Args:
-
-* <b>`input`</b>: A `Tensor`. Must be one of the following types: `float32`, `float64`.
-* <b>`kp_inputs`</b>: A `Tensor`. Must have the same type as `input`.
-* <b>`name`</b>: A name for the operation (optional).
-
-
-#### Returns:
-
-A `Tensor`. Has the same type as `input`.
\ No newline at end of file
diff --git a/g3doc/api_docs/python/tensorflow_lattice/save_quantiles_for_keypoints.md b/g3doc/api_docs/python/tensorflow_lattice/save_quantiles_for_keypoints.md
deleted file mode 100644
index 338e1f7..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/save_quantiles_for_keypoints.md
+++ /dev/null
@@ -1,78 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.save_quantiles_for_keypoints" />
-</div>
-
-# tensorflow_lattice.save_quantiles_for_keypoints
-
-``` python
-save_quantiles_for_keypoints(
-    input_fn,
-    save_dir,
-    feature_columns=None,
-    num_steps=1,
-    override=True,
-    num_quantiles=1000,
-    dtype=dtypes.float32
-)
-```
-
-Calculates and saves quantiles for given features.
-
-These values can later be retrieved and used by keypoints_from_quantiles()
-below.
-
-Repeated values are discarded before the quantiles are calculated. That means
-that the quantiles of a very skewed distribution (for instance where 99%
-of the values are 0), will be different. But for the purpose of calibration
-this approach is more useful.
-
-Nothing is returned, the values are simply saved in the given location.
-
-This function can be called as a preprocessing step before actual training
-starts. Typically one will run this in a separate process locally, before
-starting training for instance.
-
-#### Args:
-
-* <b>`input_fn`</b>: Similar to input_fn provided to Estimators. Typically one
-    doesn't need to go over the full data to get good quantiles. Typically
-    some 100 random examples per quantile is good enough for the purpose of
-    calibration. If you don't have too much data, just use everything.
-    If input_fn returns a target (used in training) it is ignored.
-* <b>`save_dir`</b>: Where to save these quantiles. Since when optimizing
-    hyper-parameters we train various models, we can share the quantiles
-    information generated here. So this should be a directory that can be
-    accessed by all training sessions. A subdirectory called "quantiles" will
-    be created, and inside one file per feature is created: named after the
-    feature name, and with the quantiles stored in JSON format.
-* <b>`feature_columns`</b>: If set, quantiles are generated for these feature columns.
-    The file name used to save the quantiles uses a hash of the names of the
-    feature_columns, so it can support different quantiles sets for different
-    parts of the model if needed. If not set quantiles will be generated for
-    all features returned by input_fn.
-* <b>`num_steps`</b>: number of steps to take over input_fn to gather enough data to
-    create quantiles. Set to 0 or None to run until queue is exhausted,
-    like if you used num_epochs in your input_fn.
-* <b>`override`</b>: if False it won't regenerate quantiles for files that are already
-    there. This works as long as the features definition/distribution hasn't
-    change from one run to another.
-* <b>`num_quantiles`</b>: This value should be larger than the maximum number of
-    keypoints that will be considered for calibrating these features. If
-    there are not enough quantiles for the keypoints, the system is robust and
-    will simply interpolate the missing quantiles. Similarly if there are not
-    enough examples to represent the quantiles, it will interpolate the
-    quantiles from the examples given.
-* <b>`dtype`</b>: Deafult dtype to use, in particular for categorical values.
-
-Returns: Nothing, results are saved to disk.
-
-
-#### Raises:
-
-* <b>`errors.OpError`</b>: For I/O errors.
-
-FutureWork:
-  * Use Munro-Paterson algorithm to calculate quantiles in a streaming
-    fashion. See Squawd library.
-  * Add support to weighted examples.
-  * Handle cases where there are not enough different values in quantiles.
\ No newline at end of file
diff --git a/g3doc/api_docs/python/tensorflow_lattice/uniform_keypoints_for_signal.md b/g3doc/api_docs/python/tensorflow_lattice/uniform_keypoints_for_signal.md
deleted file mode 100644
index 1f6dfd8..0000000
--- a/g3doc/api_docs/python/tensorflow_lattice/uniform_keypoints_for_signal.md
+++ /dev/null
@@ -1,47 +0,0 @@
-<div itemscope itemtype="http://developers.google.com/ReferenceObject">
-<meta itemprop="name" content="tensorflow_lattice.uniform_keypoints_for_signal" />
-</div>
-
-# tensorflow_lattice.uniform_keypoints_for_signal
-
-``` python
-uniform_keypoints_for_signal(
-    num_keypoints,
-    input_min,
-    input_max,
-    output_min,
-    output_max,
-    dtype=dtypes.float32
-)
-```
-
-Returns a pair of initialization tensors for calibration keypoints.
-
-This is used when the input range to be calibrated is known.
-
-#### Args:
-
-* <b>`num_keypoints`</b>: number of keypoints to use for calibrating this signal.
-* <b>`input_min`</b>: Scalar with the minimum value that the uncalibrated input
-    can take.
-* <b>`input_max`</b>: Scalar with the maximum value that the uncalibrated input
-    can take.
-* <b>`output_min`</b>: Scalar with calibrated value associated with input_min.
-    Typically the minimum expected calibrated value, but not necessarily.
-    Specially if the calibration is decreasing.
-* <b>`output_max`</b>: Scalar with calibrated scalar value associated with
-    input_max.
-* <b>`dtype`</b>: If any of the scalars are not given as tensors, they are converted
-    to tensors with this dtype.
-
-
-#### Returns:
-
-Two tensors to be used as the keypoints_inputs and keypoints_outputs
-initialization, uniformly distributed over given ranges. Dtype is given
-by input_min, input_max, output_min, output_max.
-
-
-#### Raises:
-
-* <b>`ValueError`</b>: if underlying types (dtype) don't match.
\ No newline at end of file
diff --git a/g3doc/images/data_dist.png b/g3doc/images/data_dist.png
deleted file mode 100644
index 3e71189..0000000
Binary files a/g3doc/images/data_dist.png and /dev/null differ
diff --git a/g3doc/images/deep_lattice_networks.png b/g3doc/images/deep_lattice_networks.png
deleted file mode 100644
index d198f83..0000000
Binary files a/g3doc/images/deep_lattice_networks.png and /dev/null differ
diff --git a/g3doc/images/mono_1_of_4.png b/g3doc/images/mono_1_of_4.png
deleted file mode 100644
index e111a64..0000000
Binary files a/g3doc/images/mono_1_of_4.png and /dev/null differ
diff --git a/g3doc/images/mono_2_of_4.png b/g3doc/images/mono_2_of_4.png
deleted file mode 100644
index 3e78540..0000000
Binary files a/g3doc/images/mono_2_of_4.png and /dev/null differ
diff --git a/g3doc/images/mono_3_of_4.png b/g3doc/images/mono_3_of_4.png
deleted file mode 100644
index 7d17cdc..0000000
Binary files a/g3doc/images/mono_3_of_4.png and /dev/null differ
diff --git a/g3doc/images/mono_4_of_4.png b/g3doc/images/mono_4_of_4.png
deleted file mode 100644
index 675b636..0000000
Binary files a/g3doc/images/mono_4_of_4.png and /dev/null differ
diff --git a/g3doc/images/pwl_calibration_distance.png b/g3doc/images/pwl_calibration_distance.png
deleted file mode 100644
index 4bcc336..0000000
Binary files a/g3doc/images/pwl_calibration_distance.png and /dev/null differ
diff --git a/g3doc/images/pwl_calibration_price.png b/g3doc/images/pwl_calibration_price.png
deleted file mode 100644
index f1ac7a2..0000000
Binary files a/g3doc/images/pwl_calibration_price.png and /dev/null differ
diff --git a/g3doc/tutorial/images/2d_lattice.png b/g3doc/tutorial/images/2d_lattice.png
deleted file mode 100644
index 7d5c8c8..0000000
Binary files a/g3doc/tutorial/images/2d_lattice.png and /dev/null differ
diff --git a/g3doc/tutorial/images/data_dist.png b/g3doc/tutorial/images/data_dist.png
deleted file mode 100644
index 3e71189..0000000
Binary files a/g3doc/tutorial/images/data_dist.png and /dev/null differ
diff --git a/g3doc/tutorial/images/deep_lattice_networks.png b/g3doc/tutorial/images/deep_lattice_networks.png
deleted file mode 100644
index d198f83..0000000
Binary files a/g3doc/tutorial/images/deep_lattice_networks.png and /dev/null differ
diff --git a/g3doc/tutorial/images/mono_1_of_4.png b/g3doc/tutorial/images/mono_1_of_4.png
deleted file mode 100644
index e111a64..0000000
Binary files a/g3doc/tutorial/images/mono_1_of_4.png and /dev/null differ
diff --git a/g3doc/tutorial/images/mono_2_of_4.png b/g3doc/tutorial/images/mono_2_of_4.png
deleted file mode 100644
index 3e78540..0000000
Binary files a/g3doc/tutorial/images/mono_2_of_4.png and /dev/null differ
diff --git a/g3doc/tutorial/images/mono_3_of_4.png b/g3doc/tutorial/images/mono_3_of_4.png
deleted file mode 100644
index 7d17cdc..0000000
Binary files a/g3doc/tutorial/images/mono_3_of_4.png and /dev/null differ
diff --git a/g3doc/tutorial/images/mono_4_of_4.png b/g3doc/tutorial/images/mono_4_of_4.png
deleted file mode 100644
index 675b636..0000000
Binary files a/g3doc/tutorial/images/mono_4_of_4.png and /dev/null differ
diff --git a/g3doc/tutorial/images/pwl_calibration_distance.png b/g3doc/tutorial/images/pwl_calibration_distance.png
deleted file mode 100644
index 4bcc336..0000000
Binary files a/g3doc/tutorial/images/pwl_calibration_distance.png and /dev/null differ
diff --git a/g3doc/tutorial/images/pwl_calibration_price.png b/g3doc/tutorial/images/pwl_calibration_price.png
deleted file mode 100644
index f1ac7a2..0000000
Binary files a/g3doc/tutorial/images/pwl_calibration_price.png and /dev/null differ
diff --git a/g3doc/tutorial/index.md b/g3doc/tutorial/index.md
deleted file mode 100644
index 00dccd1..0000000
--- a/g3doc/tutorial/index.md
+++ /dev/null
@@ -1,883 +0,0 @@
-<!-- DO NOT EDIT! Automatically generated file. -->
-# TensorFlow Lattice: Lattice modeling in TensorFlow
-
-__TensorFlow Lattice__ is a library that implements lattice based models which
-are fast-to-evaluate and interpretable (optionally monotonic) models, also known
-as __interpolated look-up tables__. It includes a collection of [TensorFlow
-Lattice Estimators](#tensorflow-lattice-estimators-walk-through), which you can
-use like any [TensorFlow
-Estimator](https://www.tensorflow.org/guide/estimators), and it also
-includes lattices and piecewise linear calibration as layers that can be
-composed into custom models.
-
-Note that __TensorFlow Lattice is not an official Google product__.
-
-[TOC]
-
---------------------------------------------------------------------------------
-
-## Concepts
-
-This section is a simplified version of the description in [Monotonic Calibrated
-Interpolated Look-Up Tables](http://jmlr.org/papers/v17/15-243.html))
-
-### Lattices
-
-A __lattice__ is an interpolated look-up table that can approximate arbitrary
-input-output relationships in your data. It overlaps a regular grid on your
-input space, and it learns values for the output in the vertices of the grid.
-For a test point $$x$$, $$f(x)$$ is linearly interpolated from the lattice
-values surrounding $$x$$.
-
-<img src="images/2d_lattice.png" style="display:block; margin:auto;">
-
-The above simple example is a function with just 2 features, and has 4
-parameters: 0, 0.2, 0.4, and 1, which are the function's values at the corners
-of the input space; the rest of the function is interpolated from these
-parameters.
-
-The function $$f(x)$$ can capture non-linear interactions between features. You
-can think of the lattice parameters as the height of poles set in the ground on
-a regular grid, and the resulting function is like cloth pulled tight against
-the four poles.
-
-With $$D$$ features, a regular lattice will have $$2^D$$ parameters. To fit a
-more flexible function, you can specify a finer-grained lattice over the feature
-space. Combined with an efficient $$O(D log(D))$$ interpolation, lattice
-regression gives you __fast evaluation times__ and __arbitrarily complex
-functions__.
-
-Lattice regression functions are continuous, and piecewise infinitely
-differentiable, but they are generally not analytically differentiable at the
-lattice vertices themselves. Still, they tend to be __very smooth__.
-
-### Calibration
-
-Let's say the preceding sample lattice represents a learned *user happiness*
-with a suggestion of a coffee shop. Furthermore, assume the following:
-
-*   *feature 1* is a baseline coffee price.
-*   *feature 2* is the distance to a local coffee shop.
-
-We want our model to learn user happiness with a coffee shop suggestion. The
-distance can be defined from 0km to 30km and baseline coffee price can be
-something from $0 to $20.
-
-TensorFlow Lattice models use __piecewise linear functions__ to calibrate (or
-_normalize_) your input features to the range accepted by the lattice: from
-$$0.0$$ to $$1.0$$ in the example lattice above.
-
-The following diagrams show examples of what could be the calibration of the
-price and baseline coffee price using 10 keypoints each:
-
-<div style="width:100%;"><center><nobr>
-<img src="images/pwl_calibration_distance.png" style="max-width:40%; height:auto;">
-<img src="images/pwl_calibration_price.png" style="max-width:40%; height:auto;">
-</nobr></center></div>
-
-All TensorFlow Lattice pre-made models (`Estimator`'s) use calibration of the
-features: the input (the $$x$$ axis of the plot above) is set to the quantiles
-(so data will be +/- evenly distributed on the keypoints), and the output ($$y$$
-axis) is learned along with the lattice(s).
-
-Notice that the calibration also handles the negative correlation of distance
-and _user happiness_.
-
-### Ensembles
-
-If you have $$D$$ features in a lattice, the number of parameters (vertices) of
-the lattice will be at least $$2^D$$. (To be precise replace 2s with the size of
-the grid for each feature.) As you can see lattices don't scale well with the
-number of features.
-
-TensorFlow Lattice offers ensembles of lattices to overcome this limitation.
-That is, several "tiny" lattices are combined (summed), enabling the model to
-grow linearly on the number of features, albeit exponential on the number of
-features in each of these "tiny" lattices, but the number of features per
-lattice are typically configured to be small.
-
-The library provides two variations of these ensembles:
-
-*   __Random Tiny Lattices__ (__RTL__ for short): an arbitrary number of
-    lattices of dimension $$D_l$$, each including random $$D_l$$ features out of
-    the total $$D$$ input features.
-
-*   __Ensembled Tiny Lattices__ (__ETL__ for short): As with RTLs, an arbitrary
-    number of lattices of dimension $$D_l$$ is selected, but the input for these
-    lattices are linear combinations (initialized randomly) of all the $$D$$
-    inputs. It is more flexible than *RTL*, but less interpretable and may take
-    longer to train.
-
---------------------------------------------------------------------------------
-
-## Why TensorFlow Lattice ?
-
-You can find a brief introduction to TensorFlow Lattice in [Google's Research
-Blog post](https://research.googleblog.com/).
-
-*   __Interpretability__: the parameters of the model are the output at the
-    vertices.
-
-*   Powerful: __abritrarily complex__ functions with __fast evaluation times__
-    (in comparison to some equivalent Deep Neural Networks for instance).
-
-As shown in the following figure, in real world usage, the training data is
-often a somewhat biased representation of where the model will be applied:
-
-<img src="images/data_dist.png" style="display:block; margin:auto;">
-
-TensorFlow Lattice provides the following types of __"semantic
-regularization"__:
-
-*   Lattice resolution: the number of vertices in your lattice allows control
-    over the flexibility of the functions that can be learned.
-
-*   __Monotonicity__: You can specify that the output should only
-    increase/decrease with respect to an input. In our example, you may want to
-    specify that increased distance to a coffee shop should only decrease the
-    chances of the coffee shop being a good one. (See the illustration below.)
-
-*   __Graph Laplacian__: Outputs of the lattice/calibration vertices/keypoints
-    are regularized torwards the values of their respective neighbors. So
-    corners (vertices) of the space that sees less training data will fit snugly
-    with the neighbors.
-
-*   __Torsion__: Outputs of the lattice will be regularized towards preventing
-    torsion among the features. In other words, the model will be regularized
-    towards the contributions of the features being independent of each other.
-
-<table style="vertical-align:middle; text-align:center; border-collapse:collapse;">
-<tr>
-<td style="border:none"><img src="images/mono_1_of_4.png" style="height:auto; max-width:80%;"></td>
-<td style="border:none"><img src="images/mono_2_of_4.png" style="height:auto; max-width:80%;"></td>
-<tr>
-<td style="border:none"><img src="images/mono_3_of_4.png" style="height:auto; max-width:80%;"></td>
-<td style="border:none"><img src="images/mono_4_of_4.png" style="height:auto; max-width:80%;"></td>
-</tr>
-</table>
-
---------------------------------------------------------------------------------
-
-## TensorFlow Lattice Estimators Walk-through
-
-TensorFlow Lattice library provides generic models formatted as pre-made
-[estimators](https://www.tensorflow.org/guide/estimators), which we
-hope will cover the typical use cases, or serve as example for those creating
-their own models.
-
-This section provides a walk-through of how to use the pre-made estimators to
-train a classifier of [Census income
-dataset](https://archive.ics.uci.edu/ml/datasets/Census+Income) using TensorFlow
-Lattice. The full code used in this section, which includes some more details,
-is in
-[examples/uci_census.py](https://github.com/tensorflow/lattice/blob/master/examples/uci_census.py).
-
-If you have trouble with the 'tf.estimator' interface, consider going over the
-[TensorFlow Linear Model Tutorial](https://www.tensorflow.org/tutorials/wide).
-All of the data parsing and formatting is very similar.
-
-### UCI Census Income Dataset
-
-For this walk-through, we will use the [UCI Census Income
-Dataset](https://archive.ics.uci.edu/ml/datasets/Census+Income). You can
-download the CSV train and test files directly from these links:
-
-*   [adult.data](https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data)
-*   [adult.test](https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test)
-
-Please save the datasets into a temporary directory (for example,
-`/tmp/uci_census`) and change the `--test` and `--train` flags to point to the
-files when running the code that follows.
-
-The data is available as CSV, and we use [pandas data analysis
-library](http://pandas.pydata.org/) (`pip install pandas` on most platforms,
-maybe requiring `sudo`) to make the parsing easy.
-
-The `tf.estimator` models use an input builder function, usually named
-`input_fn` which is reponsible to parse data and convert into `tf.Tensor`s (or
-`tf.SparseTensor`s) with batches of data.
-
-Our `input_fn` functions look like the following:
-
-```python
-import pandas as pd
-import tensorflow as tf
-import tensorflow_lattice as tfl
-
-flags = tf.flags
-FLAGS = flags.FLAGS
-
-flags.DEFINE_string("test", "/tmp/uci_census/adult.test", "Path to test file.")
-flags.DEFINE_string("train", "/tmp/uci_census/adult.data", "Path to train file.")
-
-CSV_COLUMNS = [
-    "age", "workclass", "fnlwgt", "education", "education_num",
-    "marital_status", "occupation", "relationship", "race", "gender",
-    "capital_gain", "capital_loss", "hours_per_week", "native_country",
-    "income_bracket"
-]
-
-def get_test_input_fn(batch_size, num_epochs, shuffle):
-  return get_input_fn(FLAGS.test, batch_size, num_epochs, shuffle)
-
-
-def get_train_input_fn(batch_size, num_epochs, shuffle):
-  return get_input_fn(FLAGS.train, batch_size, num_epochs, shuffle)
-
-
-def get_input_fn(file_path, batch_size, num_epochs, shuffle):
-  df_data = pd.read_csv(
-      tf.gfile.Open(file_path),
-      names=CSV_COLUMNS,
-      skipinitialspace=True,
-      engine="python",
-      skiprows=1)
-  df_data = df_data.dropna(how="any", axis=0)
-  labels = df_data["income_bracket"].apply(lambda x: ">50K" in x).astype(int)
-  return tf.estimator.inputs.pandas_input_fn(
-      x=df_data,
-      y=labels,
-      batch_size=batch_size,
-      shuffle=shuffle,
-      num_epochs=num_epochs,
-      num_threads=1)
-```
-
-### Preparing `FeatureColumns`
-
-TensorFlow provides `FeatureColumn`s as a way to select and describe the
-features used for a model. Numeric features require no transformations; we need
-to list the known valid values of categorical features.
-
-See more details in [TensorFlow Linear Model
-tutorial](https://www.tensorflow.org/tutorials/wide).
-
-TensorFlow Lattice pre-made estimators will take any of the currently supported
-`FeatureColumns` or alternatively the raw columns coming from the `input_fn`
-function, if they are properly numeric already.
-
-```python
-def get_feature_columns():
-  # Categorical features.
-  gender =
-      tf.feature_column.categorical_column_with_vocabulary_list(
-          "gender", ["Female", "Male"])
-  education =
-      tf.feature_column.categorical_column_with_vocabulary_list(
-          "education", [
-              "Bachelors", "HS-grad", "11th", "Masters", "9th", "Some-college",
-              "Assoc-acdm", "Assoc-voc", "7th-8th", "Doctorate", "Prof-school",
-              "5th-6th", "10th", "1st-4th", "Preschool", "12th"
-          ])
-  …
-  # Numerical (continuous) base columns.
-  age = tf.feature_column.numeric_column("age")
-  education_num = tf.feature_column.numeric_column("education_num")
-  capital_gain = tf.feature_column.numeric_column("capital_gain")
-  …
-  return [
-      age,
-      workclass,
-      education,
-      education_num,
-      marital_status,
-      occupation,
-      relationship,
-      race,
-      gender,
-      capital_gain,
-      capital_loss,
-      hours_per_week,
-      native_country,
-  ]
-
-```
-
-Note: unlike DNN pre-made estimators
-([DNNClassifier](https://www.tensorflow.org/versions/r1.3/api_docs/python/tf/estimator/DNNClassifier)
-and
-[DNNRegressor](https://www.tensorflow.org/versions/r1.3/api_docs/python/tf/estimator/DNNClassifier)),
-TensorFlow Lattice pre-made estimators accept sparse `FeatureColumn` without the
-need for embedding them.
-
-### Calibration: Saving The Quantiles
-
-TensorFlow Lattice requires proper calibration of the input for its lattices
-(see section on [calibration](#calibration) above).
-
-The current default calibration algorithm requires quantiles information about
-the data on which it's going to train. This can be done as a simple
-pre-processing step.
-
-The following code snippet from our example does that:
-
-```python
-import tensorflow_lattice as tfl
-
-flags.DEFINE_bool("create_quantiles", False,
-                  "Run once to create histogram of features for calibration.")
-flags.DEFINE_string(
-    "quantiles_dir", None,
-    "Directory where to store quantile information, defaults to the model "
-    "directory (set by --output-dir) but since quantiles can be reused by "
-    "models with different parameters, you may want to have a separate "
-    "directory.")
-…
-
-def create_quantiles(quantiles_dir):
-  """Creates quantiles directory if it doesn't yet exist."""
-  batch_size = 10000
-  input_fn = get_test_input_fn(
-      batch_size=batch_size, num_epochs=1, shuffle=False)
-  # Reads until input is exhausted, 10000 at a time.
-  tfl.save_quantiles_for_keypoints(
-      input_fn=input_fn,
-      save_dir=quantiles_dir,
-      feature_columns=create_feature_columns(),
-      num_steps=None)
-
-def main(argv)
-  …
-
-  # Create quantiles if required.
-  if FLAGS.create_quantiles:
-    if FLAGS.run != "train":
-      raise ValueError(
-          "Can not create_quantiles for mode --run='{}'".format(FLAGS.run))
-    create_quantiles(quantiles_dir)
-```
-
-Note: This only needs to be run once per dataset, and can be shared among
-different models that use the same data.
-
-Note: This information is only needed for training. During inference
-(production), the model itself already contains all the information it needs,
-and doesn't need to read this anymore.
-
-Advanced: If you know the range of input, instead of using quantiles, you can
-provide
-[`uniform_keypoints_for_signal`](../api_docs/python/tensorflow_lattice/uniform_keypoints_for_signal.md)
-as function initializer, which will create calibration keypoints uniformly in
-the given range. Or you can provide your own keypoint initializing function.
-
-### Calibrated Linear Model
-
-Calibrated linear model is the simplest model type offered in TensorFlow
-Lattice. It calibrates the input using piecewise-linear calibrated functions and
-then linearly combine the inputs. Using it is trivial, if you are used to
-TensorFlow's `Estimator` framework (see [Module
-tf.estimator](https://www.tensorflow.org/api_docs/python/tf/estimator)).
-
-To create a calibrated linear model, you need to specify features in
-`feature_columns`, the model directory in `model_dir`, the ["run
-configuration"](https://www.tensorflow.org/api_docs/python/tf/estimator/RunConfig)
-in `config`, and in `hparams` the hyperparameters settings in the form of a
-[`tfl.CalibratedLinearHParams`](../api_docs/python/tensorflow_lattice/CalibratedLinearHParams.md)
-object.
-
-All parameters are optional; see more details in:
-
-*   [`tfl.calibrated_linear_classifier`](../api_docs/python/tensorflow_lattice/calibrated_linear_classifier.md)
-*   [`tfl.calibrated_linear_regressor`](../api_docs/python/tensorflow_lattice/calibrated_linear_regressor.md)
-*   Configurable hyperparameters in
-    [`tfl.CalibratedLinearHParams`](../api_docs/python/tensorflow_lattice/CalibratedLinearHParams.md)
-
-Calibration can be forced to be monotonic and regularized in different ways. It
-also supports special casing of __missing values__ (see `missing_input_value`
-hyperparameter); that is, the calibration of missing values has its own
-parameter that is learned independently from other values.
-
-An example of code that stitches this together is presented below. For now we
-present only the default hyperparameters. The next section covers the special
-TensorFlow Lattice hyperparameters, and how to change them.
-
-The following code shows how our `create_calibrated_linear` function gets
-called. It hinges on creating an `Estimator` object, and then either training or
-evaluating it.
-
-```python
-import tensorflow_lattice as tfl
-
-def create_calibrated_linear(feature_columns, config, quantiles_dir):
-  feature_names = [fc.name for fc in feature_columns]
-  hparams = tfl.CalibratedLinearHParams(feature_names=feature_names)
-  return tfl.calibrated_linear_classifier(
-      feature_columns=feature_columns,
-      model_dir=config.model_dir,
-      config=config,
-      hparams=hparams,
-      quantiles_dir=quantiles_dir)
-…
-
-def create_estimator(config, quantiles_dir):
-  """Creates estimator for given configuration based on --model_type."""
-  feature_columns = create_feature_columns()
-  if FLAGS.model_type == "calibrated_linear":
-    return create_calibrated_linear(feature_columns, config, quantiles_dir)
-  elif FLAGS.model_type == "calibrated_lattice":
-    return create_calibrated_lattice(feature_columns, config, quantiles_dir)
-  elif FLAGS.model_type == "calibrated_rtl":
-    return create_calibrated_rtl(feature_columns, config, quantiles_dir)
-  elif FLAGS.model_type == "calibrated_etl":
-    return create_calibrated_etl(feature_columns, config, quantiles_dir)
-  elif FLAGS.model_type == "calibrated_dnn":
-    return create_calibrated_dnn(feature_columns, config, quantiles_dir)
-
-  raise ValueError("Unknown model_type={}".format(FLAGS.model_type))
-  …
-
-def main(args):
-  …
-  # Create config and then model.
-  config = tf.estimator.RunConfig().replace(model_dir=output_dir)
-  estimator = create_estimator(config, quantiles_dir)
-
-  if FLAGS.run == "train":
-    train(estimator)
-
-  elif FLAGS.run == "evaluate":
-    evaluate(estimator)
-
-  else:
-    raise ValueError("Unknonw --run={}".format(FLAGS.run))
-```
-
-### Hyperparameters setting
-
-Each of the pre-made estimators offered by *TensorFlow Lattices* is controlled
-by a set of hyperparameters. Some are shared among different estimators, some
-are unique. All are documented in their definition.
-
-*   Calibrated linear models:
-    [`tfl.CalibratedLinearHParams`](../api_docs/python/tensorflow_lattice/CalibratedLinearHParams.md)
-*   Calibrated lattice models:
-    [`tfl.CalibratedLatticeHParams`](../api_docs/python/tensorflow_lattice/CalibratedLatticeHParams.md)
-*   Calibrated RTL models:
-    [`tfl.CalibratedRtlHParams`](../api_docs/python/tensorflow_lattice/CalibratedRtlHParams.md)
-*   Calibrated ETL models:
-    [`tfl.CalibratedEtlHParams`](../api_docs/python/tensorflow_lattice/CalibratedEtlHParams.md)
-
-TensorFlow Lattices' hyperparameters classes are slightly different from
-[TensorFlow standard hyperparameters
-class](https://www.tensorflow.org/api_docs/python/tf/contrib/training/HParams)
-in that they accept global and per-feature parameters. For instance, in our
-calibrated linear model on the previous section, we defined the following
-default values:
-
-```python
-def create_calibrated_linear(feature_columns, config, quantiles_dir):
-  feature_names = [fc.name for fc in feature_columns]
-  hparams = tfl.CalibratedLinearHParams(
-      feature_names=feature_names, num_keypoints=200, learning_rate=0.1)
-  hparams.set_feature_param("capital_gain", "calibration_l2_laplacian_reg",
-                            4.0e-8)
-  hparams.parse(FLAGS.hparams)
-  _pprint_hparams(hparams)
-  return tfl.calibrated_linear_classifier(
-      feature_columns=feature_columns,
-      model_dir=config.model_dir,
-      config=config,
-      hparams=hparams,
-      quantiles_dir=quantiles_dir)
-```
-
-The preceding code uses different default values for the following
-hyperparameters:
-
-*   `num_keypoints`
-*   `learning_rate`
-*   `calibration_l2_laplacian_reg` (l2 laplacian regularization for the
-    calibration) for the feature named "capital_gain"
-
-Notice also the call `hparams.parse(FLAGS.hparams)`, which will parse a string
-with a comma-separated list of settings. Feature specific values can also be set
-by prefixing the parameter (that takes feature specific values) with
-"feature\_\_&lt;_feature\_name_&gt;\_\_&lt;_param\_name_&gt;". Notice that the
-separator here is double underscores ("\_\_").
-
-For example, the following invocation sets `learning_rate=0.001` and for feature
-_capital\_loss_ it sets `calibration_l2_laplacian_reg=1.0e-5`:
-
-```bash
-
-$ uci_census.py … --hparams=learning_rate=0.001,feature__capital_loss__calibration_l2_laplacian_reg=1.0e-5 …
-
-```
-
-We define this simple pretty-print function in our example:
-
-```python
-def _pprint_hparams(hparams):
-  """Pretty-print hparams."""
-  print("* hparams=[")
-  for (key, value) in sorted(six.iteritems(hparams.values())):
-    print("\t{}={}".format(key, value))
-  print("]")
-```
-
-### Calibrated Lattice Model
-
-Calibrated lattice models first calibrate the input with piecewise-linear
-functions, and combine them into a lattice (see [Concepts](#concepts) section).
-
-Calibrated lattice models also provide:
-
-*   __Enforced monotonicity__: in the calibration (can be increasing or
-    decreasing), and in the lattice (you must also set the calibration to be
-    monotonic and enable lattice monotonicity). Both can be selected per
-    feature.
-*   __Missing value handle__: missing values can be calibrated automatically for
-    some special value or can have their own value in the lattice. Controlled
-    per feature through the parameters: `missing_input_value` and
-    `missing_vertex`.
-*   __Semantic regularization__: rich set of regularization that can be applied
-    independently to the calibration and lattice. Can be set globally or per
-    feature. See their description in the [Concepts](#concepts) section.
-*   Flexible size: lattice can easily be adjusted to different granularity per
-    feature by setting `lattice_size`. This allows it lots of power to
-    aproximate any function.
-
-Limitations:
-
-*   __Scalability issues on number of features and lattice size__: the total
-    number of vertices (parameters) in the lattice is the product of the
-    `lattice_size` for each feature. Your models are gated by available memory
-    and parameters update speed. To stay within reasonable bounds, don't use
-    more than 14 features (or 50,000 parameters). If that isn't possible, use
-    the more powerful [Random Tiny Lattices Model](#random-tiny-lattices-model)
-    or [Embedded Tiny Lattices Model](#embedded-tiny-lattices-model).
-
-Calibrated lattice models are available as classifier or regressor by
-[`tfl.calibrated_lattice_classifier`](../api_docs/python/tensorflow_lattice/calibrated_lattice_classifier.md)
-and
-[`tfl.calibrated_lattice_regressor`](../api_docs/python/tensorflow_lattice/calibrated_lattice_regressor.md)
-constructors.
-
-Documentation on all hyperparameters is in
-[`tfl.CalibratedLatticeHParams`](../api_docs/python/tensorflow_lattice/CalibratedLatticeHParams.md)
-
-Extract from the
-[`uci_census`](https://github.com/tensorflow/lattice/blob/master/examples/uci_census.py)
-example:
-
-```python
-def create_calibrated_lattice(feature_columns, config, quantiles_dir):
-  feature_names = [fc.name for fc in feature_columns]
-  hparams = tfl.CalibratedLatticeHParams(
-      feature_names=feature_names,
-      num_keypoints=200,
-      lattice_l2_laplacian_reg=5.0e-3,
-      lattice_l2_torsion_reg=1.0e-4,
-      learning_rate=0.1,
-      lattice_size=2)
-  hparams.parse(FLAGS.hparams)
-  _pprint_hparams(hparams)
-  return tfl.calibrated_lattice_classifier(
-      feature_columns=feature_columns,
-      model_dir=config.model_dir,
-      config=config,
-      hparams=hparams,
-      quantiles_dir=quantiles_dir)
-```
-
-Note: To see how this function gets called from `main`, see [Calibrated Linear
-Model](#calibrated-linear-model).
-
-### Random Tiny Lattices Model
-
-Calibrated "Random Tiny Lattices" (RTL) models, like calibrated lattice models,
-first calibrate the input with piecewise-linear functions. But then it combines
-them in an ensemble of `num_lattices` lattices built with inputs from random
-features (`lattice_rank` input features per lattice).
-
-Extract from the
-[`uci_census`](https://github.com/tensorflow/lattice/blob/master/examples/uci_census.py)
-example:
-
-```python
-def create_calibrated_rtl(feature_columns, config, quantiles_dir):
-  feature_names = [fc.name for fc in feature_columns]
-  hparams = tfl.CalibratedRtlHParams(
-      feature_names=feature_names,
-      num_keypoints=200,
-      learning_rate=0.02,
-      lattice_l2_laplacian_reg=5.0e-4,
-      lattice_l2_torsion_reg=1.0e-4,
-      lattice_size=3,
-      lattice_rank=4,
-      num_lattices=100)
-  # Specific feature parameters.
-  hparams.set_feature_param("capital_gain", "lattice_size", 8)
-  hparams.set_feature_param("native_country", "lattice_size", 8)
-  hparams.set_feature_param("marital_status", "lattice_size", 4)
-  hparams.set_feature_param("age", "lattice_size", 8)
-  hparams.parse(FLAGS.hparams)
-  _pprint_hparams(hparams)
-  return tfl.calibrated_rtl_classifier(
-      feature_columns=feature_columns,
-      model_dir=config.model_dir,
-      config=config,
-      hparams=hparams,
-      quantiles_dir=quantiles_dir)
-```
-
-Note: To see how this function gets called from `main`, see [Calibrated Linear
-Model](#calibrated-linear-model).
-
-In this example it will calibrate the inputs (using up to 200 keypoints, per
-`num_keypoints`) and then randomly distribute them into 100 lattices
-(`num_lattices`, a feature can be used by more than one lattice).
-
-The lattices and the calibration are all trained jointly.
-
-Like with calibrated lattice models, but without the limitations on the number
-of features, it supports:
-
-*   __Enforced monotonicity__: in the calibration (can be increasing or
-    decreasing), and in the lattice (one must also set the calibration to be
-    monotonic, and enable lattice monotonicity). Both can be selected per
-    feature.
-*   __Missing value handle__: Missing values can be calibrated automatically for
-    some special value, or can have their own value in the lattice. Controlled
-    per feature through the parameters: `missing_input_value` and
-    `missing_vertex`.
-*   __Semantic regularization__: rich set of regularization that can be applied
-    independently to the calibration and lattice. Can be set globally or per
-    feature. See their description in the [Concepts](#concepts) section.
-*   Flexible size: lattice can easily be adjusted to different granularity per
-    feature by setting `lattice_size`. This allows it lots of power to
-    aproximate any function.
-
-Note: The `lattice_rank` hyperparameter controls how many features are seen in
-_combination_. It is often used as a regularization on the complexity of
-interactions allowed among the features. But as with calibrated lattices this is
-limited to 10 or 20 features at most combined in the same lattices. If you
-wonder if the model could pick better than random features to be combined in
-lattices, check out the next session, on [Embedded Tiny Lattices
-Model](#embedded-tiny-lattices)
-
-Calibrated RTL models are available as classifier or regressor by
-[`tfl.calibrated_rtl_classifier`](../api_docs/python/tensorflow_lattice/calibrated_rtl_classifier.md)
-and
-[`tfl.calibrated_rtl_regressor`](../api_docs/python/tensorflow_lattice/calibrated_rtl_regressor.md)
-constructors.
-
-Documentation on all hyperparameters in
-[`tfl.CalibratedLatticeHParams`](../api_docs/python/tensorflow_lattice/CalibratedLatticeHParams.md)
-
-Note: See above in section [Calibrated Linear Model](#calibrated-linear-model)
-on how this function gets called from `main`.
-
-### Embedded Tiny Lattices Model
-
-Calibrated "Embedded Tiny Lattices" (ETL) models, like calibrated [RTL
-models](#random-tiny-lattices-model), first calibrate the input and connect
-those calibrated signals into an ensemble of lattices. But as opposed to have
-each lattice take as input a subset of the calibrated features, in ETL models it
-takes as input an embedding of the input features: each input is a linear
-combination of the calibrated features.
-
-The number of lattices is defined by 'monotonic_num_lattices' and
-'non_monotonic_num_lattices': monotonic lattices can take as input monotonic
-features and non-monotonic features. Non-monotonic lattices can only take
-non-monotonic features as input (otherwise monotonicity could be broken).
-
-The size of the embedding to be used in each lattice is given by
-`monotonic_lattice_rank` and `non_monotonic_lattice_rank`. Each lattice has it's
-own embedding: calibration, embedding and lattices are trained jointly.
-
-The size of the lattices, which gives resolution for them is given by
-`monotonic_lattice_size` and `non_monotonic_lattice_size`.
-
-Calibrated ETL models are available as classifier or regressor by
-[`tfl.calibrated_etl_classifier`](../api_docs/python/tensorflow_lattice/calibrated_etl_classifier.md)
-and
-[`tfl.calibrated_etl_regressor`](../api_docs/python/tensorflow_lattice/calibrated_etl_regressor.md)
-constructors.
-
-Embedded tiny lattices can be __more powerful__ than [RTL
-models](#random-tiny-lattices-model), but they __sacrifice some of the "semantic
-regularization"__ (same regularization options are available, but they apply to
-abstract embeddings), and are __slower to train__. __Monotonicity still is well
-supported__.
-
-See details in paper [Deep Lattice Networks and Partial Monotonic
-Functions](https://research.google.com/pubs/pub46327.html). ETL implements only
-one layer deep lattice models, but deeper models can be built by composing
-lattice layers, in the [next session](#tensorflow-lattice-layers)
-
-In our example
-[`uci_census`](https://github.com/tensorflow/lattice/blob/master/examples/uci_census.py)
-model, using only non-monotonic signals:
-
-```python
-def create_calibrated_etl(feature_columns, config, quantiles_dir):
-  # No enforced monotonicity in this example.
-  feature_names = [fc.name for fc in feature_columns]
-  hparams = tfl.CalibratedEtlHParams(
-      feature_names=feature_names,
-      num_keypoints=200,
-      learning_rate=0.02,
-      non_monotonic_num_lattices=200,
-      non_monotonic_lattice_rank=2,
-      non_monotonic_lattice_size=2,
-      calibration_l2_laplacian_reg=4.0e-3,
-      lattice_l2_laplacian_reg=1.0e-5,
-      lattice_l2_torsion_reg=4.0e-4)
-  hparams.parse(FLAGS.hparams)
-  _pprint_hparams(hparams)
-  return tfl.calibrated_etl_classifier(
-      feature_columns=feature_columns,
-      model_dir=config.model_dir,
-      config=config,
-      hparams=hparams,
-      quantiles_dir=quantiles_dir)
-```
-
-Note: To see how this function gets called from `main`, see [Calibrated Linear
-Model](#calibrated-linear-model).
-
---------------------------------------------------------------------------------
-
-## TensorFlow Lattice Layers
-
-TensorFlow Lattice layer components are also provided by the library, so users
-can combine them in more flexible or advanced ways.
-
-The following are the layer components included in the TensorFlow Lattice
-library:
-
-*   __Piecewise-Linear Calibration__:
-    *   [`tfl.input_calibration_layer`](../api_docs/python/tensorflow_lattice/input_calibration_layer.md):
-        Calibrates the "input", provided either as `FeatureColumn`s or as a dict
-        of columns to tensors, the typical object returned by an `input_fn`
-        function. Includes support for monotonicity, regularization and special
-        "missing" values.
-    *   [`tfl.input_calibration_layer_from_hparams`](../api_docs/python/tensorflow_lattice/input_calibration_layer_from_hparams.md):
-        Calibrates the "input", provided either as `FeatureColumn`s or as a dict
-        of columns to tensors, the typical object returned by an `input_fn`
-        function. Includes support for monotonicity, regularization and special
-        "missing" values. This version uses an `tfl.CalibrateHParams` to specify
-        the hyperparameters.
-    *   [`tfl.calibration_layer`](../api_docs/python/tensorflow_lattice/calibration_layer.md):
-        Calibrates a tensor of shape \[batch_size, ...\]. Each element (outside
-        the batch-dimension) gets its own calibration. Includes support for
-        monotonicity, regularization and special "missing" values.
-*   __Lattice Layer__:
-    *   [`tfl.lattice_layer`](../api_docs/python/tensorflow_lattice/lattice_layer.md):
-        Creates `output_dim` lattices that uses as input a tensor of shape
-        \[batch_size, input_dim\]. Lattice size is defined for each dimension of
-        `input_dim`. The total number of parameters is the product of all these
-        lattice sizes times `output_dim`. Full support of monotonicity and
-        regularization.
-    *   [`tfl.ensemble_lattices_layer`](../api_docs/python/tensorflow_lattice/ensemble_lattices_layer.md):
-        Creates an ensemble of lattices connecting inputs as specified by the
-        caller. Full support of monotonicity and regularization.
-
-Example *calibrated_dnn*, a custom estimator from our example
-[`uci_census`](https://github.com/tensorflow/lattice/blob/master/examples/uci_census.py)
-model:
-
-```python
-def create_calibrated_dnn(feature_columns, config, quantiles_dir):
-  """Creates a calibrated DNN model."""
-  # This is an example of a hybrid model that uses input calibration layer
-  # offered by TensorFlow Lattice library and connects it to DNN.
-  feature_names = [fc.name for fc in feature_columns]
-  hparams = tfl.CalibratedHParams(
-      feature_names=feature_names,
-      num_keypoints=200,
-      learning_rate=1.0e-3,
-      calibration_output_min=-1.0,
-      calibration_output_max=1.0,
-      nodes_per_layer=10,  # All layers have the same number of nodes.
-      layers=2,  # Includes output layer, therefore >= 1.
-  )
-  hparams.parse(FLAGS.hparams)
-  _pprint_hparams(hparams)
-
-  def _model_fn(features, labels, mode, params):
-    """Model construction closure used when creating estimator."""
-    del mode
-    del params  # They are read directly from the bound variable hparams
-
-    # Calibrate: since there is no monotonicity, there are no projection ops.
-    # We also discard the ordered names of the features.
-    (output, _, _, regularization) = tfl.input_calibration_layer_from_hparams(
-        features, feature_columns, hparams, quantiles_dir)
-
-    # Hidden-layers.
-    for _ in range(hparams.layers - 1):
-      output = tf.layers.dense(
-          inputs=output, units=hparams.nodes_per_layer, activation=tf.sigmoid)
-
-    # Classifier logits and prediction.
-    logits = tf.layers.dense(inputs=output, units=1)
-    predictions = tf.reshape(tf.sigmoid(logits), [-1])
-
-    # Notice loss doesn't include regularization, which is added separately
-    # by means of tf.contrib.layers.apply_regularization().
-    loss_no_regularization = tf.losses.log_loss(labels, predictions)
-    loss = loss_no_regularization
-    if regularization is not None:
-      loss += regularization
-    optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate)
-    train_op = optimizer.minimize(
-        loss,
-        global_step=tf.train.get_global_step(),
-        name="calibrated_dnn_minimize")
-
-    eval_metric_ops = {
-        "accuracy": tf.metrics.accuracy(labels, predictions),
-
-        # We want to report the loss without the regularization, so metric is
-        # comparable with different regularizations. FutureWork, list both.
-        "average_loss": tf.metrics.mean(loss_no_regularization),
-    }
-
-    return tf.estimator.EstimatorSpec(mode, predictions, loss, train_op,
-                                      eval_metric_ops)
-
-  # Hyperparameters are passed directly to the model_fn closure by the context.
-  return tf.estimator.Estimator(
-      model_fn=_model_fn,
-      model_dir=config.model_dir,
-      config=config,
-      params=None)
-```
-
-### Other potential use cases of these components
-
-*   If integrating an embedding from another model (transfer-learning);
-*   Use TensorFlow Lattice\'s calibration in a DNN: works much better than
-    gaussian normalization of inputs. Something else that has been used with
-    some success is the piecewise linear function as an activation function.
-*   Use lattices on the "upper" (closer to output) layers of a DNN, for its
-    regularization.
-*   Use the piecewise-linear calibration as an activation function for neural
-    networks.
-*   Use piecewise-linear calibration as a probability distribution function for
-    learning continuous values in a Reinforcement Learning set up (REINFORCE
-    algorithm).
-
-## Papers
-
-*   [Lattice Regression](https://papers.nips.cc/paper/3694-lattice-regression),
-    Eric Garcia, Maya Gupta, Advances in Neural Information Processing Systems
-    (NIPS), 2009
-*   [Optimized Regression for Efficient Function
-    Evaluation](http://ieeexplore.ieee.org/document/6203580/), Eric Garcia,
-    Raman Arora, Maya R. Gupta, IEEE Transactions on Image Processing, 2012
-*   [Monotonic Calibrated Interpolated Look-Up
-    Tables](http://jmlr.org/papers/v17/15-243.html), Maya Gupta, Andrew Cotter,
-    Jan Pfeifer, Konstantin Voevodski, Kevin Canini, Alexander Mangylov,
-    Wojciech Moczydlowski, Alexander van Esbroeck, Journal of Machine Learning
-    Research (JMLR), 2016
-*   [Fast and Flexible Monotonic Functions with Ensembles of
-    Lattices](https://papers.nips.cc/paper/6377-fast-and-flexible-monotonic-functions-with-ensembles-of-lattices),
-    Mahdi Milani Fard, Kevin Canini, Andrew Cotter, Jan Pfeifer, Maya Gupta,
-    Advances in Neural Information Processing Systems (NIPS), 2016
-*   [Deep Lattice Networks and Partial Monotonic
-    Functions](https://research.google.com/pubs/pub46327.html), Seungil You,
-    Kevin Canini, David Ding, Jan Pfeifer, Maya R. Gupta, Advances in Neural
-    Information Processing Systems (NIPS), 2017
diff --git a/pip_pkg.sh b/pip_pkg.sh
deleted file mode 100755
index dc483a0..0000000
--- a/pip_pkg.sh
+++ /dev/null
@@ -1,95 +0,0 @@
-#!/usr/bin/env bash
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-set -e
-
-PLATFORM="$(uname -s | tr 'A-Z' 'a-z')"
-
-function main() {
-  if [ $# -lt 1 ] ; then
-    echo "No destination dir provided"
-    exit 1
-  fi
-
-  # Create the directory, then do dirname on a non-existent file inside it to
-  # give us an absolute paths with tilde characters resolved to the destination
-  # directory. Readlink -f is a cleaner way of doing this but is not available
-  # on a fresh macOS install.
-  mkdir -p "$1"
-  DEST="$(dirname "${1}/does_not_exist")"
-  echo "=== destination directory: ${DEST}"
-
-  TMPDIR=$(mktemp -d -t tmp.XXXXXXXXXX)
-
-  echo $(date) : "=== Using tmpdir: ${TMPDIR}"
-
-  echo "=== Copy TensorFlow Lattice files"
-  # Here are bazel-bin/pip_pkg.runfiles directory structure.
-  # bazel-bin/pip_pkg.runfiles
-  # |- <maybe other directories generated by bazel build>
-  # |- org_python_pypi_backports_weakref
-  # |- org_tensorflow
-  # |- protobuf
-  # |- six_archive
-  # |- tensorflow_lattice
-  #   |- external
-  #   |- pip_pkg
-  #   |- pip_pkg.sh
-  #   |- MANIFEST.in (needed)
-  #   |- setup.py (needed)
-  #   |- tensorflow_lattice (needed)
-  #
-  # To build tensorflow lattice wheel, we only need setup.py, MANIFEST.in, and
-  # python and .so files under tensorflow_lattice/tensorflow_lattice.
-  # So we extract those to ${TMPDIR}.
-  cp bazel-bin/pip_pkg.runfiles/tensorflow_lattice/setup.py "${TMPDIR}"
-  cp bazel-bin/pip_pkg.runfiles/tensorflow_lattice/MANIFEST.in "${TMPDIR}"
-  cp -R \
-    bazel-bin/pip_pkg.runfiles/tensorflow_lattice/tensorflow_lattice \
-    "${TMPDIR}"
-
-  echo "=== Copy TensorFlow Lattice root and cc files"
-  cp README.md ${TMPDIR}
-  cp LICENSE ${TMPDIR}
-  cp -R \
-    tensorflow_lattice/cc \
-    "${TMPDIR}/tensorflow_lattice"
-
-  pushd ${TMPDIR}
-  if [ "${TFL_SDIST}" = true ]; then
-    echo $(date) : "=== Building source distribution and wheel"
-  else
-    echo $(date) : "=== Building wheel"
-  fi
-
-  if [  -z "$2"  ]; then
-    if [ "${TFL_SDIST}" = true ]; then
-      python setup.py sdist > /dev/null
-    fi
-    python setup.py bdist_wheel > /dev/null
-  else
-    if [ "${TFL_SDIST}" = true ]; then
-      python setup.py "$2" sdist > /dev/null
-    fi
-    python setup.py "$2" bdist_wheel >/dev/null
-  fi
-
-  cp dist/* "${DEST}"
-  popd
-  rm -rf ${TMPDIR}
-  echo $(date) : "=== Output tar ball and wheel file are in: ${DEST}"
-}
-
-main "$@"
diff --git a/setup.py b/setup.py
index 3e95688..cf46697 100644
--- a/setup.py
+++ b/setup.py
@@ -1,114 +1,105 @@
-# pylint: disable=g-bad-file-header
-# Copyright 2017 The TensorFlow Lattice Authors.
+# Copyright 2018 The TensorFlow Lattice Authors.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
 #
-#    http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or  implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-"""Setup for pip package."""
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+# ==============================================================================
+"""Package setup script for TensorFlow Lattice library."""
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import datetime
 import sys
-import warnings
 
 from setuptools import find_packages
 from setuptools import setup
-from setuptools.command.install import install as InstallCommandBase
-from setuptools.dist import Distribution
-
-
-__version__ = '0.9.9'
-
-
-REQUIRED_PACKAGES = [
-    'six >= 1.11.0',
-    'protobuf >= 3.6.1',
-    'numpy >= 1.14.5',
-]
 
+# This version number should always be that of the *next* (unreleased) version.
+# Immediately after uploading a package to PyPI, you should increment the
+# version number and push to gitHub.
+__version__ = "2.0"
 
-if '--gpu' in sys.argv:
-  use_gpu = True
-  sys.argv.remove('--gpu')
+if "--release" in sys.argv:
+  sys.argv.remove("--release")
+  _name = "tensorflow_lattice"
 else:
-  use_gpu = False
-
-
-if use_gpu:
-  project_name = 'tensorflow-lattice-gpu'
-  REQUIRED_PACKAGES.append('tensorflow-gpu==1.14.0')
-else:
-  project_name = 'tensorflow-lattice'
-  REQUIRED_PACKAGES.append('tensorflow==1.14.0')
-
-CONSOLE_SCRIPTS = [
-    'freeze_graph_wrapper = '
-    'tensorflow_lattice.cc.tflite.freeze_graph_wrapper:main',
-    'toco_wrapper = tensorflow_lattice.cc.tflite.toco_wrapper:main',
+  # Build a nightly package by default.
+  _name = "tensorflow_lattice_nightly"
+  __version__ += datetime.datetime.now().strftime(".dev%Y%m%d")
+
+_install_requires = [
+    "absl-py",
+    "numpy",
+    "pandas",
+    "six",
+    "sklearn",
+    "matplotlib",
+    "graphviz",
 ]
 
+# Part of the visualization code uses colabtools and IPython libraries. These
+# are not added as hard requirements as they are mainly used in jupyter/colabs.
+
+_extras_require = {
+    "tensorflow": "tensorflow>=1.15",
+    "tensorflow-gpu": "tensorflow-gpu>=1.15",
+}
+
+_classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Education",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 2",
+    "Programming Language :: Python :: 3",
+    "Topic :: Scientific/Engineering",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Scientific/Engineering :: Mathematics",
+    "Topic :: Software Development",
+    "Topic :: Software Development :: Libraries",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+]
 
-class BinaryDistribution(Distribution):
-  """This class is needed in order to create OS specific wheels."""
-
-  def has_ext_modules(self):
-    return True
-
-
-warnings.warn('tensorflow-lattice is likley to fail when building from a '
-              'source distribution (sdist). Please follow instructions in '
-              '(https://github.com/tensorflow/lattice/INSTALL.md) '
-              'to build this from the source.')
-
+_description = (
+    "A library that implements optionally monotonic lattice based models.")
+_long_description = """\
+TensorFlow Lattice is a library that implements fast-to-evaluate and
+interpretable (optionally monotonic) lattice based models, which are also known
+as *interpolated look-up tables*. The library includes a collection of
+Estimators, which operate like any TensorFlow Estimator. It also includes
+Keras layers for lattices and feature calibration that can be composed
+into custom models.
+"""
 
 setup(
-    name=project_name,
+    name=_name,
     version=__version__,
-    description=('TensorFlow Lattice provides lattice models in TensorFlow'),
-    long_description='',
-    url='https://github.com/tensorflow/lattice',
-    author='Google Inc.',
-    author_email='tensorflow-lattice-releasing@google.com',
-    # Contained modules and scripts.
+    author="Google Inc.",
+    author_email="no-reply@google.com",
+    license="Apache 2.0",
+    classifiers=_classifiers,
+    install_requires=_install_requires,
+    extras_require=_extras_require,
     packages=find_packages(),
-    install_requires=REQUIRED_PACKAGES,
-    # Add in any packaged data.
     include_package_data=True,
-    package_data={'': ['*.so']},
-    exclude_package_data={'': ['BUILD', '*.h', '*.cc']},
-    zip_safe=False,
-    distclass=BinaryDistribution,
-    cmdclass={
-        'pip_pkg': InstallCommandBase,
-    },
-    entry_points={
-        'console_scripts': CONSOLE_SCRIPTS
-    },
-    # PyPI package information.
-    classifiers=[
-        'Development Status :: 4 - Beta',
-        'Intended Audience :: Developers',
-        'Intended Audience :: Education',
-        'Intended Audience :: Science/Research',
-        'License :: OSI Approved :: Apache Software License',
-        'Programming Language :: Python :: 2.7',
-        'Programming Language :: Python :: 3.4',
-        'Programming Language :: Python :: 3.5',
-        'Programming Language :: Python :: 3.6',
-        'Topic :: Scientific/Engineering :: Mathematics',
-        'Topic :: Software Development :: Libraries :: Python Modules',
-        'Topic :: Software Development :: Libraries',
-        ],
-    license='Apache 2.0',
-    keywords='lattice tensorflow tensor machine learning',
+    description=_description,
+    long_description=_long_description,
+    long_description_content_type="text/markdown",
+    keywords="tensorflow lattice calibration machine learning",
+    url=(
+        "https://github.com/tensorflow/lattice"
+    ),
 )
diff --git a/tensorflow b/tensorflow
deleted file mode 160000
index 456fbc0..0000000
--- a/tensorflow
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 456fbc0e498e3d10604973de9f46ca48d62267cc
diff --git a/tensorflow_lattice/BUILD b/tensorflow_lattice/BUILD
index 37af0c8..835d220 100644
--- a/tensorflow_lattice/BUILD
+++ b/tensorflow_lattice/BUILD
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-licenses(["notice"])  # Apache 2.0 License
 
 package(
     default_visibility = [
@@ -20,38 +19,31 @@ package(
     ],
 )
 
+licenses(["notice"])
+
 exports_files(["LICENSE"])
 
 py_library(
     name = "tensorflow_lattice",
-    srcs = ["__init__.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow_lattice/python:keypoints_initialization",
-        "//tensorflow_lattice/python:lattice_layers",
-        "//tensorflow_lattice/python:lattice_ops_py",
-        "//tensorflow_lattice/python:pwl_calibration_layers",
-        "//tensorflow_lattice/python:pwl_calibration_ops_py",
-        "//tensorflow_lattice/python:regularizers",
-        "//tensorflow_lattice/python:tools",
-        "//tensorflow_lattice/python/estimators:base",
-        "//tensorflow_lattice/python/estimators:calibrated",
-        "//tensorflow_lattice/python/estimators:calibrated_etl",
-        "//tensorflow_lattice/python/estimators:calibrated_lattice",
-        "//tensorflow_lattice/python/estimators:calibrated_linear",
-        "//tensorflow_lattice/python/estimators:calibrated_rtl",
-        "//tensorflow_lattice/python/estimators:hparams",
-        "//tensorflow_lattice/python/estimators:separately_calibrated_rtl",
+    srcs = [
+        "__init__.py",
+        "layers/__init__.py",
     ],
-)
-
-# Depend on this if you have a C++ library or binary that uses TensorFlow
-# lattice ops.
-cc_library(
-    name = "tensorflow_lattice_cc",
+    srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow_lattice/cc:lattice_ops",
-        "//tensorflow_lattice/cc:pwl_calibration_ops",
+        "//tensorflow_lattice/python:categorical_calibration_layer",
+        "//tensorflow_lattice/python:categorical_calibration_lib",
+        "//tensorflow_lattice/python:configs",
+        "//tensorflow_lattice/python:estimators",
+        "//tensorflow_lattice/python:lattice_layer",
+        "//tensorflow_lattice/python:lattice_lib",
+        "//tensorflow_lattice/python:linear_layer",
+        "//tensorflow_lattice/python:linear_lib",
+        "//tensorflow_lattice/python:model_info",
+        "//tensorflow_lattice/python:parallel_combination_layer",
+        "//tensorflow_lattice/python:pwl_calibration_layer",
+        "//tensorflow_lattice/python:pwl_calibration_lib",
+        "//tensorflow_lattice/python:test_utils",
+        "//tensorflow_lattice/python:visualization",
     ],
-    alwayslink = 1,
 )
diff --git a/tensorflow_lattice/__init__.py b/tensorflow_lattice/__init__.py
index 03fcc52..2aeca52 100644
--- a/tensorflow_lattice/__init__.py
+++ b/tensorflow_lattice/__init__.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
+# Copyright 2019 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,57 +11,27 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ==============================================================================
-"""Lattice modeling.
-
-This package provides functions and classes for lattice modeling.
-
-See full description in `README.md` file.
 
+"""Tensorflow Lattice Library.
 
-  use them.
+This package provides functions and classes for lattice modeling.
 """
 
-# pylint: disable=unused-import,wildcard-import, line-too-long
-
 from __future__ import absolute_import
 
-# Dependency imports
-
-# Import all modules here, but only import functions and classes that are
-# more likely to be used directly by users.
-from tensorflow_lattice.python.estimators.calibrated import input_calibration_layer_from_hparams
-from tensorflow_lattice.python.estimators.calibrated_etl import calibrated_etl_classifier
-from tensorflow_lattice.python.estimators.calibrated_etl import calibrated_etl_regressor
-from tensorflow_lattice.python.estimators.calibrated_lattice import calibrated_lattice_classifier
-from tensorflow_lattice.python.estimators.calibrated_lattice import calibrated_lattice_regressor
-from tensorflow_lattice.python.estimators.calibrated_linear import calibrated_linear_classifier
-from tensorflow_lattice.python.estimators.calibrated_linear import calibrated_linear_regressor
-from tensorflow_lattice.python.estimators.calibrated_rtl import calibrated_rtl_classifier
-from tensorflow_lattice.python.estimators.calibrated_rtl import calibrated_rtl_regressor
-from tensorflow_lattice.python.estimators.hparams import CalibratedEtlHParams
-from tensorflow_lattice.python.estimators.hparams import CalibratedHParams
-from tensorflow_lattice.python.estimators.hparams import CalibratedLatticeHParams
-from tensorflow_lattice.python.estimators.hparams import CalibratedLinearHParams
-from tensorflow_lattice.python.estimators.hparams import CalibratedRtlHParams
-from tensorflow_lattice.python.estimators.hparams import PerFeatureHParams
-from tensorflow_lattice.python.estimators.separately_calibrated_rtl import separately_calibrated_rtl_classifier
-from tensorflow_lattice.python.estimators.separately_calibrated_rtl import separately_calibrated_rtl_regressor
-from tensorflow_lattice.python.lib.keypoints_initialization import load_keypoints_from_quantiles
-from tensorflow_lattice.python.lib.keypoints_initialization import save_quantiles_for_keypoints
-from tensorflow_lattice.python.lib.keypoints_initialization import save_quantiles_for_keypoints_once
-from tensorflow_lattice.python.lib.keypoints_initialization import uniform_keypoints_for_signal
-from tensorflow_lattice.python.lib.lattice_layers import ensemble_lattices_layer
-from tensorflow_lattice.python.lib.lattice_layers import lattice_layer
-from tensorflow_lattice.python.lib.lattice_layers import monotone_lattice
-from tensorflow_lattice.python.lib.monotone_linear_layers import monotone_linear_layer
-from tensorflow_lattice.python.lib.monotone_linear_layers import split_monotone_linear_layer
-from tensorflow_lattice.python.lib.pwl_calibration_layers import calibration_layer
-from tensorflow_lattice.python.lib.pwl_calibration_layers import input_calibration_layer
-from tensorflow_lattice.python.lib.regularizers import calibrator_regularization
-from tensorflow_lattice.python.lib.regularizers import lattice_regularization
-from tensorflow_lattice.python.lib.tools import DEFAULT_NAME
-from tensorflow_lattice.python.ops.gen_monotonic_projection import monotonic_projection
-from tensorflow_lattice.python.ops.gen_pwl_indexing_calibrator import pwl_indexing_calibrator
-from tensorflow_lattice.python.ops.lattice_ops import lattice
-# pylint: enable=unused-import,wildcard-import,line-too-long
+import tensorflow_lattice.layers
+
+from tensorflow_lattice.python import categorical_calibration_layer
+from tensorflow_lattice.python import categorical_calibration_lib
+from tensorflow_lattice.python import configs
+from tensorflow_lattice.python import estimators
+from tensorflow_lattice.python import lattice_layer
+from tensorflow_lattice.python import lattice_lib
+from tensorflow_lattice.python import linear_layer
+from tensorflow_lattice.python import linear_lib
+from tensorflow_lattice.python import model_info
+from tensorflow_lattice.python import parallel_combination_layer
+from tensorflow_lattice.python import pwl_calibration_layer
+from tensorflow_lattice.python import pwl_calibration_lib
+from tensorflow_lattice.python import test_utils
+from tensorflow_lattice.python import visualization
diff --git a/tensorflow_lattice/cc/BUILD b/tensorflow_lattice/cc/BUILD
deleted file mode 100644
index d656718..0000000
--- a/tensorflow_lattice/cc/BUILD
+++ /dev/null
@@ -1,204 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-licenses(["notice"])  # Apache 2.0 License
-
-package(
-    default_visibility = [
-        "//tensorflow_lattice:__subpackages__",
-    ],
-)
-
-exports_files(["LICENSE"])
-
-load(
-    "//tensorflow_lattice:tensorflow_lattice.bzl",
-    "rpath_linkopts",
-)
-load(
-    "@org_tensorflow//tensorflow:tensorflow.bzl",
-    "tf_cc_test",
-    "tf_custom_op_library",
-    "tf_gen_op_libs",
-)
-
-tf_custom_op_library(
-    name = "ops/_lattice_ops.so",
-    srcs = [
-        ":ops/lattice_interpolation_ops.cc",
-        ":ops/monotone_lattice_ops.cc",
-    ],
-    linkopts = rpath_linkopts("ops/_lattice_ops.so"),
-    deps = [
-        "//tensorflow_lattice/cc/kernels:lattice_kernels",
-        "//tensorflow_lattice/cc/lib:lattice_structure",
-    ],
-)
-
-tf_custom_op_library(
-    name = "ops/_pwl_calibration_ops.so",
-    srcs = [
-        ":ops/monotonic_projection_op.cc",
-        ":ops/pwl_indexing_calibrator_ops.cc",
-    ],
-    linkopts = rpath_linkopts("ops/_pwl_calibration_ops.so"),
-    deps = [
-        "//tensorflow_lattice/cc/kernels:pwl_calibration_kernels",
-    ],
-)
-
-cc_library(
-    name = "lattice_ops",
-    deps = [
-        ":lattice_interpolation_ops_op_lib",
-        ":monotone_lattice_ops_op_lib",
-    ],
-    alwayslink = 1,
-)
-
-cc_library(
-    name = "pwl_calibration_ops",
-    deps = [
-        ":monotonic_projection_op_op_lib",
-        ":pwl_indexing_calibrator_ops_op_lib",
-    ],
-    alwayslink = 1,
-)
-
-# Collection of operators.
-tf_gen_op_libs(
-    op_lib_names = ["pwl_indexing_calibrator_ops"],
-    deps = [
-        "//tensorflow_lattice/cc/kernels:pwl_indexing_calibrator_kernels",
-        "@org_tensorflow//tensorflow/core:lib",
-    ],
-)
-
-tf_gen_op_libs(
-    op_lib_names = ["monotonic_projection_op"],
-    deps = [
-        "//tensorflow_lattice/cc/kernels:monotonic_projection_kernel",
-        "@org_tensorflow//tensorflow/core:lib",
-    ],
-)
-
-tf_gen_op_libs(
-    op_lib_names = ["lattice_interpolation_ops"],
-    deps = [
-        "//tensorflow_lattice/cc/kernels:hypercube_interpolation_kernels",
-        "//tensorflow_lattice/cc/kernels:lattice_interpolation_base",
-        "//tensorflow_lattice/cc/kernels:simplex_interpolation_kernels",
-        "//tensorflow_lattice/cc/lib:lattice_structure",
-        "@org_tensorflow//tensorflow/core:lib",
-    ],
-)
-
-tf_gen_op_libs(
-    op_lib_names = ["monotone_lattice_ops"],
-    deps = [
-        "//tensorflow_lattice/cc/kernels:monotone_lattice_kernels",
-        "//tensorflow_lattice/cc/lib:lattice_structure",
-        "@org_tensorflow//tensorflow/core:lib",
-    ],
-)
-
-# C++ tests.
-cc_library(
-    name = "test_main",
-    testonly = 1,
-    srcs = ["test_tools/test_main.cc"],
-    deps = [
-        "@org_tensorflow//tensorflow/core:test",
-    ],
-)
-
-tf_cc_test(
-    name = "pwl_indexing_calibrator_ops_test",
-    size = "small",
-    srcs = ["ops/pwl_indexing_calibrator_ops_test.cc"],
-    linkopts = rpath_linkopts("pwl_indexing_calibrator_ops_test"),
-    deps = [
-        ":pwl_indexing_calibrator_ops_op_lib",
-        ":test_main",
-        "@org_tensorflow//tensorflow/core:framework",
-        "@org_tensorflow//tensorflow/core:lib",
-        "@org_tensorflow//tensorflow/core:test",
-        "@org_tensorflow//tensorflow/core:testlib",
-        "@org_tensorflow//tensorflow/core/kernels:ops_testutil",
-    ],
-)
-
-cc_library(
-    name = "hypercube_interpolation_ops_test_lib",
-    testonly = 1,
-    srcs = ["ops/hypercube_interpolation_ops_test_p.cc"],
-    hdrs = ["ops/hypercube_interpolation_ops_test.h"],
-    linkopts = rpath_linkopts("hypercube_interpolation_ops_test"),
-    deps = [
-        ":lattice_interpolation_ops_op_lib",
-        ":test_main",
-        "@org_tensorflow//tensorflow/core:core_cpu",
-        "@org_tensorflow//tensorflow/core:framework",
-        "@org_tensorflow//tensorflow/core:lib",
-        "@org_tensorflow//tensorflow/core:protos_all_cc",
-        "@org_tensorflow//tensorflow/core:test",
-        "@org_tensorflow//tensorflow/core:testlib",
-        "@org_tensorflow//tensorflow/core/kernels:ops_testutil",
-        "@org_tensorflow//tensorflow/core/kernels:ops_util_hdrs",
-    ],
-)
-
-tf_cc_test(
-    name = "hypercube_interpolation_ops_test",
-    size = "small",
-    srcs = ["ops/hypercube_interpolation_ops_test.cc"],
-    deps = [
-        ":hypercube_interpolation_ops_test_lib",
-        "@org_tensorflow//tensorflow/core:framework",
-        "@org_tensorflow//tensorflow/core:test",
-        "@org_tensorflow//tensorflow/core:testlib",
-    ],
-)
-
-tf_cc_test(
-    name = "simplex_interpolation_ops_test",
-    size = "small",
-    srcs = ["ops/simplex_interpolation_ops_test.cc"],
-    linkopts = rpath_linkopts("simplex_interpolation_ops_test"),
-    deps = [
-        ":lattice_interpolation_ops_op_lib",
-        ":test_main",
-        "@org_tensorflow//tensorflow/core:framework",
-        "@org_tensorflow//tensorflow/core:lib",
-        "@org_tensorflow//tensorflow/core:test",
-        "@org_tensorflow//tensorflow/core:testlib",
-        "@org_tensorflow//tensorflow/core/kernels:ops_testutil",
-    ],
-)
-
-tf_cc_test(
-    name = "monotonic_projection_op_test",
-    size = "small",
-    srcs = ["ops/monotonic_projection_op_test.cc"],
-    linkopts = rpath_linkopts("monotonic_projection_op_test"),
-    deps = [
-        ":monotonic_projection_op_op_lib",
-        ":test_main",
-        "@org_tensorflow//tensorflow/core:framework",
-        "@org_tensorflow//tensorflow/core:lib",
-        "@org_tensorflow//tensorflow/core:test",
-        "@org_tensorflow//tensorflow/core:testlib",
-        "@org_tensorflow//tensorflow/core/kernels:ops_testutil",
-    ],
-)
diff --git a/tensorflow_lattice/cc/kernels/BUILD b/tensorflow_lattice/cc/kernels/BUILD
deleted file mode 100644
index ca9b499..0000000
--- a/tensorflow_lattice/cc/kernels/BUILD
+++ /dev/null
@@ -1,166 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-licenses(["notice"])  # Apache 2.0
-
-package(
-    default_visibility = [
-        "//tensorflow_lattice:__subpackages__",
-    ],
-)
-
-load(
-    "//tensorflow_lattice:tensorflow_lattice.bzl",
-    "rpath_linkopts",
-)
-load("@org_tensorflow//tensorflow:tensorflow.bzl", "tf_kernel_library")
-load("@org_tensorflow//tensorflow:tensorflow.bzl", "tf_cc_test")
-
-# Piecewise-linear calibration kernels
-cc_library(
-    name = "pwl_calibration_kernels",
-    deps = [
-        ":monotonic_projection_kernel",
-        ":pwl_indexing_calibrator_kernels",
-    ],
-)
-
-tf_kernel_library(
-    name = "pwl_indexing_calibrator_kernels",
-    srcs = ["pwl_indexing_calibrator_kernels.cc"],
-    deps = [
-        "@org_tensorflow//tensorflow/core:framework_headers_lib",
-        "@org_tensorflow//tensorflow/core:framework_lite",
-        "@protobuf_archive//:protobuf",
-    ],
-)
-
-tf_kernel_library(
-    name = "monotonic_projection_kernel",
-    srcs = ["monotonic_projection_kernel.cc"],
-    deps = [
-        ":monotonic_projections",
-        "@org_tensorflow//tensorflow/core:framework_headers_lib",
-        "@protobuf_archive//:protobuf",
-    ],
-)
-
-# Lattice interpolation kernels
-cc_library(
-    name = "lattice_kernels",
-    deps = [
-        ":hypercube_interpolation_kernels",
-        ":monotone_lattice_kernels",
-        ":simplex_interpolation_kernels",
-    ],
-)
-
-cc_library(
-    name = "lattice_interpolation_base",
-    srcs = ["lattice_interpolation_base.cc"],
-    hdrs = ["lattice_interpolation_base.h"],
-    deps = [
-        "//tensorflow_lattice/cc/lib:lattice_structure",
-        "@org_tensorflow//tensorflow/core:framework_headers_lib",
-    ],
-)
-
-tf_kernel_library(
-    name = "hypercube_interpolation_kernels",
-    srcs = ["hypercube_interpolation_kernels.cc"],
-    deps = [
-        ":lattice_interpolation_base",
-        "//tensorflow_lattice/cc/lib:lattice_structure",
-        "@org_tensorflow//tensorflow/core:framework_headers_lib",
-    ],
-    alwayslink = 1,
-)
-
-tf_kernel_library(
-    name = "simplex_interpolation_kernels",
-    srcs = ["simplex_interpolation_kernels.cc"],
-    deps = [
-        ":lattice_interpolation_base",
-        "//tensorflow_lattice/cc/lib:lattice_structure",
-        "@org_tensorflow//tensorflow/core:framework_headers_lib",
-    ],
-    alwayslink = 1,
-)
-
-# Monotonic projections.
-cc_library(
-    name = "monotonic_projections",
-    hdrs = ["monotonic_projections.h"],
-    deps = [
-        "@org_tensorflow//tensorflow/core:framework_headers_lib",
-        "@org_tensorflow//tensorflow/core:framework_lite",
-    ],
-)
-
-cc_library(
-    name = "lattice_raw_iterator",
-    srcs = ["lattice_raw_iterator.cc"],
-    hdrs = ["lattice_raw_iterator.h"],
-    deps = ["//tensorflow_lattice/cc/lib:lattice_structure"],
-)
-
-tf_cc_test(
-    name = "lattice_raw_iterator_test",
-    srcs = ["lattice_raw_iterator_test.cc"],
-    linkopts = rpath_linkopts("lattice_raw_iterator_test"),
-    deps = [
-        ":lattice_raw_iterator",
-        "//tensorflow_lattice/cc:test_main",
-        "//tensorflow_lattice/cc/lib:lattice_structure",
-        "@org_tensorflow//tensorflow/core:lib",
-        "@org_tensorflow//tensorflow/core:test",
-    ],
-)
-
-cc_library(
-    name = "monotonic_lattice_projections",
-    hdrs = ["monotonic_lattice_projections.h"],
-    deps = [
-        ":lattice_raw_iterator",
-        ":monotonic_projections",
-        "//tensorflow_lattice/cc/lib:lattice_structure",
-        "@org_tensorflow//tensorflow/core:framework_headers_lib",
-    ],
-)
-
-tf_cc_test(
-    name = "monotonic_lattice_projections_test",
-    srcs = ["monotonic_lattice_projections_test.cc"],
-    linkopts = rpath_linkopts("monotonic_lattice_projections_test"),
-    deps = [
-        ":monotonic_lattice_projections",
-        "//tensorflow_lattice/cc:test_main",
-        "//tensorflow_lattice/cc/lib:lattice_structure",
-        "@org_tensorflow//tensorflow/core:lib",
-        "@org_tensorflow//tensorflow/core:test",
-    ],
-)
-
-# Monotone lattice kernels.
-tf_kernel_library(
-    name = "monotone_lattice_kernels",
-    srcs = ["monotone_lattice_kernels.cc"],
-    deps = [
-        ":lattice_interpolation_base",
-        ":monotonic_lattice_projections",
-        "//tensorflow_lattice/cc/lib:lattice_structure",
-        "@org_tensorflow//tensorflow/core:framework_headers_lib",
-    ],
-    alwayslink = 1,
-)
diff --git a/tensorflow_lattice/cc/kernels/hypercube_interpolation_kernels.cc b/tensorflow_lattice/cc/kernels/hypercube_interpolation_kernels.cc
deleted file mode 100644
index 7de1672..0000000
--- a/tensorflow_lattice/cc/kernels/hypercube_interpolation_kernels.cc
+++ /dev/null
@@ -1,348 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include <memory>
-#include <vector>
-
-#include "tensorflow_lattice/cc/kernels/lattice_interpolation_base.h"
-#include "tensorflow_lattice/cc/lib/lattice_structure.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/platform/logging.h"
-
-namespace tensorflow {
-namespace lattice {
-
-// HypercubeInterpolationOpKernel returns interpolation weights.
-template <typename Dtype>
-class HypercubeInterpolationOpKernel
-    : public LatticeInterpolationOpBase<Dtype> {
- public:
-  explicit HypercubeInterpolationOpKernel(OpKernelConstruction* context)
-      : LatticeInterpolationOpBase<Dtype>(context) {
-
-    constexpr int64 kBaseCost = 20;
-    constexpr int64 kCostPerCellVertex = 20;
-    constexpr int64 kWeightInitializationCost = 1;
-    this->SetCostPerExample(
-        kCostPerCellVertex * this->GetLatticeStructure().NumVerticesPerCell() +
-        kWeightInitializationCost * this->GetLatticeStructure().NumVertices() +
-        kBaseCost);
-  }
-
- private:
-  InterpolationWeights<Dtype> ComputeInterpolationWeights(
-      const LatticeStructure& lattice_structure,
-      typename TTypes<Dtype>::UnalignedConstFlat input_vector) const final;
-
-  TF_DISALLOW_COPY_AND_ASSIGN(HypercubeInterpolationOpKernel);
-};
-
-// HypercubeGradientOpKernel returns gradient with respect to the
-// input.
-template <typename Dtype>
-class HypercubeGradientOpKernel : public LatticeGradientOpBase<Dtype> {
- public:
-  explicit HypercubeGradientOpKernel(OpKernelConstruction* context)
-      : LatticeGradientOpBase<Dtype>(context) {
-
-    constexpr int64 kBaseCost = 20;
-    constexpr int64 kCostPerCellVertex = 20;
-    this->SetCostPerExample(
-        kCostPerCellVertex * this->GetLatticeStructure().Dimension() *
-            this->GetLatticeStructure().NumVerticesPerCell() +
-        kBaseCost);
-  }
-
- private:
-  std::vector<Dtype> ComputeGradWrtInput(
-      const LatticeStructure& lattice_structure,
-      typename TTypes<Dtype>::UnalignedConstFlat input_vector,
-      typename TTypes<Dtype>::UnalignedConstFlat weight_vector,
-      typename TTypes<Dtype>::UnalignedConstFlat grad_wrt_weight_vector)
-      const final;
-
-  TF_DISALLOW_COPY_AND_ASSIGN(HypercubeGradientOpKernel);
-};
-
-// Produces linear interpolation weights for an input that is in the unit
-// hypercube (the residual), as well as the corresponding indices in the lattice
-// (based on the bottom_corner). Both the weights and the indices are computed
-// during the same loop for efficiency, but we'll explain their computations
-// separately. Also returns the residual vector from the bottom corner of the
-// hypercube cell. See http://jmlr.org/papers/v17/15-243.html for more details.
-//
-// Calculating the linear interpolation weights
-// --------------------------------------------
-// The linear interpolation weights on each vertex are the volumes of the
-// hyperrectangles formed by partitioning the unit hypercube at the input.
-// Example: 2D case  - Draw a unit square. Draw an input x in the square.
-// Draw horizontal and vertical lines through x.  That forms 4 boxes - the
-// volume of these boxes are the weights. Note the boxes are a partition of
-// the unit square, so the sum of the areas (volumes) of the boxes sums to 1.
-// The linear interpolation weight on each vertex is the volume of the box in
-// the opposite corner (so that if you move x close to one corner, the weight
-// on that corner grows). Mathematically in the 2D case (and generalizes
-// directly to higher D) the weights are:
-// weight([0, 0]) = (1 - input[0]) * (1 - input[1])
-// weight([0, 1]) = (1 - input[0]) * input[1]
-// weight([1, 0]) = input[0] * (1 - input[1])
-// weight([1, 1]) =  input[0] * input[1]
-//
-// Computing each of the 2^D weights directly using above formula would take
-// O(2^D * D) operations. Instead we take advantage of the many repeated
-// calculations to reduce this to a O(2^D) computation as follows:
-// Let's start by initializing weight to 1 for every vertex. Lets consider
-// current vertex. Suppose its bit representation is "0110". For every "0" we
-// should multiply its weight on (1 - input[i]), where i is a sequence number
-// of correspondent bit. And for each "1" we should multiply its weight on
-// input[i].
-// Let us iterate through all vertices in dfs (lexicographical) order. Let
-// current_highest_dimension be a sequence number of highest bit in binary
-// representation of current vertex. At this moment we multiplied
-// correspondent weights for all dimensions below current_highest_dimension.
-// Now, let us update current_highest_dimension.
-// Example:
-// If "ii" is iterating on "??x010" (the location of memory where finally the
-// weight for "00_1_010" will be stored), then we set the value for
-//
-//   // Resetting bit x of ??x010.
-//   earlier_ii = ii ^ (1 << current_highest_dimension)
-//   // Now ii represents ?x1010.
-//   weight[ii] = weight[earlier_ii] * input[current_highest_dimension]
-//   // earlier_ii represents ?x0010.
-//   weight[earlier_ii] *= (1 - input[current_highest_dimension])
-//
-// Example for 2x2 case:
-//   weight[0] is weight on [0,0]
-//   weight[1] is weight on [1,0]
-//   weight[2] is weight on [0,1]
-//   weight[3] is weight on [1,1]
-// Initialization:  weight[0] = 1, no other weight set.
-// Loop: ii = 1. current_highest_dimension = 0
-//   weight[1] = weight[0] * input[0];
-//   weight[0] = weight[0] * (1 - input[0])
-// ii = 2. current_highest_dimension = 1. (highest bit of ii got index 1 at
-// this step, so update current_highest_dimension to reflect this)
-//   weight[2] = weight[0] * input[1];
-//   weight[0] = weight[0] * (1 - input[1])
-// ii = 3. current_highest_dimension = 1.
-//   weight[3] = weight[1] * input[1];
-//   weight[1] = weight[1] * (1 - input[1])
-//
-// Calculating the corresponding indices. Notice if the lattice sizes are larger
-// than 2, the indices of the wieghts will be adjusted according to the
-// LatticeStructure.strides.
-// -------------------------------------
-// The lattice index for the iith vertex in the cell is the same as the index
-// we computed for an earlier neighbor vertex, but offset by
-// lattice_strides[(dimensions - 1) - current_highest_dimension].
-// Example:
-// Suppose we have a 2x2 lattice. We should output vertices in the order:
-//   [0,0], [1,0], [0,1], [1,1].
-// Bottom corner is [0,0], so vertices[0] = 0 already set.
-// let ii = 1. It corresponds to vertex [0,1]. current_highest_dimension = 0.
-//   lattice index of vertices[1] is different from lattice index of
-//   vertices[0] in dimension current_highest_dimension = 0 (counting from the
-// end).
-//   vertices[1] = vertices[0] + lattice_strides[0] = [1, 0];
-// let ii = 2, it corresponds to [1,0]. current_highest_dimension becomes 1.
-//   vertices[2] = vertices[0] + lattice_strides[1] = [0, 1];
-//   vertices[3] = vertices[1] + lattice_strides[1] = [1, 1];
-//
-
-template <typename Dtype>
-InterpolationWeights<Dtype>
-HypercubeInterpolationOpKernel<Dtype>::ComputeInterpolationWeights(
-    const LatticeStructure& lattice_structure,
-    typename TTypes<Dtype>::UnalignedConstFlat input) const {
-  const BottomCornerIndexAndResidual<Dtype> bottom_corner_index_and_residual =
-      lattice_structure.GetBottomCornerIndexAndResidual<Dtype>(input);
-  const std::vector<Dtype>& residual =
-      bottom_corner_index_and_residual.residual;
-  const int64 num_vertices_per_cell = lattice_structure.NumVerticesPerCell();
-  // interpolation weight contains upto num_vertices_per_cell non-zero elements.
-  InterpolationWeights<Dtype> interpolation_weights;
-  std::vector<int64>& index = interpolation_weights.indices;
-  std::vector<Dtype>& weight = interpolation_weights.weights;
-
-  index.resize(num_vertices_per_cell);
-  weight.resize(num_vertices_per_cell);
-  index[0] = bottom_corner_index_and_residual.bottom_corner_index;
-  weight[0] = 1.0;
-
-  const int64 input_dim = lattice_structure.Dimension();
-  const std::vector<int64>& strides = lattice_structure.Strides();
-
-  int64 current_highest_dimension = 0;
-  Dtype current_residual_value = residual[current_highest_dimension];
-  for (int64 ii = 1; ii < num_vertices_per_cell; ++ii) {
-    // Make sure that we're within the bounds of the unit hypercube.
-    DCHECK_GE(current_residual_value, 0);
-    DCHECK_LE(current_residual_value, 1);
-    // Sanity check: current_highest_dimension has better respect the bounds.
-    DCHECK_GE(current_highest_dimension, 0);
-    DCHECK_LT(current_highest_dimension, input_dim);
-
-    const int64 earlier_ii = ii ^ (1 << current_highest_dimension);
-    index[ii] = index[earlier_ii] + strides[current_highest_dimension];
-    weight[ii] = weight[earlier_ii] * current_residual_value;
-    weight[earlier_ii] *= (1.0 - current_residual_value);
-
-    if ((ii & (ii + 1)) == 0) {
-      // If ii + 1 is power of 2, then current_highest_dimension has changed,
-      // that means, that we are processing next dimension.
-      ++current_highest_dimension;
-      if (input_dim >= current_highest_dimension + 1) {
-        current_residual_value = residual[current_highest_dimension];
-      }
-    }
-  }
-  return interpolation_weights;
-}
-
-// The goal of the gradient op is, given grad_wrt_weight:
-//   (dy / dweight[0], dy / dweight[1], dy / dweight[2], dy / dweight[3]),
-// to compute the grad_wrt_input:
-//   (dy / dx[0], ..., dy / dx[D-1]).
-//
-// We know that:
-//   dy/dx[jj] = sum_{ii \in weights} dy/dweight[ii] * dweight[ii]/dx[jj]
-//
-// For dweight[ii]/dx[jj], we use the following observation.
-// For any 2 x ... x 2 lattices:
-//  weight[ii] + weight[jj] == constant.
-// for all (ii, jj) pair such that ii ^ jj == 2 ** k and ii < jj. (This means
-// ii's kth vertex is 0, and jj's kth vertex is 1, and other vertices are same.)
-// Moreover, for such (ii, jj) pair, we have
-//   dweight[ii] / dx[k] == -(weight[ii] + weight[jj])
-//   dweight[jj] / dx[k] == (weight[ii] + weight[jj])
-//
-// To see this, let us consider 2 x 2 lattice case.
-//
-// Recall that
-//   weight[0] = (1 - x[0]) * (1 - x[1])
-//   weight[1] = x[0] * (1 - x[1])
-//   weight[2] = (1 - x[0]) * x[1]
-//   weight[3] = x[0] * x[1]
-//
-// Therefore,
-//   dweight[0] / dx[0] = -(1 - x[1]) == -(weight[0] + weight[1])
-//   dweight[1] / dx[0] = (1 - x[1]) == (weight[0] + weight[1])
-//   dweight[2] / dx[0] = -x[1] == -(weight[2] + weight[3])
-//   dweight[3] / dx[0] = x[1] == (weight[2] + weight[3]),
-// and
-//   dweight[0] / dx[1] = -(1 - x[0]) == -(weight[0] + weight[2])
-//   dweight[1] / dx[1] = -x[0] == -(weight[1] + weight[3])
-//   dweight[2] / dx[1] = (1 - x[0]) == (weight[0] + weight[2])
-//   dweight[3] / dx[1] = x[0] == (weight[1] + weight[3]).
-//
-// So the summation part marginalize the dependency of x[k], and the sign is
-// minus if the kth vertex is 0, and plus if the kth vertex is 1.
-// The following code computes the gradient using the (ii, jj) pair by
-// enumerating all indices whose kth vertex is 0.
-// In order to support the multi-cell lattice, the code constructs a list
-// (nnz_weight below) that maps the indices in the 2 x .... x 2 cell holding x
-// into indices in the multi-cell.
-//
-// Including this construction, the overall complexity is
-//  O((input_dim + 2) * 2 ** (input_dim - 1)).
-//
-// Also when x[jj] < 0 or x[jj] > lattice_size[jj], the input is out of bound.
-// So the change in the input should not change the output, therefore the
-// gradient should be zero.
-//
-
-template <typename Dtype>
-std::vector<Dtype> HypercubeGradientOpKernel<Dtype>::ComputeGradWrtInput(
-    const LatticeStructure& lattice_structure,
-    typename TTypes<Dtype>::UnalignedConstFlat input,
-    typename TTypes<Dtype>::UnalignedConstFlat weight,
-    typename TTypes<Dtype>::UnalignedConstFlat grad_wrt_weight) const {
-  const BottomCornerIndexAndResidual<Dtype> bottom_corner_index_and_residual =
-      lattice_structure.GetBottomCornerIndexAndResidual<Dtype>(input);
-  const int64 input_dim = lattice_structure.Dimension();
-  std::vector<Dtype> grad_wrt_input(input_dim, 0.0);
-
-  // There are at most 2 ** n number of non-zero elements in weight.
-  // nnz_weight_index keeps the index of non-zero element in the weight.
-  // The following loop enumerats all vertices in cell in the following order.
-  // [0, 0, ..., 0], [1, 0, ...,0], [0, 1, ..., 0], ..., [1, 1, ..., 1].
-  std::vector<int64> nnz_weight_index(lattice_structure.NumVerticesPerCell());
-
-  int64 current_dim = 0;
-  int64 current_bit = 1;  // Always 1 << current_dim;
-  nnz_weight_index[0] = bottom_corner_index_and_residual.bottom_corner_index;
-  const std::vector<int64>& strides = lattice_structure.Strides();
-  for (int64 ii = 1; ii < nnz_weight_index.size(); ++ii) {
-    if ((ii & current_bit) == 0) {
-      ++current_dim;
-      current_bit <<= 1;
-    }
-    // ii - current_bit is the base.
-    // ii is the current one, which is always an upper layer in the current
-    // dimension.
-    nnz_weight_index[ii] =
-        nnz_weight_index[ii - current_bit] + strides[current_dim];
-  }
-
-  // Compute the gradient for each input.
-  for (int64 ii = 0; ii < input_dim; ++ii) {
-    // If out_of_bound, gradient is 0.
-    if (bottom_corner_index_and_residual.out_of_bound[ii]) {
-      continue;
-    }
-    // Only process the bottom faces.
-    int64 bit = 1 << ii;
-    int64 stride = strides[ii];
-    Dtype grad_ii = 0.0;
-    for (int64 index = 0; index < lattice_structure.NumVerticesPerCell();
-         ++index) {
-      // Upper face. Skip this index.
-      if (index & bit) {
-        continue;
-      }
-      // Bottom face.
-      int64 lower_index = nnz_weight_index[index];
-      int64 upper_index = lower_index + stride;
-      grad_ii += (weight(lower_index) + weight(upper_index)) *
-                 (grad_wrt_weight(upper_index) - grad_wrt_weight(lower_index));
-    }
-    grad_wrt_input[ii] = grad_ii;
-  }
-
-  return grad_wrt_input;
-}
-// Register kernels for float and double.
-REGISTER_KERNEL_BUILDER(Name("HypercubeInterpolation")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<float>("Dtype"),
-                        HypercubeInterpolationOpKernel<float>);
-
-REGISTER_KERNEL_BUILDER(Name("HypercubeInterpolation")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<double>("Dtype"),
-                        HypercubeInterpolationOpKernel<double>);
-
-REGISTER_KERNEL_BUILDER(
-    Name("HypercubeGradient").Device(DEVICE_CPU).TypeConstraint<float>("Dtype"),
-    HypercubeGradientOpKernel<float>);
-
-REGISTER_KERNEL_BUILDER(Name("HypercubeGradient")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<double>("Dtype"),
-                        HypercubeGradientOpKernel<double>);
-
-}  // namespace lattice
-}  // namespace tensorflow
diff --git a/tensorflow_lattice/cc/kernels/lattice_interpolation_base.cc b/tensorflow_lattice/cc/kernels/lattice_interpolation_base.cc
deleted file mode 100644
index aceedb9..0000000
--- a/tensorflow_lattice/cc/kernels/lattice_interpolation_base.cc
+++ /dev/null
@@ -1,54 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow_lattice/cc/kernels/lattice_interpolation_base.h"
-
-#include <vector>
-
-#include "tensorflow_lattice/cc/lib/lattice_structure.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/lib/strings/str_util.h"
-
-namespace tensorflow {
-namespace lattice {
-
-using errors::InvalidArgument;
-using str_util::Join;
-
-LatticeOpBase::LatticeOpBase(OpKernelConstruction* context)
-    : OpKernel(context), cost_per_example_(1.0) {
-  std::vector<int> lattice_sizes;
-  OP_REQUIRES_OK(context, context->GetAttr("lattice_sizes", &lattice_sizes));
-  OP_REQUIRES(context, LatticeStructure::IsValidLatticeSizes(lattice_sizes),
-              InvalidArgument(Join(lattice_sizes, ","),
-                              " is not a valid lattice size"));
-  lattice_structure_ =
-      std::unique_ptr<LatticeStructure>(new LatticeStructure(lattice_sizes));
-}
-
-void LatticeOpBase::CheckShape(OpKernelContext* context, const Tensor& tensor,
-                               const std::vector<int64>& expected_shape) const {
-  OP_REQUIRES(context, tensor.dims() == expected_shape.size(),
-              InvalidArgument("expect rank ", expected_shape.size(), "but got ",
-                              tensor.DebugString()));
-
-  for (int ii = 0; ii < expected_shape.size(); ++ii) {
-    OP_REQUIRES(context, tensor.dim_size(ii) == expected_shape[ii],
-                InvalidArgument("expect ", ii, "-dim: ", expected_shape[ii],
-                                "but got ", tensor.DebugString()));
-  }
-}
-
-}  // namespace lattice
-}  // namespace tensorflow
diff --git a/tensorflow_lattice/cc/kernels/lattice_interpolation_base.h b/tensorflow_lattice/cc/kernels/lattice_interpolation_base.h
deleted file mode 100644
index 19456bb..0000000
--- a/tensorflow_lattice/cc/kernels/lattice_interpolation_base.h
+++ /dev/null
@@ -1,233 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-// Lattice interpolation base class.
-#ifndef TENSORFLOW_LATTICE_CC_KERNELS_LATTICE_INTERPOLATION_BASE_H_
-#define TENSORFLOW_LATTICE_CC_KERNELS_LATTICE_INTERPOLATION_BASE_H_
-
-#include <functional>
-#include <memory>
-#include <vector>
-
-#include "tensorflow_lattice/cc/lib/lattice_structure.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/util/work_sharder.h"
-
-namespace tensorflow {
-namespace lattice {
-
-template <typename Dtype>
-struct InterpolationWeights {
-  std::vector<int64> indices;
-  std::vector<Dtype> weights;
-};
-
-// LatticeOpBase class contains common part of all lattice operators as lattice
-// structure initialization.
-class LatticeOpBase : public OpKernel {
- public:
-  explicit LatticeOpBase(OpKernelConstruction* context);
-
-  // Returns the lattice_structure.
-  const LatticeStructure& GetLatticeStructure() const {
-    return *lattice_structure_;
-  }
-
-  // Check whether the shape of tensor is same with expected_shape.
-  void CheckShape(OpKernelContext* context, const Tensor& tensor,
-                  const std::vector<int64>& expected_shape) const;
-
-  // Cost per example.
-  const int64 CostPerExample() const { return cost_per_example_; }
-  void SetCostPerExample(const int64 cost_per_example) {
-    cost_per_example_ = cost_per_example;
-  }
-
- private:
-  std::unique_ptr<LatticeStructure> lattice_structure_;
-  int64 cost_per_example_;
-};
-
-// LatticeInterpolationOpBase is a base class for
-// HypercubeInterpolationOpKernel and SimplexInterpolationOpKernel.
-// The InterpolationWeights computation should be implemented in
-// ComputeInterpolationWeights method.
-template <typename Dtype>
-class LatticeInterpolationOpBase : public LatticeOpBase {
- public:
-  explicit LatticeInterpolationOpBase(OpKernelConstruction* context)
-      : LatticeOpBase(context) {}
-
-  void Compute(OpKernelContext* context) override;
-
- protected:
-  virtual InterpolationWeights<Dtype> ComputeInterpolationWeights(
-      const LatticeStructure& lattice_structure,
-      typename TTypes<Dtype>::UnalignedConstFlat input_vector) const = 0;
-
- private:
-  // Apply InterpolationWeights to each slice of tensors.
-  void BatchInterpolationWorker(const Tensor& input_tensor, const int start,
-                                const int limit,
-                                Tensor* interpolation_weights_tensor) const;
-};
-
-template <typename Dtype>
-void LatticeInterpolationOpBase<Dtype>::BatchInterpolationWorker(
-    const Tensor& input_tensor, const int start, const int limit,
-    Tensor* interpolation_weights_tensor) const {
-  for (int ii = start; ii < limit; ++ii) {
-    // Get iith input vector.
-    const auto input_row_ii = input_tensor.Slice(ii, ii + 1);
-
-    // Compute weight-index pairs.
-    const InterpolationWeights<Dtype> interpolation_weights =
-        ComputeInterpolationWeights(GetLatticeStructure(),
-                                    input_row_ii.unaligned_flat<Dtype>());
-
-    // Get iith interpolation weight vector (output).
-    auto interpolation_weights_row_ii =
-        interpolation_weights_tensor->Slice(ii, ii + 1).unaligned_flat<Dtype>();
-
-    // Assign values to interpolation weight vector.
-    interpolation_weights_row_ii.setZero();
-    DCHECK_EQ(interpolation_weights.indices.size(),
-              interpolation_weights.weights.size());
-    for (int jj = 0; jj < interpolation_weights.indices.size(); ++jj) {
-      interpolation_weights_row_ii(interpolation_weights.indices[jj]) =
-          interpolation_weights.weights[jj];
-    }
-  }
-}
-
-template <typename Dtype>
-void LatticeInterpolationOpBase<Dtype>::Compute(OpKernelContext* context) {
-  const LatticeStructure& lattice_structure = GetLatticeStructure();
-  // Grab the input tensor.
-  const Tensor& input_tensor = context->input(0);
-  // Check the shapes.
-  const int64 batch_dim = input_tensor.dim_size(0);
-  const int64 input_dim = lattice_structure.Dimension();
-  CheckShape(context, input_tensor, {batch_dim, input_dim});
-
-  // Allocate interpolation_weights_tensor.
-  Tensor* interpolation_weights_tensor = nullptr;
-  OP_REQUIRES_OK(
-      context,
-      context->allocate_output(
-          0, TensorShape({batch_dim, lattice_structure.NumVertices()}),
-          &interpolation_weights_tensor));
-
-  auto worker_threads = *(context->device()->tensorflow_cpu_worker_threads());
-
-  // Launch threads.
-  Shard(worker_threads.num_threads, worker_threads.workers, batch_dim,
-        CostPerExample(), [&](int start, int limit) {
-          BatchInterpolationWorker(input_tensor, start, limit,
-                                   interpolation_weights_tensor);
-        });
-}
-
-// LatticeGradientOpBase is a base class for HypercubeGradientOpKernel and
-// SimplexGradientOpKernel.
-// Computing Gradient with respect to input should be should be implemented in
-// ComputeGradWrtInput method.
-template <typename Dtype>
-class LatticeGradientOpBase : public LatticeOpBase {
- public:
-  explicit LatticeGradientOpBase(OpKernelConstruction* context)
-      : LatticeOpBase(context) {}
-
-  void Compute(OpKernelContext* context) override;
-
- protected:
-  virtual std::vector<Dtype> ComputeGradWrtInput(
-      const LatticeStructure& lattice_structure,
-      typename TTypes<Dtype>::UnalignedConstFlat input_vector,
-      typename TTypes<Dtype>::UnalignedConstFlat weight_vector,
-      typename TTypes<Dtype>::UnalignedConstFlat grad_wrt_weight_vector)
-      const = 0;
-
- private:
-  // Apply grad_wrt_input_fn_ to each slice of tensors.
-  void BatchGradientWorker(const Tensor& input_tensor,
-                           const Tensor& weight_tensor,
-                           const Tensor& grad_wrt_weight_tensor,
-                           const int start, const int limit,
-                           Tensor* grad_wrt_input_tensor) const;
-};
-
-// BatchGradientWorker computes the gradient with respect to the input of each
-// row.
-template <typename Dtype>
-void LatticeGradientOpBase<Dtype>::BatchGradientWorker(
-    const Tensor& input_tensor, const Tensor& weight_tensor,
-    const Tensor& grad_wrt_weight_tensor, const int start, const int limit,
-    Tensor* grad_wrt_input_tensor) const {
-  auto grad_wrt_input_matrix = grad_wrt_input_tensor->matrix<Dtype>();
-  for (int ii = start; ii < limit; ++ii) {
-    const auto input_row_ii = input_tensor.Slice(ii, ii + 1);
-    const auto weight_row_ii = weight_tensor.Slice(ii, ii + 1);
-    const auto grad_wrt_weight_row_ii =
-        grad_wrt_weight_tensor.Slice(ii, ii + 1);
-
-    const std::vector<Dtype> grad_wrt_input = ComputeGradWrtInput(
-        GetLatticeStructure(), input_row_ii.unaligned_flat<Dtype>(),
-        weight_row_ii.unaligned_flat<Dtype>(),
-        grad_wrt_weight_row_ii.unaligned_flat<Dtype>());
-
-    for (int jj = 0; jj < grad_wrt_input.size(); ++jj) {
-      grad_wrt_input_matrix(ii, jj) = grad_wrt_input[jj];
-    }
-  }
-}
-
-template <typename Dtype>
-void LatticeGradientOpBase<Dtype>::Compute(OpKernelContext* context) {
-  const LatticeStructure& lattice_structure = this->GetLatticeStructure();
-  const Tensor& input_tensor = context->input(0);
-  const Tensor& weight_tensor = context->input(1);
-  const Tensor& grad_wrt_weight_tensor = context->input(2);
-  // Check the shapes.
-  const int64 batch_dim = input_tensor.dim_size(0);
-  const int64 input_dim = lattice_structure.Dimension();
-  CheckShape(context, input_tensor, {batch_dim, input_dim});
-  CheckShape(context, weight_tensor,
-             {batch_dim, lattice_structure.NumVertices()});
-  CheckShape(context, grad_wrt_weight_tensor,
-             {batch_dim, lattice_structure.NumVertices()});
-
-  // Dense implementation.
-  Tensor* grad_wrt_input_tensor = nullptr;
-  OP_REQUIRES_OK(
-      context,
-      context->allocate_output(0, TensorShape({batch_dim, input_dim}),
-                               &grad_wrt_input_tensor));
-
-  auto worker_threads = *(context->device()->tensorflow_cpu_worker_threads());
-
-  // Launch threads.
-  Shard(worker_threads.num_threads, worker_threads.workers, batch_dim,
-        CostPerExample(), [&](int start, int limit) {
-          BatchGradientWorker(input_tensor, weight_tensor,
-                              grad_wrt_weight_tensor, start, limit,
-                              grad_wrt_input_tensor);
-        });
-}
-
-}  // namespace lattice
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_LATTICE_CC_KERNELS_LATTICE_INTERPOLATION_BASE_H_
diff --git a/tensorflow_lattice/cc/kernels/lattice_raw_iterator.cc b/tensorflow_lattice/cc/kernels/lattice_raw_iterator.cc
deleted file mode 100644
index 101dfc1..0000000
--- a/tensorflow_lattice/cc/kernels/lattice_raw_iterator.cc
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow_lattice/cc/kernels/lattice_raw_iterator.h"
-
-#include <vector>
-
-#include "tensorflow_lattice/cc/lib/lattice_structure.h"
-
-namespace tensorflow {
-namespace lattice {
-
-void LatticeRawIterator::Next() {
-  ++index_;
-  for (int64 dim = 0; dim < lattice_sizes_.size(); ++dim) {
-    ++vertex_[dim];
-    if (vertex_[dim] == lattice_sizes_[dim]) {
-      vertex_[dim] = 0;
-    } else {
-      break;
-    }
-  }
-}
-
-}  // namespace lattice
-}  // namespace tensorflow
diff --git a/tensorflow_lattice/cc/kernels/lattice_raw_iterator.h b/tensorflow_lattice/cc/kernels/lattice_raw_iterator.h
deleted file mode 100644
index 68e9fb9..0000000
--- a/tensorflow_lattice/cc/kernels/lattice_raw_iterator.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-// LatticeRawIterator iterates all vertices in a multi-cell lattice in the
-// column-major order. Note that this indexing (column-major order) should be
-// consistent with LatticeStructure.
-//
-// Iteration example:
-// for (LatticeRawIterator iter(lattice_structure) ; !iter.IsDone();
-// iter.Next()) {
-//   const int64 global_index = iter.Index();
-//   const int64 vertex_first_dim = iter.VertexDim(0);
-//   const int64 vertex_second_dim = iter.VertexDim(1);
-// }
-
-#ifndef TENSORFLOW_LATTICE_CC_KERNELS_LATTICE_RAW_ITERATOR_H_
-#define TENSORFLOW_LATTICE_CC_KERNELS_LATTICE_RAW_ITERATOR_H_
-
-#include <memory>
-#include <vector>
-
-#include "tensorflow_lattice/cc/lib/lattice_structure.h"
-
-namespace tensorflow {
-namespace lattice {
-
-class LatticeRawIterator {
- public:
-  explicit LatticeRawIterator(const LatticeStructure& lattice_structure)
-      : lattice_sizes_(lattice_structure.LatticeSizes()),
-        vertex_(lattice_structure.Dimension(), 0),
-        index_(0),
-        last_index_(lattice_structure.NumVertices()) {}
-
-  // Forwards the iterator.
-  void Next();
-
-  bool IsDone() const { return index_ >= last_index_; }
-  int64 Index() const { return index_; }
-  const std::vector<int64>& Vertex() const { return vertex_; }
-  int64 VertexDim(const int64 dim) const { return vertex_[dim]; }
-
- private:
-  const std::vector<int> lattice_sizes_;
-  std::vector<int64> vertex_;
-  int64 index_;
-  const int64 last_index_;
-};
-
-}  // namespace lattice
-}  // namespace tensorflow
-#endif  // TENSORFLOW_LATTICE_CC_KERNELS_LATTICE_RAW_ITERATOR_H_
diff --git a/tensorflow_lattice/cc/kernels/lattice_raw_iterator_test.cc b/tensorflow_lattice/cc/kernels/lattice_raw_iterator_test.cc
deleted file mode 100644
index 144825c..0000000
--- a/tensorflow_lattice/cc/kernels/lattice_raw_iterator_test.cc
+++ /dev/null
@@ -1,111 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow_lattice/cc/kernels/lattice_raw_iterator.h"
-
-#include <vector>
-
-#include "tensorflow_lattice/cc/lib/lattice_structure.h"
-#include "tensorflow/core/lib/strings/str_util.h"
-#include "tensorflow/core/platform/logging.h"
-#include "tensorflow/core/platform/test.h"
-
-namespace tensorflow {
-namespace lattice {
-
-namespace {
-struct IndexVertexPair {
-  int64 index;
-  std::vector<int64> vertex;
-};
-}  // namespace
-
-// The fixture for testing LatticeRawIteration.
-class LatticeRawIteratorTest : public ::testing::Test {
- protected:
-  // Given the lattice sizes, iterate using RawIterator and check whether the
-  // iterator visits all expected index_vertex_pairs.
-  void CheckFullIteration(
-      const std::vector<int>& lattice_sizes,
-      const std::vector<IndexVertexPair>& expected_index_vertex_pairs) {
-    LatticeStructure lattice_structure(lattice_sizes);
-
-    // Iterate and collect indices and vertices.
-    std::vector<IndexVertexPair> visited_index_vertex_pairs;
-    for (LatticeRawIterator iter(lattice_structure); !iter.IsDone();
-         iter.Next()) {
-      visited_index_vertex_pairs.push_back(
-          IndexVertexPair{iter.Index(), iter.Vertex()});
-      LOG(INFO) << "visited_index : " << iter.Index() << " visited_vertex: ["
-                << str_util::Join(iter.Vertex(), ",") << "]";
-    }
-
-    // Check the result with the expected results.
-    CompareIndexVertexPairs(expected_index_vertex_pairs,
-                            visited_index_vertex_pairs);
-  }
-
- private:
-  void CompareIndexVertexPairs(
-      const std::vector<IndexVertexPair>& index_vertex_pairs1,
-      const std::vector<IndexVertexPair>& index_vertex_pairs2) {
-    ASSERT_EQ(index_vertex_pairs1.size(), index_vertex_pairs2.size());
-    const int num_pairs = index_vertex_pairs1.size();
-    std::vector<bool> visited(num_pairs, false);
-    // n ** 2 comparsion.
-    for (const auto& index_vertex_pair2 : index_vertex_pairs2) {
-      for (int ii = 0; ii < num_pairs; ++ii) {
-        if (index_vertex_pair2.index == index_vertex_pairs1[ii].index &&
-            index_vertex_pair2.vertex == index_vertex_pairs1[ii].vertex) {
-          visited[ii] = true;
-          break;
-        }
-      }
-    }
-    // Now check that we visited all index_vertex_pair in index_vertex_pairs1.
-    for (const bool is_visited : visited) {
-      EXPECT_TRUE(is_visited);
-    }
-  }
-};
-
-TEST_F(LatticeRawIteratorTest, FullIterationWithTwoByThree) {
-  CheckFullIteration(
-      /*lattice_sizes=*/{2, 3}, /*expected_index_vertex_pairs=*/{{0, {0, 0}},
-                                                                 {1, {1, 0}},
-                                                                 {2, {0, 1}},
-                                                                 {3, {1, 1}},
-                                                                 {4, {0, 2}},
-                                                                 {5, {1, 2}}});
-}
-
-TEST_F(LatticeRawIteratorTest, FullIterationWithThreeByTwoByTwo) {
-  CheckFullIteration(
-      /*lattice_sizes=*/{3, 2, 2},
-      /*expected_index_vertex_pairs=*/{{0, {0, 0, 0}},
-                                       {1, {1, 0, 0}},
-                                       {2, {2, 0, 0}},
-                                       {3, {0, 1, 0}},
-                                       {4, {1, 1, 0}},
-                                       {5, {2, 1, 0}},
-                                       {6, {0, 0, 1}},
-                                       {7, {1, 0, 1}},
-                                       {8, {2, 0, 1}},
-                                       {9, {0, 1, 1}},
-                                       {10, {1, 1, 1}},
-                                       {11, {2, 1, 1}}});
-}
-
-}  // namespace lattice
-}  // namespace tensorflow
diff --git a/tensorflow_lattice/cc/kernels/monotone_lattice_kernels.cc b/tensorflow_lattice/cc/kernels/monotone_lattice_kernels.cc
deleted file mode 100644
index 6ff06ea..0000000
--- a/tensorflow_lattice/cc/kernels/monotone_lattice_kernels.cc
+++ /dev/null
@@ -1,184 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include <type_traits>
-#include <vector>
-
-#include "tensorflow_lattice/cc/kernels/lattice_interpolation_base.h"
-#include "tensorflow_lattice/cc/kernels/monotonic_lattice_projections.h"
-#include "tensorflow_lattice/cc/lib/lattice_structure.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/util/work_sharder.h"
-
-namespace tensorflow {
-namespace lattice {
-
-// MonotoneLatticeOp returns the projected lattice param vectors onto the
-// subspace that satisfies monotonicity constraints specified by is_monotone.
-// If is_monotone[k] == true, then kth input will have a non-decreasing
-// monotonicity constraint, and is_monotone[k] == false, then then kth input has
-// no monotonicity constraints.
-//
-// Lattice param tensor is expected to be a 2d tensor, [num_outputs,
-// num_parameters], where each row represents a parameter from multi-cell
-// lattice.
-template <typename Dtype>
-class MonotoneLatticeOp : public LatticeOpBase {
- public:
-  static_assert(std::is_floating_point<Dtype>::value,
-                "Dtype needs to be a floating point");
-
-  explicit MonotoneLatticeOp(OpKernelConstruction* context);
-  void Compute(OpKernelContext* context) final;
-
-  TF_DISALLOW_COPY_AND_ASSIGN(MonotoneLatticeOp);
-
- private:
-  void ProjectionWorker(const Tensor& lattice_params_tensor, int start,
-                        int limit, int num_parameters,
-                        Tensor* projection_tensor,
-                        OpKernelContext* context) const;
-
-  std::unique_ptr<MonotoneLatticeProjector<Dtype>> projector_;
-};
-
-template <typename Dtype>
-MonotoneLatticeOp<Dtype>::MonotoneLatticeOp(OpKernelConstruction* context)
-    : LatticeOpBase(context) {
-  std::vector<bool> is_monotone;
-  float tolerance;
-  int64 max_iter;
-
-  OP_REQUIRES_OK(context, context->GetAttr("is_monotone", &is_monotone));
-  OP_REQUIRES_OK(context, context->GetAttr("tolerance", &tolerance));
-  OP_REQUIRES_OK(context, context->GetAttr("max_iter", &max_iter));
-
-  const int64 lattice_dim = GetLatticeStructure().Dimension();
-  OP_REQUIRES(context, (is_monotone.size() == lattice_dim),
-              errors::InvalidArgument(
-                  "lattice dimension :", lattice_dim,
-                  " != ", "is_monotone dimension: ", is_monotone.size()));
-
-  std::vector<int> monotone_dims;
-  for (int ii = 0; ii < lattice_dim; ++ii) {
-    if (is_monotone[ii]) {
-      monotone_dims.push_back(ii);
-    }
-  }
-
-  projector_ = std::unique_ptr<MonotoneLatticeProjector<Dtype>>(
-      new MonotoneLatticeProjector<Dtype>(GetLatticeStructure(), monotone_dims,
-                                          tolerance, max_iter));
-
-
-  constexpr int64 kInitCost = 20;
-  constexpr int64 kBaseCost = 20;
-  constexpr int64 kConstraintCost = 20;
-  // For initilaization: constant0 * GetLatticeStructure().NumVertices().
-  // Each iteration in ADMM:
-  //   1. Projection for each constraint: constant1 * NumVertices().
-  //   2. Center variable update: constant2 * NumVertices()
-  //   3. Dual variable update for each constraint: constant3 *
-  //   NumVertices().
-  // Therefore, the total cost of each iteration is
-  //  ((constant1 + constant3) * number of monotone dimensions + constant2) *
-  //  NumVertices().
-  // The number of iteration is bounded by min(max_iter, O(||true_projection -
-  // initial_point||_2/epsilon)). But since the latter is hard to obtain, we use
-  // max_iter as an upper bound.
-  // So the total cost is given by
-  //
-  // ((kConstraintCost * monotone_dims.size() + kBaseCost) * max_iter +
-  // kInitCost) * GetLatticeStructure().NumVertices()
-  const int64 cost_per_example =
-      ((kConstraintCost * monotone_dims.size() + kBaseCost) * max_iter +
-       kInitCost) *
-      GetLatticeStructure().NumVertices();
-  SetCostPerExample(cost_per_example);
-}
-
-template <typename Dtype>
-void MonotoneLatticeOp<Dtype>::Compute(OpKernelContext* context) {
-  // Grab the param tensor. Expect [num_ouputs, num_parameters] tensor.
-  const Tensor& lattice_params_tensor = context->input(0);
-
-  OP_REQUIRES(context, lattice_params_tensor.dims() == 2,
-              errors::InvalidArgument("expected a 2d tensor, got ",
-                                      lattice_params_tensor.dims()));
-  OP_REQUIRES(
-      context,
-      lattice_params_tensor.dim_size(1) == GetLatticeStructure().NumVertices(),
-      errors::InvalidArgument(
-          "expected parameter dimension: ", GetLatticeStructure().NumVertices(),
-          "got: ", lattice_params_tensor.dim_size(1)));
-  const int64 num_outputs = lattice_params_tensor.dim_size(0);
-  const int64 num_parameters = lattice_params_tensor.dim_size(1);
-
-  Tensor* projection_tensor = nullptr;
-  OP_REQUIRES_OK(context, context->allocate_output(
-                              0, TensorShape({num_outputs, num_parameters}),
-                              &projection_tensor));
-
-  auto worker_threads = *(context->device()->tensorflow_cpu_worker_threads());
-
-  // A worker that projects lattice_params_tensor[start : start + limit - 1, :]
-  // and saves the result to from the
-  // projection_tensor[start : start + limit - 1, :].
-  // This lambda captures everything including "this" to use ProjectionWorker
-  // method and all of captured states' lifetime is longer than Shard operation.
-  auto worker = [&](int start, int limit) {
-    ProjectionWorker(lattice_params_tensor, start, limit, num_parameters,
-                     projection_tensor, context);
-  };
-  // Launch threads.
-  Shard(worker_threads.num_threads, worker_threads.workers, num_outputs,
-        CostPerExample(), worker);
-}
-
-template <typename Dtype>
-void MonotoneLatticeOp<Dtype>::ProjectionWorker(
-    const Tensor& lattice_params_tensor, const int start, const int limit,
-    const int num_parameters, Tensor* projection_tensor,
-    OpKernelContext* context) const {
-  auto lattice_params_matrix = lattice_params_tensor.matrix<Dtype>();
-  auto projection_matrix = projection_tensor->matrix<Dtype>();
-  for (int row = start; row < limit; ++row) {
-    // Computing the projection per each row.
-    std::vector<Dtype> lattice_params_vec(num_parameters);
-    std::vector<Dtype> projected_lattice_params_vec(num_parameters, 0.0);
-
-    // Fetching the lattice parameter.
-    for (int ii = 0; ii < num_parameters; ++ii) {
-      lattice_params_vec[ii] = lattice_params_matrix(row, ii);
-    }
-    OP_REQUIRES_OK(context, projector_->Project(lattice_params_vec,
-                                                &projected_lattice_params_vec));
-    // Fill-in projected params.
-    for (int ii = 0; ii < num_parameters; ++ii) {
-      projection_matrix(row, ii) = projected_lattice_params_vec[ii];
-    }
-  }
-}
-
-// Register kernels for float and double.
-REGISTER_KERNEL_BUILDER(
-    Name("MonotoneLattice").Device(DEVICE_CPU).TypeConstraint<float>("Dtype"),
-    MonotoneLatticeOp<float>);
-REGISTER_KERNEL_BUILDER(
-    Name("MonotoneLattice").Device(DEVICE_CPU).TypeConstraint<double>("Dtype"),
-    MonotoneLatticeOp<double>);
-
-}  // namespace lattice
-}  // namespace tensorflow
diff --git a/tensorflow_lattice/cc/kernels/monotonic_lattice_projections.h b/tensorflow_lattice/cc/kernels/monotonic_lattice_projections.h
deleted file mode 100644
index 767272e..0000000
--- a/tensorflow_lattice/cc/kernels/monotonic_lattice_projections.h
+++ /dev/null
@@ -1,326 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-// Project lattice parameter vector onto monotonicity constraints.
-#ifndef TENSORFLOW_LATTICE_CC_KERNELS_MONOTONIC_LATTICE_PROJECTIONS_H_
-#define TENSORFLOW_LATTICE_CC_KERNELS_MONOTONIC_LATTICE_PROJECTIONS_H_
-
-#include <cmath>
-#include <limits>
-#include <type_traits>
-#include <vector>
-
-#include "tensorflow_lattice/cc/kernels/lattice_raw_iterator.h"
-#include "tensorflow_lattice/cc/kernels/monotonic_projections.h"
-#include "tensorflow_lattice/cc/lib/lattice_structure.h"
-
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/platform/logging.h"
-
-namespace tensorflow {
-namespace lattice {
-
-// Monotone Lattice projector projects lattice parameter to monotonicity
-// constraints specified by monotone_dimensions.
-// monotone_dimensions contains a index of (increasing) monotonic dimension.
-// For example, if we want to impose the monotonicity constraint in the 0th and
-// 2th dimensions, then monotone_dimensions = {0, 2}.
-//
-// The implementation uses Alternating Direction Method of Multipliers (ADMM)
-// parallel projection. See Distributed Optimization and Statistical Learning
-// via the Alternating Direction Method of Multipliers
-// (http://web.stanford.edu/~boyd/papers/pdf/admm_distr_stats.pdf) Section
-// 5.1.2. Parallel projeciton and Chapter 7. for the theoritical background.
-//
-// Suppose we have K number of convex sets, C_1, ..., C_K, and we want to
-// project a variable x_0 in R^n to the intersection of C_1, ..., C_K.
-// Let x_1, ..., x_K in R^n, and d_1, ..., d_K in R^n.
-// The ADMM parallel projection works as follows.
-//
-//  Step 0: Initialize x_center = x_0, and d_k = 0 for all k = 1, ..., K.
-//  Step 1: x_k <- Projection of (d_k + x_center) onto C_i for all i = 1, ...,
-//  K.
-//  Step 2: x_center <- 0.5 * x_0 + 0.5 * 1/K * sum_k (x_k - d_k).
-//  Step 3: d_k <- d_k + x_center - x_k.
-//  Step 4: Go back to the Step 1 if sum_k ||x_center - x_k||_1 > eps.
-//
-// Step 1 generates x_k in C_k. However, x_k may not be in C_i where i \neq k.
-// However, the algorithm is guaranteed to converge, which implies d_k should
-// stop being updated after many iterations.
-// Therefore, x_center == x_k for all k eventually. Since x_center == x_1 == ...
-// == x_K, we can conclude that x_center is in the intersection of C_1, ...,
-// C_K.
-// Step 2 generates x_center that minimizes ||x_center - x_0||_2^2 + some
-// regularization terms. Upon convergence, regularization temrs are zero.
-// Therefore, x_center == the projeciton of x_0 onto the intersection of C_1,
-// ..., C_K, when the algorithm converges.
-//
-// In the following implementation, we set each C_k to be the set of
-// lattice_param_vec that satisfies one dimensional monotonicity constraint.
-// Assuming we have K number of monotone dimensions, the ADMM algorithm
-// perform the projection for a given lattice_param_vec as follows:
-//   Step 0: Initialize center = lattice_param_vec and duals[k] =
-//     std::vector<Dtype>(param_size, 0.0) for k = 0, ..., K - 1.
-//   Step 1: params[k] <- Projection of (duals[k] + center) onto the kth 1D
-//   monotonicity constraint. (Here + means an elementwise summation.) for k =
-//   0, ..., K - 1.
-//   Step 2: center <- 0.5 * lattice_param_vec + 0.5 * 1/K * sum_k (params[k] -
-//     duals[k])
-//   Step 3: duals[k] += (center - params[k]) for k = 0, ..., K - 1.
-//   Step 4: Repeat Step 1 until sum_k ||center - params[k]||_1 < epsilon, or
-//     Step 1 was repeated more than max_iter times.
-template <typename Dtype>
-class MonotoneLatticeProjector {
- public:
-  static_assert(std::is_floating_point<Dtype>::value,
-                "Dtype needs to be a floating point");
-
-  explicit MonotoneLatticeProjector(const LatticeStructure& lattice_structure,
-                                    const std::vector<int>& monotone_dimensions,
-                                    const Dtype epsilon = 1e-7,
-                                    const int64 max_iter = 100000);
-
-  // Apply ADMM projections, and save the result to the projected_param.
-  Status Project(const std::vector<Dtype>& lattice_param_vec,
-                 std::vector<Dtype>* projected_lattice_param_vec) const;
-
- private:
-  // This projector computes the projection of lattice parameter vector onto the
-  // per dimension monotonicity constraints.
-  //
-  // For example, consider 3 x 3 lattice:
-  //
-  // 2---------5--------8
-  // |         |        |
-  // |         |        |
-  // 1---------4--------7
-  // |         |        |
-  // |         |        |
-  // 0---------3--------6
-  //
-  // For the 0th dimension, we have
-  //   weight[0] <= weight[3] <= weight[6]
-  //   weight[1] <= weight[4] <= weight[7]
-  //   weight[2] <= weight[5] <= weight[8].
-  //
-  // So PerDimensionProjector(lattice_structure, 0) will project the
-  // lattice_param_vec onto the constraints of the given dimension.
-  //
-  // For the 1th dimension, we have
-  //   weight[0] <= weight[1] <= weight[2]
-  //   weight[3] <= weight[4] <= weight[5]
-  //   weight[6] <= weight[7] <= weight[8].
-  //
-  // So PerDimensionProjector(lattice_structure, 1) will project
-  // lattice_param_vec onto the constraints of the given dimension.
-  class PerDimensionProjector {
-   public:
-    explicit PerDimensionProjector(const LatticeStructure& lattice_structure,
-                                   const int64 dimension);
-
-    // Apply projection, and save the result to the lattice_param_vec.
-    void Project(std::vector<Dtype>* lattice_param_vec) const;
-
-   private:
-    // Helper function that returns the base indices of a given LatticeStructure
-    // and dimension.
-    static std::vector<int64> BaseIndices(
-        const LatticeStructure& lattice_structure, const int64 dimension);
-
-    const int64 lattice_size_;
-    const int64 stride_;
-    const std::vector<int64> base_indices_;
-  };
-
-  const Dtype epsilon_;
-  const int64 max_iter_;
-  int64 param_size_;
-  std::vector<PerDimensionProjector> projectors_;
-};
-
-// Implementation of PerDimensionProjector's methods.
-template <typename Dtype>
-MonotoneLatticeProjector<Dtype>::PerDimensionProjector::PerDimensionProjector(
-    const LatticeStructure& lattice_structure, const int64 dimension)
-    : lattice_size_(lattice_structure.LatticeSize(dimension)),
-      stride_(lattice_structure.Stride(dimension)),
-      base_indices_(BaseIndices(lattice_structure, dimension)) {}
-
-template <typename Dtype>
-std::vector<int64>
-MonotoneLatticeProjector<Dtype>::PerDimensionProjector::BaseIndices(
-    const LatticeStructure& lattice_structure, const int64 dimension) {
-  std::vector<int64> base_indices;
-
-  for (LatticeRawIterator iter(lattice_structure); !iter.IsDone();
-       iter.Next()) {
-    if (iter.VertexDim(dimension) == 0) {
-      base_indices.push_back(iter.Index());
-    }
-  }
-  return base_indices;
-}
-
-
-template <typename Dtype>
-void MonotoneLatticeProjector<Dtype>::PerDimensionProjector::Project(
-    std::vector<Dtype>* lattice_param_vec_ptr) const {
-  DCHECK(lattice_param_vec_ptr);
-
-  std::vector<Dtype>& lattice_param_vec = *lattice_param_vec_ptr;
-  for (const int64 base_index : base_indices_) {
-    std::vector<Dtype> lattice_slice(lattice_size_);
-    // Find the slice of lattice parameter vector.
-    int64 current_index = base_index;
-    for (Dtype& value : lattice_slice) {
-      value = lattice_param_vec[current_index];
-      current_index += stride_;
-    }
-
-    // Make a projection.
-    std::vector<Dtype> projected_slice =
-        VectorMonotonicProjection(lattice_slice, std::less_equal<Dtype>());
-
-    // Fill in the result.
-    current_index = base_index;
-    for (const Dtype value : projected_slice) {
-      lattice_param_vec[current_index] = value;
-      current_index += stride_;
-    }
-  }
-}
-
-// Implementation of MonotoneLatticeProjector's methods.
-template <typename Dtype>
-MonotoneLatticeProjector<Dtype>::MonotoneLatticeProjector(
-    const LatticeStructure& lattice_structure,
-    const std::vector<int>& monotone_dimensions, const Dtype epsilon,
-    const int64 max_iter)
-    : epsilon_(epsilon),
-      max_iter_(max_iter),
-      param_size_(lattice_structure.NumVertices()) {
-  for (const int dim : monotone_dimensions) {
-    projectors_.push_back(PerDimensionProjector(lattice_structure, dim));
-  }
-}
-
-// Apply ADMM projections.
-template <typename Dtype>
-Status MonotoneLatticeProjector<Dtype>::Project(
-    const std::vector<Dtype>& lattice_param_vec,
-    std::vector<Dtype>* projected_lattice_param_vec) const {
-  if (lattice_param_vec.size() != param_size_) {
-    return errors::InvalidArgument("lattice_param_vec's size (",
-                                   lattice_param_vec.size(),
-                                   ") != param_size (", param_size_, ")");
-  }
-
-  if (!projected_lattice_param_vec) {
-    return errors::InvalidArgument("projected_lattice_param_vec is nullptr");
-  }
-  if (projected_lattice_param_vec->size() != param_size_) {
-    return errors::InvalidArgument("projected_lattice_param_vec's size (",
-                                   projected_lattice_param_vec->size(),
-                                   ") != param_size (", param_size_, ")");
-  }
-
-  // No projection at all. Make a deep copy, then return.
-  if (projectors_.empty()) {
-    *projected_lattice_param_vec = lattice_param_vec;
-    return Status::OK();
-  }
-
-  // Only one projection. No need for running a complicated projection.
-  if (projectors_.size() == 1) {
-    // Make a deep copy, then project.
-    *projected_lattice_param_vec = lattice_param_vec;
-    projectors_[0].Project(projected_lattice_param_vec);
-    return Status::OK();
-  }
-
-  // Initialize all variables.
-  // 1. Center: This contains a reference to the projected lattice parameter
-  // vector.
-  // 2. Param_per_cluster.
-  // 3. Deviation_per_cluster.
-  std::vector<Dtype>& center = *projected_lattice_param_vec;
-  const int param_size = lattice_param_vec.size();
-  const int num_clusters = projectors_.size();
-
-  // Initial point is a deep copy of lattice_param_vec.
-  center = lattice_param_vec;
-  std::vector<std::vector<Dtype>> param_per_cluster(
-      num_clusters, std::vector<Dtype>(param_size, 0.0));
-  std::vector<std::vector<Dtype>> duals(num_clusters,
-                                        std::vector<Dtype>(param_size, 0.0));
-
-  Dtype residual = std::numeric_limits<Dtype>::max();
-  int64 iter = 0;
-  const Dtype average_scale = 0.5 / static_cast<Dtype>(num_clusters);
-
-
-  while (residual > epsilon_) {
-    // Step 1. Update parameter in each cluster by applying projections.
-    for (int ii = 0; ii < num_clusters; ++ii) {
-      // Step 1-1. Update param_per_cluster[ii] == center + duals[ii].
-      const std::vector<Dtype>& duals_ii = duals[ii];
-      std::vector<Dtype>& param_ii = param_per_cluster[ii];
-      for (int jj = 0; jj < param_size; ++jj) {
-        param_ii[jj] = duals_ii[jj] + center[jj];
-      }
-      // Step 1-2. Project onto the monotonicity constraint.
-      projectors_[ii].Project(&param_ii);
-    }
-
-    // Step 2. Update the center.
-    // center = 1/2 * lattice_param_vec + 1/2 * (Average(param_per_cluster) -
-    // Average(dual))
-    center.assign(param_size, 0);
-    for (int ii = 0; ii < num_clusters; ++ii) {
-      const std::vector<Dtype>& dual = duals[ii];
-      const std::vector<Dtype>& param = param_per_cluster[ii];
-      for (int jj = 0; jj < param_size; ++jj) {
-        center[jj] += (param[jj] - dual[jj]);
-      }
-    }
-    for (int ii = 0; ii < param_size; ++ii) {
-      center[ii] *= average_scale;
-      center[ii] += 0.5 * lattice_param_vec[ii];
-    }
-
-    // Step 3. Update the dual and residual
-    residual = 0;
-    for (int ii = 0; ii < num_clusters; ++ii) {
-      std::vector<Dtype>& dual = duals[ii];
-      const std::vector<Dtype>& param = param_per_cluster[ii];
-      for (int jj = 0; jj < param_size; ++jj) {
-        const Dtype diff = center[jj] - param[jj];
-        dual[jj] += diff;
-        residual += std::abs(diff);
-      }
-    }
-
-    ++iter;
-    if (iter > max_iter_) {
-      break;
-    }
-  }
-  return Status::OK();
-}
-
-}  // namespace lattice
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_LATTICE_CC_KERNELS_MONOTONIC_LATTICE_PROJECTIONS_H_
diff --git a/tensorflow_lattice/cc/kernels/monotonic_lattice_projections_test.cc b/tensorflow_lattice/cc/kernels/monotonic_lattice_projections_test.cc
deleted file mode 100644
index 18e8525..0000000
--- a/tensorflow_lattice/cc/kernels/monotonic_lattice_projections_test.cc
+++ /dev/null
@@ -1,135 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow_lattice/cc/kernels/monotonic_lattice_projections.h"
-
-#include <vector>
-
-#include "tensorflow_lattice/cc/lib/lattice_structure.h"
-#include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/lib/strings/str_util.h"
-#include "tensorflow/core/platform/logging.h"
-#include "tensorflow/core/platform/test.h"
-
-namespace tensorflow {
-namespace lattice {
-namespace {
-TEST(MonotoneLatticeProjectorErrorTest, ProjectionWithNullptr) {
-  LatticeStructure lattice_structure(/*lattice_sizes=*/{2, 2});
-  MonotoneLatticeProjector<float> projector(lattice_structure,
-                                            /*monotone_dimensions=*/{});
-  const Status s =
-      projector.Project(/*lattice_param_vec=*/{0, 1, 2, 3}, nullptr);
-  EXPECT_EQ(s.code(), error::INVALID_ARGUMENT);
-}
-
-TEST(MonotoneLatticeProjectorErrorTest, ProjectionWithWrongInputDimension) {
-  LatticeStructure lattice_structure(/*lattice_sizes=*/{2, 2});
-  MonotoneLatticeProjector<float> projector(lattice_structure,
-                                            /*monotone_dimensions=*/{});
-  std::vector<float> output(4, 0.0);
-  const Status s = projector.Project(/*lattice_param_vec=*/{0, 1, 2}, &output);
-  EXPECT_EQ(s.code(), error::INVALID_ARGUMENT);
-}
-
-TEST(MonotoneLatticeProjectorErrorTest, ProjectionWithWrongOutputDimension) {
-  LatticeStructure lattice_structure(/*lattice_sizes=*/{2, 2});
-  MonotoneLatticeProjector<float> projector(lattice_structure,
-                                            /*monotone_dimensions=*/{});
-  std::vector<float> output(3, 0.0);
-  const Status s =
-      projector.Project(/*lattice_param_vec=*/{0, 1, 2, 3}, &output);
-  EXPECT_EQ(s.code(), error::INVALID_ARGUMENT);
-}
-
-// The fixture for testing MonotoneLatticeProjector.
-class MonotoneLatticeProjectorTest : public ::testing::Test {
- protected:
-  void CheckProjection(
-      const std::vector<int>& lattice_sizes,
-      const std::vector<int>& monotone_dimensions,
-      const std::vector<float>& lattice_param_vec,
-      const std::vector<float>& expected_projected_lattice_param_vec) {
-    LatticeStructure lattice_structure(lattice_sizes);
-    MonotoneLatticeProjector<float> projector(lattice_structure,
-                                              monotone_dimensions, kEpsilon);
-    std::vector<float> projected_lattice_param_vec(lattice_param_vec.size());
-    TF_ASSERT_OK(
-        projector.Project(lattice_param_vec, &projected_lattice_param_vec));
-    LOG(INFO) << "lattice param: " << str_util::Join(lattice_param_vec, ",");
-    LOG(INFO) << "Expected projected lattice param: "
-              << str_util::Join(expected_projected_lattice_param_vec, ",");
-    LOG(INFO) << "Projected lattice param: "
-              << str_util::Join(projected_lattice_param_vec, ",");
-
-    ASSERT_EQ(projected_lattice_param_vec.size(),
-              expected_projected_lattice_param_vec.size());
-    for (int ii = 0; ii < expected_projected_lattice_param_vec.size(); ++ii) {
-      EXPECT_NEAR(expected_projected_lattice_param_vec[ii],
-                  projected_lattice_param_vec[ii], kEpsilon);
-    }
-  }
-
- private:
-  const float kEpsilon = 1e-5;
-};
-
-TEST_F(MonotoneLatticeProjectorTest, ProjectToNothing) {
-  CheckProjection(
-      /*lattice_sizes=*/{2, 2}, /*monotone_dimensions=*/{},
-      /*lattice_param_vec=*/{3.0, 0.0, 2.0, 5.0},
-      /*expected_projected_lattice_param_vec=*/{3.0, 0.0, 2.0, 5.0});
-}
-
-TEST_F(MonotoneLatticeProjectorTest, ProjectTo0thDimension) {
-  CheckProjection(
-      /*lattice_sizes=*/{2, 2}, /*monotone_dimensions=*/{0},
-      /*lattice_param_vec=*/{3.0, 0.0, 2.0, 5.0},
-      /*expected_projected_lattice_param_vec=*/{1.5, 1.5, 2.0, 5.0});
-}
-
-TEST_F(MonotoneLatticeProjectorTest, ProjectTo1stDimension) {
-  CheckProjection(
-      /*lattice_sizes=*/{2, 2}, /*monotone_dimensions=*/{1},
-      /*lattice_param_vec=*/{3.0, 0.0, 2.0, 5.0},
-      /*expected_projected_lattice_param_vec=*/{2.5, 0.0, 2.5, 5.0});
-}
-
-TEST_F(MonotoneLatticeProjectorTest, ProjectToAllDimensions) {
-  CheckProjection(
-      /*lattice_sizes=*/{2, 2}, /*monotone_dimensions=*/{0, 1},
-      /*lattice_param_vec=*/{3.0, 0.0, 2.0, 5.0},
-      /*expected_projected_lattice_param_vec=*/{1.5, 1.5, 2.0, 5.0});
-}
-
-TEST_F(MonotoneLatticeProjectorTest, ProjectThreeByTwoLatticeToAllDimensions) {
-  CheckProjection(
-      /*lattice_sizes=*/{3, 2}, /*monotone_dimensions=*/{0, 1},
-      /*lattice_param_vec=*/{3.0, 1.0, 0.0, 0.0, 2.0, 5.0},
-      /*expected_projected_lattice_param_vec=*/{1.0, 1.0, 1.0, 1.0, 2.0, 5.0});
-}
-
-TEST_F(MonotoneLatticeProjectorTest,
-       ProjectTwoByTwoByTwoLatticeToAllDimensions) {
-  CheckProjection(
-      /*lattice_sizes=*/{2, 2, 2},
-      /*monotone_dimensions=*/{0, 1, 2},
-      /*lattice_param_vec=*/{0.44, 0.3, 0.12, 3.33, 3.0, 0.0, 2.0, 5.0},
-      /*expected_projected_lattice_param_vec=*/{0.28, 0.3, 0.28, 3.33, 1.5, 1.5,
-                                                2.0, 5.0});
-}
-
-}  // namespace
-}  // namespace lattice
-}  // namespace tensorflow
diff --git a/tensorflow_lattice/cc/kernels/monotonic_projection_kernel.cc b/tensorflow_lattice/cc/kernels/monotonic_projection_kernel.cc
deleted file mode 100644
index 018d9b9..0000000
--- a/tensorflow_lattice/cc/kernels/monotonic_projection_kernel.cc
+++ /dev/null
@@ -1,93 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include <algorithm>
-#include <functional>
-#include <vector>
-
-#include "tensorflow_lattice/cc/kernels/monotonic_projections.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor_types.h"
-#include "tensorflow/core/framework/types.pb.h"
-
-namespace tensorflow {
-namespace lattice {
-
-namespace {
-
-template <typename Dtype>
-bool CmpLesserOrEqual(const Dtype a, const Dtype b) {
-  return a <= b;
-}
-
-template <typename Dtype>
-bool CmpGreaterOrEqual(const Dtype a, const Dtype b) {
-  return a >= b;
-}
-
-}  // namespace
-
-template <typename Dtype>
-class MonotonicProjectionOpKernel : public OpKernel {
- public:
-  explicit MonotonicProjectionOpKernel(OpKernelConstruction* context)
-      : OpKernel(context) {}
-
-  void Compute(OpKernelContext* context) override {
-    const Tensor& values_tensor = context->input(0);
-    const Tensor& increasing_tensor = context->input(1);
-
-    OP_REQUIRES(
-        context, values_tensor.dims() == 1,
-        errors::InvalidArgument("values must have dims=1, got values.dims=",
-                                values_tensor.dims()));
-    OP_REQUIRES(context, increasing_tensor.dims() == 0,
-                errors::InvalidArgument(
-                    "increasing must be a boolean scalar, got increasing.dims=",
-                    increasing_tensor.dims()));
-    OP_REQUIRES(
-        context, increasing_tensor.dtype() == DT_BOOL,
-        errors::InvalidArgument(
-            "increasing must be a boolean scalar, got increasing.dtype=",
-            DataType_Name(increasing_tensor.dtype())));
-
-    Tensor* monotonic_tensor = nullptr;
-    OP_REQUIRES_OK(
-        context,
-        context->allocate_output(0, values_tensor.shape(), &monotonic_tensor));
-
-    // Copy the current non-monotonic values and project them to monotonicity.
-    *monotonic_tensor = values_tensor;
-    if (increasing_tensor.scalar<bool>()()) {
-      TensorVectorMonotonicProjection<Dtype>(monotonic_tensor->vec<Dtype>(),
-                                             CmpLesserOrEqual<Dtype>);
-    } else {
-      TensorVectorMonotonicProjection<Dtype>(monotonic_tensor->vec<Dtype>(),
-                                             CmpGreaterOrEqual<Dtype>);
-    }
-  }
-};
-
-REGISTER_KERNEL_BUILDER(Name("MonotonicProjection")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<float>("Dtype"),
-                        MonotonicProjectionOpKernel<float>);
-REGISTER_KERNEL_BUILDER(Name("MonotonicProjection")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<double>("Dtype"),
-                        MonotonicProjectionOpKernel<double>);
-
-}  // namespace lattice
-}  // namespace tensorflow
diff --git a/tensorflow_lattice/cc/kernels/monotonic_projections.h b/tensorflow_lattice/cc/kernels/monotonic_projections.h
deleted file mode 100644
index 545ae6d..0000000
--- a/tensorflow_lattice/cc/kernels/monotonic_projections.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-// Functions that calculate monotonic projections.
-#ifndef TENSORFLOW_LATTICE_CC_KERNELS_MONOTONIC_PROJECTIONS_H_
-#define TENSORFLOW_LATTICE_CC_KERNELS_MONOTONIC_PROJECTIONS_H_
-
-#include <vector>
-
-#include "tensorflow/core/framework/tensor.h"
-
-namespace tensorflow {
-namespace lattice {
-
-// Converts a vector to a non-strictly monotonic vector that minimizes squared
-// distance to original vector values.
-//
-// monotonic_cmp is the comparison function that defines the direction of
-//   the monotonicity. monotonic_cmp(a,b) should return true if a followed
-//   by b is considered monotonic (equal values should always be considered
-//   monotonic). monotonic_cmp should be transitive and
-//   monotonic_cmp(a,b) && monotonic_cmp(b,a) iff a == b.
-template <typename Dtype, typename CmpFn>
-std::vector<Dtype> VectorMonotonicProjection(const std::vector<Dtype>& input,
-                                             const CmpFn monotonic_cmp);
-
-// Converts a Tensor vector to a non-strictly monotonic vector that minimizes
-// squared distance to original vector values.
-//
-// monotonic_cmp is the comparison function that defines the direction of
-//   the monotonicity. monotonic_cmp(a,b) should return true if a followed
-//   by b is considered monotonic (equal values should always be considered
-//   monotonic). monotonic_cmp should be transitive and
-//   monotonic_cmp(a,b) && monotonic_cmp(b,a) iff a == b.
-template <typename Dtype, typename CmpFn>
-void TensorVectorMonotonicProjection(typename TTypes<Dtype>::Vec values,
-                                     const CmpFn monotonic_cmp);
-
-// Converts a vector to a non-strictly monotonic vector that minimizes squared
-// distance to original vector values.
-//
-// Given a vector, input, it finds a non-strictly monotonic vector, output, such
-// that:
-//
-//     1. cmp_fn(output[i], output[i + 1]) == true for all 0 <= i < n -1
-//        (e.g., output[0] <= output[1] <= ... <= output[n -1])
-//     2. minimizes || input - output ||_2
-//
-// This is a implementation special case of pool adjacent violators (PAV)
-// algorithm.
-//
-// To use it one provides a comparison function (that defines the desired
-// monotonicity direction) and Insert() one value at a time, in order.
-//
-// In the end one can project the monotonic vector into a std::vector or
-// directly into a Tensor vector.
-template <typename Dtype, typename CmpFn>
-class MonotonicProjector {
- public:
-  // size is the size of the vector to be projected to monotonicity.
-  // monotonic_cmp is the comparison function that defines the direction of
-  //   the monotonicity. monotonic_cmp(a,b) should return true if a followed
-  //   by b is considered monotonic (equal values should always be considered
-  //   monotonic). monotonic_cmp should be transitive and
-  //   monotonic_cmp(a,b) && monotonic_cmp(b,a) iff a == b.
-  explicit MonotonicProjector(const int size, const CmpFn monotonic_cmp)
-      : size_(size), monotonic_cmp_(monotonic_cmp) {
-    pool_list_.reserve(size);
-  }
-
-  // Insert value to end of pool list keeping list monotonic according to
-  // monotonic_cmp_.
-  void Insert(Dtype value) {
-    Pool new_pool{1, value, value};
-    // While new_pool wouldn't be properly monotonic, merge the pool with the
-    // previous one.
-    while (!pool_list_.empty() &&
-           !monotonic_cmp_(pool_list_.back().mean, new_pool.mean)) {
-      // If last pool would break monotonicity,
-      new_pool.size += pool_list_.back().size;
-      new_pool.sum += pool_list_.back().sum;
-      new_pool.mean = new_pool.sum / new_pool.size;
-      pool_list_.pop_back();
-    }
-    pool_list_.push_back(new_pool);
-  }
-
-  // Copies monotonic projection to Tensor vector.
-  void ProjectToTensorVector(typename TTypes<Dtype>::Vec output) {
-    int output_index = 0;
-    for (const auto& pool : pool_list_) {
-      for (const int limit = output_index + pool.size; output_index < limit;
-           ++output_index) {
-        output(output_index) = pool.mean;
-      }
-    }
-  }
-
-  // Returns monotonic projection as vector.
-  std::vector<Dtype> ProjectToVector() {
-    std::vector<Dtype> output(size_);
-    int output_index = 0;
-    for (const auto& pool : pool_list_) {
-      for (const int limit = output_index + pool.size; output_index < limit;
-           ++output_index) {
-        output[output_index] = pool.mean;
-      }
-    }
-    return output;
-  }
-
- private:
-  struct Pool {
-    int size;         // Number of elements in pool.
-    Dtype sum, mean;  // Sum and mean of all values in pool.
-  };
-
-  const int size_;
-  std::vector<Pool> pool_list_;
-  const CmpFn monotonic_cmp_;
-};
-
-// Implementation details
-
-// START_SKIP_DOXYGEN
-template <typename Dtype, typename CmpFn>
-std::vector<Dtype> VectorMonotonicProjection(const std::vector<Dtype>& input,
-                                             const CmpFn monotonic_cmp) {
-  MonotonicProjector<Dtype, CmpFn> projector(input.size(), monotonic_cmp);
-  for (const Dtype value : input) {
-    projector.Insert(value);
-  }
-  return projector.ProjectToVector();
-}
-
-template <typename Dtype, typename CmpFn>
-void TensorVectorMonotonicProjection(typename TTypes<Dtype>::Vec values,
-                                     const CmpFn monotonic_cmp) {
-  MonotonicProjector<Dtype, CmpFn> projector(values.size(), monotonic_cmp);
-  for (int i = 0; i < values.size(); ++i) {
-    projector.Insert(values(i));
-  }
-  projector.ProjectToTensorVector(values);
-}
-// END_SKIP_DOXYGEN
-
-}  // namespace lattice
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_LATTICE_CC_KERNELS_MONOTONIC_PROJECTIONS_H_
diff --git a/tensorflow_lattice/cc/kernels/pwl_indexing_calibrator_kernels.cc b/tensorflow_lattice/cc/kernels/pwl_indexing_calibrator_kernels.cc
deleted file mode 100644
index aab1669..0000000
--- a/tensorflow_lattice/cc/kernels/pwl_indexing_calibrator_kernels.cc
+++ /dev/null
@@ -1,759 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include <algorithm>
-#include <cmath>
-#include <memory>
-#include <vector>
-
-#include "tensorflow/core/framework/common_shape_fns.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/shape_inference.h"
-#include "tensorflow/core/framework/tensor_types.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/util/sparse/sparse_tensor.h"
-#include "tensorflow/core/util/work_sharder.h"
-
-namespace tensorflow {
-namespace lattice {
-
-namespace {
-
-// Maximum number of points used by interpolation. It may use up to 3 when it's
-// exactly on top of a keypoint input -- it returns also the left and right
-// keypoints inputs (indices). See explanation below on
-// FindExpandedInterpolation.
-constexpr int kMaxNumInterpolationPoints = 3;
-
-// Changed with PwlSetDebugMode function. This variable forces each row of a
-// batch to be processed by a separate worker, only used for testing.
-bool test_force_split = false;
-
-}  // namespace
-
-extern void PwlSetTestMode(bool split_batches);
-void PwlSetTestMode(const bool split_batches) {
-  test_force_split = split_batches;
-}
-
-// Helper struct that holds all information needed to resolve one interpolation:
-// the number of consecutive points used (num_points), the index of the first
-// one (lower_index) the associated weights -- not used in every case.
-template <typename Dtype>
-struct InterpolationPoints {
-  int num_points;
-  int64_t lower_index;
-  Dtype weights[kMaxNumInterpolationPoints];
-};
-
-namespace {
-
-// Find the interpolation points, but _not the weights_, for the given
-// uncalibrated value and keypoints inputs (kp_inputs).
-// The interpolation will be between kp_inputs[lower_index] and
-// kp_inputs[lower_index + 1]. Except outside the edges or if x (uncalibrated)
-// is exactly on top of a keypoint, in which case the function returns 1 point.
-// It uses a simple binary-search, so it is O(log(|kp_inputs|)).
-template <typename Dtype>
-InterpolationPoints<Dtype> FindInterpolationPoints(
-    const Dtype uncalibrated,
-    const typename TTypes<const Dtype>::Vec& kp_inputs) {
-  if (uncalibrated <= kp_inputs(0)) {
-    return InterpolationPoints<Dtype>{1, 0};
-  }
-  const int64_t kp_inputs_last_idx = static_cast<int64_t>(kp_inputs.size() - 1);
-  if (uncalibrated >= kp_inputs(kp_inputs_last_idx)) {
-    return InterpolationPoints<Dtype>{1, kp_inputs_last_idx};
-  }
-
-  // Binary search the keypoints inputs.
-  int64_t min_idx = 0, max_idx = kp_inputs.size();
-  while (max_idx > min_idx + 1) {
-    const int64_t idx = (max_idx + min_idx) / 2;
-    const Dtype value = kp_inputs(idx);
-    if (uncalibrated == value) {
-      return InterpolationPoints<Dtype>{1, idx};
-    }
-    if (uncalibrated > value) {
-      min_idx = idx;
-    } else {
-      max_idx = idx;
-    }
-  }
-
-  // Two points, where lower_index is min_idx.
-  return InterpolationPoints<Dtype>{2, min_idx};
-}
-
-// Find interpolations points and associated weights for the given
-// uncalibrated value and keypoints inputs (kp_inputs).
-// Returns 1 interpolation point if uncalibrated is exactly on top of an
-// input keypoint (or if beyond the edges), or 2 if in between two
-// keypoints.
-// See FindInterpolationPoints.
-template <typename Dtype>
-InterpolationPoints<Dtype> FindInterpolationPointsWithWeights(
-    const Dtype uncalibrated,
-    const typename TTypes<const Dtype>::Vec& kp_inputs) {
-  // Get points an calculates weights.
-  InterpolationPoints<Dtype> interpolation_points =
-      FindInterpolationPoints<Dtype>(uncalibrated, kp_inputs);
-  if (interpolation_points.num_points == 1) {
-    // All weight goes to the exact one keypoint where the uncalibrated value
-    // lies.
-    interpolation_points.weights[0] = 1.0;
-    return interpolation_points;
-  }
-
-  // assert(interpolation_points.num_points == 2)
-  // The piecewise linear interpolation weights (w) when x (uncalibrated)
-  // is in between two keypoints, is given by:
-  //
-  //   w[lower_index] = 1.0 - theta(x, lower_index)
-  //   w[lower_index + 1] = theta(x, lower_index)
-  // Where:
-  //   theta(x, lower_index) = (x - keypoint_inputs[lower_index]) /
-  //                           delta(lower_index)
-  //   delta(lower_index) = kp_inputs[lower_index+1] - kp_inputs[lower_index]
-  //
-  // Note: the calibration later will apply the weights to the keypoints
-  // outputs, in the following format:
-  //
-  //   calibrated(x) = sum ( w(x) .* kp_outputs )
-  //
-  // So in this particular case, down the pipe, we'll have:
-  //
-  //   calibrated(x) = w[lower_index] * kp_outputs[lower_index] +
-  //       w[lower_index + 1] * kp_outputs[lower_index + 1]
-  //
-  // And since w(x) is a linear in x, calibrated(x) will be linear in x as well.
-  const Dtype delta = kp_inputs(interpolation_points.lower_index + 1) -
-                      kp_inputs(interpolation_points.lower_index);
-  interpolation_points.weights[1] =
-      (uncalibrated - kp_inputs(interpolation_points.lower_index)) / delta;
-  interpolation_points.weights[0] = 1.0 - interpolation_points.weights[1];
-  return interpolation_points;
-}
-
-template <typename Dtype>
-void IndexingCalibratorWorker(
-    const typename TTypes<const Dtype>::Vec& kp_inputs,
-    const typename TTypes<const Dtype>::Vec& uncalibrated_flat, const int start,
-    const int limit, typename TTypes<Dtype, 2>::Tensor interpolation) {
-  // Loop over input weights.
-  for (int i = start; i < limit; i++) {
-    // Find interpolation lower_index and weights (weights).
-    const InterpolationPoints<Dtype> interpolation_points =
-        FindInterpolationPointsWithWeights<Dtype>(uncalibrated_flat(i),
-                                                  kp_inputs);
-
-    // Copy interpolation weights.
-    for (int j = 0; j < interpolation_points.num_points; j++) {
-      interpolation(i, interpolation_points.lower_index + j) =
-          interpolation_points.weights[j];
-    }
-  }
-}
-
-}  // namespace
-
-template <typename Dtype>
-class PwlIndexingCalibratorOpKernel : public OpKernel {
- public:
-  explicit PwlIndexingCalibratorOpKernel(OpKernelConstruction* context)
-      : OpKernel(context) {}
-
-  void Compute(OpKernelContext* context) override {
-    // Grab keypoints inputs: it provides the num_keypoints.
-    const Tensor& kp_inputs_tensor = context->input(1);
-    OP_REQUIRES(context, kp_inputs_tensor.dims() == 1,
-                errors::InvalidArgument(
-                    "keypoints must have dims=1, got kp_inputs.dims=",
-                    kp_inputs_tensor.dims()));
-    auto kp_inputs = kp_inputs_tensor.vec<Dtype>();
-    const int num_keypoints = kp_inputs.size();
-
-    // Uncalibrated value(s): it provides the batch_size.
-    const Tensor& uncalibrated_tensor = context->input(0);
-    OP_REQUIRES(
-        context, uncalibrated_tensor.dims() == 1,
-        errors::InvalidArgument("input must have dims=1, got input.dims=",
-                                uncalibrated_tensor.dims()));
-    const auto& uncalibrated_flat = uncalibrated_tensor.vec<Dtype>();
-    const int64 batch_size = uncalibrated_flat.size();
-
-    // Output tensor.
-    Tensor* interpolation_tensor = nullptr;
-    OP_REQUIRES_OK(
-        context,
-        context->allocate_output(
-            0, TensorShape({uncalibrated_flat.size(), num_keypoints}),
-            &interpolation_tensor));
-    auto interpolation_tensor_flat =
-        interpolation_tensor->flat_inner_dims<Dtype, 2>();
-    interpolation_tensor_flat.setZero();
-
-    if (test_force_split) {
-      // Debug mode: do one example at a time.
-      for (int ii = 0; ii < batch_size; ii++) {
-        IndexingCalibratorWorker<Dtype>(kp_inputs, uncalibrated_flat, ii,
-                                        ii + 1, interpolation_tensor_flat);
-      }
-    } else {
-      // Sharded (multi-threaded) calculation:
-      auto worker_threads =
-          *(context->device()->tensorflow_cpu_worker_threads());
-      // Cost is O(N) because of having to zero out the weights.
-
-      constexpr int64 kBaseCost = 20;
-      constexpr int64 kCostPerKeypoint = 20;
-      const int64 cost_per_unit = kBaseCost + num_keypoints * kCostPerKeypoint;
-      Shard(worker_threads.num_threads, worker_threads.workers, batch_size,
-            cost_per_unit, [&kp_inputs, &uncalibrated_flat,
-                            &interpolation_tensor_flat](int start, int limit) {
-              IndexingCalibratorWorker<Dtype>(kp_inputs, uncalibrated_flat,
-                                              start, limit,
-                                              interpolation_tensor_flat);
-            });
-    }
-  }
-
-  TF_DISALLOW_COPY_AND_ASSIGN(PwlIndexingCalibratorOpKernel);
-};
-
-namespace {
-
-// This worker computes the partial derivative w.r.t. input (uncalibrated).
-//
-// To simplify, let's call x the input uncalibrated value of one example
-// (as opposed to the full batch). Let's call w(x) the vector of interpolation
-// weights, returned by PwlIndexingCalibratorOp.
-//
-// The optimizer needs to find the gradient of loss(x), which can be written
-// as loss(w(x)).
-//
-//     d(loss)/dx = d(loss)/d(w) * d(w)/dx
-//
-// grad_wrt_weights is the d(loss)/d(w) and is given. So this function needs to
-// calculate d(w)/dx in order to return d(loss)/dx.
-//
-// For the common case, x is in between two keypoints (kp_inputs). Recall from
-// comment in FindInterpolation above that:
-//
-//     w[lower_index] = 1.0 - theta
-//     w[lower_index + 1] = theta
-// Where:
-//     theta = (x - keypoint_inputs[lower_index]) / delta[lower_index]
-//     delta[lower_index] = kp_inputs[lower_index + 1] - kp_inputs[lower_index]
-//
-// For d(w)/dx we have:
-//     d(w[i])/dx = 0, for i != lower_index and i != lower_index +1
-//
-// And for i = lower_index and lower_index+1 (notice that kp_inputs are
-// constants):
-//
-//     d(w[lower_index])/dx = - 1/delta[lower_index]
-//     d(w[lower_index+1)])/dx = 1/delta[lower_index]
-//
-// Since d(loss)/dx = d(loss)/d(w) * d(w)/dx, d(loss)/d(w) = grad_wrt_weights,
-// we have:
-//
-//     d(loss)/dx = (grad_wrt_weights[lower_index+1] -
-//                   grad_wrt_weights[lower_index]) / delta[lower_index]
-
-template <typename Dtype>
-void IndexingCalibratorInputGradientWorker(
-    const typename TTypes<const Dtype>::Vec& kp_inputs,
-    const typename TTypes<const Dtype>::Vec& uncalibrated_vec,
-    const typename TTypes<const Dtype>::Matrix& grad_wrt_weights_mat,
-    const int start, const int limit,
-    typename TTypes<Dtype>::Vec* grad_wrt_input_vec) {
-  const int num_keypoints = kp_inputs.size();
-
-  // Loop over examples (batch_index) of the batch.
-  for (int batch_index = start; batch_index < limit; batch_index++) {
-    // Simpler non-batch (single value) version:
-    const Dtype uncalibrated = uncalibrated_vec(batch_index);
-
-    // Find interpolation lower_index and weights (weights).
-    const InterpolationPoints<Dtype> interpolation_points =
-        FindInterpolationPoints<Dtype>(uncalibrated, kp_inputs);
-
-    // Input grad has to be multiplied by the output grad.
-    if (interpolation_points.num_points == 2) {
-      // Input is in between 2 keypoints.
-      const Dtype delta = kp_inputs(interpolation_points.lower_index + 1) -
-                          kp_inputs(interpolation_points.lower_index);
-      (*grad_wrt_input_vec)(batch_index) =
-          (grad_wrt_weights_mat(batch_index,
-                                interpolation_points.lower_index + 1) -
-           grad_wrt_weights_mat(batch_index,
-                                interpolation_points.lower_index)) /
-          delta;
-
-    } else {  // assert(interpolation_points.num_points == 1)
-      // Input is exactly on top of a keypoint. d(w)/dx is not defined in this
-      // case, and what we do is to average the d(w)/dx that comes to the right
-      // of it and the d(w)/dx to the left of it.
-      //
-      // To the right of lower_index we have, from above:
-      //
-      // d(loss)/dx = (grad_wrt_weights[lower_index+1] -
-      //     grad_wrt_weights[lower_index]) / delta[lower_index]
-      //
-      // And from the left:
-      //
-      // d(loss)/dx = (grad_wrt_weights[lower_index] -
-      //     grad_wrt_weights[lower_index-1]) / delta[lower_index - 1]
-      //
-      // And we take a sub-gradient (or super-gradient), by averaging of those
-      // two gradients, except if the keypoint is in one of the edges (start
-      // or end of the kp_inputs), in which case we just get the d(w)/dx from
-      // the side we have.
-      Dtype grad = 0.0;  // == d(loss)/dx
-      int count = 0;
-      if (interpolation_points.lower_index > 0) {
-        const Dtype delta = kp_inputs(interpolation_points.lower_index) -
-                            kp_inputs(interpolation_points.lower_index - 1);
-        grad = (grad_wrt_weights_mat(batch_index,
-                                     interpolation_points.lower_index) -
-                grad_wrt_weights_mat(batch_index,
-                                     interpolation_points.lower_index - 1)) /
-               delta;
-        ++count;
-      }
-      if (interpolation_points.lower_index < num_keypoints - 1) {
-        const Dtype delta = kp_inputs(interpolation_points.lower_index + 1) -
-                            kp_inputs(interpolation_points.lower_index);
-        grad += (grad_wrt_weights_mat(batch_index,
-                                      interpolation_points.lower_index + 1) -
-                 grad_wrt_weights_mat(batch_index,
-                                      interpolation_points.lower_index)) /
-                delta;
-        ++count;
-      }
-      if (count > 0) grad /= count;  // Take mean.
-      (*grad_wrt_input_vec)(batch_index) = grad;
-    }
-  }
-}
-}  // namespace
-
-template <typename Dtype>
-class PwlIndexingCalibratorGradientOpKernel : public OpKernel {
- public:
-  explicit PwlIndexingCalibratorGradientOpKernel(OpKernelConstruction* context)
-      : OpKernel(context) {}
-
-  void Compute(OpKernelContext* context) override {
-    // Grab keypoints inputs: it provides the num_keypoints.
-    const Tensor& kp_inputs_tensor = context->input(1);
-    OP_REQUIRES(context, kp_inputs_tensor.dims() == 1,
-                errors::InvalidArgument(
-                    "keypoints must have dims=1, got kp_inputs.dims=",
-                    kp_inputs_tensor.dims()));
-    auto kp_inputs = kp_inputs_tensor.vec<Dtype>();
-    const int num_keypoints = kp_inputs.size();
-
-    // Uncalibrated value(s): it provides the batch_size.
-    const Tensor& uncalibrated_tensor = context->input(0);
-    OP_REQUIRES(
-        context, uncalibrated_tensor.dims() == 1,
-        errors::InvalidArgument("input must have dims=1, got input.dims=",
-                                uncalibrated_tensor.dims()));
-    const auto& uncalibrated_vec = uncalibrated_tensor.vec<Dtype>();
-    const int64 batch_size = uncalibrated_vec.size();
-
-    // Gradient with respect to outputs, needed for chain rule.
-    const Tensor& grad_wrt_weights_tensor = context->input(2);
-    OP_REQUIRES(
-        context, grad_wrt_weights_tensor.dims() == 2,
-        errors::InvalidArgument("grad_wrt_weights_tensor must have dims=2, "
-                                "got grad_wrt_weights_tensor.dims=",
-                                grad_wrt_weights_tensor.dims()));
-    OP_REQUIRES(
-        context, grad_wrt_weights_tensor.dim_size(0) == batch_size,
-        errors::InvalidArgument(
-            "grad_wrt_weights_tensor (output gradient) has shape [batch_size=",
-            grad_wrt_weights_tensor.dim_size(0),
-            ", num_keypoints], expected batch_size=", batch_size, " instead"));
-    OP_REQUIRES(
-        context, grad_wrt_weights_tensor.dim_size(1) == num_keypoints,
-        errors::InvalidArgument(
-            "grad_wrt_weights_tensor (output gradient) has shape [batch_size, "
-            "num_keypoints=",
-            grad_wrt_weights_tensor.dim_size(1), "], expected num_keypoints=",
-            num_keypoints, " instead"));
-    const auto grad_wrt_weights_mat = grad_wrt_weights_tensor.matrix<Dtype>();
-
-    // Keypoints' inputs are fixed, so their gradient are always zero.
-    // The kp_inputs is of fixed size ([num_keypoints]) independent of the
-    // batch size. So the gradient wrt kp_inputs is summed over all batch,
-    // as opposed to the gradient wrt to the input.
-    Tensor* grad_wrt_kp_inputs = nullptr;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(1, TensorShape({num_keypoints}),
-                                            &grad_wrt_kp_inputs));
-    grad_wrt_kp_inputs->vec<Dtype>().setZero();
-
-    // Gradient with respect to input:
-    Tensor* grad_wrt_input_tensor = nullptr;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(0, TensorShape({batch_size}),
-                                            &grad_wrt_input_tensor));
-    auto grad_wrt_input_vec = grad_wrt_input_tensor->vec<Dtype>();
-
-    // Sharded (multi-threaded) calculation:
-    auto worker_threads = *(context->device()->tensorflow_cpu_worker_threads());
-    // Cost is O(N) because of having to zero out the weights.
-
-    constexpr int64 kBaseCost = 20;
-    constexpr int64 kCostPerKeypoint = 20;
-    const int64 cost_per_unit = kBaseCost + num_keypoints * kCostPerKeypoint;
-    Shard(worker_threads.num_threads, worker_threads.workers, batch_size,
-          cost_per_unit, [&kp_inputs, &uncalibrated_vec, &grad_wrt_weights_mat,
-                          &grad_wrt_input_vec](int start, int limit) {
-            IndexingCalibratorInputGradientWorker<Dtype>(
-                kp_inputs, uncalibrated_vec, grad_wrt_weights_mat, start, limit,
-                &grad_wrt_input_vec);
-          });
-  }
-
-  TF_DISALLOW_COPY_AND_ASSIGN(PwlIndexingCalibratorGradientOpKernel);
-};
-
-//////////////////////////////////////////////////////////////////////////////
-// Sparse implementation
-//////////////////////////////////////////////////////////////////////////////
-
-namespace {
-
-// Calculates expanded interpolation points: Similar to
-// FindInterpolationPointsWithWeights above, but expand interpolation
-// on exact keypoints to the ones around it. So it returns either 2 or
-// 3 keypoints.
-// The expansion is helpful in the sparse implementation because it makes the
-// optimizing feature provide the d(loss)/d(w) for those points around, even
-// when their weights are zero: see description of
-// IndexingCalibratorInputGradientWorker above.
-// Returns a InterpolationPoints struct with 2 or 3 points with the
-// weights properly set.
-template <typename Dtype>
-InterpolationPoints<Dtype> FindExpandedInterpolationPointsWithWeights(
-    const Dtype uncalibrated,
-    const typename TTypes<const Dtype>::Vec& kp_inputs) {
-  // Find interpolation points without expansion.
-  InterpolationPoints<Dtype> interpolation_points =
-      FindInterpolationPointsWithWeights<Dtype>(uncalibrated, kp_inputs);
-
-  // Nothing changes for interpolation between keypoints.
-  if (interpolation_points.num_points == 2 || kp_inputs.size() == 1) {
-    return interpolation_points;
-  }
-
-  // assert(interpolation_points.num_points == 1)
-  // Add second keypoint if on the first keypoint.
-  if (interpolation_points.lower_index == 0) {
-    interpolation_points.num_points = 2;
-    interpolation_points.weights[0] = 1;
-    interpolation_points.weights[1] = 0;
-    return interpolation_points;
-  }
-
-  // Add second keypoint if on the last keypoint.
-  const auto kp_last = kp_inputs.size() - 1;
-  if (interpolation_points.lower_index == kp_last) {
-    interpolation_points.num_points = 2;
-    interpolation_points.lower_index--;
-    interpolation_points.weights[0] = 0;
-    interpolation_points.weights[1] = 1;
-    return interpolation_points;
-  }
-
-  // Add keypoints on the sides when exactly on a middle keypoint.
-  interpolation_points.num_points = 3;
-  interpolation_points.lower_index--;
-  interpolation_points.weights[0] = 0;
-  interpolation_points.weights[1] = 1;
-  interpolation_points.weights[2] = 0;
-  return interpolation_points;
-}
-
-// Calculates the gradient w.r.t the input, for the given interpolation points.
-// This is a simple adaptation of IndexingCalibratorInputGradientWorker for
-// sparse tensors. Please see the documentation in that function for the math
-// details.
-template <typename Dtype>
-Dtype GradWRTInputSparse(
-    const int num_interpolation_points,
-    const typename TTypes<const Dtype>::Vec& kp_inputs, const int64 lower_index,
-    const typename TTypes<const Dtype>::Vec& grad_wrt_weights,
-    const int64 weights_base_idx) {
-  Dtype grad;
-  if (num_interpolation_points == 2) {
-    // Input is in between 2 keypoints.
-    const Dtype delta = kp_inputs(lower_index + 1) - kp_inputs(lower_index);
-    grad = (grad_wrt_weights(weights_base_idx + 1) -
-            grad_wrt_weights(weights_base_idx + 0)) /
-           delta;
-
-  } else {
-    // assert(num_interpolation_points == 3)
-    // Input is exactly on top of a keypoint: average the slope of the
-    // previous and next keypoints: it's not correct since it is a point
-    // of discontinuity, but allows the weights to move.
-    const Dtype delta1 = kp_inputs(lower_index + 1) - kp_inputs(lower_index);
-    grad = (grad_wrt_weights(weights_base_idx + 1) -
-            grad_wrt_weights(weights_base_idx)) /
-           delta1;
-
-    const Dtype delta2 =
-        kp_inputs(lower_index + 2) - kp_inputs(lower_index + 1);
-    grad += (grad_wrt_weights(weights_base_idx + 2) -
-             grad_wrt_weights(weights_base_idx + 1)) /
-            delta2;
-
-    // Divided by 2 to get the mean of the gradients.
-    grad /= 2.0;
-  }
-  return grad;
-}
-
-}  // namespace
-
-template <typename Dtype>
-class PwlIndexingCalibratorSparseOpKernel : public OpKernel {
- public:
-  explicit PwlIndexingCalibratorSparseOpKernel(OpKernelConstruction* context)
-      : OpKernel(context) {}
-
-  void Compute(OpKernelContext* context) override {
-    // Grab keypoints inputs.
-    const Tensor& kp_inputs_tensor = context->input(1);
-    OP_REQUIRES(context, kp_inputs_tensor.dims() == 1,
-                errors::InvalidArgument(
-                    "keypoints must have dims=1, got kp_inputs.dims=",
-                    kp_inputs_tensor.dims()));
-    auto kp_inputs = kp_inputs_tensor.vec<Dtype>();
-
-    // Uncalibrated value(s): it provides the batch_size.
-    const Tensor& uncalibrated_tensor = context->input(0);
-    OP_REQUIRES(
-        context, uncalibrated_tensor.dims() == 1,
-        errors::InvalidArgument("input must have dims=1, got input.dims=",
-                                uncalibrated_tensor.dims()));
-    const auto& uncalibrated_vec = uncalibrated_tensor.vec<Dtype>();
-    const int64 batch_size = uncalibrated_vec.size();
-
-    // Find interpolation points and weights for each uncalibrated
-    // value.
-    std::vector<int64> batch_lower_weight_indices(batch_size);
-    std::vector<Dtype> batch_weights(batch_size * kMaxNumInterpolationPoints);
-    std::vector<int> batch_num_interpolation_points(batch_size);
-    int64 total_interpolation_points = 0;
-    for (int i = 0; i < batch_size; i++) {
-      const InterpolationPoints<Dtype> interpolation_points =
-          FindExpandedInterpolationPointsWithWeights<Dtype>(uncalibrated_vec(i),
-                                                            kp_inputs);
-      for (int j = 0; j < interpolation_points.num_points; j++) {
-        batch_weights[total_interpolation_points + j] =
-            interpolation_points.weights[j];
-      }
-      batch_num_interpolation_points[i] = interpolation_points.num_points;
-      batch_lower_weight_indices[i] = interpolation_points.lower_index;
-      total_interpolation_points += interpolation_points.num_points;
-    }
-
-    // Copy interpolation weights into sparse tensor components: indices,
-    // weights.
-    // Build indices tensor: each index is a vector of 2 numbers: batch_index
-    // and the weight index.
-    Tensor* tensor_indices = nullptr;
-    OP_REQUIRES_OK(
-        context,
-        context->allocate_output(
-            0, TensorShape({total_interpolation_points, 2}), &tensor_indices));
-    auto tensor_indices_mat = tensor_indices->matrix<int64>();
-    int64 sparse_index = 0;
-    for (int batch_index = 0; batch_index < batch_size; batch_index++) {
-      for (int col = 0; col < batch_num_interpolation_points[batch_index];
-           col++) {
-        tensor_indices_mat(sparse_index, 0) = batch_index;
-        tensor_indices_mat(sparse_index, 1) =
-            batch_lower_weight_indices[batch_index] + col;
-        sparse_index++;
-      }
-    }
-
-    // The weights in order of the sparse index is already calculated in
-    // batch_weights so we just need to copy them.
-    Tensor* tensor_weights = nullptr;
-    OP_REQUIRES_OK(
-        context,
-        context->allocate_output(1, TensorShape({total_interpolation_points}),
-                                 &tensor_weights));
-    // Notice batch_weights has some overhead space, we can only copy
-    // total_interpolation_points weights.
-    std::copy(batch_weights.begin(),
-              batch_weights.begin() + total_interpolation_points,
-              tensor_weights->flat<Dtype>().data());
-  }
-
-  TF_DISALLOW_COPY_AND_ASSIGN(PwlIndexingCalibratorSparseOpKernel);
-};
-
-template <typename Dtype>
-class PwlIndexingCalibratorSparseGradientOpKernel : public OpKernel {
- public:
-  explicit PwlIndexingCalibratorSparseGradientOpKernel(
-      OpKernelConstruction* context)
-      : OpKernel(context) {}
-
-  void Compute(OpKernelContext* context) override {
-    // Grab keypoints inputs: it provides the num_keypoints.
-    const Tensor& kp_inputs_tensor = context->input(1);
-    OP_REQUIRES(context, kp_inputs_tensor.dims() == 1,
-                errors::InvalidArgument(
-                    "keypoints must have dims=1, got kp_inputs.dims=",
-                    kp_inputs_tensor.dims()));
-    auto kp_inputs = kp_inputs_tensor.vec<Dtype>();
-    const int num_keypoints = kp_inputs.size();
-
-    // Uncalibrated value(s): it provides the batch_size.
-    const Tensor& uncalibrated_tensor = context->input(0);
-    OP_REQUIRES(
-        context, uncalibrated_tensor.dims() == 1,
-        errors::InvalidArgument("input must have dims=1, got input.dims=",
-                                uncalibrated_tensor.dims()));
-    const auto& uncalibrated_vec = uncalibrated_tensor.vec<Dtype>();
-    const int64 batch_size = uncalibrated_vec.size();
-
-    // Interpolation indices returned by PwlIndexingCalibratorSparse op.
-    // It will be a matrix where each row represent an interpolation point
-    // given by (batch_index, weight_index), 0 <= batch_index < batch_size,
-    // 0 <= weight_index < kp_inputs.size().
-    const Tensor& interpolation_indices_tensor = context->input(2);
-    OP_REQUIRES(context, interpolation_indices_tensor.dims() == 2,
-                errors::InvalidArgument(
-                    "interpolation_indicesmust have dims=2, got input.dims=",
-                    uncalibrated_tensor.dims()));
-    const auto interpolation_indices =
-        interpolation_indices_tensor.matrix<int64>();
-    const int64 total_interpolation_points =
-        interpolation_indices_tensor.dim_size(0);
-
-    // Gradient with respect to outputs, needed for chain rule. One value
-    // per sparse index in interpolation_indices.
-    const Tensor& grad_wrt_weights_tensor = context->input(3);
-    const auto grad_wrt_weights = grad_wrt_weights_tensor.vec<Dtype>();
-    OP_REQUIRES(
-        context, grad_wrt_weights.size() == total_interpolation_points,
-        errors::InvalidArgument("grad_wrt_weights (", grad_wrt_weights.size(),
-                                " elements) must have as many elements as the "
-                                "total number of interpolation indices (",
-                                total_interpolation_points, " elements)"));
-
-    // Keypoints' inputs are fixed, so their gradient are always zero. Fixed
-    // size, invariant to the size of the batch.
-    Tensor* grad_wrt_kp_inputs = nullptr;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(1, TensorShape({num_keypoints}),
-                                            &grad_wrt_kp_inputs));
-    grad_wrt_kp_inputs->vec<Dtype>().setZero();
-
-    // Gradient with respect to inputs is dense and of the same dimension as
-    // the input, that is batch_size.
-    Tensor* grad_wrt_input_tensor = nullptr;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(0, TensorShape({batch_size}),
-                                            &grad_wrt_input_tensor));
-    auto grad_wrt_input = grad_wrt_input_tensor->vec<Dtype>();
-
-    // Each interpolation point is associated to one weigh in grad_wrt_weights
-    // and a pair of indices (batch index, lower_index) in
-    // interpolation_indices.
-    int64 sparse_index = 0;  // Loops over all interpolation points.
-    while (sparse_index < total_interpolation_points) {
-      const int batch_index = interpolation_indices(sparse_index, 0);
-      OP_REQUIRES(context, batch_index >= 0 && batch_index < batch_size,
-                  errors::InvalidArgument(
-                      "invalid batch_index index for sparse "
-                      "interpolation, expected 0 <= batch_index <= ",
-                      batch_size, " got ", batch_index));
-      const int64 weights_base_idx = sparse_index;
-      int64 lower_index = interpolation_indices(sparse_index, 1);
-      int num_interpolation_points = 0;
-      do {
-        num_interpolation_points++;
-        sparse_index++;
-      } while (sparse_index < total_interpolation_points &&
-               interpolation_indices(sparse_index, 0) == batch_index &&
-               num_interpolation_points < kMaxNumInterpolationPoints + 1);
-      OP_REQUIRES(
-          context,
-          num_interpolation_points == 2 || num_interpolation_points == 3,
-          errors::InvalidArgument(
-              "only interpolations with 2 or 3 points are supported, got ",
-              num_interpolation_points));
-      grad_wrt_input(batch_index) = GradWRTInputSparse<Dtype>(
-          num_interpolation_points, kp_inputs, lower_index, grad_wrt_weights,
-          weights_base_idx);
-    }
-  }
-
-  TF_DISALLOW_COPY_AND_ASSIGN(PwlIndexingCalibratorSparseGradientOpKernel);
-};
-
-//////////////////////////////////////////////////////////////////////////////
-// Kernels registration for all operation defined here.
-//////////////////////////////////////////////////////////////////////////////
-REGISTER_KERNEL_BUILDER(Name("PwlIndexingCalibrator")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<float>("Dtype"),
-                        PwlIndexingCalibratorOpKernel<float>);
-REGISTER_KERNEL_BUILDER(Name("PwlIndexingCalibrator")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<double>("Dtype"),
-                        PwlIndexingCalibratorOpKernel<double>);
-
-REGISTER_KERNEL_BUILDER(Name("PwlIndexingCalibratorGradient")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<float>("Dtype"),
-                        PwlIndexingCalibratorGradientOpKernel<float>);
-REGISTER_KERNEL_BUILDER(Name("PwlIndexingCalibratorGradient")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<double>("Dtype"),
-                        PwlIndexingCalibratorGradientOpKernel<double>);
-
-REGISTER_KERNEL_BUILDER(Name("PwlIndexingCalibratorSparse")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<float>("Dtype"),
-                        PwlIndexingCalibratorSparseOpKernel<float>);
-REGISTER_KERNEL_BUILDER(Name("PwlIndexingCalibratorSparse")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<double>("Dtype"),
-                        PwlIndexingCalibratorSparseOpKernel<double>);
-
-REGISTER_KERNEL_BUILDER(Name("PwlIndexingCalibratorSparseGradient")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<float>("Dtype"),
-                        PwlIndexingCalibratorSparseGradientOpKernel<float>);
-REGISTER_KERNEL_BUILDER(Name("PwlIndexingCalibratorSparseGradient")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<double>("Dtype"),
-                        PwlIndexingCalibratorSparseGradientOpKernel<double>);
-
-}  // namespace lattice
-}  // namespace tensorflow
diff --git a/tensorflow_lattice/cc/kernels/simplex_interpolation_kernels.cc b/tensorflow_lattice/cc/kernels/simplex_interpolation_kernels.cc
deleted file mode 100644
index 8539a8c..0000000
--- a/tensorflow_lattice/cc/kernels/simplex_interpolation_kernels.cc
+++ /dev/null
@@ -1,285 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include <algorithm>
-#include <cmath>
-#include <memory>
-#include <numeric>
-#include <vector>
-
-#include "tensorflow_lattice/cc/kernels/lattice_interpolation_base.h"
-#include "tensorflow_lattice/cc/lib/lattice_structure.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor.h"
-
-namespace tensorflow {
-namespace lattice {
-
-namespace {
-
-// Returns the permutation such that
-// values[permutation[0]] >= ... >= values[permutation[d - 1]] where
-// d == values.size().
-template <typename Dtype>
-std::vector<int64> DescendingPermutation(const std::vector<Dtype>& values) {
-  std::vector<int64> permutation(values.size());
-  std::iota(permutation.begin(), permutation.end(), 0);
-
-  auto cmp = [&values](const int64 left, const int64 right) -> bool {
-    return values[left] > values[right];
-  };
-
-  std::sort(permutation.begin(), permutation.end(), cmp);
-
-  return permutation;
-}
-
-}  // namespace
-
-// SimplexInterpolationOpKernel returns interpolation weights.
-template <typename Dtype>
-class SimplexInterpolationOpKernel : public LatticeInterpolationOpBase<Dtype> {
- public:
-  explicit SimplexInterpolationOpKernel(OpKernelConstruction* context)
-      : LatticeInterpolationOpBase<Dtype>(context) {
-
-    constexpr int64 kBaseCost = 20;
-    constexpr int64 kCostPerCellVertex = 20;
-    constexpr int64 kWeightInitializationCost = 1;
-    const int64 input_dim = this->GetLatticeStructure().Dimension();
-    this->SetCostPerExample(kCostPerCellVertex * input_dim * log(input_dim) +
-                            kWeightInitializationCost *
-                                this->GetLatticeStructure().NumVertices() +
-                            kBaseCost);
-  }
-
- private:
-  InterpolationWeights<Dtype> ComputeInterpolationWeights(
-      const LatticeStructure& lattice_structure,
-      typename TTypes<Dtype>::UnalignedConstFlat input_vector) const final;
-
-  TF_DISALLOW_COPY_AND_ASSIGN(SimplexInterpolationOpKernel);
-};
-
-// SimplexGradientOpKernel returns gradient with respect to the
-// input. See details in CalculateGradientWrtInput above.
-template <typename Dtype>
-class SimplexGradientOpKernel : public LatticeGradientOpBase<Dtype> {
- public:
-  explicit SimplexGradientOpKernel(OpKernelConstruction* context)
-      : LatticeGradientOpBase<Dtype>(context) {
-
-    constexpr int64 kBaseCost = 20;
-    constexpr int64 kCostPerCellVertex = 20;
-    const int64 input_dim = this->GetLatticeStructure().Dimension();
-    this->SetCostPerExample(kCostPerCellVertex * input_dim * log(input_dim) +
-                            kBaseCost);
-  }
-
- private:
-  std::vector<Dtype> ComputeGradWrtInput(
-      const LatticeStructure& lattice_structure,
-      typename TTypes<Dtype>::UnalignedConstFlat input_vector,
-      typename TTypes<Dtype>::UnalignedConstFlat weight_vector,
-      typename TTypes<Dtype>::UnalignedConstFlat grad_wrt_weight_vector)
-      const final;
-
-  TF_DISALLOW_COPY_AND_ASSIGN(SimplexGradientOpKernel);
-};
-
-// Produces simplex interpolation weights for an input that is in the unit
-// hypercube (the residual), as well as the corresponding indices in the lattice
-// (based on the bottom_corner). See http://jmlr.org/papers/v17/15-243.html for
-// more details.
-//
-// Calculating the linear interpolation weights
-// --------------------------------------------
-// We compute the linear interpolation weights using Lovasz's extension.
-// The formula for Lovasz's extension:
-// 1. Find the permuation such that
-//
-//   input[permutation[0]] >= ... >= input[permutation[d-1]]
-//
-// 2. Assign the weight such that
-//
-//   weight on e0 = 1 - input[permutation[0]]
-//   weight on e0 + e[permutation[0]] = input[permutation[0]] -
-//   input[permutation[1]]
-//   ...
-//   weight on e0 + \sum_{i=0}^k e[permutation[i]] = input[permutation[k]] -
-//   input[permutation[k + 1]]
-//   ....
-//   weight on e0 + \sum_{i=0}^{d - 1} e[permutation[i]] =
-//   input[permutation[d - 1]]
-//
-// where e0 = [0,...0], e[i] = [0,...,1,...,0] whose ith component is 1.
-// (Note that the weight is in the 2 ** D dimensional probability simplex, hence
-// the valid interpolation weight.)
-//
-// This is equivalent to partition the hypercube into d! simplices, where each
-// simplex has d+1 vertices, and each simplex's vertices includes the all-zeros
-// vertex, one vertex with one ones, one vertex with two ones, ... and the
-// all-ones vertex.
-//
-// For example, for a three-dimensional unit hypercube the 3! = 6 simplices
-// are:
-// 1: [0,0,0], [0,0,1], [0,1,1], [1,1,1]
-// 2: [0,0,0], [0,0,1], [1,0,1], [1,1,1]
-// 3: [0,0,0], [0,1,0], [0,1,1], [1,1,1]
-// 4: [0,0,0], [0,1,0], [1,1,0], [1,1,1]
-// 5: [0,0,0], [1,0,0], [1,1,0], [1,1,1]
-// 6: [0,0,0], [1,0,0], [1,0,1], [1,1,1]
-//
-// Thus we can specify one of the d! simplices by a d-dim vector stating the
-// order in which the vertices add 1. In the example above, the first simplex
-// can be specified as [2,1,0], and the second simplex as [2,0,1].
-//
-// For the first simplex, the weights are given by
-//
-//   weight on [0,0,0] = 1 - input[2]
-//   weight on [0,0,1] = input[2] - input[1]
-//   weight on [0,1,1] = input[1] - input[0]
-//   weight on [1,1,1] = input[0]
-//   weight on others  = 0.
-//
-// For the second simplex, the weights are given by
-//   weight on [0,0,0] = 1 - input[2]
-//   weight on [0,0,1] = input[2] - input[0]
-//   weight on [1,0,1] = input[0] - input[1]
-//   weight on [1,1,1] = input[0]
-//   weight on others  = 0.
-//
-// An extension to the multi-cell case is done by
-//  1. Finding the bottom corner index and the residual vector.
-//  2. Compute the interpolation weight using the residual vector.
-//  3. Modify e[i] = strides[i] + bottom_corner_index.
-//
-
-template <typename Dtype>
-InterpolationWeights<Dtype>
-SimplexInterpolationOpKernel<Dtype>::ComputeInterpolationWeights(
-    const LatticeStructure& lattice_structure,
-    typename TTypes<Dtype>::UnalignedConstFlat input) const {
-  const BottomCornerIndexAndResidual<Dtype> bottom_corner_index_and_residual =
-      lattice_structure.GetBottomCornerIndexAndResidual<Dtype>(input);
-  const std::vector<Dtype>& residual =
-      bottom_corner_index_and_residual.residual;
-  const std::vector<int64> descending_permutation =
-      DescendingPermutation<Dtype>(residual);
-
-  const int64 input_dim = lattice_structure.Dimension();
-  // interpolation weight contains upto d + 1 non-zero elements.
-  // Number of non-zero weights.
-  const int64 nnz_weight = input_dim + 1;
-  InterpolationWeights<Dtype> interpolation_weights;
-  std::vector<int64>& index = interpolation_weights.indices;
-  std::vector<Dtype>& weight = interpolation_weights.weights;
-  index.resize(nnz_weight);
-  weight.resize(nnz_weight);
-
-  Dtype current_residual = 1.0;
-  int64 current_index = bottom_corner_index_and_residual.bottom_corner_index;
-  const std::vector<int64>& strides = lattice_structure.Strides();
-  for (int ii = 0; ii < input_dim; ++ii) {
-    const int64 current_dim = descending_permutation[ii];
-    const Dtype next_residual = residual[current_dim];
-    // Assigning index and weight.
-    index[ii] = current_index;
-    weight[ii] = current_residual - next_residual;
-    // Proceed to the next item.
-    current_index += strides[current_dim];
-    current_residual = next_residual;
-  }
-  // The boundary case.
-  index[input_dim] = current_index;
-  weight[input_dim] = current_residual;
-
-  return interpolation_weights;
-}
-
-// The goal of the gradient op is, given grad_wrt_weight:
-//   (dy / dweight[0], dy / dweight[1], dy / dweight[2], dy / dweight[3]),
-// to compute the grad_wrt_input:
-//   (dy / dx[0], ..., dy / dx[D-1]).
-//
-// We know that:
-//   dy/dx[jj] = sum_{ii \in weights} dy/dweight[ii] * dweight[ii]/dx[jj]
-//
-// So we need to calculate dweight[ii]/dx[jj]. Let us consider 2 x 2 lattice
-// case first. Recall that
-//
-//   weight = \sum_k input[k] * (e[permutation[k + 1]] - e[permutation[k]])
-//
-// which is a linear function in input[k]. Therefore the gradient can be
-// computed easily once we have the permutation.
-//
-// The boudnary case (out_of_bound):
-//  When input[k] > 1 or input[k] < 0, we assign the zero gradient.
-//
-
-template <typename Dtype>
-std::vector<Dtype> SimplexGradientOpKernel<Dtype>::ComputeGradWrtInput(
-    const LatticeStructure& lattice_structure,
-    typename TTypes<Dtype>::UnalignedConstFlat input,
-    typename TTypes<Dtype>::UnalignedConstFlat unused_weight,
-    typename TTypes<Dtype>::UnalignedConstFlat grad_wrt_weight) const {
-  const BottomCornerIndexAndResidual<Dtype> bottom_corner_index_and_residual =
-      lattice_structure.GetBottomCornerIndexAndResidual<Dtype>(input);
-  const std::vector<Dtype>& residual =
-      bottom_corner_index_and_residual.residual;
-  const std::vector<int64> descending_permutation =
-      DescendingPermutation<Dtype>(residual);
-
-  const int64 input_dim = lattice_structure.Dimension();
-  int64 current_index = bottom_corner_index_and_residual.bottom_corner_index;
-  int64 current_coefficient = grad_wrt_weight(current_index);
-  const std::vector<int64>& strides = lattice_structure.Strides();
-  const std::vector<bool>& out_of_bound =
-      bottom_corner_index_and_residual.out_of_bound;
-
-  // Initialization.
-  std::vector<Dtype> grad_wrt_input(input_dim, 0.0);
-  for (const int64 current_dim : descending_permutation) {
-    current_index += strides[current_dim];
-    const Dtype next_coefficient = grad_wrt_weight(current_index);
-    // Only update the gradient if it is not out of bound.
-    if (!out_of_bound[current_dim]) {
-      grad_wrt_input[current_dim] = (next_coefficient - current_coefficient);
-    }
-    current_coefficient = next_coefficient;
-  }
-  return grad_wrt_input;
-}
-
-// Register kernels for float and double.
-REGISTER_KERNEL_BUILDER(Name("SimplexInterpolation")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<float>("Dtype"),
-                        SimplexInterpolationOpKernel<float>);
-
-REGISTER_KERNEL_BUILDER(Name("SimplexInterpolation")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<double>("Dtype"),
-                        SimplexInterpolationOpKernel<double>);
-
-REGISTER_KERNEL_BUILDER(
-    Name("SimplexGradient").Device(DEVICE_CPU).TypeConstraint<float>("Dtype"),
-    SimplexGradientOpKernel<float>);
-
-REGISTER_KERNEL_BUILDER(
-    Name("SimplexGradient").Device(DEVICE_CPU).TypeConstraint<double>("Dtype"),
-    SimplexGradientOpKernel<double>);
-
-}  // namespace lattice
-}  // namespace tensorflow
diff --git a/tensorflow_lattice/cc/lib/BUILD b/tensorflow_lattice/cc/lib/BUILD
deleted file mode 100644
index 5580cdd..0000000
--- a/tensorflow_lattice/cc/lib/BUILD
+++ /dev/null
@@ -1,32 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-licenses(["notice"])  # Apache 2.0
-
-package(
-    default_visibility = [
-        "//tensorflow_lattice:__subpackages__",
-    ],
-)
-
-cc_library(
-    name = "lattice_structure",
-    srcs = ["lattice_structure.cc"],
-    hdrs = ["lattice_structure.h"],
-    deps = [
-        "@org_tensorflow//tensorflow/core:framework_headers_lib",
-        "@org_tensorflow//tensorflow/core:framework_lite",
-        "@protobuf_archive//:protobuf",
-    ],
-)
diff --git a/tensorflow_lattice/cc/lib/lattice_structure.cc b/tensorflow_lattice/cc/lib/lattice_structure.cc
deleted file mode 100644
index e9029c6..0000000
--- a/tensorflow_lattice/cc/lib/lattice_structure.cc
+++ /dev/null
@@ -1,51 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow_lattice/cc/lib/lattice_structure.h"
-
-#include <memory>
-#include <vector>
-
-#include "tensorflow/core/framework/tensor.h"
-
-namespace tensorflow {
-namespace lattice {
-
-LatticeStructure::LatticeStructure(const std::vector<int>& lattice_sizes)
-    : lattice_sizes_(lattice_sizes) {
-  dimension_ = lattice_sizes_.size();
-  strides_.resize(dimension_);
-  num_vertices_ = 1;
-  for (int ii = 0; ii < dimension_; ++ii) {
-    strides_[ii] = num_vertices_;
-    num_vertices_ *= lattice_sizes_[ii];
-  }
-  num_vertices_per_cell_ = 1 << dimension_;
-}
-
-bool LatticeStructure::IsValidLatticeSizes(
-    const std::vector<int>& lattice_sizes) {
-  if (lattice_sizes.empty()) {
-    return false;
-  }
-  for (int size : lattice_sizes) {
-    if (size < 2) {
-      return false;
-    }
-  }
-  return true;
-}
-
-}  // namespace lattice
-}  // namespace tensorflow
diff --git a/tensorflow_lattice/cc/lib/lattice_structure.h b/tensorflow_lattice/cc/lib/lattice_structure.h
deleted file mode 100644
index 1ea8176..0000000
--- a/tensorflow_lattice/cc/lib/lattice_structure.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-// Lattice structure class that represents a lattice with column-major indexing.
-#ifndef TENSORFLOW_LATTICE_CC_LIB_LATTICE_STRUCTURE_H_
-#define TENSORFLOW_LATTICE_CC_LIB_LATTICE_STRUCTURE_H_
-
-#include <math.h>
-#include <vector>
-
-#include "tensorflow/core/framework/tensor.h"
-
-namespace tensorflow {
-namespace lattice {
-
-template <typename Dtype>
-Dtype ClipToBounds(const Dtype value, const Dtype lower_bound,
-                   const Dtype upper_bound) {
-  return value > upper_bound ? upper_bound
-                             : (value < lower_bound ? lower_bound : value);
-}
-
-// BottomCornerIndexAndResidual contains a bottom corner index in the multi-cell
-// lattice and residual vector for a given input. If out_of_bound[k] is true,
-// then kth input is outside of multi-cell lattice's boundary.
-template <typename Dtype>
-struct BottomCornerIndexAndResidual {
-  int64 bottom_corner_index;
-  std::vector<Dtype> residual;
-  std::vector<bool> out_of_bound;
-};
-
-// This class represents a structure of a multi-cell lattice including the
-// dimension of a lattice, number of vertices, number of vertices in each cell,
-// and strides for a global index.
-// For example, in 2d case, a multi-cell lattice is a grid. The following
-// example shows a 3 x 2 multi-cell lattice. Each cell has four vertices, and
-// in total, this multi-cell lattice contains 12 vertices.
-//
-//   --------------------------
-//   |       |        |       |
-//   |       |        |       |
-//   --------------------------
-//   |       |        |       |
-//   |       |        |       |
-//   --------------------------
-//
-// With the column-major indexing, the lattice with lattice_sizes
-// [m_0, m_1, ..., m_{n - 1}] will have:
-//   dimension: n
-//   number of vertices: m_0 * ... * m_{n-1}
-//   number of vertices in each cell: 2 ** (n-1)
-//   stride[0] = 1
-//   stride[1] = 1 * m_{0}
-//        ...
-//   stride[n-1] = 1 * m_{n - 2} ... * m_0
-//
-// Moreover, BottomCornerIndexAndResidual method returns the bottom corner index
-// and residual vector of a input vector in a multi-cell lattice.
-class LatticeStructure {
- public:
-  // lattice_sizes[ii] is expected to contain a lattice size of the iith
-  // coordinate.
-  explicit LatticeStructure(const std::vector<int>& lattice_sizes);
-
-  // Returns true if all elements >= 2.
-  static bool IsValidLatticeSizes(const std::vector<int>& lattice_sizes);
-
-  const int64 Dimension() const { return dimension_; }
-  const int64 NumVertices() const { return num_vertices_; }
-  const int64 NumVerticesPerCell() const { return num_vertices_per_cell_; }
-
-  int64 Stride(int64 dimension) const { return strides_[dimension]; }
-  const std::vector<int64>& Strides() const { return strides_; }
-  int LatticeSize(int64 dimension) const { return lattice_sizes_[dimension]; }
-  const std::vector<int>& LatticeSizes() const { return lattice_sizes_; }
-
-  // Returns the bottom corner index of a cell that the input_vec belongs to and
-  // the residual of vector, which is input_vec minus the vector corresponding
-  // to the bottom corner index.
-  // For example, consider the following 5 x 3 lattice in 2d plane.
-  //
-  //   x2
-  //   |
-  //   |
-  //   10 ---- 11 ---- 12 ---- 13 --- 14
-  //   |       |       |   x   |       |
-  //   |       |       |       |       |
-  //   5 ----- 6 ----- 7 ----- 8 ---- -9
-  //   |       |       |       |       |
-  //   |       |       |       |       |
-  //   0 ----- 1 ----- 2 ----- 3 ----- 4----x1
-  //
-  // where the number at each vertex is the global index of each vertex. Each
-  // cell is a square with the width 1. So the coordinate representation of
-  // 0-indexed vertex is (0, 0), 1-indexed vertex is (1, 0), and 14-indexed
-  // vertex is (4, 2).
-  // Let x be the input vector, located at (2.5, 1.8). In this case, the
-  // cell's bottom corner index is 7, and the residual is (0.5, 0.8).
-  template <typename Dtype>
-  BottomCornerIndexAndResidual<Dtype> GetBottomCornerIndexAndResidual(
-      typename TTypes<Dtype>::UnalignedConstFlat input_vec) const;
-
- private:
-  int64 dimension_;
-  int64 num_vertices_;
-  int64 num_vertices_per_cell_;
-  std::vector<int> lattice_sizes_;
-  std::vector<int64> strides_;
-};
-
-template <typename Dtype>
-BottomCornerIndexAndResidual<Dtype>
-LatticeStructure::GetBottomCornerIndexAndResidual(
-    typename TTypes<Dtype>::UnalignedConstFlat vec) const {
-  BottomCornerIndexAndResidual<Dtype> bottom_corner_index_and_residual;
-  int64& bottom_corner_index =
-      bottom_corner_index_and_residual.bottom_corner_index;
-  std::vector<Dtype>& residual = bottom_corner_index_and_residual.residual;
-  std::vector<bool>& out_of_bound =
-      bottom_corner_index_and_residual.out_of_bound;
-
-  residual.resize(dimension_);
-  out_of_bound.resize(dimension_);
-
-  bottom_corner_index = 0;
-  for (int64 ii = 0; ii < dimension_; ++ii) {
-    const int64 max_vertex_in_ii = lattice_sizes_[ii] - 1;
-    const Dtype input_ii = vec(ii);
-    // Find the bottom corner lattice coordinates for the "ii"th feature of
-    // this point. We clip to the bounds of the lattice, [0, max_vertex_in_ii].
-
-    const int64 bottom_corner_ii = ClipToBounds<int64>(
-        static_cast<int64>(std::floor(input_ii)), 0, max_vertex_in_ii - 1);
-    const Dtype residual_ii =
-        ClipToBounds<Dtype>(input_ii - bottom_corner_ii, 0.0, 1.0);
-
-    bottom_corner_index += strides_[ii] * bottom_corner_ii;
-    residual[ii] = residual_ii;
-    out_of_bound[ii] = (input_ii < 0.0 || input_ii > max_vertex_in_ii);
-  }
-  return bottom_corner_index_and_residual;
-}
-
-}  // namespace lattice
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_LATTICE_CC_LIB_LATTICE_STRUCTURE_H_
diff --git a/tensorflow_lattice/cc/ops/hypercube_interpolation_ops_test.cc b/tensorflow_lattice/cc/ops/hypercube_interpolation_ops_test.cc
deleted file mode 100644
index 49ad861..0000000
--- a/tensorflow_lattice/cc/ops/hypercube_interpolation_ops_test.cc
+++ /dev/null
@@ -1,87 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#define EIGEN_USE_THREADS
-
-#include "tensorflow_lattice/cc/ops/hypercube_interpolation_ops_test.h"
-#include "tensorflow/core/framework/fake_input.h"
-#include "tensorflow/core/framework/node_def_builder.h"
-#include "tensorflow/core/framework/shape_inference_testutil.h"
-#include "tensorflow/core/lib/core/status_test_util.h"
-
-namespace tensorflow {
-namespace lattice {
-namespace {
-
-INSTANTIATE_TEST_CASE_P(HypercubeInterpolationOpsTestCPU,
-                         HypercubeInterpolationOpsTest,
-                         ::testing::Values([](OpsTestBase*) {}));
-
-TEST(HypercubeInterpolationOpsShapeTest, HypercubeInterpolation_ShapeFn) {
-  ShapeInferenceTestOp op("HypercubeInterpolation");
-
-  // Total number of weights = 3 x 2 x 3 = 18.
-  // Output dimension is always 18.
-  std::vector<int> lattice_sizes = {3, 2, 3};
-  TF_ASSERT_OK(NodeDefBuilder("test", "HypercubeInterpolation")
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("lattice_sizes", lattice_sizes)
-                   .Finalize(&op.node_def));
-
-  INFER_OK(op, "[10,3]", "[d0_0,18]");
-  INFER_OK(op, "[?,3]", "[d0_0,18]");
-
-  INFER_ERROR("", op, "[?,?]");
-  INFER_ERROR("Shape must be rank 2 but is rank 3", op, "[?,?,1]");
-  INFER_ERROR("Shape must be rank 2 but is rank 1", op, "[10]");
-  INFER_ERROR("Dimension must be 3 but is 2", op, "[?,2]");
-  INFER_ERROR("Dimension must be 3 but is 2", op, "[5,2]");
-}
-
-TEST(HypercubeGradientOpsShapeTest, HypercubeGradient_ShapeFn) {
-  ShapeInferenceTestOp op("HypercubeGradient");
-
-  // Total number of weights = 3 x 2 x 3 = 18.
-  // Output dimension is always 18.
-  std::vector<int> lattice_sizes = {3, 2, 3};
-  TF_ASSERT_OK(NodeDefBuilder("test", "HypercubeGradient")
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("lattice_sizes", lattice_sizes)
-                   .Finalize(&op.node_def));
-
-  INFER_OK(op, "[10,3];[10,18];[10,18]", "[d0_0,d0_1]");
-  INFER_OK(op, "[?,3];[?,18];[?,18]", "[d0_0,d0_1]");
-
-  INFER_ERROR("", op, "[?,?]");
-  INFER_ERROR("Shape must be rank 2 but is rank 3", op, "[?,?,1];[?,1];[?,1]");
-  INFER_ERROR("Shape must be rank 2 but is rank 1", op, "[10];[?,?,1];[?,?,1]");
-  INFER_ERROR("Dimension must be 3 but is 2", op, "[?,2];[2,3];[2,3]");
-  INFER_ERROR("Dimension must be 3 but is 2", op, "[5,2];[5,5];[5,5]");
-  INFER_ERROR("Shape must be rank 2 but is rank 3", op, "[2,3];[?,1,3];[?,1]");
-  INFER_ERROR("Shape must be rank 2 but is rank 1", op, "[2,3];[10];[10]");
-  INFER_ERROR("Input batch size (2) != Weight batch size (5)", op,
-              "[2,3];[5,18];[5,18]");
-  INFER_ERROR("Weight shape ([2,18]) != GradWrtWeight shape ([5,18])", op,
-              "[2,3];[2,18];[5,18]");
-  INFER_ERROR("Weight shape ([2,18]) != GradWrtWeight shape ([2,15])", op,
-              "[2,3];[2,18];[2,15]");
-  INFER_ERROR("Dimension must be 18 but is 17", op, "[?,3];[?,17];[?,17]");
-  INFER_ERROR("Dimension must be 18 but is 5", op, "[5,3];[5,5];[5,5]");
-}
-
-}  // namespace
-}  // namespace lattice
-}  // namespace tensorflow
diff --git a/tensorflow_lattice/cc/ops/hypercube_interpolation_ops_test.h b/tensorflow_lattice/cc/ops/hypercube_interpolation_ops_test.h
deleted file mode 100644
index fa76b67..0000000
--- a/tensorflow_lattice/cc/ops/hypercube_interpolation_ops_test.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LATTICE_CC_OPS_LATTICE_OPS_TEST_H_
-#define TENSORFLOW_LATTICE_CC_OPS_LATTICE_OPS_TEST_H_
-
-#include <functional>
-
-#include "tensorflow/core/kernels/ops_testutil.h"
-
-namespace tensorflow {
-namespace lattice {
-
-class HypercubeInterpolationOpsTest
-    : public OpsTestBase,
-      public ::testing::WithParamInterface<std::function<void(OpsTestBase*)>> {
- protected:
-  void SetUp() override { GetParam()(this); }
-};
-
-}  // namespace lattice
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_LATTICE_CC_OPS_LATTICE_OPS_TEST_H_
diff --git a/tensorflow_lattice/cc/ops/hypercube_interpolation_ops_test_p.cc b/tensorflow_lattice/cc/ops/hypercube_interpolation_ops_test_p.cc
deleted file mode 100644
index 79319d9..0000000
--- a/tensorflow_lattice/cc/ops/hypercube_interpolation_ops_test_p.cc
+++ /dev/null
@@ -1,93 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#define EIGEN_USE_THREADS
-
-#include <string>
-#include <vector>
-
-#include "tensorflow_lattice/cc/ops/hypercube_interpolation_ops_test.h"
-#include "tensorflow/core/framework/fake_input.h"
-#include "tensorflow/core/framework/node_def_builder.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/framework/tensor_testutil.h"
-#include "tensorflow/core/framework/types.pb.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/platform/logging.h"
-
-namespace tensorflow {
-namespace lattice {
-namespace {
-
-TEST_P(HypercubeInterpolationOpsTest, ThreeDoubleLattice) {
-  const std::vector<int> lattice_sizes = {3};
-  TF_ASSERT_OK(
-      NodeDefBuilder("hypercube_interpolation", "HypercubeInterpolation")
-          .Input(FakeInput(DT_DOUBLE))
-          .Attr("lattice_sizes", lattice_sizes)
-          .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-  // Input tensor = [[-1], [0], [0.2], [0.8], [1.0], [1.3], [2.0], [2.5]].
-  AddInputFromArray<double>(TensorShape({8, 1}),
-                            {-1.0, 0.0, 0.2, 0.8, 1.0, 1.3, 2.0, 2.5});
-  TF_ASSERT_OK(RunOpKernel());
-  // expected weight = [[1, 0, 0], [1, 0, 0], [0.8, 0.2, 0], [0.2, 0.8, 0],
-  // [0, 1, 0], [0, 0.7, 0.3], [0, 0, 1.0], [0, 0, 1.0]].
-  Tensor expected_weights(DT_DOUBLE, TensorShape({8, 3}));
-  test::FillValues<double>(
-      &expected_weights,
-      {1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.8, 0.2, 0.0, 0.2, 0.8, 0.0,
-       0.0, 1.0, 0.0, 0.0, 0.7, 0.3, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0});
-
-  LOG(INFO) << "Input: " << GetInput(0).SummarizeValue(8);
-  LOG(INFO) << "Expected weight: " << expected_weights.SummarizeValue(24);
-  LOG(INFO) << "Result: " << GetOutput(0)->SummarizeValue(24);
-  test::ExpectTensorEqual<double>(expected_weights, *GetOutput(0));
-}
-
-TEST_P(HypercubeInterpolationOpsTest, TwoByTwoFloatLattice) {
-  const std::vector<int> lattice_sizes = {2, 2};
-  TF_ASSERT_OK(
-      NodeDefBuilder("hypercube_interpolation", "HypercubeInterpolation")
-          .Input(FakeInput(DT_FLOAT))
-          .Attr("lattice_sizes", lattice_sizes)
-          .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-  // Input tensor = [[0, 0], [0, 1], [1, 0], [1, 1], [0.5, 0.5], [0.2, 0.8],
-  // [0.2, 0.3]]
-  AddInputFromArray<float>(
-      TensorShape({7, 2}),
-      {0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.5, 0.5, 0.2, 0.8, 0.2, 0.3});
-  TF_ASSERT_OK(RunOpKernel());
-  // expected weight = [[1, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 1],
-  // [0.25, 0.25, 0.25, 0.25], [0.16, 0.04, 0.64, 0.16], [0.56, 0.14, 0.24,
-  // 0.06]]
-  Tensor expected_weights(DT_FLOAT, TensorShape({7, 4}));
-  test::FillValues<float>(
-      &expected_weights,
-      {1.0,  0.0,  0.0,  0.0,  0.0,  0.0,  1.0,  0.0,  0.0,  1.0,
-       0.0,  0.0,  0.0,  0.0,  0.0,  1.0,  0.25, 0.25, 0.25, 0.25,
-       0.16, 0.04, 0.64, 0.16, 0.56, 0.14, 0.24, 0.06});
-
-  LOG(INFO) << "Input: " << GetInput(0).SummarizeValue(14);
-  LOG(INFO) << "Expected weight: " << expected_weights.SummarizeValue(28);
-  LOG(INFO) << "Result: " << GetOutput(0)->SummarizeValue(28);
-  test::ExpectTensorEqual<float>(expected_weights, *GetOutput(0));
-}
-
-}  // namespace
-}  // namespace lattice
-}  // namespace tensorflow
diff --git a/tensorflow_lattice/cc/ops/lattice_interpolation_ops.cc b/tensorflow_lattice/cc/ops/lattice_interpolation_ops.cc
deleted file mode 100644
index dacc620..0000000
--- a/tensorflow_lattice/cc/ops/lattice_interpolation_ops.cc
+++ /dev/null
@@ -1,184 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include <vector>
-
-#include "tensorflow_lattice/cc/lib/lattice_structure.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-#include "tensorflow/core/lib/strings/str_util.h"
-#include "tensorflow/core/lib/strings/strcat.h"
-
-namespace tensorflow {
-namespace lattice {
-
-namespace {
-Status InterpolationShapeFn(shape_inference::InferenceContext* c) {
-  std::vector<int> lattice_sizes;
-  TF_RETURN_IF_ERROR(c->GetAttr("lattice_sizes", &lattice_sizes));
-  if (!LatticeStructure::IsValidLatticeSizes(lattice_sizes)) {
-    return errors::InvalidArgument(str_util::Join(lattice_sizes, ","),
-                                   " is not a valid lattice sizes");
-  }
-
-  // input_shape = [?,lattice_sizes.size()].
-  shape_inference::ShapeHandle input_shape;
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &input_shape));
-  shape_inference::DimensionHandle unused_lattice_input_size;
-  TF_RETURN_IF_ERROR(c->WithValue(c->Dim(input_shape, 1), lattice_sizes.size(),
-                                  &unused_lattice_input_size));
-
-  shape_inference::DimensionHandle batch_size = c->Dim(input_shape, 0);
-  LatticeStructure lattice_structure(lattice_sizes);
-  c->set_output(0, c->Matrix(batch_size, lattice_structure.NumVertices()));
-
-  return Status::OK();
-}
-
-Status GradWrtInputShapeFn(shape_inference::InferenceContext* c) {
-  std::vector<int> lattice_sizes;
-  TF_RETURN_IF_ERROR(c->GetAttr("lattice_sizes", &lattice_sizes));
-  if (!LatticeStructure::IsValidLatticeSizes(lattice_sizes)) {
-    return errors::InvalidArgument(str_util::Join(lattice_sizes, ","),
-                                   " is not a valid lattice sizes");
-  }
-  LatticeStructure lattice_structure(lattice_sizes);
-
-  // input_shape = [?,lattice_sizes.size()].
-  shape_inference::ShapeHandle input_shape;
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &input_shape));
-  shape_inference::DimensionHandle batch_size = c->Dim(input_shape, 0);
-  shape_inference::DimensionHandle input_size;
-  TF_RETURN_IF_ERROR(
-      c->WithValue(c->Dim(input_shape, 1), lattice_sizes.size(), &input_size));
-
-  // weight_shape = [?,LatticeStructure.NumVertcies()].
-  shape_inference::ShapeHandle weight_shape;
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 2, &weight_shape));
-  if (c->Value(c->Dim(weight_shape, 0)) != c->Value(c->Dim(input_shape, 0))) {
-    return errors::InvalidArgument(strings::StrCat(
-        "Input batch size (", c->DebugString(c->Dim(input_shape, 0)),
-        ") != Weight batch size (", c->DebugString(c->Dim(weight_shape, 0)),
-        ")"));
-  }
-  shape_inference::DimensionHandle unused_weight_size;
-  TF_RETURN_IF_ERROR(c->WithValue(c->Dim(weight_shape, 1),
-                                  lattice_structure.NumVertices(),
-                                  &unused_weight_size));
-
-  // grad_wrt_weight_shape = [?,LatticeStructure.NumVertcies()].
-  shape_inference::ShapeHandle grad_wrt_weight_shape;
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 2, &grad_wrt_weight_shape));
-  if ((c->Value(c->Dim(weight_shape, 0)) !=
-       c->Value(c->Dim(grad_wrt_weight_shape, 0))) ||
-      (c->Value(c->Dim(weight_shape, 1)) !=
-       c->Value(c->Dim(grad_wrt_weight_shape, 1)))) {
-    return errors::InvalidArgument(
-        strings::StrCat("Weight shape (", c->DebugString(weight_shape),
-                        ") != GradWrtWeight shape (",
-                        c->DebugString(grad_wrt_weight_shape), ")"));
-  }
-
-  c->set_output(0, c->Matrix(batch_size, input_size));
-
-  return Status::OK();
-}
-}  // namespace
-
-REGISTER_OP("HypercubeInterpolation")
-    .Input("input: Dtype")
-    .Output("weights: Dtype")
-    .Attr("Dtype: {float, double} = DT_FLOAT")
-    .Attr("lattice_sizes: list(int) = []")
-    .SetShapeFn(InterpolationShapeFn)
-    .Doc(R"doc(
-Returns a tensor representing interpolation weights in a hypercube lattice
-interpolation.
-
-Inputs
-  input: 2D tensor, `[?, d]`
-
-Params
-  lattice_sizes: 1D int tensor that contains a lattice size per each dimension,
-  [m_0, ..., m_{d - 1}].
-
-Outputs
-  weights: 2D tensor that contains interpolation weights.
-  [?, m_0 x m_1 ... x m_{d - 1}].
-)doc");
-
-REGISTER_OP("HypercubeGradient")
-    .Input("input: Dtype")
-    .Input("weight: Dtype")
-    .Input("grad_wrt_weight: Dtype")
-    .Output("grad_wrt_input: Dtype")
-    .Attr("Dtype: {float, double} = DT_FLOAT")
-    .Attr("lattice_sizes: list(int) = []")
-    .SetShapeFn(GradWrtInputShapeFn)
-    .Doc(R"doc(
-Computes gradients of HypercubeInterpolation. Returns a dense gradient.
-
-Inputs
-  input: input tensor, `[?, d]`.
-  grad_wrt_weight: Gradient with respect to the outputs of this operator,
-  `[?, m_0 x m_1 x .. x m_{d - 1}]`
-
-Outputs
-  grad_wrt_input: A gradient tensor, `[?, d]`, with respect to input.
-)doc");
-
-REGISTER_OP("SimplexInterpolation")
-    .Input("input: Dtype")
-    .Output("weights: Dtype")
-    .Attr("Dtype: {float, double} = DT_FLOAT")
-    .Attr("lattice_sizes: list(int) = []")
-    .SetShapeFn(InterpolationShapeFn)
-    .Doc(R"doc(
-Returns a tensor representing interpolation weights in a simplex lattice
-interpolation.
-
-Inputs
-  input: 2D tensor, `[?, d]`
-
-Params
-  lattice_sizes: 1D int tensor that contains a lattice size per each dimension,
-  [m_0, ..., m_{d - 1}].
-
-Outputs
-  weights: 2D tensor that contains interpolation weights.
-  [?, m_0 x m_1 ... x m_{d - 1}].
-)doc");
-
-REGISTER_OP("SimplexGradient")
-    .Input("input: Dtype")
-    .Input("weight: Dtype")
-    .Input("grad_wrt_weight: Dtype")
-    .Output("grad_wrt_input: Dtype")
-    .Attr("Dtype: {float, double} = DT_FLOAT")
-    .Attr("lattice_sizes: list(int) = []")
-    .SetShapeFn(GradWrtInputShapeFn)
-    .Doc(R"doc(
-Computes gradients of SimplexInterpolation. Returns a dense gradient.
-
-Inputs
-  input: input tensor, `[?, d]`.
-  grad_wrt_weight: Gradient with respect to the outputs of this operator,
-  `[?, m_0 x m_1 x .. x m_{d - 1}]`
-
-Outputs
-  grad_wrt_input: A gradient tensor, `[?, d]`, with respect to input.
-)doc");
-
-}  // namespace lattice
-}  // namespace tensorflow
diff --git a/tensorflow_lattice/cc/ops/monotone_lattice_ops.cc b/tensorflow_lattice/cc/ops/monotone_lattice_ops.cc
deleted file mode 100644
index 9db7124..0000000
--- a/tensorflow_lattice/cc/ops/monotone_lattice_ops.cc
+++ /dev/null
@@ -1,89 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include <vector>
-
-#include "tensorflow_lattice/cc/lib/lattice_structure.h"
-#include "tensorflow/core/framework/common_shape_fns.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-#include "tensorflow/core/lib/strings/str_util.h"
-#include "tensorflow/core/lib/strings/strcat.h"
-
-namespace tensorflow {
-namespace lattice {
-
-REGISTER_OP("MonotoneLattice")
-    .Input("lattice_params: Dtype")
-    .Output("projected_lattice_params: Dtype")
-    .Attr("Dtype: {float, double} = DT_FLOAT")
-    .Attr("is_monotone: list(bool) = []")
-    .Attr("lattice_sizes: list(int) = []")
-    .Attr("tolerance: float = 1e-7")
-    .Attr("max_iter: int = 1000")
-    .SetShapeFn([](shape_inference::InferenceContext* c) {
-      // Check pre-conditions.
-      std::vector<int> lattice_sizes;
-      TF_RETURN_IF_ERROR(c->GetAttr("lattice_sizes", &lattice_sizes));
-      if (!LatticeStructure::IsValidLatticeSizes(lattice_sizes)) {
-        return errors::InvalidArgument(str_util::Join(lattice_sizes, ","),
-                                       " is not a valid lattice sizes");
-      }
-      LatticeStructure lattice_structure(lattice_sizes);
-
-      shape_inference::ShapeHandle lattice_params_shape;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &lattice_params_shape));
-      if (c->Value(c->Dim(lattice_params_shape, 1)) !=
-          lattice_structure.NumVertices()) {
-        return errors::InvalidArgument(
-            strings::StrCat("lattice_params' number of parameters (",
-                            c->DebugString(c->Dim(lattice_params_shape, 1)),
-                            ") != expected number of parameters (",
-                            lattice_structure.NumVertices(), ")"));
-      }
-      // Returns the shape of the output.
-      return shape_inference::UnchangedShapeWithRank(c, 2);
-    })
-    .Doc(R"doc(
-Returns a projected lattice parameters onto the monotonicity constraints.
-
-Monotonicity constraints are specified is_monotone. If is_monotone[k] == True,
-then the kth input has a non-decreasing monotonicity, otherwise there will be no
-constraints.
-
-This operator uses an iterative algorithm, Alternating Direction Method of
-Multipliers (ADMM) method, to find the projection, so tolerance and max_iter can
-be used to control the accuracy vs. the time spent trade-offs in the ADMM
-method.
-
-Inputs
-  lattice_params: 2D tensor, `[number of outputs, number of parameters]`
-
-Params
-  is_monotone: 1D bool tensor that contains whether the kth dimension should be
-  monotonic.
-  lattice_sizes: 1D int tensor that contains a lattice size per each dimension,
-  [m_0, ..., m_{d - 1}].
-  tolerance: The tolerance in ||true projection - projection|| in the ADMM
-  method.
-  max_iter: Maximum number of iterations in the ADMM method.
-
-Outputs
-  projected_lattice_params: 2D tensor,
-  `[number of outputs, number of parameters]`, that contains the projected
-  parameters.
-)doc");
-
-}  // namespace lattice
-}  // namespace tensorflow
diff --git a/tensorflow_lattice/cc/ops/monotone_lattice_ops_test.cc b/tensorflow_lattice/cc/ops/monotone_lattice_ops_test.cc
deleted file mode 100644
index 8300169..0000000
--- a/tensorflow_lattice/cc/ops/monotone_lattice_ops_test.cc
+++ /dev/null
@@ -1,187 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include <string>
-#include <vector>
-
-#include "tensorflow/core/framework/fake_input.h"
-#include "tensorflow/core/framework/node_def_builder.h"
-#include "tensorflow/core/framework/shape_inference_testutil.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_testutil.h"
-#include "tensorflow/core/kernels/ops_testutil.h"
-#include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/platform/logging.h"
-#include "tensorflow/core/platform/test.h"
-
-namespace tensorflow {
-namespace lattice {
-
-namespace {
-class MonotoneLatticeOpsTest : public OpsTestBase {
- protected:
-  MonotoneLatticeOpsTest() {}
-  // Computes the projected_lattice_param_vec and compares the output
-  // with expected_projected_lattice_param_vec.
-  // In order to test batch parameter projection, this test method accepts a
-  // list of lattice_param_vec and a list of
-  // expected_projected_lattice_param_vec.
-  void CheckProjection(
-      const std::vector<int>& lattice_sizes,
-      const std::vector<bool>& is_monotone,
-      const std::vector<std::vector<double>>& lattice_param_vecs,
-      const std::vector<std::vector<double>>&
-          expected_projected_lattice_param_vecs) {
-    constexpr double kEpsilon = 1e-5;
-    const int num_inputs = lattice_param_vecs.size();
-    ASSERT_GT(num_inputs, 0);
-    const int num_parameters = lattice_param_vecs[0].size();
-
-    // Pre-condition.
-    ASSERT_EQ(expected_projected_lattice_param_vecs.size(), num_inputs);
-
-    // Flattening vectors to fill-in tensors.
-    std::vector<double> flattened_lattice_param_vecs;
-    std::vector<double> flattened_expected_projection;
-    flattened_lattice_param_vecs.reserve(num_inputs * num_parameters);
-    flattened_expected_projection.reserve(num_inputs * num_parameters);
-    for (int ii = 0; ii < num_inputs; ++ii) {
-      ASSERT_EQ(lattice_param_vecs[ii].size(), num_parameters);
-      ASSERT_EQ(expected_projected_lattice_param_vecs[ii].size(),
-                num_parameters);
-      for (int jj = 0; jj < num_parameters; ++jj) {
-        flattened_lattice_param_vecs.push_back(lattice_param_vecs[ii][jj]);
-        flattened_expected_projection.push_back(
-            expected_projected_lattice_param_vecs[ii][jj]);
-      }
-    }
-
-    // Define tensorflow ops to be tested.
-    TF_ASSERT_OK(NodeDefBuilder("monotone_lattice", "MonotoneLattice")
-                     .Input(FakeInput(DT_DOUBLE))
-                     .Attr("lattice_sizes", lattice_sizes)
-                     .Attr("is_monotone", is_monotone)
-                     .Finalize(node_def()));
-
-    TF_ASSERT_OK(InitOp());
-    AddInputFromArray<double>(TensorShape({num_inputs, num_parameters}),
-                              flattened_lattice_param_vecs);
-    TF_ASSERT_OK(RunOpKernel());
-    Tensor expected_projection_tensor(
-        DT_DOUBLE, TensorShape({num_inputs, num_parameters}));
-    test::FillValues<double>(&expected_projection_tensor,
-                             flattened_expected_projection);
-
-    VLOG(1) << "Lattice parameter tensor: "
-            << GetInput(0).SummarizeValue(num_parameters);
-    VLOG(1) << "Expected projection tensor: "
-            << expected_projection_tensor.SummarizeValue(num_parameters);
-    VLOG(1) << "Result tensor: "
-            << GetOutput(0)->SummarizeValue(num_parameters);
-    test::ExpectTensorNear<double>(expected_projection_tensor, *GetOutput(0),
-                                   kEpsilon);
-  }
-};
-
-TEST_F(MonotoneLatticeOpsTest, ProjectToNothing) {
-  CheckProjection(
-      /*lattice_sizes=*/{2, 2}, /*is_monotone=*/{false, false},
-      /*lattice_param_vecs=*/{{3.0, 0.0, 2.0, 5.0}},
-      /*expected_projected_lattice_param_vecs=*/{{3.0, 0.0, 2.0, 5.0}});
-}
-
-TEST_F(MonotoneLatticeOpsTest, ProjectTo0thDimension) {
-  CheckProjection(
-      /*lattice_sizes=*/{2, 2}, /*is_monotone=*/{true, false},
-      /*lattice_param_vecs=*/{{3.0, 0.0, 2.0, 5.0}},
-      /*expected_projected_lattice_param_vecs=*/{{1.5, 1.5, 2.0, 5.0}});
-}
-
-TEST_F(MonotoneLatticeOpsTest, ProjectTo1stDimension) {
-  CheckProjection(
-      /*lattice_sizes=*/{2, 2}, /*is_monotone=*/{false, true},
-      /*lattice_param_vecs=*/{{3.0, 0.0, 2.0, 5.0}},
-      /*expected_projected_lattice_param_vecs=*/{{2.5, 0.0, 2.5, 5.0}});
-}
-
-TEST_F(MonotoneLatticeOpsTest, ProjectToAllDimensions) {
-  CheckProjection(
-      /*lattice_sizes=*/{2, 2}, /*is_monotone=*/{true, true},
-      /*lattice_param_vecs=*/{{3.0, 0.0, 2.0, 5.0}},
-      /*expected_projected_lattice_param_vecs=*/{{1.5, 1.5, 2.0, 5.0}});
-}
-
-TEST_F(MonotoneLatticeOpsTest, ProjectThreeByTwoLatticeToAllDimensions) {
-  CheckProjection(
-      /*lattice_sizes=*/{3, 2}, /*is_monotone=*/{true, true},
-      /*lattice_param_vecs=*/{{3.0, 1.0, 0.0, 0.0, 2.0, 5.0}},
-      /*expected_projected_lattice_param_vecs=*/{
-          {1.0, 1.0, 1.0, 1.0, 2.0, 5.0}});
-}
-
-TEST_F(MonotoneLatticeOpsTest, ProjectMultipleTwoByTwoLatticesToAllDimensions) {
-  CheckProjection(
-      /*lattice_sizes=*/{2, 2}, /*is_monotone=*/{true, true},
-      /*lattice_param_vecs=*/{{3.0, 0.0, 2.0, 5.0},
-                              {3.0, 0.0, 2.0, 5.0},
-                              {0.0, 1.0, 2.0, 3.0},
-                              {3.0, 3.0, 1.0, 1.0},
-                              {-1.0, -5.0, 2.0, 3.0}},
-      /*expected_projected_lattice_param_vecs=*/{{1.5, 1.5, 2.0, 5.0},
-                                                 {1.5, 1.5, 2.0, 5.0},
-                                                 {0.0, 1.0, 2.0, 3.0},
-                                                 {2.0, 2.0, 2.0, 2.0},
-                                                 {-3.0, -3.0, 2.0, 3.0}});
-}
-
-TEST(MonotoneLatticeOpsShapeTest, CorrectInference) {
-  ShapeInferenceTestOp op("MonotoneLattice");
-
-  // 2 x 2 x 2 lattice = 8 parameters.
-  std::vector<int> lattice_sizes = {2, 2, 2};
-  std::vector<bool> is_monotone = {true, true, true};
-  TF_ASSERT_OK(NodeDefBuilder("test", "MonotoneLattice")
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("lattice_sizes", lattice_sizes)
-                   .Attr("is_monotone", is_monotone)
-                   .Finalize(&op.node_def));
-
-  INFER_OK(op, "[3,8]", "in0");
-  INFER_OK(op, "[10,8]", "in0");
-}
-
-TEST(MonotoneLatticeOpsShapeTest, WrongShapeShouldFail) {
-  ShapeInferenceTestOp op("MonotoneLattice");
-
-  // 2 x 2 x 2 lattice = 8 parameters.
-  std::vector<int> lattice_sizes = {2, 2, 2};
-  std::vector<bool> is_monotone = {true, true, true};
-  TF_ASSERT_OK(NodeDefBuilder("test", "MonotoneLattice")
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("lattice_sizes", lattice_sizes)
-                   .Attr("is_monotone", is_monotone)
-                   .Finalize(&op.node_def));
-
-  INFER_ERROR("Shape must be rank 2 but is rank 1", op, "[1]");
-  INFER_ERROR("Shape must be rank 2 but is rank 3", op, "[1,2,3]");
-  INFER_ERROR(
-      "lattice_params' number of parameters (3) != expected number of "
-      "parameters (8)",
-      op, "[10,3]");
-}
-
-}  // namespace
-
-}  // namespace lattice
-}  // namespace tensorflow
diff --git a/tensorflow_lattice/cc/ops/monotonic_projection_op.cc b/tensorflow_lattice/cc/ops/monotonic_projection_op.cc
deleted file mode 100644
index 07614c4..0000000
--- a/tensorflow_lattice/cc/ops/monotonic_projection_op.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include <algorithm>
-#include <cmath>
-#include <memory>
-#include <vector>
-
-#include "tensorflow/core/framework/common_shape_fns.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-
-namespace tensorflow {
-namespace lattice {
-
-REGISTER_OP("MonotonicProjection")
-    .Input("values: Dtype")
-    .Input("increasing: bool")
-    .Output("monotonic: Dtype")
-    .Attr("Dtype: {float, double} = DT_FLOAT")
-    .SetShapeFn([](shape_inference::InferenceContext* c) {
-      // Input must be a vector, and output is the same shape as input.
-      shape_inference::ShapeHandle values_shape;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &values_shape));
-      shape_inference::ShapeHandle increasing_shape;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &increasing_shape));
-
-      c->set_output(0, values_shape);
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Returns a not-strict monotonic projection of the vector.
-
-The returned vector is of the same size as the input and values (optionally)
-changed to make them monotonically, minimizing the sum of the square distance
-to the original values.
-
-This is part of the set of ops that support monotonicity in piecewise-linear
-calibration.
-
-Note that the gradient is undefined for this function.
-
-  values: `Tensor` with values to be made monotonic.
-  increasing: Defines if projection it to monotonic increasing values
-    or to monotonic decreasing ones.
-
-  monotonic: output `Tensor` with values made monotonic.
-)doc");
-
-}  // namespace lattice
-}  // namespace tensorflow
diff --git a/tensorflow_lattice/cc/ops/monotonic_projection_op_test.cc b/tensorflow_lattice/cc/ops/monotonic_projection_op_test.cc
deleted file mode 100644
index 6945c65..0000000
--- a/tensorflow_lattice/cc/ops/monotonic_projection_op_test.cc
+++ /dev/null
@@ -1,99 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include <vector>
-
-#include "tensorflow/core/framework/fake_input.h"
-#include "tensorflow/core/framework/node_def_builder.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/shape_inference_testutil.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_testutil.h"
-#include "tensorflow/core/kernels/ops_testutil.h"
-#include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/lib/gtl/array_slice.h"
-#include "tensorflow/core/platform/logging.h"
-
-namespace tensorflow {
-namespace lattice {
-
-class MonotonicProjectionOpTest : public OpsTestBase {};
-
-TEST_F(MonotonicProjectionOpTest, MonotonicProjection) {
-  struct Test {
-    bool increasing;
-    gtl::ArraySlice<double> before;
-    gtl::ArraySlice<double> expected;
-  };
-  std::vector<Test> tests{
-      // No-op.
-      {true, {}, {}},
-      {true, {0, 1}, {0, 1}},
-      {false, {1, 0}, {1, 0}},
-      {true, {22.9}, {22.9}},
-      {false, {22.9}, {22.9}},
-      {true, {6.0, 7.0, 8.0}, {6.0, 7.0, 8.0}},
-
-      // Short dependency.
-      {true, {1, 0}, {0.5, 0.5}},
-      {false, {0, 1}, {0.5, 0.5}},
-
-      // Long dependencies.
-      {true, {6.0, 1, 2, 3.5}, {3, 3, 3, 3.5}},
-      {true, {10.0, 9.0, 8.0, 7.0, 6.0}, {8.0, 8.0, 8.0, 8.0, 8.0}},
-
-      // Examples that require back-tracking of pools.
-      {false, {2, 1, 6}, {3, 3, 3}},
-      {true, {4, 5, 0}, {3, 3, 3}},
-      {true, {4, 5, 0, 4, -3}, {2, 2, 2, 2, 2}},
-      {true, {5.0, 6.0, 5.0, 6.0, 7.0, 6.0}, {5.0, 5.5, 5.5, 6.0, 6.5, 6.5}},
-  };
-
-  for (const auto &test : tests) {
-    inputs_.clear();
-    const int64 test_size = test.before.size();
-    LOG(INFO) << "Testing for increasing=" << test.increasing << ", values=["
-              << ::tensorflow::str_util::Join(test.before, ", ") << "]";
-
-    TF_ASSERT_OK(NodeDefBuilder("monotonic_projection:0", "MonotonicProjection")
-                     .Input("values", 0, DT_DOUBLE)
-                     .Input("increasing", 0, DT_BOOL)
-                     .Finalize(node_def()));
-    TF_ASSERT_OK(InitOp());
-    AddInputFromArray<double>(TensorShape({test_size}), test.before);
-    AddInputFromList<bool>(TensorShape(), {test.increasing});
-    TF_ASSERT_OK(RunOpKernel());
-
-    Tensor expected(allocator(), DT_DOUBLE, TensorShape({test_size}));
-    test::FillValues<double>(&expected, test.expected);
-    test::ExpectTensorEqual<double>(expected, *GetOutput(0));
-  }
-}
-
-TEST_F(MonotonicProjectionOpTest, MonotonicProjection_ShapeFn) {
-  ShapeInferenceTestOp op("MonotonicProjection");
-  TF_ASSERT_OK(NodeDefBuilder("monotonic_projection:1", "MonotonicProjection")
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_BOOL))
-                   .Finalize(&op.node_def));
-
-  INFER_OK(op, "[11];[]", "in0");
-  INFER_OK(op, "[17];[]", "in0");
-
-  INFER_ERROR("Shape must be rank 1 but is rank 2", op, "[20,1];[]");
-  INFER_ERROR("Shape must be rank 0 but is rank 1", op, "[20];[1]");
-}
-
-}  // namespace lattice
-}  // namespace tensorflow
diff --git a/tensorflow_lattice/cc/ops/pwl_indexing_calibrator_ops.cc b/tensorflow_lattice/cc/ops/pwl_indexing_calibrator_ops.cc
deleted file mode 100644
index 1634475..0000000
--- a/tensorflow_lattice/cc/ops/pwl_indexing_calibrator_ops.cc
+++ /dev/null
@@ -1,190 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-// Implementations of the piecewise linear "Indexing" calibrator: operators
-// related to the calculation of the interpolation weights and gradients.
-//
-// Sparse and dense implementations.
-//
-// FutureWork: Zero tensors using functor::SetZeroFunctor (device dependent),
-#include <algorithm>
-#include <cmath>
-#include <memory>
-#include <vector>
-
-#include "tensorflow/core/framework/common_shape_fns.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-#include "tensorflow/core/lib/core/errors.h"
-
-namespace tensorflow {
-namespace lattice {
-
-namespace {
-
-bool IsSameShape(shape_inference::InferenceContext* c,
-                 const shape_inference::ShapeHandle& shape1,
-                 const shape_inference::ShapeHandle& shape2) {
-  if (c->Rank(shape1) != c->Rank(shape2)) return false;
-  for (int ii = 0; ii < c->Rank(shape1); ++ii) {
-    if (c->Value(c->Dim(shape1, ii)) != c->Value(c->Dim(shape2, ii))) {
-      return false;
-    }
-  }
-  return true;
-}
-
-}  // namespace
-
-REGISTER_OP("PwlIndexingCalibrator")
-    .Input("input: Dtype")
-    .Input("kp_inputs: Dtype")
-    .Output("weights: Dtype")
-    .Attr("Dtype: {float, double} = DT_FLOAT")
-    .SetShapeFn([](shape_inference::InferenceContext* c) {
-      shape_inference::ShapeHandle input_shape;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &input_shape));
-      shape_inference::DimensionHandle batch_size = c->Dim(input_shape, 0);
-      shape_inference::ShapeHandle kp_input_shape;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &kp_input_shape));
-      shape_inference::DimensionHandle num_keypoints =
-          c->Dim(kp_input_shape, 0);
-      auto output_shape = c->Matrix(batch_size, num_keypoints);
-      c->set_output(0, output_shape);
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Returns tensor representing interpolation weights in a piecewise linear
-function. If using a large number of keypoints, try PwlIndexingCalibratorSparse.
-
-Notice that in this version the keypoints inputs (given by kp_inputs) is kept
-fixed by forcing its gradient to be always 0. FutureWork: allow kp_inputs to
-also be optimized, by providing a gradient.
-
-Inputs
-  input: uncalibrated weights, `[batch_size]`
-  kp_input: keypoints' input weights, can be initialized with the
-            pwl_calibrator_initialize_input_keypoints op. `[num_keypoints]`
-
-Outputs
-  weights: Interpolation weights for a piecewise linear function. Its shape is
-    `[batch_size, num_keypoints]`. The dot product of this and the keypoints
-    output will give the calibrated value.
-)doc");
-
-REGISTER_OP("PwlIndexingCalibratorGradient")
-    .Input("input: Dtype")
-    .Input("kp_inputs: Dtype")
-    .Input("grad_wrt_weights: Dtype")
-    .Output("grad_wrt_input: Dtype")
-    .Output("grad_wrt_kp_inputs: Dtype")
-    .Attr("Dtype: {float, double} = DT_FLOAT")
-    .SetShapeFn([](shape_inference::InferenceContext* c) {
-      shape_inference::ShapeHandle input_shape;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &input_shape));
-      shape_inference::DimensionHandle batch_size = c->Dim(input_shape, 0);
-
-      shape_inference::ShapeHandle kp_input_shape;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &kp_input_shape));
-      const auto num_keypoints = c->Dim(kp_input_shape, 0);
-
-      auto weights_shape = c->Matrix(batch_size, num_keypoints);
-      shape_inference::ShapeHandle grad_wrt_weights_shape;
-      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 2, &grad_wrt_weights_shape));
-      if (!IsSameShape(c, weights_shape, grad_wrt_weights_shape)) {
-        return errors::InvalidArgument("grad_wrt_weights has shape ",
-                                       c->DebugString(grad_wrt_weights_shape),
-                                       ", but weights has shape ",
-                                       c->DebugString(weights_shape));
-      }
-
-      auto grad_wrt_input_shape = c->Vector(batch_size);
-      c->set_output(0, grad_wrt_input_shape);
-      auto grad_wrt_kp_inputs_shape = c->Vector(num_keypoints);
-      c->set_output(1, grad_wrt_kp_inputs_shape);
-      return Status::OK();
-    })
-    .Doc(R"doc(
-Computes gradients of PwlIndexingCalibrator. Returns a dense gradient.
-
-As FutureWork we want to allow kp_inputs to be adjusted dynamically.
-
-Inputs
-  input: uncalibrated value, `[batch_size]`.
-  kp_inputs: keypoints' input weights, can be initialized with the
-      pwl_calibrator_initialize_input_keypoints op, `[num_keypoints]`.
-  weights_grad: Gradient with respect to the weights outputs of this operator,
-      `[batch_size, num_keypoints]`.
-
-Outputs
-  grad_wrt_input: gradient with respect to input, `[batch_size]`.
-  grad_wrt_kp_inputs: gradient with respect to the kp_inputs. This is fixed in 0
-      because (for now) the keypoints inputs are fixed, `[num_keypoints]`.
-
-)doc");
-
-REGISTER_OP("PwlIndexingCalibratorSparse")
-    .Input("input: Dtype")
-    .Input("kp_inputs: Dtype")
-    .Output("indices: int64")
-    .Output("weights: Dtype")
-    .Attr("Dtype: {float, double} = DT_FLOAT")
-    .Doc(R"doc(
-Returns sparse tensor representing interpolation weights in a piecewise linear
-function.
-
-Inputs
-  input: uncalibrated weights, `[batch_size]`
-  kp_input: keypoints' input weights, can be initialized with the
-            pwl_calibrator_initialize_input_keypoints op. `[num_keypoints]`
-
-Outputs
-  indices, weights: Tensors with sparse representation of interpolation weights
-    for a piecewise linear function in the form of a SparseTensor. At most two
-    weights will be set per uncalibrated value given. This can be multiplied
-    by the keypoints' output weights. The tensor will be shaped
-    `[batch_size, num_keypoints]`.
-)doc");
-
-REGISTER_OP("PwlIndexingCalibratorSparseGradient")
-    .Input("input: Dtype")
-    .Input("kp_inputs: Dtype")
-    .Input("indices: int64")
-    .Input("grad_wrt_weights: Dtype")
-    .Output("grad_wrt_input: Dtype")
-    .Output("grad_wrt_kp_inputs: Dtype")
-    .Attr("Dtype: {float, double} = DT_FLOAT")
-    .Doc(R"doc(
-Computes gradients of PwlIndexingCalibratorSparse. Returns (dense) gradients
-with respect to the input and to the kp_inputs.
-
-As FutureWork we want to allow kp_inputs to be adjusted dynamically.
-
-Inputs
-  input: uncalibrated value, `[batch_size]`.
-  kp_inputs: keypoints' input weights, can be initialized with the
-      pwl_calibrator_initialize_input_keypoints op, `[num_keypoints]`.
-  indices, weights_grad: indices and weights gradient (gradient
-      of the loss function with respect to output weights calculated by
-      PwlIndexingCalibratorSparseOp). They are the sparse representation of a
-      Tensor of shape `[batch_size, num_keypoints]`.
-
-Outputs
-  grad_wrt_input: gradient with respect to input, `[batch_size]`.
-  grad_wrt_kp_inputs: gradient with respect to the kp_inputs. This is fixed in 0
-      because (for now) the keypoints inputs are fixed, `[num_keypoints]`.
-)doc");
-
-}  // namespace lattice
-}  // namespace tensorflow
diff --git a/tensorflow_lattice/cc/ops/pwl_indexing_calibrator_ops_test.cc b/tensorflow_lattice/cc/ops/pwl_indexing_calibrator_ops_test.cc
deleted file mode 100644
index 792bc6f..0000000
--- a/tensorflow_lattice/cc/ops/pwl_indexing_calibrator_ops_test.cc
+++ /dev/null
@@ -1,480 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include <vector>
-
-#include "tensorflow/core/framework/fake_input.h"
-#include "tensorflow/core/framework/node_def_builder.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/shape_inference_testutil.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_testutil.h"
-#include "tensorflow/core/kernels/ops_testutil.h"
-#include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/lib/gtl/array_slice.h"
-#include "tensorflow/core/lib/strings/str_util.h"
-#include "tensorflow/core/platform/logging.h"
-
-namespace tensorflow {
-namespace lattice {
-
-using ::tensorflow::gtl::ArraySlice;
-
-extern void PwlSetTestMode(bool split_batch);
-
-class PwlIndexingCalibratorOpTest : public OpsTestBase {
- protected:
-  void PwlIndexingCalibratorHelper(const bool use_sparse) {
-    ArraySlice<double> keypoints_inputs{0.0, 20.0, 40.0, 60.0, 80.0, 100.0};
-    const int num_keypoints = keypoints_inputs.size();
-
-    struct Test {
-      ArraySlice<double> uncalibrated;
-      ArraySlice<double> expected_weights;
-      ArraySlice<int64> expected_indices;
-    };
-    std::vector<Test> tests{
-        // Bounded min.
-        {{-10.0}, {1.0, 0.0}, {0, 1}},
-
-        // Bounded max.
-        {{200.0}, {0.0, 1.0}, {4, 5}},
-
-        // Exact match.
-        {{80.0}, {0.0, 1.0, 0.0}, {3, 4, 5}},
-
-        // Interpolated examples.
-        {{10.0}, {0.5, 0.5}, {0, 1}},
-        {{35.0}, {0.25, 0.75}, {1, 2}},
-    };
-
-    LOG(INFO) << "Keypoints inputs: "
-              << "[" << str_util::Join(keypoints_inputs, ",") << "]";
-    for (const auto &test : tests) {
-      inputs_.clear();
-      TF_ASSERT_OK(NodeDefBuilder("pwl_indexing_calibrator:0",
-                                  use_sparse ? "PwlIndexingCalibratorSparse"
-                                             : "PwlIndexingCalibrator")
-                       .Input(FakeInput(DT_DOUBLE))  //
-                       .Input(FakeInput(DT_DOUBLE))
-                       .Finalize(node_def()));
-      TF_ASSERT_OK(InitOp());
-      int batch_size = test.uncalibrated.size();
-      AddInputFromArray<double>(TensorShape({batch_size}), test.uncalibrated);
-      AddInputFromArray<double>(TensorShape({6}), keypoints_inputs);
-      LOG(INFO) << "Testing for uncalibrated="
-                << "[" << str_util::Join(test.uncalibrated, ",") << "]";
-      TF_ASSERT_OK(RunOpKernel());
-
-      if (use_sparse) {
-        // Sparse implementation.
-        Tensor expected_weights(
-            allocator(), DT_DOUBLE,
-            TensorShape({static_cast<int64>(test.expected_weights.size())}));
-        test::FillValues<double>(&expected_weights, test.expected_weights);
-        test::ExpectTensorEqual<double>(expected_weights, *GetOutput(1));
-
-        Tensor expected_indices(
-            allocator(), DT_INT64,
-            TensorShape({static_cast<int64>(test.expected_indices.size()), 2}));
-        std::vector<int64> flattened_indices_with_batch;
-        for (int64 index : test.expected_indices) {
-          flattened_indices_with_batch.push_back(0);  // batch index, always 0
-          flattened_indices_with_batch.push_back(index);
-        }
-        test::FillValues<int64>(&expected_indices,
-                                flattened_indices_with_batch);
-        LOG(INFO) << "Expected: "
-                  << "[" << str_util::Join(test.expected_indices, ",") << "]";
-
-        test::ExpectTensorEqual<int64>(expected_indices, *GetOutput(0));
-
-      } else {
-        // Dense implementation.
-        Tensor expected_weights(
-            allocator(), DT_DOUBLE,
-            TensorShape({1, static_cast<int64>(num_keypoints)}));
-        std::vector<double> weights(num_keypoints, 0);
-        for (int i = 0; i < test.expected_weights.size(); i++) {
-          weights[test.expected_indices[i]] = test.expected_weights[i];
-        }
-        test::FillValues<double>(&expected_weights, weights);
-        test::ExpectTensorEqual<double>(expected_weights, *GetOutput(0));
-      }
-    }
-
-    // Test batch version
-    inputs_.clear();
-    TF_ASSERT_OK(NodeDefBuilder("pwl_indexing_calibrator:1",
-                                use_sparse ? "PwlIndexingCalibratorSparse"
-                                           : "PwlIndexingCalibrator")
-                     .Input(FakeInput(DT_DOUBLE))  //
-                     .Input(FakeInput(DT_DOUBLE))
-                     .Finalize(node_def()));
-    TF_ASSERT_OK(InitOp());
-
-    std::vector<double> all_uncalibrated;
-    for (const auto &test : tests) {
-      all_uncalibrated.push_back(test.uncalibrated[0]);
-    }
-    AddInputFromArray<double>(
-        TensorShape({static_cast<int64>(all_uncalibrated.size())}),
-        all_uncalibrated);
-
-    AddInputFromArray<double>(TensorShape({6}), keypoints_inputs);
-
-    LOG(INFO) << "Testing for batch of all uncalibrated values: uncalibrated="
-              << "[" << str_util::Join(all_uncalibrated, ",") << "]";
-    TF_ASSERT_OK(RunOpKernel());
-
-    if (use_sparse) {
-      // Sparse implementation.
-      std::vector<int64> vec_indices;
-      std::vector<double> vec_weights;
-      for (int j = 0; j < tests.size(); j++) {
-        const Test &test = tests[j];
-        for (int64 idx : test.expected_indices) {
-          // Each example takes two coordinates.
-          vec_indices.push_back(j);
-          vec_indices.push_back(idx);
-        }
-        for (double w : test.expected_weights) {
-          vec_weights.push_back(w);
-        }
-      }
-
-      Tensor expected_weights(
-          allocator(), DT_DOUBLE,
-          TensorShape({static_cast<int64>(vec_weights.size())}));
-      test::FillValues<double>(&expected_weights, vec_weights);
-
-      Tensor expected_indices(
-          allocator(), DT_INT64,
-          TensorShape({static_cast<int64>(vec_weights.size()), 2}));
-      test::FillValues<int64>(&expected_indices, vec_indices);
-
-      const Tensor &output_indices = *GetOutput(0);
-      const Tensor &output_weights = *GetOutput(1);
-      test::ExpectTensorEqual<int64>(expected_indices, output_indices);
-      test::ExpectTensorEqual<double>(expected_weights, output_weights);
-
-    } else {
-      // Batch dense version.
-      Tensor expected_weights(allocator(), DT_DOUBLE,
-                              TensorShape({static_cast<int64>(tests.size()),
-                                           static_cast<int64>(num_keypoints)}));
-      std::vector<double> weights(tests.size() * num_keypoints, 0);
-      for (int j = 0; j < tests.size(); j++) {
-        const Test &test = tests[j];
-        for (int i = 0; i < test.expected_weights.size(); i++) {
-          weights[j * num_keypoints + test.expected_indices[i]] =
-              test.expected_weights[i];
-        }
-      }
-      test::FillValues<double>(&expected_weights, weights);
-      test::ExpectTensorEqual<double>(expected_weights, *GetOutput(0));
-    }
-  }
-
-  void PwlIndexingCalibratorFloatHelper(const bool use_sparse) {
-    ArraySlice<float> keypoints_inputs{0.0, 20.0, 40.0, 60.0, 80.0, 100.0};
-    const int num_keypoints = keypoints_inputs.size();
-
-    TF_ASSERT_OK(NodeDefBuilder("pwl_indexing_calibrator:0",
-                                use_sparse ? "PwlIndexingCalibratorSparse"
-                                           : "PwlIndexingCalibrator")
-                     .Input(FakeInput(DT_FLOAT))  //
-                     .Input(FakeInput(DT_FLOAT))
-                     .Finalize(node_def()));
-    TF_ASSERT_OK(InitOp());
-    constexpr float uncalibrated = 200.0;
-    AddInputFromArray<float>(TensorShape({1}), {uncalibrated});
-    AddInputFromArray<float>(TensorShape({6}), keypoints_inputs);
-    TF_ASSERT_OK(RunOpKernel()) << "Failed for uncalibrated="
-                                << "[" << uncalibrated << "]";
-    LOG(INFO) << "Testing for uncalibrated="
-              << "[" << uncalibrated << "]";
-
-    if (use_sparse) {
-      // Sparse implementation.
-      Tensor expected_weights(allocator(), DT_FLOAT, TensorShape({2}));
-      test::FillValues<float>(&expected_weights, {0.0, 1.0});
-      test::ExpectTensorEqual<float>(expected_weights, *GetOutput(1));
-
-      Tensor expected_indices(allocator(), DT_INT64, TensorShape({2, 2}));
-      test::FillValues<int64>(&expected_indices, {0, 4, 0, 5});
-      test::ExpectTensorEqual<int64>(expected_indices, *GetOutput(0));
-
-    } else {
-      // Dense implementation.
-      Tensor expected_weights(
-          allocator(), DT_FLOAT,
-          TensorShape({1, static_cast<int64>(num_keypoints)}));
-      std::vector<float> values(num_keypoints, 0);
-      values[5] = 1;
-      test::FillValues<float>(&expected_weights, values);
-      test::ExpectTensorEqual<float>(expected_weights, *GetOutput(0));
-    }
-  }
-
-  void PwlIndexingCalibratorGradientHelper(const bool use_sparse) {
-    ArraySlice<double> keypoints_inputs{0.0, 20.0, 40.0, 60.0, 80.0, 100.0};
-    ArraySlice<double> weights_grad{0.0, 1.0, 2.0, 4.0, 8.0, 10.0};
-    ArraySlice<double> grad_wrt_kp_inputs_values{0, 0, 0, 0, 0, 0};
-
-    struct Test {
-      ArraySlice<double> uncalibrated;
-      ArraySlice<double> interpolation_weights;
-
-      // interpolation_indices: 2 numbers per value: batch_index, weight_index.
-      ArraySlice<int64> interpolation_indices;
-
-      ArraySlice<double> grad_wrt_input;
-
-      // Indices that would been used by the sparse interpolation, see
-      // FindExpandedInterpolationIndices.
-      std::vector<int> keypoint_indices;
-    };
-    std::vector<Test> tests{
-        // At min, gradient should be based on slope of the first piece.
-        {{-10.0}, {1.0, 0.0}, {0, 1}, {1.0 / 20.0}},
-
-        // At max, gradient should be based on slope of the last piece.
-        {{200.0}, {0.0, 1.0}, {4, 5}, {2.0 / 20.0}},
-
-        // At a keypoint, slope should be mean of two slopes:
-        {{40.0}, {0.0, 1.0, 0.0}, {1, 2, 3}, {(1.0 / 20 + 2.0 / 20) / 2}},
-
-        // Interpolated examples.
-        {{10.0}, {0.5, 0.5}, {0, 1}, {1.0 / 20}},
-        {{75.0}, {0.25, 0.75}, {3, 4}, {4.0 / 20}},
-    };
-
-    LOG(INFO) << "Keypoints inputs: "
-              << "[" << str_util::Join(keypoints_inputs, ",") << "]";
-
-    for (const auto &test : tests) {
-      inputs_.clear();
-      if (use_sparse) {
-        TF_ASSERT_OK(NodeDefBuilder("pwl_indexing_calibrator_gradient:0",
-                                    "PwlIndexingCalibratorSparseGradient")
-                         .Input(FakeInput(DT_DOUBLE))
-                         .Input(FakeInput(DT_DOUBLE))
-                         .Input(FakeInput(DT_INT64))
-                         .Input(FakeInput(DT_DOUBLE))
-                         .Finalize(node_def()));
-      } else {
-        TF_ASSERT_OK(NodeDefBuilder("pwl_indexing_calibrator_gradient:0",
-                                    "PwlIndexingCalibratorGradient")
-                         .Input(FakeInput(DT_DOUBLE))
-                         .Input(FakeInput(DT_DOUBLE))
-                         .Input(FakeInput(DT_DOUBLE))
-                         .Finalize(node_def()));
-      }
-      TF_ASSERT_OK(InitOp());
-
-      // Input being calibrated.
-      AddInputFromArray<double>(TensorShape({1}), test.uncalibrated);
-
-      // Parameters of calibration: the keypoints input values.
-      AddInputFromArray<double>(TensorShape({6}), keypoints_inputs);
-
-      // The gradient with respect to the output: presumably the keypoints
-      // outputs if they are the last layer.
-      if (use_sparse) {
-        // Add interpolation indices, that will be provided for sparse
-        // gradients.
-        std::vector<int64> flattened_interpolation_indices_with_batch;
-        for (const int64 weight_index : test.interpolation_indices) {
-          flattened_interpolation_indices_with_batch.push_back(0);  // batch_idx
-          flattened_interpolation_indices_with_batch.push_back(weight_index);
-        }
-        AddInputFromArray<int64>(
-            TensorShape(
-                {static_cast<int64>(test.interpolation_indices.size()), 2}),
-            flattened_interpolation_indices_with_batch);
-
-        std::vector<double> sparse_weights_grad;
-        for (const int64 weight_index : test.interpolation_indices) {
-          sparse_weights_grad.push_back(weights_grad[weight_index]);
-        }
-        AddInputFromArray<double>(
-            TensorShape({static_cast<int64>(sparse_weights_grad.size())}),
-            sparse_weights_grad);
-      } else {
-        AddInputFromArray<double>(TensorShape({1, 6}), weights_grad);
-      }
-      LOG(INFO) << "Testing for uncalibrated="
-                << "[" << str_util::Join(test.uncalibrated, ",") << "]";
-      TF_ASSERT_OK(RunOpKernel());
-
-      Tensor grad_wrt_input(allocator(), DT_DOUBLE, TensorShape({1}));
-      test::FillValues<double>(&grad_wrt_input, test.grad_wrt_input);
-      test::ExpectTensorEqual<double>(grad_wrt_input, *GetOutput(0));
-
-      Tensor grad_wrt_kp_inputs(
-          allocator(), DT_DOUBLE,
-          TensorShape({static_cast<int64>(grad_wrt_kp_inputs_values.size())}));
-      test::FillValues<double>(&grad_wrt_kp_inputs, grad_wrt_kp_inputs_values);
-      test::ExpectTensorEqual<double>(grad_wrt_kp_inputs, *GetOutput(1));
-    }
-
-    // Evaluate all tests in one batch.
-    inputs_.clear();
-    if (use_sparse) {
-      TF_ASSERT_OK(NodeDefBuilder("pwl_indexing_calibrator_gradient:0",
-                                  "PwlIndexingCalibratorSparseGradient")
-                       .Input(FakeInput(DT_DOUBLE))
-                       .Input(FakeInput(DT_DOUBLE))
-                       .Input(FakeInput(DT_INT64))
-                       .Input(FakeInput(DT_DOUBLE))
-                       .Finalize(node_def()));
-    } else {
-      TF_ASSERT_OK(NodeDefBuilder("pwl_indexing_calibrator_gradient:0",
-                                  "PwlIndexingCalibratorGradient")
-                       .Input(FakeInput(DT_DOUBLE))
-                       .Input(FakeInput(DT_DOUBLE))
-                       .Input(FakeInput(DT_DOUBLE))
-                       .Finalize(node_def()));
-    }
-    TF_ASSERT_OK(InitOp());
-
-    // Input being calibrated.
-    std::vector<double> all_uncalibrated;
-    for (const auto &test : tests) {
-      all_uncalibrated.push_back(test.uncalibrated[0]);
-    }
-    AddInputFromArray<double>(TensorShape({static_cast<int64>(tests.size())}),
-                              all_uncalibrated);
-
-    // Parameters of calibration: the keypoints' input values.
-    AddInputFromArray<double>(TensorShape({6}), keypoints_inputs);
-
-    // The gradient with respect to the output: presumably the keypoints'
-    // outputs if they are the last layer.
-    if (use_sparse) {
-      std::vector<double> grad_wrt_weights_sparse;
-      std::vector<int64> interpolation_indices_with_batch;
-      for (int batch_index = 0; batch_index < tests.size(); batch_index++) {
-        const auto &test = tests[batch_index];
-        for (const int weight_index : test.interpolation_indices) {
-          grad_wrt_weights_sparse.push_back(weights_grad[weight_index]);
-          interpolation_indices_with_batch.push_back(batch_index);
-          interpolation_indices_with_batch.push_back(weight_index);
-        }
-      }
-      AddInputFromArray<int64>(
-          TensorShape({static_cast<int64>(grad_wrt_weights_sparse.size()), 2}),
-          interpolation_indices_with_batch);
-      AddInputFromArray<double>(
-          TensorShape({static_cast<int64>(grad_wrt_weights_sparse.size())}),
-          grad_wrt_weights_sparse);
-    } else {
-      // Repeat weights_grad for each test.
-      std::vector<double> repeated_weights_grad;
-      for (int i = 0; i < tests.size(); i++) {
-        for (const double w : weights_grad) {
-          repeated_weights_grad.push_back(w);
-        }
-      }
-      AddInputFromArray<double>(
-          TensorShape({static_cast<int64>(tests.size()), 6}),
-          repeated_weights_grad);
-    }
-    LOG(INFO) << "Testing for all tests in one batch";
-    TF_ASSERT_OK(RunOpKernel());
-
-    Tensor grad_wrt_input(allocator(), DT_DOUBLE,
-                          TensorShape({static_cast<int64>(tests.size())}));
-    std::vector<double> all_grad_wrt_input;
-    for (const auto &test : tests) {
-      all_grad_wrt_input.push_back(test.grad_wrt_input[0]);
-    }
-    test::FillValues<double>(&grad_wrt_input, all_grad_wrt_input);
-    test::ExpectTensorEqual<double>(grad_wrt_input, *GetOutput(0));
-
-    Tensor grad_wrt_kp_inputs(
-        allocator(), DT_DOUBLE,
-        TensorShape({static_cast<int64>(grad_wrt_kp_inputs_values.size())}));
-    test::FillValues<double>(&grad_wrt_kp_inputs, grad_wrt_kp_inputs_values);
-    test::ExpectTensorEqual<double>(grad_wrt_kp_inputs, *GetOutput(1));
-  }
-};
-
-TEST_F(PwlIndexingCalibratorOpTest, PwlIndexingCalibratorDense) {
-  LOG(INFO) << "Process whole batch at once: (split_batch=false)";
-  PwlSetTestMode(/*split_batch=*/false);
-  PwlIndexingCalibratorHelper(false);
-
-  LOG(INFO) << "Process whole batch in splits: (split_batch=true)";
-  PwlSetTestMode(/*split_batch=*/true);
-  PwlIndexingCalibratorHelper(false);
-  PwlSetTestMode(/*split_batch=*/false);
-}
-
-TEST_F(PwlIndexingCalibratorOpTest, PwlIndexingCalibratorSparse) {
-  PwlIndexingCalibratorHelper(true);
-}
-
-TEST_F(PwlIndexingCalibratorOpTest, PwlIndexingCalibratorFloatDense) {
-  PwlIndexingCalibratorFloatHelper(false);
-}
-
-TEST_F(PwlIndexingCalibratorOpTest, PwlIndexingCalibratorFloatSparse) {
-  PwlIndexingCalibratorFloatHelper(true);
-}
-
-TEST_F(PwlIndexingCalibratorOpTest, PwlIndexingCalibratorGradientDense) {
-  PwlIndexingCalibratorGradientHelper(false);
-}
-
-TEST_F(PwlIndexingCalibratorOpTest, PwlIndexingCalibratorGradientSparse) {
-  PwlIndexingCalibratorGradientHelper(true);
-}
-
-TEST_F(PwlIndexingCalibratorOpTest, PwlIndexingCalibrator_ShapeFn) {
-  ShapeInferenceTestOp op("PwlIndexingCalibrator");
-  TF_ASSERT_OK(NodeDefBuilder("test", "PwlIndexingCalibrator")
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Finalize(&op.node_def));
-
-  INFER_OK(op, "[20];[10]", "[d0_0,d1_0]");
-  INFER_OK(op, "[?];[10]", "[d0_0,d1_0]");
-
-  INFER_ERROR("Shape must be rank 1 but is rank 2", op, "[20,1];[10]");
-}
-
-TEST_F(PwlIndexingCalibratorOpTest, PwlIndexingCalibratorGradient_ShapeFn) {
-  ShapeInferenceTestOp op("PwlIndexingCalibratorGradient");
-  TF_ASSERT_OK(NodeDefBuilder("pwl_indexing_calibrator_gradient:1",
-                              "PwlIndexingCalibratorGradient")
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Finalize(&op.node_def));
-
-  INFER_OK(op, "[11];[13];[11,13]", "[d0_0];[d1_0]");
-  INFER_OK(op, "[?];[7];[?,7]", "[d0_0];[d1_0]");
-
-  INFER_ERROR("Shape must be rank 1 but is rank 2", op, "[20,1];[11];[20,11]");
-  INFER_ERROR("Shape must be rank 1 but is rank 2", op, "[20];[11,1];[20,11]");
-  INFER_ERROR("Shape must be rank 2 but is rank 1", op, "[20];[11];[20]");
-  INFER_ERROR(
-      "grad_wrt_weights has shape [17,11], but weights has shape [20,11]", op,
-      "[20];[11];[17,11]");
-}
-
-}  // namespace lattice
-}  // namespace tensorflow
diff --git a/tensorflow_lattice/cc/ops/simplex_interpolation_ops_test.cc b/tensorflow_lattice/cc/ops/simplex_interpolation_ops_test.cc
deleted file mode 100644
index 92a8ae8..0000000
--- a/tensorflow_lattice/cc/ops/simplex_interpolation_ops_test.cc
+++ /dev/null
@@ -1,143 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include <string>
-#include <vector>
-
-#include "tensorflow/core/framework/fake_input.h"
-#include "tensorflow/core/framework/node_def_builder.h"
-#include "tensorflow/core/framework/shape_inference_testutil.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_testutil.h"
-#include "tensorflow/core/kernels/ops_testutil.h"
-#include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/platform/logging.h"
-#include "tensorflow/core/platform/test.h"
-
-namespace tensorflow {
-namespace lattice {
-
-class SimplexInterpolationOpsTest : public OpsTestBase {
- protected:
-  SimplexInterpolationOpsTest() {}
-};
-
-TEST_F(SimplexInterpolationOpsTest, ThreeDoubleLattice) {
-  const std::vector<int> lattice_sizes = {3};
-  TF_ASSERT_OK(NodeDefBuilder("simplex_interpolation", "SimplexInterpolation")
-                   .Input(FakeInput(DT_DOUBLE))
-                   .Attr("lattice_sizes", lattice_sizes)
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-  // Input tensor = [[-1], [0], [0.2], [0.8], [1.0], [1.3], [2.0], [2.5]].
-  AddInputFromArray<double>(TensorShape({8, 1}),
-                            {-1.0, 0.0, 0.2, 0.8, 1.0, 1.3, 2.0, 2.5});
-  TF_ASSERT_OK(RunOpKernel());
-  // expected weight = [[1, 0, 0], [1, 0, 0], [0.8, 0.2, 0], [0.2, 0.8, 0],
-  // [0, 1, 0], [0, 0.7, 0.3], [0, 0, 1.0], [0, 0, 1.0]].
-  Tensor expected_weights(DT_DOUBLE, TensorShape({8, 3}));
-  test::FillValues<double>(
-      &expected_weights,
-      {1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.8, 0.2, 0.0, 0.2, 0.8, 0.0,
-       0.0, 1.0, 0.0, 0.0, 0.7, 0.3, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0});
-
-  LOG(INFO) << "Input: " << GetInput(0).SummarizeValue(8);
-  LOG(INFO) << "Expected weight: " << expected_weights.SummarizeValue(24);
-  LOG(INFO) << "Result: " << GetOutput(0)->SummarizeValue(24);
-  test::ExpectTensorEqual<double>(expected_weights, *GetOutput(0));
-}
-
-TEST_F(SimplexInterpolationOpsTest, TwoByTwoFloatLattice) {
-  const std::vector<int> lattice_sizes = {2, 2};
-  TF_ASSERT_OK(NodeDefBuilder("simplex_interpolation", "SimplexInterpolation")
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("lattice_sizes", lattice_sizes)
-                   .Finalize(node_def()));
-  TF_ASSERT_OK(InitOp());
-  // Input tensor = [[0, 0], [0, 1], [1, 0], [1, 1], [0.5, 0.5], [0.2, 0.8],
-  // [0.2, 0.3]]
-  AddInputFromArray<float>(
-      TensorShape({7, 2}),
-      {0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.5, 0.5, 0.2, 0.8, 0.2, 0.3});
-  TF_ASSERT_OK(RunOpKernel());
-  // expected weight = [[1, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 1],
-  // [0.5, 0, 0, 0.5], [0.2, 0, 0.6, 0.2], [0.7, 0, 0.1, 0.2]
-  Tensor expected_weights(DT_FLOAT, TensorShape({7, 4}));
-  test::FillValues<float>(
-      &expected_weights,
-      {1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0,
-       0.0, 1.0, 0.5, 0.0, 0.0, 0.5, 0.2, 0.0, 0.6, 0.2, 0.7, 0.0, 0.1, 0.2});
-
-  LOG(INFO) << "Input: " << GetInput(0).SummarizeValue(14);
-  LOG(INFO) << "Expected weight: " << expected_weights.SummarizeValue(28);
-  LOG(INFO) << "Result: " << GetOutput(0)->SummarizeValue(28);
-  test::ExpectTensorEqual<float>(expected_weights, *GetOutput(0));
-}
-
-TEST(SimplexInterpolationOpsShapeTest, SimplexInterpolation_ShapeFn) {
-  ShapeInferenceTestOp op("SimplexInterpolation");
-
-  // Total number of weights = 3 x 2 x 3 = 18.
-  // Output dimension is always 18.
-  std::vector<int> lattice_sizes = {3, 2, 3};
-  TF_ASSERT_OK(NodeDefBuilder("test", "SimplexInterpolation")
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("lattice_sizes", lattice_sizes)
-                   .Finalize(&op.node_def));
-
-  INFER_OK(op, "[10,3]", "[d0_0,18]");
-  INFER_OK(op, "[?,3]", "[d0_0,18]");
-
-  INFER_ERROR("", op, "[?,?]");
-  INFER_ERROR("Shape must be rank 2 but is rank 3", op, "[?,?,1]");
-  INFER_ERROR("Shape must be rank 2 but is rank 1", op, "[10]");
-  INFER_ERROR("Dimension must be 3 but is 2", op, "[?,2]");
-  INFER_ERROR("Dimension must be 3 but is 2", op, "[5,2]");
-}
-
-TEST(SimplexGradientOpsShapeTest, SimplexGradient_ShapeFn) {
-  ShapeInferenceTestOp op("SimplexGradient");
-
-  // Total number of weights = 3 x 2 x 3 = 18.
-  // Output dimension is always 18.
-  std::vector<int> lattice_sizes = {3, 2, 3};
-  TF_ASSERT_OK(NodeDefBuilder("test", "SimplexGradient")
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Input(FakeInput(DT_FLOAT))
-                   .Attr("lattice_sizes", lattice_sizes)
-                   .Finalize(&op.node_def));
-
-  INFER_OK(op, "[10,3];[10,18];[10,18]", "[d0_0,d0_1]");
-  INFER_OK(op, "[?,3];[?,18];[?,18]", "[d0_0,d0_1]");
-
-  INFER_ERROR("", op, "[?,?]");
-  INFER_ERROR("Shape must be rank 2 but is rank 3", op, "[?,?,1];[?,1];[?,1]");
-  INFER_ERROR("Shape must be rank 2 but is rank 1", op, "[10];[?,?,1];[?,?,1]");
-  INFER_ERROR("Dimension must be 3 but is 2", op, "[?,2];[2,3];[2,3]");
-  INFER_ERROR("Dimension must be 3 but is 2", op, "[5,2];[5,5];[5,5]");
-  INFER_ERROR("Shape must be rank 2 but is rank 3", op, "[2,3];[?,1,3];[?,1]");
-  INFER_ERROR("Shape must be rank 2 but is rank 1", op, "[2,3];[10];[10]");
-  INFER_ERROR("Input batch size (2) != Weight batch size (5)", op,
-              "[2,3];[5,18];[5,18]");
-  INFER_ERROR("Weight shape ([2,18]) != GradWrtWeight shape ([5,18])", op,
-              "[2,3];[2,18];[5,18]");
-  INFER_ERROR("Weight shape ([2,18]) != GradWrtWeight shape ([2,15])", op,
-              "[2,3];[2,18];[2,15]");
-  INFER_ERROR("Dimension must be 18 but is 17", op, "[?,3];[?,17];[?,17]");
-  INFER_ERROR("Dimension must be 18 but is 5", op, "[5,3];[5,5];[5,5]");
-}
-
-}  // namespace lattice
-}  // namespace tensorflow
diff --git a/tensorflow_lattice/cc/test_tools/test_main.cc b/tensorflow_lattice/cc/test_tools/test_main.cc
deleted file mode 100644
index 6fcb6fe..0000000
--- a/tensorflow_lattice/cc/test_tools/test_main.cc
+++ /dev/null
@@ -1,23 +0,0 @@
-/* Copyright 2017 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-// A program with a main that is suitable for unittests.
-#include "tensorflow/core/platform/test.h"
-#include "tensorflow/core/platform/test_benchmark.h"
-
-int main(int argc, char** argv) {
-  testing::InitGoogleTest(&argc, argv);
-  tensorflow::testing::RunBenchmarks();
-  return RUN_ALL_TESTS();
-}
diff --git a/tensorflow_lattice/cc/tflite_ops/BUILD b/tensorflow_lattice/cc/tflite_ops/BUILD
deleted file mode 100644
index 12bbac8..0000000
--- a/tensorflow_lattice/cc/tflite_ops/BUILD
+++ /dev/null
@@ -1,152 +0,0 @@
-# Copyright 2018 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-# This package makes the custom ops developed for tensorflow_lattice available
-# in the form of tf-lite ops.  For help with integration, reach out to
-# tensorflow_lattice team
-load(
-    "@org_tensorflow//tensorflow:tensorflow.bzl",
-    "tf_cc_test",
-)
-
-package(default_visibility = [
-    "//visibility:public",
-])
-
-licenses(["notice"])  # Apache 2.0 License
-
-exports_files(["LICENSE"])
-
-cc_library(
-    name = "tflite_ops",
-    srcs = [
-        "helpers.h",
-        "interpolation.cc",
-        "pwl_indexing_calibrator.cc",
-    ],
-    hdrs = [
-        "tflite_ops.h",
-    ],
-    deps = [
-        "@org_tensorflow//tensorflow/lite:framework",
-        "@org_tensorflow//tensorflow/lite/kernels:kernel_util",
-        "@org_tensorflow//tensorflow/lite/kernels/internal:reference",
-        "@flatbuffers",
-    ],
-)
-
-cc_library(
-    name = "tflite_ops_cc",
-    srcs = ["tflite_ops.cc"],
-    hdrs = ["tflite_ops.h"],
-    deps = ["@org_tensorflow//tensorflow/lite:framework"],
-)
-
-tf_cc_test(
-    name = "hypercube_interpolation_test",
-    size = "small",
-    srcs = [
-        "hypercube_interpolation_test.cc",
-    ],
-    deps = [
-        ":tflite_ops",
-        "@org_tensorflow//tensorflow/lite:framework",
-        "@org_tensorflow//tensorflow/lite:string_util",
-        "@org_tensorflow//tensorflow/lite/kernels:builtin_ops",
-        "@org_tensorflow//tensorflow/lite/kernels:kernel_util",
-        "@org_tensorflow//tensorflow/lite/kernels:test_util",
-        "@org_tensorflow//tensorflow/lite/kernels/internal:reference",
-        "@com_google_googletest//:gtest",
-        "@flatbuffers",
-    ],
-)
-
-tf_cc_test(
-    name = "simplex_interpolation_test",
-    size = "small",
-    srcs = [
-        "simplex_interpolation_test.cc",
-    ],
-    deps = [
-        ":tflite_ops",
-        "@org_tensorflow//tensorflow/lite:framework",
-        "@org_tensorflow//tensorflow/lite:string_util",
-        "@org_tensorflow//tensorflow/lite/kernels:builtin_ops",
-        "@org_tensorflow//tensorflow/lite/kernels:kernel_util",
-        "@org_tensorflow//tensorflow/lite/kernels:test_util",
-        "@org_tensorflow//tensorflow/lite/kernels/internal:reference",
-        "@com_google_googletest//:gtest",
-        "@flatbuffers",
-    ],
-)
-
-tf_cc_test(
-    name = "pwl_indexing_calibrator_test",
-    size = "small",
-    srcs = [
-        "pwl_indexing_calibrator_test.cc",
-    ],
-    deps = [
-        ":tflite_ops",
-        "@org_tensorflow//tensorflow/lite:framework",
-        "@org_tensorflow//tensorflow/lite:string_util",
-        "@org_tensorflow//tensorflow/lite/kernels:builtin_ops",
-        "@org_tensorflow//tensorflow/lite/kernels:kernel_util",
-        "@org_tensorflow//tensorflow/lite/kernels:test_util",
-        "@org_tensorflow//tensorflow/lite/kernels/internal:reference",
-        "@com_google_googletest//:gtest",
-        "@flatbuffers",
-    ],
-)
-
-tf_cc_test(
-    name = "pwl_indexing_calibrator_sparse_test",
-    size = "small",
-    srcs = [
-        "pwl_indexing_calibrator_sparse_test.cc",
-    ],
-    deps = [
-        ":tflite_ops",
-        "@org_tensorflow//tensorflow/lite:framework",
-        "@org_tensorflow//tensorflow/lite:string_util",
-        "@org_tensorflow//tensorflow/lite/kernels:builtin_ops",
-        "@org_tensorflow//tensorflow/lite/kernels:kernel_util",
-        "@org_tensorflow//tensorflow/lite/kernels:test_util",
-        "@org_tensorflow//tensorflow/lite/kernels/internal:reference",
-        "@com_google_googletest//:gtest",
-        "@flatbuffers",
-    ],
-)
-
-py_binary(
-    name = "toco_wrapper",
-    srcs = ["toco_wrapper.py"],
-    python_version = "PY2",
-    deps = [
-        ":tflite_ops",
-        "//tensorflow_lattice",
-        "@org_tensorflow//tensorflow/lite/python:tflite_convert_main_lib",
-    ],
-)
-
-py_binary(
-    name = "freeze_graph_wrapper",
-    srcs = ["freeze_graph_wrapper.py"],
-    python_version = "PY2",
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow_lattice",
-        "@org_tensorflow//tensorflow/python/tools:freeze_graph_main_lib",
-    ],
-)
diff --git a/tensorflow_lattice/cc/tflite_ops/README.md b/tensorflow_lattice/cc/tflite_ops/README.md
deleted file mode 100644
index 4bcb005..0000000
--- a/tensorflow_lattice/cc/tflite_ops/README.md
+++ /dev/null
@@ -1,202 +0,0 @@
-<!-- Copyright 2018 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-=============================================================================-->
-
-# tensorflow_lattice on TF-Lite
-
-## Concepts
-
-__TF-Lite__ Framework in tensorflow/contrib for evaluating TF graphs on
-low-power platforms <go/tf-lite>
-
-__TOCO__ Tool for converting saved tensorflow graphs to tf-lite format
-[TOCO docs](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/toco/g3doc/cmdline_examples.md)
-
-## Introduction
-
-This document describes how to use a lattice model on a low-power platform by
-converting it to a Tensorflow-lite model which can then be run on the device.
-This allows for inferences to be done without wifi or server costs.
-
-## Use Notes
-
-These tf-lite ops are necessary when running a tf-lite model that includes any
-custom tf-lattice ops. Typically, a TF model saved in frozen_graph format will
-be converted with TOCO. The output of TOCO can then be run (on device) with
-TF-Lite. There are two integration tasks corresponding to these two steps:
-
-### Use TOCO to convert a saved Tensorflow graph
-
-TOCO, as explained above, operates on the output of the `tflite_convert`
-utility. In order for this utility to work properly, Tensorflow itself must have
-already loaded any custom ops that are needed. This is done 'lazily', so that
-Tensorflow, and consequently TOCO, will fail to find a custom op that has not
-yet been loaded. This is the purpose of the `toco_wrapper` target in this
-directory. It triggers the loading of tensorflow_lattice ops by importing the
-`tensorflow_lattice` python package. The wrapper script simply makes this import
-and then calls `tflite_convert`. Use `toco_wrapper` with the same arguments that
-you wish passed on to `tflite_convert`.
-
-### Make the tf-lite op visible to the tf-lite interpreter
-
-The low level code that instantiates and calls the tf-lite interpreter must be
-modified to register the custom op. __The registration is done _in situ_ by the
-team who wish to use the ops.__ Remember to add the `'tflite_ops` dependency to
-build target.
-
-## Example Commands
-
-Example code for registering op:
-
-```c++
-#include "third_party/py/tensorflow_lattice/cc/tflite_ops/tflite_ops.h"
-
-namespace tflite {
-
-// ...
-
-tflite::ops::builtin::BuiltinOpResolver resolver;
-// this is the key addition
-RegisterTfLatticeOps(&resolver);
-```
-
-Example commands, useful for testing that an op is reachable:
-
-```
-$ toco_wrapper \
-  --output_file=/tmp/xo.tflite \
-  --graph_def_file=/usr/local/google/home/epenn/Downloads/frozen_graph.pb \
-  --input_arrays=deploy/Placeholder \
-  --output_arrays=deploy/regression/MatMul \
-  --allow_custom_ops
-
-# This command will fail unless an edit like that described above is made to
-# .../lite/tools/benchmark/benchmark_tflite_model.cc
-$ bazel run tensorflow/lite/tools/benchmark:benchmark_model \
-  -- --graph=/tmp/xo.tflite
-
-```
-
-If successful, the last command will print a summary of run timings.
-
-## Full Example
-
-### Build model
-
-Consider the following simple tf_lattice model. Note where the model directory
-is being set, this information will be important later.
-
-```python
-import numpy as np
-
-import tensorflow as tf
-import tensorflow_lattice as tfl
-
-# Feature definition.
-feature_columns = [
-    tf.feature_column.numeric_column('x0'),
-    tf.feature_column.numeric_column('x1'),
-]
-
-# Hyperparameters.
-num_keypoints = 10
-hparams = tfl.CalibratedRtlHParams(
-    num_keypoints=num_keypoints,
-    num_lattices=5,
-    lattice_rank=2,
-    learning_rate=0.1)
-def init_fn():
-  return tfl.uniform_keypoints_for_signal(num_keypoints,
-                                          input_min=-1.0,
-                                          input_max=1.0,
-                                          output_min=0.0,
-                                          output_max=1.0)
-
-# Estimator.
-rtl_estimator = tfl.calibrated_rtl_regressor(
-    model_dir='/tmp/tfl_estimator_0',  # Set model directory
-    feature_columns=feature_columns,
-    hparams=hparams,
-    keypoints_initializers_fn=init_fn
-)
-
-# Prepare the dataset.
-num_examples = 1000
-x0 = np.random.uniform(-1.0, 1.0, size=num_examples)
-x1 = np.random.uniform(-1.0, 1.0, size=num_examples)
-y = x0 ** 2 + x1 ** 2
-
-# Example input function.
-twod_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-    x={'x0': x0,
-       'x1': x1},
-    y=y,
-    batch_size=10,
-    num_epochs=1,
-    shuffle=False)
-
-# Train!
-rtl_estimator.train(input_fn=twod_input_fn)
-# Evaluate!
-print(rtl_estimator.evaluate(input_fn=twod_input_fn))
-```
-
-### Determine input and output nodes
-
-In order to use the conversion utilities below, it is necessary to know which
-nodes in the tensorflow model graph are to be used as input and output. This can
-be tricky, especially when using the estimator API.
-
-To visually inspect the graph, run the following:
-
-```bash
-$ MODEL_DIR=/tmp/tfl_estimator_0  # from above
-$ tensorboard --logdir $MODEL_DIR  # use the model directory specified above
-```
-
-For this example, the following nodes will be used for input and output:
-
-```bash
-$ INPUT_NODE=tfl_calibrated_rtl/feature_column_transformation/input_layer/concat
-$ OUTPUT_NODE=tfl_calibrated_rtl/add
-```
-
-### Convert trained model to frozen graph format using frozen_graph_wrapper
-
-This conversion uses the tensorflow `frozen_graph` utility. As with
-`tflite_convert` (TOCO), this utility requires that tensorflow has loaded the
-tensorflow_lattice custom ops. In order to facilitate this, a simple wrapper is
-provided.
-
-```bash
-$ freeze_graph_wrapper \
-  --input_graph=$MODEL_DIR/graph.pbtxt \
-  --input_checkpoint=$MODEL_DIR/model.ckpt-100 \
-  --output_graph=$MODEL_DIR/output_graph.pb \
-  --output_node_names=tfl_calibrated_rtl/add
-```
-
-### Convert frozen graph to tf-lite format using toco_wrapper
-
-This step will produce a tf-lite artifact suitable for use. Note that use will
-require edits to the low level C++ code as described above
-
-```bash
-$ toco_wrapper \
-  --output_file=$MODEL_DIR/tflite.out \
-  --graph_def_file=$MODEL_DIR/output_graph.pb \
-  --input_arrays=$INPUT_NODE \
-  --output_arrays=$OUTPUT_NODE \
-  --allow_custom_ops
-```
diff --git a/tensorflow_lattice/cc/tflite_ops/freeze_graph_wrapper.py b/tensorflow_lattice/cc/tflite_ops/freeze_graph_wrapper.py
deleted file mode 100644
index e5c2aa2..0000000
--- a/tensorflow_lattice/cc/tflite_ops/freeze_graph_wrapper.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# Copyright 2018 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Runs TOCO tflite_converter after importing tensorflow_lattice ops."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-# Dependency imports
-# tensorflow_lattice must be imported in order for tensorflow to recognize its
-# custom ops, which is necessary for freeze_graph to find them
-import tensorflow_lattice as tfl  # pylint: disable=unused-import
-from tensorflow.python.tools import freeze_graph
-
-
-def main():
-  return freeze_graph.run_main()
-
-
-if __name__ == '__main__':
-  main()
diff --git a/tensorflow_lattice/cc/tflite_ops/helpers.h b/tensorflow_lattice/cc/tflite_ops/helpers.h
deleted file mode 100644
index fc9f07e..0000000
--- a/tensorflow_lattice/cc/tflite_ops/helpers.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/* Copyright 2018 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LATTICE_CC_TFLITE_OPS_HELPERS_H_
-#define TENSORFLOW_LATTICE_CC_TFLITE_OPS_HELPERS_H_
-
-#include <math.h>
-#include <vector>
-
-namespace tflite_lattice {
-
-template <typename T>
-T ClipToBounds(const T value, const T lower_bound, const T upper_bound) {
-  return value > upper_bound ? upper_bound
-                             : (value < lower_bound ? lower_bound : value);
-}
-
-// BottomCornerIndexAndResidual contains a bottom corner index in the multi-cell
-// lattice and residual vector for a given input. If out_of_bound[k] is true,
-// then kth input is outside of multi-cell lattice's boundary.
-template <typename Dtype>
-struct BottomCornerIndexAndResidual {
-  int bottom_corner_index;
-  std::vector<Dtype> residual;
-  std::vector<bool> out_of_bound;
-};
-
-template <typename Dtype>
-BottomCornerIndexAndResidual<Dtype> GetBottomCornerIndexAndResidual(
-    std::vector<int> lattice_sizes, const float* input_row,
-    std::vector<int> strides) {
-  int dimension = lattice_sizes.size();
-  BottomCornerIndexAndResidual<Dtype> bottom_corner_index_and_residual;
-  int& bottom_corner_index =
-      bottom_corner_index_and_residual.bottom_corner_index;
-  std::vector<Dtype>& residual = bottom_corner_index_and_residual.residual;
-  std::vector<bool>& out_of_bound =
-      bottom_corner_index_and_residual.out_of_bound;
-
-  residual.resize(dimension);
-  out_of_bound.resize(dimension);
-
-  bottom_corner_index = 0;
-  for (int i = 0; i < dimension; ++i) {
-    const int max_vertex_in_i = lattice_sizes[i] - 1;
-    const float input_i = input_row[i];
-    // Find the bottom corner lattice coordinates for the "i"th feature of
-    // this point.
-    // We clip to the bounds of the lattice, [0, max_vertex_in_i].
-
-    const int bottom_corner_i = ClipToBounds<int>(
-        static_cast<int>(floor(input_i)), 0, max_vertex_in_i - 1);
-    const Dtype residual_i =
-        ClipToBounds<Dtype>(input_i - bottom_corner_i, 0.0, 1.0);
-
-    bottom_corner_index += strides[i] * bottom_corner_i;
-    residual[i] = residual_i;
-    out_of_bound[i] = (input_i < 0.0 || input_i > max_vertex_in_i);
-  }
-  return bottom_corner_index_and_residual;
-}
-
-typedef struct {
-  // lattice_sizes is provided by user and records the number of nodes in each
-  // lattice dimension.  All other members derived from this one.
-  // Naming matches tensoflow op.
-  std::vector<int> lattice_sizes;
-  // Number of dimensions present.  Total nodes = k ^ dimension if all
-  // lattice_sizes are k.
-  int dimension;
-  // One node index in dimension k equals strides[k] array indices
-  std::vector<int> strides;
-  // Total number of nodes, is the product of numbers in lattice_sizes
-  int num_vertices;
-  // Nodes in a cell.  Note that this is the number of non-zero weights that
-  // will result from interpolating one point.  Also note that each vertex can
-  // belong to many cells.
-  int num_vertices_per_cell;
-} InterpolationParams;
-
-}  // namespace tflite_lattice
-
-#endif  // TENSORFLOW_LATTICE_CC_TFLITE_OPS_HELPERS_H_
diff --git a/tensorflow_lattice/cc/tflite_ops/hypercube_interpolation_test.cc b/tensorflow_lattice/cc/tflite_ops/hypercube_interpolation_test.cc
deleted file mode 100644
index 8649fce..0000000
--- a/tensorflow_lattice/cc/tflite_ops/hypercube_interpolation_test.cc
+++ /dev/null
@@ -1,221 +0,0 @@
-/* Copyright 2018 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include <math.h>
-#include <vector>
-
-#include <gtest/gtest.h>
-#include "flatbuffers/flexbuffers.h"
-#include "tensorflow_lattice/cc/tflite_ops/tflite_ops.h"
-#include "tensorflow/lite/interpreter.h"
-#include "tensorflow/lite/kernels/register.h"
-#include "tensorflow/lite/kernels/test_util.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/string_util.h"
-
-namespace tflite {
-namespace ops {
-namespace custom {
-
-
-namespace {
-
-class HypercubeInterpolationOp : public SingleOpModel {
- public:
-  HypercubeInterpolationOp(const TensorData& input, const TensorData& output,
-                           std::vector<int> lattice_sizes) {
-    input_ = AddInput(input);
-    output_ = AddOutput(output);
-    flexbuffers::Builder fbb;
-    size_t map_start = fbb.StartMap();
-    auto vec_start = fbb.StartVector("lattice_sizes");
-    for (int ii = 0; ii < lattice_sizes.size(); ++ii) {
-      fbb.Add(lattice_sizes[ii]);
-    }
-    fbb.EndVector(vec_start, /* typed */ true, /* fixed */ false);
-    fbb.EndMap(map_start);
-    fbb.Finish();
-    SetCustomOp("HypercubeInterpolation", fbb.GetBuffer(),
-                Register_HYPERCUBE_INTERPOLATION);
-
-    BuildInterpreter({GetShape(input_)});
-  }
-
-  int input() { return input_; }
-  std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
-  std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
-
- private:
-  int input_;
-  int output_;
-};
-
-TEST(Test2D, HypercubeInterpolationTest) {
-  const float equal_vertex_weight_2d = 0.25;
-  const int out_row_length = 9;  // 3 nodes ^ 2 dimensions
-  const int n_cells = 4;         // (3 nodes - 1) ^ 2 dimensions
-  HypercubeInterpolationOp m({TensorType_FLOAT32, {n_cells, 2}},
-                             {TensorType_FLOAT32, {}}, {3, 3});
-  m.PopulateTensor<float>(m.input(), {
-    0.5, 0.5,
-    0.5, 1.5,
-    1.5, 0.5,
-    1.5, 1.5,
-  });
-  m.Invoke();
-  std::vector<float> out(out_row_length * n_cells, 0.0);
-  int non_zero_indices[n_cells][4] = {
-      {0, 1, 3, 4},
-      {3, 4, 6, 7},
-      {1, 2, 4, 5},
-      {4, 5, 7, 8},
-  };
-  int row_offset;
-  for (int ii = 0; ii < n_cells; ii++) {
-    for (int ij : non_zero_indices[ii]) {
-      row_offset = ii * out_row_length;
-      out[row_offset + ij] = equal_vertex_weight_2d;
-    }
-  }
-  EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear(out, 1e-3)));
-}
-
-TEST(Test3D, HypercubeInterpolationTest) {
-  const float equal_vertex_weight_3d = 0.125;
-  const int out_row_length = 27;  // 3 nodes ^ 3 dimensions
-  const int n_cells = 8;  // (3 nodes - 1) ^ 3 dimensions
-  const int tier_stride = 9;  // 3 nodes ^ 2 dimensions
-  HypercubeInterpolationOp m(
-      {TensorType_FLOAT32, {n_cells, 3}}, {TensorType_FLOAT32, {}}, {3, 3, 3});
-  m.PopulateTensor<float>(m.input(), {
-      0.5, 0.5, 0.5,
-      0.5, 1.5, 0.5,
-      1.5, 0.5, 0.5,
-      1.5, 1.5, 0.5,
-      0.5, 0.5, 1.5,
-      0.5, 1.5, 1.5,
-      1.5, 0.5, 1.5,
-      1.5, 1.5, 1.5,
-  });
-  m.Invoke();
-  std::vector<float> out(out_row_length * n_cells, 0.0);
-  // the 3D lattice of 9 cells is a stack of 2 'tiers' of 3x3 (4 cell) latice
-  // the non-zero entries follow the same pattern as they did for the 2D case
-  // except that the value must be propagated vertically to 8 vertices of a cube
-  // instead of the 4 vertices of a square.  Also, the 2D must be iterated over
-  // twice, once for each tier, but both will have the same 2D projection.
-  int non_zero_indices[n_cells / 2][4] = {
-      {0, 1, 3, 4},
-      {3, 4, 6, 7},
-      {1, 2, 4, 5},
-      {4, 5, 7, 8},
-  };
-  int row_offset;
-  int tier_offset;
-  int on_tier;
-  for (int ii = 0; ii < n_cells; ++ii) {
-    row_offset = ii * out_row_length;
-    on_tier = ii < 4 ? 0 : 1;
-    for (int tier = 0; tier < 2; ++tier) {
-      tier_offset = (on_tier + tier) * tier_stride;
-      for (int ij : non_zero_indices[ii % 4]) {
-        out[row_offset + tier_offset + ij] = equal_vertex_weight_3d;
-      }
-    }
-  }
-  EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear(out, 1e-3)));
-}
-
-TEST(ThreeDoubleLattice, HypercubeInterpolationTest) {
-  HypercubeInterpolationOp m({TensorType_FLOAT32, {8, 1}},
-                             {TensorType_FLOAT32, {}}, {3});
-  m.PopulateTensor<float>(m.input(), {
-                                         -1.0,
-                                         0.0,
-                                         0.2,
-                                         0.8,
-                                         1.0,
-                                         1.3,
-                                         2.0,
-                                         2.5,
-                                     });
-  m.Invoke();
-  std::vector<float> out = {
-      1.0, 0.0, 0.0,
-      1.0, 0.0, 0.0,
-      0.8, 0.2, 0.0,
-      0.2, 0.8, 0.0,
-      0.0, 1.0, 0.0,
-      0.0, 0.7, 0.3,
-      0.0, 0.0, 1.0,
-      0.0, 0.0, 1.0,
-  };
-
-  EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear(out, 1e-3)));
-}
-
-TEST(TwoByTwoFloatLattice, HypercubeInterpolationTest) {
-  HypercubeInterpolationOp m({TensorType_FLOAT32, {7, 2}},
-                             {TensorType_FLOAT32, {}}, {2, 2});
-  m.PopulateTensor<float>(m.input(), {
-      0.0, 0.0,
-      0.0, 1.0,
-      1.0, 0.0,
-      1.0, 1.0,
-      0.5, 0.5,
-      0.2, 0.8,
-      0.2, 0.3,
-  });
-  m.Invoke();
-  std::vector<float> out = {
-      1.0, 0.0, 0.0, 0.0,
-      0.0, 0.0, 1.0, 0.0,
-      0.0, 1.0, 0.0, 0.0,
-      0.0, 0.0, 0.0, 1.0,
-      0.25, 0.25, 0.25, 0.25,
-      0.16, 0.04, 0.64, 0.16,
-      0.56, 0.14, 0.24, 0.06,
-  };
-
-  EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear(out, 1e-3)));
-}
-
-TEST(TestOOB, HypercubeInterpolationTest) {
-  HypercubeInterpolationOp m({TensorType_FLOAT32, {3, 2}},
-                             {TensorType_FLOAT32, {}}, {2, 2});
-  m.PopulateTensor<float>(m.input(), {
-      0.0,  3.0,
-      1.4,  .1,
-      1.0,  1.0,
-  });
-  m.Invoke();
-  std::vector<float> out = {
-      0.0, 0.0, 1.0, 0.0,
-      0.0, 0.9, 0.0, 0.1,
-      0.0, 0.0, 0.0, 1.0,
-  };
-
-  EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear(out, 1e-3)));
-}
-
-}  // namespace
-}  // namespace custom
-}  // namespace ops
-}  // namespace tflite
-
-int main(int argc, char** argv) {
-  ::tflite::LogToStderr();
-  ::testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
diff --git a/tensorflow_lattice/cc/tflite_ops/interpolation.cc b/tensorflow_lattice/cc/tflite_ops/interpolation.cc
deleted file mode 100644
index ccc7f05..0000000
--- a/tensorflow_lattice/cc/tflite_ops/interpolation.cc
+++ /dev/null
@@ -1,254 +0,0 @@
-/* Copyright 2018 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-// tf-lite op corresponding to hypercube_interpolation op defined by tf-lattice
-
-#include <numeric>
-#include "flatbuffers/flexbuffers.h"
-#include "tensorflow_lattice/cc/tflite_ops/helpers.h"
-#include "tensorflow/lite/context.h"
-#include "tensorflow/lite/kernels/internal/tensor.h"
-#include "tensorflow/lite/kernels/kernel_util.h"
-
-namespace tflite {
-namespace ops {
-namespace custom {
-namespace interpolation {
-
-using tflite_lattice::BottomCornerIndexAndResidual;
-using tflite_lattice::GetBottomCornerIndexAndResidual;
-using tflite_lattice::InterpolationParams;
-
-
-// See tensorflow_lattice/cc/kernels/hypercube_interpolation_kernels.cc
-// for an in depth explanation of this routine and further references.
-// Edge behavior is inherited from the tensorflow op, which specifies that
-// points out of bounds are clipped to nearest cell boundary.
-void ComputeInterpolationWeightsHyper(const std::vector<int>& lattice_sizes,
-                                        int num_vertices_per_cell,
-                                        const std::vector<int>& strides,
-                                        const float* input_row,
-                                        float* output_row) {
-  int dimension = lattice_sizes.size();
-  std::vector<int> indices(num_vertices_per_cell);
-  std::vector<float> weights(num_vertices_per_cell);
-
-  const BottomCornerIndexAndResidual<float> index_and_residual =
-      GetBottomCornerIndexAndResidual<float>(lattice_sizes, input_row, strides);
-  const std::vector<float>& residual =
-      index_and_residual.residual;
-
-  indices[0] = index_and_residual.bottom_corner_index;
-  weights[0] = 1.0;
-  int current_highest_dimension = 0;
-  float current_residual_value = residual[current_highest_dimension];
-  for (int i = 1; i < num_vertices_per_cell; ++i) {
-    // Make sure that we're within the bounds of the unit hypercube.
-    TFLITE_DCHECK_GE(current_residual_value, 0);
-    TFLITE_DCHECK_LE(current_residual_value, 1);
-    // Sanity check: current_highest_dimension has better respect the bounds.
-    TFLITE_DCHECK_GE(current_highest_dimension, 0);
-    TFLITE_DCHECK_LT(current_highest_dimension, dimension);
-    const int earlier_i = i ^ (1 << current_highest_dimension);
-    indices[i] = indices[earlier_i] + strides[current_highest_dimension];
-    weights[i] = weights[earlier_i] * current_residual_value;
-    weights[earlier_i] *= (1.0 - current_residual_value);
-
-    if ((i & (i + 1)) == 0) {
-      // If i + 1 is power of 2, then current_highest_dimension has changed,
-      // that means, that we are processing next dimension.
-      ++current_highest_dimension;
-      if (dimension >= current_highest_dimension + 1) {
-        current_residual_value = residual[current_highest_dimension];
-      }
-    }
-  }
-  // initialize output tensor to zeros
-  // This is the number of vertices, which is the length of the output rows,
-  // See Init for more context
-  const int row_size = strides[dimension - 1] * lattice_sizes[dimension - 1];
-  for (int i = 0; i < row_size; ++i) {
-    output_row[i] = 0.0;
-  }
-  for (int jj = 0; jj < indices.size(); ++jj) {
-    output_row[indices[jj]] = weights[jj];
-  }
-}
-
-// Returns the permutation such that
-// values[permutation[0]] >= ... >= values[permutation[d - 1]] where
-// d == values.size().
-std::vector<int> DescendingPermutation(const std::vector<float>& values) {
-  std::vector<int> permutation(values.size());
-  std::iota(permutation.begin(), permutation.end(), 0);
-
-  auto cmp = [&values](const int left, const int right) -> bool {
-    return values[left] > values[right];
-  };
-  std::sort(permutation.begin(), permutation.end(), cmp);
-  return permutation;
-}
-
-// This function is adapted from ComputeInterpolationWeights in
-// tensorflow_lattice/cc/kernels/simplex_interpolation_kernels.cc,
-// see there for a detailed exposition.
-// Produces simplex interpolation weights for an input that is in the unit
-// hypercube (the residual), as well as the corresponding indices in the lattice
-// (based on the bottom_corner). See http://jmlr.org/papers/v17/15-243.html for
-// more details.
-void ComputeInterpolationWeightsSimplex(const std::vector<int>& lattice_sizes,
-                                        int num_vertices_per_cell,
-                                        const std::vector<int>& strides,
-                                        const float* input_row,
-                                        float* output_row) {
-  int dimension = lattice_sizes.size();
-
-  const BottomCornerIndexAndResidual<float> bottom_corner_index_and_residual =
-      GetBottomCornerIndexAndResidual<float>(lattice_sizes, input_row, strides);
-  const std::vector<float>& residual =
-      bottom_corner_index_and_residual.residual;
-
-  const std::vector<int> descending_permutation =
-      DescendingPermutation(residual);
-
-  const int input_dim = dimension;
-  // interpolation weight contains upto d + 1 non-zero elements.
-  // Number of non-zero weights.
-  const int max_nonzero = input_dim + 1;
-  std::vector<int> indices(max_nonzero);
-  std::vector<float> weights(max_nonzero);
-
-  float current_residual = 1.0;
-  int current_index = bottom_corner_index_and_residual.bottom_corner_index;
-  for (int i = 0; i < input_dim; ++i) {
-    const int current_dim = descending_permutation[i];
-    const float next_residual = residual[current_dim];
-    // Assigning index and weight.
-    indices[i] = current_index;
-    weights[i] = current_residual - next_residual;
-    // Proceed to the next item.
-    current_index += strides[current_dim];
-    current_residual = next_residual;
-  }
-  // The boundary case.
-  indices[input_dim] = current_index;
-  weights[input_dim] = current_residual;
-
-  // initialize output tensor to zeros
-  // This is the number of vertices, which is the length of the output rows,
-  // See Init for more context
-  const int row_size = strides[dimension - 1] * lattice_sizes[dimension - 1];
-  for (int i = 0; i < row_size; ++i) {
-    output_row[i] = 0.0;
-  }
-  for (int j = 0; j < indices.size(); ++j) {
-    output_row[indices[j]] = weights[j];
-  }
-}
-
-void* Init(TfLiteContext* context, const char* buffer, size_t length) {
-  auto* data = new InterpolationParams;
-  const uint8_t* buffer_t = reinterpret_cast<const uint8_t*>(buffer);
-  const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap();
-  auto sizes = m["lattice_sizes"].AsTypedVector();
-  data->dimension = sizes.size();
-  for (int i = 0; i < data->dimension; ++i) {
-    data->lattice_sizes.push_back(sizes[i].AsInt64());
-  }
-  data->strides.resize(data->dimension);
-  data->num_vertices = 1;
-  for (int i = 0; i < data->dimension; ++i) {
-    data->strides[i] = data->num_vertices;
-    data->num_vertices *= data->lattice_sizes[i];
-  }
-  data->num_vertices_per_cell = 1 << data->dimension;
-  return data;
-}
-
-void Free(TfLiteContext* context, void* buffer) {
-  delete reinterpret_cast<InterpolationParams*>(buffer);
-}
-
-TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
-  TfLiteTensor* output = GetOutput(context, node, 0);
-
-  const auto* params =
-      reinterpret_cast<InterpolationParams*>(node->user_data);
-  const TfLiteTensor* input = GetInput(context, node, 0);
-  // output tensor shape is number of input rows x number of vertices
-  TfLiteIntArray* output_size = TfLiteIntArrayCreate(2);
-  output_size->data[0] = input->dims->data[0];
-  output_size->data[1] = params->num_vertices;
-  TF_LITE_ENSURE_OK(context,
-                    context->ResizeTensor(context, output, output_size));
-
-  return kTfLiteOk;
-}
-
-using WeightCalculator = const std::function<void(
-    const std::vector<int>&,
-    int,
-    const std::vector<int>&,
-    const float*,
-    float*
-)>;
-
-TfLiteStatus Eval(
-    TfLiteContext* context, TfLiteNode* node,
-    WeightCalculator compute_weights_f) {
-  const auto* params =
-      reinterpret_cast<InterpolationParams*>(node->user_data);
-
-  const TfLiteTensor* input = GetInput(context, node, 0);
-  const float* input_flat = GetTensorData<float>(input);
-  TfLiteTensor* output = GetOutput(context, node, 0);
-  float* output_flat = GetTensorData<float>(output);
-
-  for (int row_i = 0; row_i < input->dims->data[0]; ++row_i) {
-    const float* input_row = input_flat + row_i * input->dims->data[1];
-    float* output_row = output_flat + row_i * params->num_vertices;
-    compute_weights_f(params->lattice_sizes,
-                      params->num_vertices_per_cell, params->strides, input_row,
-                      output_row);
-  }
-  return kTfLiteOk;
-}
-
-TfLiteStatus EvalHyper(TfLiteContext* context, TfLiteNode* node) {
-  return Eval(context, node, ComputeInterpolationWeightsHyper);
-}
-
-TfLiteStatus EvalSimplex(TfLiteContext* context, TfLiteNode* node) {
-  return Eval(context, node, ComputeInterpolationWeightsSimplex);
-}
-
-}  // namespace interpolation
-
-TfLiteRegistration* Register_HYPERCUBE_INTERPOLATION() {
-  static TfLiteRegistration r = {interpolation::Init, interpolation::Free,
-                                 interpolation::Prepare,
-                                 interpolation::EvalHyper};
-  return &r;
-}
-
-TfLiteRegistration* Register_SIMPLEX_INTERPOLATION() {
-  static TfLiteRegistration r = {interpolation::Init, interpolation::Free,
-                                 interpolation::Prepare,
-                                 interpolation::EvalSimplex};
-  return &r;
-}
-
-}  // namespace custom
-}  // namespace ops
-}  // namespace tflite
diff --git a/tensorflow_lattice/cc/tflite_ops/pwl_indexing_calibrator.cc b/tensorflow_lattice/cc/tflite_ops/pwl_indexing_calibrator.cc
deleted file mode 100644
index bd431af..0000000
--- a/tensorflow_lattice/cc/tflite_ops/pwl_indexing_calibrator.cc
+++ /dev/null
@@ -1,231 +0,0 @@
-/* Copyright 2018 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-// tf-lite op corresponding to PwlIndexingCalibrator op defined by tf-lattice
-
-#include "flatbuffers/flexbuffers.h"
-#include "tensorflow_lattice/cc/tflite_ops/tflite_ops.h"
-#include "tensorflow/lite/context.h"
-#include "tensorflow/lite/kernels/internal/tensor.h"
-#include "tensorflow/lite/kernels/kernel_util.h"
-
-namespace tflite {
-namespace ops {
-namespace custom {
-namespace pwl_indexing_calibrator {
-
-// Typically the two nearest keypoints are returned for interpolation.
-// If the input coincides with a keypoint, then only that one is returned.
-// If the input is outside the keypoint range, then only the nearest keypoint
-// is returned.
-constexpr int kMaxNumInterpolationPoints = 2;
-
-template <typename Dtype>
-struct InterpolationPoints {
-  int num_points;
-  int64_t lower_index;
-  Dtype weights[kMaxNumInterpolationPoints];
-};
-
-
-
-// Gets the index of an element in a flat representation given its row and col
-inline int Get2DIndex(int n_cols, int row, int col) {
-  return n_cols * row + col;
-}
-
-// Find the interpolation points, but _not the weights_, for the given
-// uncalibrated value and keypoints inputs (kp_inputs).
-// The interpolation will be between kp_inputs[lower_index] and
-// kp_inputs[lower_index + 1]. Except outside the edges or if x (uncalibrated)
-// is exactly on top of a keypoint, in which case the function returns 1 point.
-// It uses a simple binary-search, so it is O(log(|kp_inputs|)).
-template <typename Dtype>
-InterpolationPoints<Dtype> FindInterpolationPoints(const Dtype uncalibrated,
-                                                   const float* kp_inputs,
-                                                   int num_kp) {
-  if (uncalibrated <= kp_inputs[0]) {
-    return InterpolationPoints<Dtype>{1, 0};
-  }
-  if (uncalibrated >= kp_inputs[num_kp - 1]) {
-    return InterpolationPoints<Dtype>{1, num_kp - 1};
-  }
-
-  // Binary search the keypoints inputs.
-  int64_t min_idx = 0, max_idx = num_kp;
-  while (max_idx > min_idx + 1) {
-    const int64_t idx = (max_idx + min_idx) / 2;
-    const float value = kp_inputs[idx];
-    if (uncalibrated == value) {
-      return InterpolationPoints<Dtype>{1, idx};
-    }
-    if (uncalibrated > value) {
-      min_idx = idx;
-    } else {
-      max_idx = idx;
-    }
-  }
-
-  // Two points, where lower_index is min_idx.
-  return InterpolationPoints<Dtype>{2, min_idx};
-}
-
-// Find interpolations points and associated weights for the given
-// uncalibrated value and keypoints inputs (kp_inputs).
-// Returns 1 interpolation point if uncalibrated is exactly on top of an
-// input keypoint (or if beyond the edges), or 2 if in between two
-// keypoints.
-// See FindInterpolationPoints.
-template <typename Dtype>
-InterpolationPoints<Dtype> FindInterpolationPointsWithWeights(
-    const Dtype uncalibrated, const float* kp_inputs, int num_kp) {
-  // Get points an calculates weights.
-  InterpolationPoints<Dtype> interpolation_points =
-      FindInterpolationPoints<Dtype>(uncalibrated, kp_inputs, num_kp);
-  if (interpolation_points.num_points == 1) {
-    // All weight goes to the exact one keypoint where the uncalibrated value
-    // lies.
-    interpolation_points.weights[0] = 1.0;
-    return interpolation_points;
-  }
-  const Dtype delta = kp_inputs[interpolation_points.lower_index + 1] -
-                      kp_inputs[interpolation_points.lower_index];
-  interpolation_points.weights[1] =
-      (uncalibrated - kp_inputs[interpolation_points.lower_index]) / delta;
-  interpolation_points.weights[0] = 1.0 - interpolation_points.weights[1];
-  return interpolation_points;
-}
-
-TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
-  TfLiteTensor* output = GetOutput(context, node, 0);
-  const TfLiteTensor* input = GetInput(context, node, 0);
-  TF_LITE_ENSURE_EQ(context, NumDimensions(input), 1);
-  const TfLiteTensor* kp_inputs = GetInput(context, node, 1);
-  TF_LITE_ENSURE_EQ(context, NumDimensions(kp_inputs), 1);
-  // output tensor shape is number of input rows x number of vertices
-  TfLiteIntArray* output_size = TfLiteIntArrayCreate(2);
-  output_size->data[0] = SizeOfDimension(input, 0);
-  output_size->data[1] = SizeOfDimension(kp_inputs, 0);
-  TF_LITE_ENSURE_OK(context,
-                    context->ResizeTensor(context, output, output_size));
-
-  return kTfLiteOk;
-}
-
-TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
-  const TfLiteTensor* input = GetInput(context, node, 0);
-  const float* input_flat = GetTensorData<float>(input);
-
-  const TfLiteTensor* kp_inputs = GetInput(context, node, 1);
-  const float* kp_inputs_flat = GetTensorData<float>(kp_inputs);
-
-  TfLiteTensor* output = GetOutput(context, node, 0);
-  float* output_flat = GetTensorData<float>(output);
-
-  for (int row = 0; row < SizeOfDimension(input, 0); ++row) {
-    const float uncalibrated = input_flat[row];
-    InterpolationPoints<float> pts = FindInterpolationPointsWithWeights(
-        uncalibrated, kp_inputs_flat, SizeOfDimension(kp_inputs, 0));
-    float* output_row = output_flat + row * SizeOfDimension(kp_inputs, 0);
-    for (int i = 0; i < SizeOfDimension(kp_inputs, 0); ++i) {
-      output_row[i] = 0.0;
-    }
-    for (int k = 0; k < pts.num_points; ++k) {
-      output_row[pts.lower_index + k] = pts.weights[k];
-    }
-  }
-  return kTfLiteOk;
-}
-
-TfLiteStatus Prepare_Sparse(TfLiteContext* context, TfLiteNode* node) {
-  TfLiteTensor* indices_output = GetOutput(context, node, 0);
-  TfLiteTensor* weights_output = GetOutput(context, node, 1);
-  SetTensorToDynamic(indices_output);
-  SetTensorToDynamic(weights_output);
-  weights_output->type = kTfLiteFloat32;
-  indices_output->type = kTfLiteInt32;
-  return kTfLiteOk;
-}
-
-TfLiteStatus Eval_Sparse(TfLiteContext* context, TfLiteNode* node) {
-  const TfLiteTensor* input = GetInput(context, node, 0);
-  const float* input_flat = GetTensorData<float>(input);
-
-  const TfLiteTensor* kp_inputs = GetInput(context, node, 1);
-  const float* kp_inputs_flat = GetTensorData<float>(kp_inputs);
-
-  TfLiteTensor* indices_output = GetOutput(context, node, 0);
-  TfLiteIntArray* indices_output_size = TfLiteIntArrayCreate(2);
-  indices_output_size->data[0] =
-      kMaxNumInterpolationPoints * SizeOfDimension(input, 0);
-  indices_output_size->data[1] = 2;
-  TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, indices_output,
-                                                   indices_output_size));
-  int* indices_output_flat = GetTensorData<int>(indices_output);
-
-  TfLiteTensor* weights_output = GetOutput(context, node, 1);
-  TfLiteIntArray* weights_output_size = TfLiteIntArrayCreate(1);
-  weights_output_size->data[0] =
-      kMaxNumInterpolationPoints * SizeOfDimension(input, 0);
-  TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, weights_output,
-                                                   weights_output_size));
-  float* weights_output_flat = GetTensorData<float>(weights_output);
-
-  int current_output_row = 0;
-  for (int row = 0; row < SizeOfDimension(input, 0); ++row) {
-    const float uncalibrated = input_flat[row];
-    InterpolationPoints<float> pts = FindInterpolationPointsWithWeights(
-        uncalibrated, kp_inputs_flat, SizeOfDimension(kp_inputs, 0));
-    for (int i = 0; i < pts.num_points; ++i) {
-      weights_output_flat[current_output_row] = pts.weights[i];
-      indices_output_flat[Get2DIndex(2, current_output_row, 0)] = row;
-      indices_output_flat[Get2DIndex(2, current_output_row, 1)] =
-          pts.lower_index + i;
-      ++current_output_row;
-    }
-  }
-
-  TfLiteIntArray* indices_output_size_ = TfLiteIntArrayCreate(2);
-  indices_output_size_->data[0] = current_output_row;
-  indices_output_size_->data[1] = 2;
-  TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, indices_output,
-                                                   indices_output_size_));
-
-  TfLiteIntArray* weights_output_size_ = TfLiteIntArrayCreate(1);
-  weights_output_size_->data[0] = current_output_row;
-  TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, weights_output,
-                                                   weights_output_size_));
-
-  return kTfLiteOk;
-}
-
-}  // namespace pwl_indexing_calibrator
-
-TfLiteRegistration* Register_PWL_INDEXING_CALIBRATOR() {
-  static TfLiteRegistration r = {nullptr, nullptr,
-                                 pwl_indexing_calibrator::Prepare,
-                                 pwl_indexing_calibrator::Eval};
-  return &r;
-}
-
-TfLiteRegistration* Register_PWL_INDEXING_CALIBRATOR_SPARSE() {
-  static TfLiteRegistration r = {nullptr, nullptr,
-                                 pwl_indexing_calibrator::Prepare_Sparse,
-                                 pwl_indexing_calibrator::Eval_Sparse};
-  return &r;
-}
-
-}  // namespace custom
-}  // namespace ops
-}  // namespace tflite
diff --git a/tensorflow_lattice/cc/tflite_ops/pwl_indexing_calibrator_sparse_test.cc b/tensorflow_lattice/cc/tflite_ops/pwl_indexing_calibrator_sparse_test.cc
deleted file mode 100644
index 752018d..0000000
--- a/tensorflow_lattice/cc/tflite_ops/pwl_indexing_calibrator_sparse_test.cc
+++ /dev/null
@@ -1,157 +0,0 @@
-/* Copyright 2018 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include <math.h>
-#include <vector>
-
-#include <gtest/gtest.h>
-#include "flatbuffers/flexbuffers.h"
-#include "tensorflow_lattice/cc/tflite_ops/tflite_ops.h"
-#include "tensorflow/lite/interpreter.h"
-#include "tensorflow/lite/kernels/register.h"
-#include "tensorflow/lite/kernels/test_util.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/string_util.h"
-
-namespace tflite {
-namespace ops {
-namespace custom {
-
-namespace {
-
-class PWLIndexingCalibratorSparseOp : public SingleOpModel {
- public:
-  PWLIndexingCalibratorSparseOp(const TensorData& input,
-                                const TensorData& kp_inputs,
-                                const TensorData& indices_output,
-                                const TensorData& weights_output) {
-    input_ = AddInput(input);
-    kp_inputs_ = AddInput(kp_inputs);
-    indices_output_ = AddOutput(indices_output);
-    weights_output_ = AddOutput(weights_output);
-    SetCustomOp("PWLIndexingCalibratorSparseOp", {},
-                Register_PWL_INDEXING_CALIBRATOR_SPARSE);
-
-    BuildInterpreter({GetShape(input_), GetShape(kp_inputs_)});
-  }
-
-  int input() { return input_; }
-  int kp_inputs() { return kp_inputs_; }
-  std::vector<int> GetIndicesOutput() {
-    return ExtractVector<int>(indices_output_);
-  }
-  std::vector<int> GetIndicesOutputShape() {
-    return GetTensorShape(indices_output_);
-  }
-  std::vector<float> GetWeightsOutput() {
-    return ExtractVector<float>(weights_output_);
-  }
-  std::vector<int> GetWeightsOutputShape() {
-    return GetTensorShape(weights_output_);
-  }
-
- private:
-  int input_;
-  int kp_inputs_;
-  int indices_output_;
-  int weights_output_;
-};
-
-TEST(TestBasic, PWLIndexingCalibratorSparseTest) {
-  PWLIndexingCalibratorSparseOp m(
-      {TensorType_FLOAT32, {6}}, {TensorType_FLOAT32, {4}},
-      {TensorType_INT32, {}}, {TensorType_FLOAT32, {}});
-  m.PopulateTensor<float>(m.input(), {-3.0, 0.0, 0.1, 0.5, 0.75, 2.0});
-  m.PopulateTensor<float>(m.kp_inputs(), {0.0, 0.25, 0.5, 0.75});
-  m.Invoke();
-  std::vector<int> indices_out = {
-      0, 0,
-      1, 0,
-      2, 0,
-      2, 1,
-      3, 2,
-      4, 3,
-      5, 3,
-  };
-  std::vector<float> weights_out = {
-      1.0,
-      1.0,
-      0.6,
-      0.4,
-      1.0,
-      1.0,
-      1.0,
-  };
-  EXPECT_THAT(m.GetIndicesOutput(), testing::ElementsAreArray(indices_out));
-  EXPECT_THAT(m.GetWeightsOutput(),
-              ElementsAreArray(ArrayFloatNear(weights_out, 1e-3)));
-}
-
-TEST(TestTF, PWLIndexingCalibratorSparseTest) {
-  std::vector<float> keypoints = {0.0, 20.0, 40.0, 60.0, 80.0, 100.0};
-  struct Test {
-    std::vector<float> uncalibrated;
-    std::vector<float> expected_weights;
-    std::vector<int> expected_indices;
-  };
-  std::vector<Test> tests{
-      // Bounded min.
-      {{-10.0}, {1.0}, {0}},
-
-      // Bounded max.
-      {{200.0}, {1.0}, {5}},
-
-      // Exact match.
-      {{80.0}, {1.0}, {4}},
-
-      // Interpolated examples.
-      {{10.0}, {0.5, 0.5}, {0, 1}},
-      {{35.0}, {0.25, 0.75}, {1, 2}},
-  };
-
-  for (auto &test : tests) {
-    PWLIndexingCalibratorSparseOp m(
-        {TensorType_FLOAT32, {(int)test.uncalibrated.size()}},
-        {TensorType_FLOAT32, {(int)keypoints.size()}}, {TensorType_FLOAT32, {}},
-        {TensorType_FLOAT32, {}});
-    m.PopulateTensor<float>(
-        m.input(), 0, test.uncalibrated.data(),
-        test.uncalibrated.data() + test.uncalibrated.size());
-    m.PopulateTensor<float>(m.kp_inputs(), 0, keypoints.data(),
-                            keypoints.data() + keypoints.size());
-    m.Invoke();
-    std::vector<int> indices_out(2 * test.expected_weights.size());
-    std::vector<float> weights_out(test.expected_weights.size());
-    for (int kk = 0; kk < test.expected_weights.size(); ++kk) {
-      int indices_out_row_offset = 2 * kk;
-      indices_out[indices_out_row_offset] = 0;
-      indices_out[indices_out_row_offset + 1] = test.expected_indices[kk];
-      weights_out[kk] = test.expected_weights[kk];
-    }
-    EXPECT_THAT(m.GetIndicesOutput(), testing::ElementsAreArray(indices_out));
-    EXPECT_THAT(m.GetWeightsOutput(),
-                ElementsAreArray(ArrayFloatNear(weights_out, 1e-3)));
-  }
-}
-
-}  // namespace
-}  // namespace custom
-}  // namespace ops
-}  // namespace tflite
-
-int main(int argc, char** argv) {
-  ::tflite::LogToStderr();
-  ::testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
diff --git a/tensorflow_lattice/cc/tflite_ops/pwl_indexing_calibrator_test.cc b/tensorflow_lattice/cc/tflite_ops/pwl_indexing_calibrator_test.cc
deleted file mode 100644
index 2696433..0000000
--- a/tensorflow_lattice/cc/tflite_ops/pwl_indexing_calibrator_test.cc
+++ /dev/null
@@ -1,143 +0,0 @@
-/* Copyright 2018 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include <math.h>
-#include <vector>
-
-#include <gtest/gtest.h>
-#include "flatbuffers/flexbuffers.h"
-#include "tensorflow_lattice/cc/tflite_ops/tflite_ops.h"
-#include "tensorflow/lite/interpreter.h"
-#include "tensorflow/lite/kernels/register.h"
-#include "tensorflow/lite/kernels/test_util.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/string_util.h"
-
-namespace tflite {
-namespace ops {
-namespace custom {
-
-namespace {
-
-class PWLIndexingCalibratorOp : public SingleOpModel {
- public:
-  PWLIndexingCalibratorOp(const TensorData& input, const TensorData& kp_inputs,
-                          const TensorData& output) {
-    input_ = AddInput(input);
-    kp_inputs_ = AddInput(kp_inputs);
-    output_ = AddOutput(output);
-    SetCustomOp("PWLIndexingCalibratorOp", {},
-                Register_PWL_INDEXING_CALIBRATOR);
-
-    BuildInterpreter({GetShape(input_), GetShape(kp_inputs_)});
-  }
-
-  int input() { return input_; }
-  int kp_inputs() { return kp_inputs_; }
-  std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
-  std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
-
- private:
-  int input_;
-  int kp_inputs_;
-  int output_;
-};
-
-TEST(TestBasic, PWLIndexingCalibratorTest) {
-  PWLIndexingCalibratorOp m({TensorType_FLOAT32, {6}},
-                            {TensorType_FLOAT32, {4}},
-                            {TensorType_FLOAT32, {}});
-  m.PopulateTensor<float>(m.input(), {-3.0, 0.0, 0.1, 0.5, 0.75, 2.0});
-  m.PopulateTensor<float>(m.kp_inputs(), {0.0, 0.25, 0.5, 0.75});
-  m.Invoke();
-  std::vector<float> out = {
-      1.0, 0.0, 0.0, 0.0,
-      1.0, 0.0, 0.0, 0.0,
-      0.6, 0.4, 0.0, 0.0,
-      0.0, 0.0, 1.0, 0.0,
-      0.0, 0.0, 0.0, 1.0,
-      0.0, 0.0, 0.0, 1.0,
-  };
-  EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear(out, 1e-3)));
-}
-
-TEST(TestSingle, PWLIndexingCalibratorTest) {
-  PWLIndexingCalibratorOp m({TensorType_FLOAT32, {3}},
-                            {TensorType_FLOAT32, {1}},
-                            {TensorType_FLOAT32, {}});
-  m.PopulateTensor<float>(m.input(), {-1.0, 0.0, 1.1});
-  m.PopulateTensor<float>(m.kp_inputs(), {0.0});
-  m.Invoke();
-  std::vector<float> out = {
-      1.0,
-      1.0,
-      1.0,
-  };
-  EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear(out, 1e-3)));
-}
-
-TEST(TestTF, PWLIndexingCalibratorTest) {
-  std::vector<float> keypoints = {0.0, 20.0, 40.0, 60.0, 80.0, 100.0};
-  struct Test {
-    std::vector<float> uncalibrated;
-    std::vector<float> expected_weights;
-    std::vector<int> expected_indices;
-  };
-  std::vector<Test> tests{
-      // Bounded min.
-      {{-10.0}, {1.0, 0.0}, {0, 1}},
-
-      // Bounded max.
-      {{200.0}, {0.0, 1.0}, {4, 5}},
-
-      // Exact match.
-      {{80.0}, {0.0, 1.0, 0.0}, {3, 4, 5}},
-
-      // Interpolated examples.
-      {{10.0}, {0.5, 0.5}, {0, 1}},
-      {{35.0}, {0.25, 0.75}, {1, 2}},
-  };
-
-  for (auto& test : tests) {
-    PWLIndexingCalibratorOp m(
-        {TensorType_FLOAT32, {(int)test.uncalibrated.size()}},
-        {TensorType_FLOAT32, {(int)keypoints.size()}},
-        {TensorType_FLOAT32, {}});
-    m.PopulateTensor<float>(
-        m.input(), 0, test.uncalibrated.data(),
-        test.uncalibrated.data() + test.uncalibrated.size());
-    m.PopulateTensor<float>(m.kp_inputs(), 0, keypoints.data(),
-                            keypoints.data() + keypoints.size());
-    m.Invoke();
-    std::vector<float> out(keypoints.size() * test.uncalibrated.size());
-    for (int ii = 0; ii < out.size(); ++ii) {
-      out[ii] = 0.0;
-    }
-    for (int kk = 0; kk < test.expected_weights.size(); ++kk) {
-      out[test.expected_indices[kk]] = test.expected_weights[kk];
-    }
-    EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear(out, 1e-3)));
-  }
-}
-
-}  // namespace
-}  // namespace custom
-}  // namespace ops
-}  // namespace tflite
-
-int main(int argc, char** argv) {
-  ::tflite::LogToStderr();
-  ::testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
diff --git a/tensorflow_lattice/cc/tflite_ops/simplex_interpolation_test.cc b/tensorflow_lattice/cc/tflite_ops/simplex_interpolation_test.cc
deleted file mode 100644
index 6e8c215..0000000
--- a/tensorflow_lattice/cc/tflite_ops/simplex_interpolation_test.cc
+++ /dev/null
@@ -1,118 +0,0 @@
-/* Copyright 2018 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include <math.h>
-#include <vector>
-
-#include <gtest/gtest.h>
-#include "flatbuffers/flexbuffers.h"
-#include "tensorflow_lattice/cc/tflite_ops/tflite_ops.h"
-#include "tensorflow/lite/interpreter.h"
-#include "tensorflow/lite/kernels/register.h"
-#include "tensorflow/lite/kernels/test_util.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/string_util.h"
-
-namespace tflite {
-namespace ops {
-namespace custom {
-
-
-namespace {
-
-class SimplexInterpolationOp : public SingleOpModel {
- public:
-  SimplexInterpolationOp(const TensorData& input, const TensorData& output,
-                           std::vector<int> lattice_sizes) {
-    input_ = AddInput(input);
-    output_ = AddOutput(output);
-    flexbuffers::Builder fbb;
-    size_t map_start = fbb.StartMap();
-    auto vec_start = fbb.StartVector("lattice_sizes");
-    for (int i = 0; i < lattice_sizes.size(); ++i) {
-      fbb.Add(lattice_sizes[i]);
-    }
-    fbb.EndVector(vec_start, /* typed */ true, /* fixed */ false);
-    fbb.EndMap(map_start);
-    fbb.Finish();
-    SetCustomOp("SimplexInterpolation", fbb.GetBuffer(),
-                Register_SIMPLEX_INTERPOLATION);
-
-    BuildInterpreter({GetShape(input_)});
-  }
-
-  int input() { return input_; }
-  std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
-  std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
-
- private:
-  int input_;
-  int output_;
-};
-
-TEST(Test1D, SimplexInterpolationTest) {
-  SimplexInterpolationOp m({TensorType_FLOAT32, {8, 1}},
-                             {TensorType_FLOAT32, {}}, {3});
-  m.PopulateTensor<float>(m.input(), {
-    -1.0, 0.0, 0.2, 0.8, 1.0, 1.3, 2.0, 2.5
-  });
-  m.Invoke();
-  std::vector<float> out = {
-    1.0, 0.0, 0.0,
-    1.0, 0.0, 0.0,
-    0.8, 0.2, 0.0,
-    0.2, 0.8, 0.0,
-    0.0, 1.0, 0.0,
-    0.0, 0.7, 0.3,
-    0.0, 0.0, 1.0,
-    0.0, 0.0, 1.0
-  };
-  EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear(out, 1e-3)));
-}
-
-TEST(Test2D, SimplexInterpolationTest) {
-  SimplexInterpolationOp m({TensorType_FLOAT32, {7, 2}},
-                             {TensorType_FLOAT32, {}}, {2, 2});
-  m.PopulateTensor<float>(m.input(), {
-    0.0, 0.0,
-    0.0, 1.0,
-    1.0, 0.0,
-    1.0, 1.0,
-    0.5, 0.5,
-    0.2, 0.8,
-    0.2, 0.3
-  });
-  m.Invoke();
-  std::vector<float> out = {
-    1.0, 0.0, 0.0, 0.0,
-    0.0, 0.0, 1.0, 0.0,
-    0.0, 1.0, 0.0, 0.0,
-    0.0, 0.0, 0.0, 1.0,
-    0.5, 0.0, 0.0, 0.5,
-    0.2, 0.0, 0.6, 0.2,
-    0.7, 0.0, 0.1, 0.2
-  };
-  EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear(out, 1e-3)));
-}
-
-}  // namespace
-}  // namespace custom
-}  // namespace ops
-}  // namespace tflite
-
-int main(int argc, char** argv) {
-  ::tflite::LogToStderr();
-  ::testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
diff --git a/tensorflow_lattice/cc/tflite_ops/tflite_ops.cc b/tensorflow_lattice/cc/tflite_ops/tflite_ops.cc
deleted file mode 100644
index bfe0181..0000000
--- a/tensorflow_lattice/cc/tflite_ops/tflite_ops.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-/* Copyright 2018 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow_lattice/cc/tflite_ops/tflite_ops.h"
-#include "tensorflow/lite/context.h"
-#include "tensorflow/lite/op_resolver.h"
-
-namespace tflite {
-
-void RegisterTfLatticeOps(MutableOpResolver* resolver) {
-  resolver->AddCustom("HypercubeInterpolation",
-            tflite::ops::custom::Register_HYPERCUBE_INTERPOLATION());
-  resolver->AddCustom("SimplexInterpolation",
-            tflite::ops::custom::Register_SIMPLEX_INTERPOLATION());
-  resolver->AddCustom("PWLIndexingCalibration",
-                      tflite::ops::custom::Register_PWL_INDEXING_CALIBRATOR());
-  resolver->AddCustom(
-      "PWLIndexingCalibrationSparse",
-      tflite::ops::custom::Register_PWL_INDEXING_CALIBRATOR_SPARSE());
-}
-
-}  // namespace tflite
diff --git a/tensorflow_lattice/cc/tflite_ops/tflite_ops.h b/tensorflow_lattice/cc/tflite_ops/tflite_ops.h
deleted file mode 100644
index c1b6830..0000000
--- a/tensorflow_lattice/cc/tflite_ops/tflite_ops.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/* Copyright 2018 The TensorFlow Lattice Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LATTICE_CC_TFLITE_OPS_TFLITE_OPS_H_
-#define TENSORFLOW_LATTICE_CC_TFLITE_OPS_TFLITE_OPS_H_
-#include "tensorflow/lite/context.h"
-#include "tensorflow/lite/op_resolver.h"
-
-// This file provides declarations and utilities useful for consumers of TF-Lite
-// ops in TF-Lattice project.  In particular, there are headers for registration
-// functions for each op, as well as a function that performs the registration.
-namespace tflite {
-namespace ops {
-namespace custom {
-
-TfLiteRegistration* Register_HYPERCUBE_INTERPOLATION();
-TfLiteRegistration* Register_SIMPLEX_INTERPOLATION();
-TfLiteRegistration* Register_PWL_INDEXING_CALIBRATOR();
-TfLiteRegistration* Register_PWL_INDEXING_CALIBRATOR_SPARSE();
-
-}  // namespace custom
-}  // namespace ops
-
-// Registers the custom ops so that tflite interpreter can find them.  Must be
-// called by clients that intend to use these ops.
-void RegisterTfLatticeOps(MutableOpResolver* resolver);
-
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LATTICE_CC_TFLITE_OPS_TFLITE_OPS_H_
diff --git a/tensorflow_lattice/cc/tflite_ops/toco_wrapper.py b/tensorflow_lattice/cc/tflite_ops/toco_wrapper.py
deleted file mode 100644
index 1335e50..0000000
--- a/tensorflow_lattice/cc/tflite_ops/toco_wrapper.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# Copyright 2018 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Runs TOCO tflite_converter after importing tensorflow_lattice ops."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import sys
-
-# Dependency imports
-# tensorflow_lattice must be imported in order for tensorflow to recognize its
-# custom ops, which is necessary for toco to find them
-import tensorflow_lattice  # pylint: disable=unused-import
-from tensorflow.lite.python import tflite_convert
-
-
-def main():
-  return tflite_convert.app.run(main=tflite_convert.run_main, argv=sys.argv[:1])
-
-
-if __name__ == '__main__':
-  main()
diff --git a/tensorflow_lattice/python/estimators/__init__.py b/tensorflow_lattice/layers/__init__.py
similarity index 55%
rename from tensorflow_lattice/python/estimators/__init__.py
rename to tensorflow_lattice/layers/__init__.py
index 2e5e266..1d36250 100644
--- a/tensorflow_lattice/python/estimators/__init__.py
+++ b/tensorflow_lattice/layers/__init__.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
+# Copyright 2019 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,9 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ==============================================================================
-"""TensorFlow Lattice tf.estimators."""
 
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+"""'layers' namespace for TFL layers."""
+
+from tensorflow_lattice.python.categorical_calibration_layer import CategoricalCalibration
+from tensorflow_lattice.python.lattice_layer import Lattice
+from tensorflow_lattice.python.linear_layer import Linear
+from tensorflow_lattice.python.parallel_combination_layer import ParallelCombination
+from tensorflow_lattice.python.pwl_calibration_layer import PWLCalibration
diff --git a/tensorflow_lattice/python/BUILD b/tensorflow_lattice/python/BUILD
index a1fa44b..6a17db2 100644
--- a/tensorflow_lattice/python/BUILD
+++ b/tensorflow_lattice/python/BUILD
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
+# Copyright 2019 The TensorFlow Lattice Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-licenses(["notice"])  # Apache 2.0 License
 
 package(
     default_visibility = [
@@ -20,248 +19,297 @@ package(
     ],
 )
 
-exports_files(["LICENSE"])
+licenses(["notice"])
 
-# All python tests can run under python 2 and 3.
-load(
-    "//tensorflow_lattice:tensorflow_lattice.bzl",
-    "rpath_linkopts",
-)
-load(
-    "@org_tensorflow//tensorflow:tensorflow.bzl",
-    "tf_gen_op_wrapper_py",
+py_library(
+    name = "pwl_calibration_layer",
+    srcs = ["pwl_calibration_layer.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":pwl_calibration_lib",
+        # absl/logging dep,
+        # tensorflow:tensorflow_no_contrib dep,
+    ],
 )
-load("@org_tensorflow//tensorflow:tensorflow.bzl", "tf_custom_op_py_library")
 
-tf_gen_op_wrapper_py(
-    name = "pwl_indexing_calibrator_py_wrapper",
-    out = "ops/gen_pwl_indexing_calibrator.py",
-    cc_linkopts = rpath_linkopts("pwl_indexing_calibrator_py_wrapper"),
-    hidden = [],
+py_library(
+    name = "pwl_calibration_lib",
+    srcs = ["pwl_calibration_lib.py"],
+    srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow_lattice/cc:pwl_indexing_calibrator_ops_op_lib",
+        # enum dep,
+        # tensorflow:tensorflow_no_contrib dep,
     ],
 )
 
-tf_gen_op_wrapper_py(
-    name = "monotonic_projection_py_wrapper",
-    out = "ops/gen_monotonic_projection.py",
-    cc_linkopts = rpath_linkopts("monotonic_projection_py_wrapper"),
-    hidden = [],
+py_test(
+    name = "pwl_calibration_test",
+    size = "large",
+    srcs = ["pwl_calibration_test.py"],
+    python_version = "PY3",
+    # shard_count = 12,
+    srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow_lattice/cc:monotonic_projection_op_op_lib",
+        ":parallel_combination_layer",
+        ":pwl_calibration_layer",
+        ":test_utils",
+        # absl/logging dep,
+        # absl/testing:parameterized dep,
+        # numpy dep,
+        # tensorflow dep,
     ],
 )
 
-tf_gen_op_wrapper_py(
-    name = "lattice_interpolation_py_wrapper",
-    out = "ops/gen_lattice_interpolation.py",
-    cc_linkopts = rpath_linkopts("lattice_interpolation_py_wrapper"),
-    hidden = [],
-    require_shape_functions = True,
+py_library(
+    name = "linear_layer",
+    srcs = ["linear_layer.py"],
+    srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow_lattice/cc:lattice_interpolation_ops_op_lib",
+        ":linear_lib",
+        # tensorflow:tensorflow_no_contrib dep,
     ],
 )
 
-tf_gen_op_wrapper_py(
-    name = "monotone_lattice_py_wrapper",
-    out = "ops/gen_monotone_lattice.py",
-    cc_linkopts = rpath_linkopts("monotone_lattice_py_wrapper"),
-    hidden = [],
-    require_shape_functions = True,
+py_library(
+    name = "linear_lib",
+    srcs = ["linear_lib.py"],
+    srcs_version = "PY2AND3",
     deps = [
-        "//tensorflow_lattice/cc:monotone_lattice_ops_op_lib",
+        ":utils",
+        # tensorflow:tensorflow_no_contrib dep,
     ],
 )
 
-tf_custom_op_py_library(
-    name = "lattice_ops_py",
-    srcs = [
-        "ops/lattice_ops.py",
-    ],
-    dso = [
-        "//tensorflow_lattice/cc:ops/_lattice_ops.so",
-    ],
-    kernels = [
-        "//tensorflow_lattice/cc:lattice_ops",
-        "//tensorflow_lattice/cc/kernels:lattice_kernels",
-    ],
+py_library(
+    name = "categorical_calibration_layer",
+    srcs = ["categorical_calibration_layer.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":lattice_interpolation_py_wrapper",
-        ":monotone_lattice_py_wrapper",
-        "@org_tensorflow//tensorflow:tensorflow_py",
-        "@org_tensorflow//tensorflow/python:framework_for_generated_wrappers",
+        ":categorical_calibration_lib",
+        # tensorflow:tensorflow_no_contrib dep,
     ],
 )
 
-tf_custom_op_py_library(
-    name = "pwl_calibration_ops_py",
-    srcs = [
-        "ops/pwl_calibration_ops.py",
+py_library(
+    name = "categorical_calibration_lib",
+    srcs = ["categorical_calibration_lib.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":utils",
+        # enum dep,
+        # tensorflow:tensorflow_no_contrib dep,
     ],
-    dso = [
-        "//tensorflow_lattice/cc:ops/_pwl_calibration_ops.so",
+)
+
+py_test(
+    name = "categorical_calibration_test",
+    size = "medium",
+    timeout = "long",
+    srcs = ["categorical_calibration_test.py"],
+    python_version = "PY3",
+    # shard_count = 4,
+    srcs_version = "PY2AND3",
+    deps = [
+        ":categorical_calibration_layer",
+        ":parallel_combination_layer",
+        ":test_utils",
+        # absl/logging dep,
+        # absl/testing:parameterized dep,
+        # numpy dep,
+        # tensorflow dep,
     ],
-    kernels = [
-        "//tensorflow_lattice/cc:pwl_calibration_ops",
-        "//tensorflow_lattice/cc/kernels:pwl_calibration_kernels",
+)
+
+py_test(
+    name = "linear_test",
+    size = "large",
+    srcs = ["linear_test.py"],
+    python_version = "PY3",
+    srcs_version = "PY2AND3",
+    deps = [
+        ":linear_layer",
+        ":test_utils",
+        # absl/logging dep,
+        # absl/testing:parameterized dep,
+        # numpy dep,
+        # tensorflow dep,
     ],
+)
+
+py_library(
+    name = "lattice_layer",
+    srcs = ["lattice_layer.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":monotonic_projection_py_wrapper",
-        ":pwl_indexing_calibrator_py_wrapper",
-        "@org_tensorflow//tensorflow:tensorflow_py",
-        "@org_tensorflow//tensorflow/python:framework_for_generated_wrappers",
+        ":categorical_calibration_layer",
+        ":lattice_lib",
+        ":pwl_calibration_layer",
+        # tensorflow:tensorflow_no_contrib dep,
     ],
 )
 
 py_library(
-    name = "keypoints_initialization",
-    srcs = ["lib/keypoints_initialization.py"],
+    name = "lattice_lib",
+    srcs = ["lattice_lib.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":tools",
-        "@org_tensorflow//third_party/py/numpy",
-        "@six_archive//:six",
-        "@org_tensorflow//tensorflow:tensorflow_py",
+        # absl/logging dep,
+        # tensorflow:tensorflow_no_contrib dep,
     ],
 )
 
 py_test(
-    name = "keypoints_initialization_test",
-    size = "medium",
-    srcs = ["lib/keypoints_initialization_test.py"],
+    name = "lattice_test",
+    size = "large",
+    srcs = ["lattice_test.py"],
+    python_version = "PY3",
+    # shard_count = 12,
     srcs_version = "PY2AND3",
     deps = [
-        ":keypoints_initialization",
-        "@absl_py//absl/testing:parameterized",
-        "@org_tensorflow//tensorflow:tensorflow_py",
+        ":lattice_layer",
+        ":test_utils",
+        # absl/logging dep,
+        # absl/testing:parameterized dep,
+        # numpy dep,
+        # tensorflow dep,
     ],
 )
 
 py_library(
-    name = "pwl_calibration_layers",
-    srcs = ["lib/pwl_calibration_layers.py"],
+    name = "parallel_combination_layer",
+    srcs = ["parallel_combination_layer.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":keypoints_initialization",
-        ":pwl_calibration_ops_py",
-        ":regularizers",
-        ":tools",
-        "@org_tensorflow//tensorflow:tensorflow_py",
+        ":categorical_calibration_layer",
+        ":lattice_layer",
+        ":linear_layer",
+        ":pwl_calibration_layer",
+        # tensorflow:tensorflow_no_contrib dep,
     ],
 )
 
 py_test(
-    name = "pwl_calibration_layers_test",
-    size = "medium",
-    srcs = ["lib/pwl_calibration_layers_test.py"],
+    name = "parallel_combination_test",
+    size = "large",
+    srcs = ["parallel_combination_test.py"],
+    python_version = "PY3",
     srcs_version = "PY2AND3",
     deps = [
-        ":pwl_calibration_layers",
-        ":tools",
-        "@org_tensorflow//third_party/py/numpy",
-        "@org_tensorflow//tensorflow:tensorflow_py",
+        ":lattice_layer",
+        ":parallel_combination_layer",
+        # absl/logging dep,
+        # absl/testing:parameterized dep,
+        # numpy dep,
+        # tensorflow dep,
     ],
 )
 
 py_library(
-    name = "lattice_layers",
-    srcs = ["lib/lattice_layers.py"],
+    name = "configs",
+    srcs = ["configs.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":lattice_ops_py",
-        ":regularizers",
-        ":tools",
-        "@org_tensorflow//tensorflow:tensorflow_py",
+        # absl/logging dep,
+        # tensorflow dep,
     ],
 )
 
 py_test(
-    name = "lattice_layers_test",
-    size = "medium",
-    srcs = ["lib/lattice_layers_test.py"],
+    name = "configs_test",
+    size = "small",
+    srcs = ["configs_test.py"],
+    python_version = "PY3",
     srcs_version = "PY2AND3",
     deps = [
-        ":lattice_layers",
-        "@org_tensorflow//tensorflow:tensorflow_py",
+        ":configs",
+        # absl/logging dep,
+        # tensorflow dep,
     ],
 )
 
 py_library(
-    name = "monotone_linear_layers",
-    srcs = ["lib/monotone_linear_layers.py"],
+    name = "utils",
+    srcs = ["utils.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":regularizers",
-        ":tools",
-        "@org_tensorflow//tensorflow:tensorflow_py",
+        # tensorflow dep,
     ],
 )
 
 py_test(
-    name = "monotone_linear_layers_test",
-    size = "medium",
-    srcs = ["lib/monotone_linear_layers_test.py"],
+    name = "utils_test",
+    srcs = ["utils_test.py"],
+    python_version = "PY3",
     srcs_version = "PY2AND3",
     deps = [
-        ":monotone_linear_layers",
-        "@org_tensorflow//tensorflow:tensorflow_py",
+        ":utils",
+        # tensorflow dep,
     ],
 )
 
 py_library(
-    name = "regularizers",
-    srcs = ["lib/regularizers.py"],
+    name = "test_utils",
+    srcs = ["test_utils.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":tools",
-        "@org_tensorflow//tensorflow:tensorflow_py",
+        ":visualization",
+        # absl/logging dep,
+        # numpy dep,
     ],
 )
 
-py_test(
-    name = "regularizers_test",
-    size = "large",
-    srcs = ["lib/regularizers_test.py"],
+py_library(
+    name = "visualization",
+    srcs = ["visualization.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":regularizers",
-        "@org_tensorflow//tensorflow:tensorflow_py",
+        ":model_info",
+        # graphviz dep,
+        # matplotlib dep,
+        # mpl_toolkits/mplot3d dep,
+        # numpy dep,
     ],
 )
 
-# TensorFlow Lattice internal libraries.
 py_library(
-    name = "tools",
-    srcs = ["lib/tools.py"],
+    name = "estimators",
+    srcs = ["estimators.py"],
     srcs_version = "PY2AND3",
     deps = [
-        "@org_tensorflow//third_party/py/numpy",
-        "@org_tensorflow//tensorflow:tensorflow_py",
+        ":categorical_calibration_layer",
+        ":configs",
+        ":lattice_layer",
+        ":lattice_lib",
+        ":linear_layer",
+        ":model_info",
+        ":pwl_calibration_layer",
+        ":pwl_calibration_lib",
+        # absl/logging dep,
+        # tensorflow dep,
     ],
 )
 
 py_test(
-    name = "tools_test",
-    size = "medium",
-    srcs = ["lib/tools_test.py"],
+    name = "estimators_test",
+    size = "large",
+    timeout = "long",
+    srcs = ["estimators_test.py"],
+    python_version = "PY3",
+    # shard_count = 8,
     srcs_version = "PY2AND3",
     deps = [
-        ":test_data",
-        ":tools",
-        "@org_tensorflow//third_party/py/numpy",
-        "@org_tensorflow//tensorflow:tensorflow_py",
+        ":configs",
+        ":estimators",
+        ":model_info",
+        # absl/logging dep,
+        # sklearn dep,
+        # tensorflow dep,
     ],
 )
 
 py_library(
-    name = "test_data",
-    srcs = ["lib/test_data.py"],
+    name = "model_info",
+    srcs = ["model_info.py"],
     srcs_version = "PY2AND3",
-    deps = [
-        "@org_tensorflow//tensorflow:tensorflow_py",
-    ],
+    deps = [],
 )
diff --git a/tensorflow_lattice/python/__init__.py b/tensorflow_lattice/python/__init__.py
index f9600cf..63e23e0 100644
--- a/tensorflow_lattice/python/__init__.py
+++ b/tensorflow_lattice/python/__init__.py
@@ -1,4 +1,4 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
+# Copyright 2019 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,9 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ==============================================================================
 """TensorFlow Lattice python package."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+
diff --git a/tensorflow_lattice/python/categorical_calibration_layer.py b/tensorflow_lattice/python/categorical_calibration_layer.py
new file mode 100644
index 0000000..0ba07d7
--- /dev/null
+++ b/tensorflow_lattice/python/categorical_calibration_layer.py
@@ -0,0 +1,310 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Categorical calibration layer with monotonicity and bound constraints.
+
+Keras implementation of tensorflow lattice categorical calibration layer. This
+layer takes single or multi-dimensional input and transforms it using lookup
+tables satisfying monotonicity and bounds constraints if specified.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from . import categorical_calibration_lib
+import tensorflow as tf
+from tensorflow import keras
+
+DEFAULT_INPUT_VALUE_NAME = "default_input_value"
+CATEGORICAL_CALIBRATION_KERNEL_NAME = "categorical_calibration_kernel"
+
+# TODO: implement variation/variance regularizer.
+
+
+class CategoricalCalibration(keras.layers.Layer):
+  # pyformat: disable
+  """Categorical calibration layer with monotonicity and bound constraints.
+
+  This layer takes input of shape `(batch_size, units)` or `(batch_size, 1)` and
+  transforms it using `units` number of lookup tables satisfying monotonicity
+  and bounds constraints if specified. If multi dimensional input is provided,
+  each output will be for the corresponding input, otherwise all calibration
+  functions will act on the same input. All units share the same layer
+  configuration, but each one has their separate set of trained parameters.
+
+  Input shape:
+  Rank-2 tensor with shape:  `(batch_size, units)` or `(batch_size, 1)`.
+
+  Output shape:
+  Rank-2 tensor with shape: `(batch_size, units)`.
+
+  Attributes:
+    - All `__init__` args.
+    kernel: TF variable of shape `(batch_size, units)` which stores the lookup
+    table.
+
+  Example:
+
+  ```python
+  calibrator = tfl.categorical_calibration_layer.CategoricalCalibration(
+      # Number of categories.
+      num_buckets=3,
+      # Output can be bounded.
+      output_min=0.0,
+      output_max=1.0,
+      # For categorical calibration layer monotonicity is specified for pairs of
+      # indices of categories. Output for first category in pair will be less
+      # than or equal to output for second category.
+      monotonicities=[(0, 1), (0, 2)])
+  ```
+
+  Usage with functional models:
+
+  ```python
+  input_feature = keras.layers.Input(shape=[1])
+  calibrated_feature = tfl.categorical_calibration_layer.CategoricalCalibration(
+      num_buckets=3,
+      output_min=0.0,
+      output_max=1.0,
+      monotonicities=[(0, 1), (0, 2)],
+  )(feature)
+  ...
+  model = keras.models.Model(
+      inputs=[input_feature, ...],
+      outputs=...)
+  ```
+  """
+  # pyformat: enable
+
+  def __init__(self,
+               num_buckets,
+               units=1,
+               output_min=None,
+               output_max=None,
+               monotonicities=None,
+               kernel_initializer="uniform",
+               kernel_regularizer=None,
+               default_input_value=None,
+               **kwargs):
+    # pyformat: disable
+    """Initializes a `CategoricalCalibration` instance.
+
+    Args:
+      num_buckets: Number of categories.
+      units: Output dimension of the layer. See class comments for details.
+      output_min: Minimum output of calibrator.
+      output_max: Maximum output of calibrator.
+      monotonicities: List of pairs with `(i, j)` indices indicating `output(i)`
+        should be less than or equal to `output(j)`.
+      kernel_initializer: None or one of:
+        - `'uniform'`: If `output_min` and `output_max` are provided initial
+          values will be uniformly sampled from `[output_min, output_max]`
+          range.
+        - `'constant'`: If `output_min` and `output_max` are provided all output
+          values will be initlized to the constant
+          `(output_min + output_max) / 2`.
+        - Any Keras initializer object.
+      kernel_regularizer: None or single element or list of any Keras
+        regularizer objects.
+      default_input_value: If set, all inputs which are equal to this value will
+        be treated as default and mapped to the last bucket.
+      **kwargs: Other args passed to `tf.keras.layers.Layer` initializer.
+
+    Raises:
+      ValueError: If layer hyperparameters are invalid.
+    """
+    # pyformat: enable
+    dtype = kwargs.pop("dtype", tf.float32)  # output dtype
+    super(CategoricalCalibration, self).__init__(dtype=dtype, **kwargs)
+
+    categorical_calibration_lib.verify_hyperparameters(
+        num_buckets=num_buckets,
+        output_min=output_min,
+        output_max=output_max,
+        monotonicities=monotonicities)
+    self.num_buckets = num_buckets
+    self.units = units
+    self.output_min = output_min
+    self.output_max = output_max
+    self.monotonicities = monotonicities
+    if output_min is not None and output_max is not None:
+      if kernel_initializer == "constant":
+        kernel_initializer = keras.initializers.Constant(
+            (output_min + output_max) / 2)
+      elif kernel_initializer == "uniform":
+        kernel_initializer = keras.initializers.RandomUniform(
+            output_min, output_max)
+    self.kernel_initializer = keras.initializers.get(kernel_initializer)
+    self.kernel_regularizer = []
+    if kernel_regularizer:
+      if callable(kernel_regularizer):
+        kernel_regularizer = [kernel_regularizer]
+      for reg in kernel_regularizer:
+        self.kernel_regularizer.append(keras.regularizers.get(reg))
+    self.default_input_value = default_input_value
+
+  def build(self, input_shape):
+    """Standard Keras build() method."""
+    if (self.output_min is not None or self.output_max is not None or
+        self.monotonicities):
+      constraints = CategoricalCalibrationConstraints(
+          output_min=self.output_min,
+          output_max=self.output_max,
+          monotonicities=self.monotonicities)
+    else:
+      constraints = None
+
+    if not self.kernel_regularizer:
+      kernel_reg = None
+    elif len(self.kernel_regularizer) == 1:
+      kernel_reg = self.kernel_regularizer[0]
+    else:
+      # Keras interface assumes only one regularizer, so summ all regularization
+      # losses which we have.
+      kernel_reg = lambda x: tf.add_n([r(x) for r in self.kernel_regularizer])
+
+    # categorical calibration layer kernel is units-column matrix with value of
+    # output(i) = self.kernel[i]. Default value converted to the last index.
+    self.kernel = self.add_weight(
+        CATEGORICAL_CALIBRATION_KERNEL_NAME,
+        shape=[self.num_buckets, self.units],
+        initializer=self.kernel_initializer,
+        regularizer=kernel_reg,
+        constraint=constraints,
+        dtype=self.dtype)
+
+    if self.kernel_regularizer and not tf.executing_eagerly():
+      # Keras has its own mechanism to handle regularization losses which
+      # does not use GraphKeys, but we want to also add losses to graph keys so
+      # they are easily accessable when layer is being used outside of Keras.
+      # Adding losses to GraphKeys will not interfer with Keras.
+      for reg in self.kernel_regularizer:
+        tf.compat.v1.add_to_collection(
+            tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES, reg(self.kernel))
+
+    super(CategoricalCalibration, self).build(input_shape)
+
+  def call(self, inputs):
+    """Standard Keras call() method."""
+    if self.default_input_value is not None:
+      default_input_value_tensor = tf.constant(
+          self.default_input_value,
+          name=DEFAULT_INPUT_VALUE_NAME,
+          dtype=inputs.dtype)
+      replacement = tf.zeros_like(inputs) + (self.num_buckets - 1)
+      inputs = tf.where(
+          tf.equal(inputs, default_input_value_tensor), replacement, inputs)
+
+    if inputs.dtype not in [tf.uint8, tf.int32, tf.int64]:
+      inputs = tf.cast(inputs, dtype=tf.int32)
+
+    # We can't use tf.gather_nd(self.kernel, inputs) as it doesn't support
+    # constraints (constraint functions are not supported for IndexedSlices).
+    # Instead we use matrix multiplication by one-hot encoding of the index.
+    if self.units == 1:
+      # This can be slightly faster as it uses matmul.
+      return tf.matmul(
+          tf.one_hot(tf.squeeze(inputs, axis=[-1]), depth=self.num_buckets),
+          self.kernel)
+    return tf.reduce_sum(
+        tf.one_hot(inputs, axis=1, depth=self.num_buckets) * self.kernel,
+        axis=1)
+
+  def compute_output_shape(self, input_shape):
+    """Standard Keras compute_output_shape() method."""
+    del input_shape
+    return [None, self.units]
+
+  def get_config(self):
+    """Standard Keras config for serialization."""
+    config = {
+        "num_buckets": self.num_buckets,
+        "units": self.units,
+        "output_min": self.output_min,
+        "output_max": self.output_max,
+        "monotonicities": self.monotonicities,
+        "kernel_initializer":
+            keras.initializers.serialize(self.kernel_initializer),
+        "kernel_regularizer":
+            [keras.regularizers.serialize(r) for r in self.kernel_regularizer],
+        "default_input_value": self.default_input_value,
+    }  # pyformat: disable
+    config.update(super(CategoricalCalibration, self).get_config())
+    return config
+
+  def assert_constraints(self, eps=1e-6):
+    """Asserts that layer weights satisfy all constraints.
+
+    In graph mode builds and returns list of assertion ops. Note that ops will
+    be created at the moment when this function is being called.
+    In eager mode directly executes assetions.
+
+    Args:
+      eps: Allowed constraints violation.
+
+    Returns:
+      List of assertion ops in graph mode or immideately asserts in eager mode.
+    """
+    return categorical_calibration_lib.assert_constraints(
+        weights=self.kernel,
+        output_min=self.output_min,
+        output_max=self.output_max,
+        monotonicities=self.monotonicities,
+        eps=eps)
+
+
+class CategoricalCalibrationConstraints(keras.constraints.Constraint):
+  # pyformat: disable
+  """Monotonicity and bounds constraints for categorical calibration layer.
+
+  Updates the weights of CategoricalCalibration layer to satify bound and
+  monotonicity constraints. The update is an approximate L2 projection into the
+  constrained parameter space.
+
+  Attributes:
+    - All `__init__` arguments.
+  """
+  # pyformat: enable
+
+  def __init__(self, output_min=None, output_max=None, monotonicities=None):
+    """Initializes an instance of `CategoricalCalibrationConstraints`.
+
+    Args:
+      output_min: Minimum possible output of categorical function.
+      output_max: Maximum possible output of categorical function.
+      monotonicities: Monotonicities of CategoricalCalibration layer.
+    """
+    categorical_calibration_lib.verify_hyperparameters(
+        output_min=output_min,
+        output_max=output_max,
+        monotonicities=monotonicities)
+    self.monotonicities = monotonicities
+    self.output_min = output_min
+    self.output_max = output_max
+
+  def __call__(self, w):
+    """Applies constraints to w."""
+    return categorical_calibration_lib.project(
+        weights=w,
+        output_min=self.output_min,
+        output_max=self.output_max,
+        monotonicities=self.monotonicities)
+
+  def get_config(self):
+    """Standard Keras config for serialization."""
+    return {
+        "output_min": self.output_min,
+        "output_max": self.output_max,
+        "monotonicities": self.monotonicities,
+    }  # pyformat: disable
diff --git a/tensorflow_lattice/python/categorical_calibration_lib.py b/tensorflow_lattice/python/categorical_calibration_lib.py
new file mode 100644
index 0000000..e7de9cc
--- /dev/null
+++ b/tensorflow_lattice/python/categorical_calibration_lib.py
@@ -0,0 +1,161 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Helpers and computations of categorical calibration layer."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from . import utils
+import tensorflow as tf
+
+
+def project(weights, output_min, output_max, monotonicities):
+  """Monotonicity/bounds constraints implementation for categorical calibration.
+
+  Returns the approximate L2 projection of the CategoricalCalibration weights
+  into the constrained parameter space.
+
+  Args:
+    weights: Tensor which represents weights of Categorical calibration layer.
+    output_min: Lower bound constraint on weights.
+    output_max: Upper bound constraint on weights.
+    monotonicities: List of pair of indices `(i, j)`, indicating constraint
+      `weight[i] <= weight[j]`.
+
+  Returns:
+    Projected `weights` tensor.
+
+  Raises:
+    ValueError: If monotonicities are not of the correct format or are circular.
+  """
+  num_buckets = weights.shape[0]
+  verify_hyperparameters(
+      num_buckets=num_buckets,
+      output_min=output_min,
+      output_max=output_max,
+      monotonicities=monotonicities)
+
+  projected_weights = weights
+
+  if monotonicities:
+    projected_weights = (
+        utils.approximately_project_categorical_partial_monotonicities(
+            projected_weights, monotonicities))
+
+  if output_min is not None:
+    projected_weights = tf.maximum(projected_weights, output_min)
+  if output_max is not None:
+    projected_weights = tf.minimum(projected_weights, output_max)
+  return projected_weights
+
+
+def assert_constraints(weights,
+                       output_min,
+                       output_max,
+                       monotonicities,
+                       debug_tensors=None,
+                       eps=1e-6):
+  """Asserts that `weights` satisfiy constraints.
+
+  Args:
+    weights: Tensor which represents weights of Categorical calibration layer.
+    output_min: Lower bound constraint on weights.
+    output_max: Upper bound constraint on weights.
+    monotonicities: List of pair of indices `(i, j)`, indicating constraint
+      `weight[i] <= weight[j]`.
+    debug_tensors: None or list of anything convertible to tensor (for example
+      tensors or strings) which will be printed in case of constraints
+      violation.
+    eps: Allowed constraints violation.
+
+  Returns:
+    List of assertion ops in graph mode or immideately asserts in eager mode.
+  """
+  num_buckets = weights.shape[0]
+  verify_hyperparameters(
+      num_buckets=num_buckets,
+      output_min=output_min,
+      output_max=output_max,
+      monotonicities=monotonicities)
+
+  info = ["Outputs: ", weights, "Epsilon: ", eps]
+  if debug_tensors:
+    info += debug_tensors
+  asserts = []
+
+  if output_min is not None:
+    min_output = tf.reduce_min(weights)
+    asserts.append(
+        tf.Assert(
+            min_output >= output_min - eps,
+            data=["Lower bound violation.", "output_min:", output_min] + info,
+            summarize=num_buckets))
+
+  if output_max is not None:
+    max_output = tf.reduce_max(weights)
+    asserts.append(
+        tf.Assert(
+            max_output <= output_max + eps,
+            data=["Upper bound violation.", "output_max:", output_max] + info,
+            summarize=num_buckets))
+
+  if monotonicities:
+    left = tf.gather_nd(weights, [[i] for (i, j) in monotonicities])
+    right = tf.gather_nd(weights, [[j] for (i, j) in monotonicities])
+    asserts.append(
+        tf.Assert(
+            tf.reduce_min(left - right) < eps,
+            data=["Monotonicity violation.", "monotonicities:", monotonicities]
+            + info,
+            summarize=num_buckets))
+
+  return asserts
+
+
+def verify_hyperparameters(num_buckets=None,
+                           output_min=None,
+                           output_max=None,
+                           monotonicities=None):
+  """Verifies that all given hyperparameters are consistent.
+
+  See `tfl.lattice_layer.CategoricalCalibration` class level comment for
+  detailes.
+
+  Args:
+    num_buckets: `num_buckets` of CategoricalCalibration layer.
+    output_min: `smallest output` of CategoricalCalibration layer.
+    output_max: `largest output` of CategoricalCalibration layer.
+    monotonicities: `monotonicities` of CategoricalCalibration layer.
+
+  Raises:
+    ValueError: If parameters are incorrect or inconsistent.
+  """
+  if output_min is not None and output_max is not None:
+    if output_max < output_min:
+      raise ValueError(
+          "If specified output_max must be greater than output_min. "
+          "They are: ({}, {})".format(output_min, output_max))
+
+  if monotonicities:
+    if (not isinstance(monotonicities, list) or
+        not all(isinstance(m, tuple) and len(m) == 2 for m in monotonicities)):
+      raise ValueError("Monotonicities should be a list of pairs (tuples).")
+    for (i, j) in monotonicities:
+      if (i < 0 or j < 0 or (num_buckets is not None and
+                             (i >= num_buckets or j >= num_buckets))):
+        raise ValueError(
+            "Monotonicities should be pairs of be indices in range "
+            "[0, num_buckets). They are: {}".format(monotonicities))
diff --git a/tensorflow_lattice/python/categorical_calibration_test.py b/tensorflow_lattice/python/categorical_calibration_test.py
new file mode 100644
index 0000000..3ca51fe
--- /dev/null
+++ b/tensorflow_lattice/python/categorical_calibration_test.py
@@ -0,0 +1,316 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for categorical calibration layer.
+
+This test should be run with "-c opt" since otherwise it's slow.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl import logging
+from absl.testing import parameterized
+import numpy as np
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow_lattice.python import categorical_calibration_layer as categorical_calibraion
+from tensorflow_lattice.python import parallel_combination_layer as parallel_combination
+from tensorflow_lattice.python import test_utils
+
+
+class CategoricalCalibrationLayerTest(parameterized.TestCase, tf.test.TestCase):
+
+  def setUp(self):
+    self._disable_all = False
+    self._loss_eps = 1e-2
+    self._loss_diff_eps = 1e-4
+    super(CategoricalCalibrationLayerTest, self).setUp()
+
+  def _ResetAllBackends(self):
+    keras.backend.clear_session()
+    tf.compat.v1.reset_default_graph()
+
+  def _ScatterXUniformly(self, units, num_points, num_buckets,
+                         missing_probability, default_input_value):
+    """Randomly uniformly scatters points across input space."""
+    data = []
+    for unit_idx in range(units):
+      if missing_probability > 0.0:
+        missing_points = int(num_points * missing_probability)
+      else:
+        missing_points = 0
+
+      x = ([default_input_value for _ in range(missing_points)] +
+           [i % num_buckets for i in range(num_points - missing_points)])
+      np.random.seed(unit_idx)
+      np.random.shuffle(x)
+      if data:
+        data = [values + (value,) for values, value in zip(data, x)]
+      else:
+        data = [(value,) for value in x]
+
+    return [np.asarray(v, dtype=np.int32) for v in data]
+
+  def _SetDefaults(self, config):
+    config.setdefault("units", 1)
+    config.setdefault("use_multi_calibration_layer", False)
+    config.setdefault("one_d_input", False)
+    config.setdefault("output_min", None)
+    config.setdefault("output_max", None)
+    config.setdefault("default_input_value", None)
+    config.setdefault("monotonicities", None)
+    config.setdefault("missing_probability", 0.0)
+    config.setdefault("constraint_assertion_eps", 1e-6)
+    config.setdefault("kernel_regularizer", None)
+    config.setdefault("model_dir", "/tmp/test_pwl_model_dir/")
+    return config
+
+  def _TrainModel(self, config, plot_path=None):
+    """Trains model and returns loss.
+
+    Args:
+      config: Layer config internal for this test which specifies params of
+        piecewise linear layer to train.
+      plot_path: if specified - png file name to save visualization. See
+        test_utils.run_training_loop() for more details.
+
+    Returns:
+      Training loss.
+    """
+    logging.info("Testing config:")
+    logging.info(config)
+    config = self._SetDefaults(config)
+
+    self._ResetAllBackends()
+
+    if config["default_input_value"] is not None:
+      # default_input_value is mapped to the last bucket, hence x_generator
+      # needs to generate in [0, ..., num_buckets-2] range.
+      num_random_buckets = config["num_buckets"] - 1
+    else:
+      num_random_buckets = config["num_buckets"]
+
+    # The input to the model can either be single or multi dimensional.
+    input_units = 1 if config["one_d_input"] else config["units"]
+
+    training_inputs = config["x_generator"](
+        units=input_units,
+        num_points=config["num_training_records"],
+        num_buckets=num_random_buckets,
+        missing_probability=config["missing_probability"],
+        default_input_value=config["default_input_value"])
+    training_labels = [config["y_function"](x) for x in training_inputs]
+
+    # Either create multiple CategoricalCalibration layers and combine using a
+    # ParallelCombination layer, or create a single CategoricalCalibration with
+    # multiple output dimensions.
+    if config["use_multi_calibration_layer"]:
+      num_calibration_layers = config["units"]
+      categorical_calibraion_units = 1
+    else:
+      num_calibration_layers = 1
+      categorical_calibraion_units = config["units"]
+
+    model = keras.models.Sequential()
+    model.add(keras.layers.Input(shape=[input_units], dtype=tf.int32))
+    calibration_layers = []
+    for _ in range(num_calibration_layers):
+      calibration_layers.append(
+          categorical_calibraion.CategoricalCalibration(
+              units=categorical_calibraion_units,
+              kernel_initializer="constant",
+              num_buckets=config["num_buckets"],
+              output_min=config["output_min"],
+              output_max=config["output_max"],
+              monotonicities=config["monotonicities"],
+              kernel_regularizer=config["kernel_regularizer"],
+              default_input_value=config["default_input_value"]))
+    if len(calibration_layers) == 1:
+      model.add(calibration_layers[0])
+    else:
+      model.add(parallel_combination.ParallelCombination(calibration_layers))
+    if config["units"] > 1:
+      model.add(keras.layers.Lambda(
+          lambda x: tf.reduce_mean(x, axis=1, keepdims=True)))
+    model.compile(
+        loss=keras.losses.mean_squared_error,
+        optimizer=config["optimizer"](learning_rate=config["learning_rate"]))
+
+    training_data = (training_inputs, training_labels, training_inputs)
+
+    loss = test_utils.run_training_loop(
+        config=config,
+        training_data=training_data,
+        keras_model=model,
+        plot_path=plot_path,
+        input_dtype=np.int32)
+
+    assetion_ops = []
+    for calibration_layer in calibration_layers:
+      assetion_ops.extend(
+          calibration_layer.assert_constraints(
+              eps=config["constraint_assertion_eps"]))
+    if not tf.executing_eagerly() and assetion_ops:
+      tf.compat.v1.keras.backend.get_session().run(assetion_ops)
+
+    return loss
+
+  @parameterized.parameters((np.mean,), (lambda x: -np.mean(x),))
+  def testUnconstrainedNoMissingValue(self, y_function):
+    if self._disable_all:
+      return
+    config = {
+        "num_training_records": 200,
+        "num_training_epoch": 500,
+        "optimizer": tf.keras.optimizers.Adam,
+        "learning_rate": 0.15,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": y_function,
+        "num_buckets": 10,
+        "output_min": None,
+        "output_max": None,
+        "monotonicities": None,
+    }
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.0, delta=self._loss_eps)
+    config["units"] = 3
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.0, delta=self._loss_eps)
+    config["one_d_input"] = True
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.0, delta=self._loss_eps)
+
+  @parameterized.parameters((np.mean,), (lambda x: -np.mean(x),))
+  def testUnconstrainedWithMissingValue(self, y_function):
+    if self._disable_all:
+      return
+    config = {
+        "num_training_records": 200,
+        "num_training_epoch": 500,
+        "optimizer": tf.keras.optimizers.Adam,
+        "learning_rate": 0.15,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": y_function,
+        "num_buckets": 10,
+        "output_min": None,
+        "output_max": None,
+        "monotonicities": None,
+        "default_input_value": -1,
+        "missing_probability": 0.1,
+    }
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.0, delta=self._loss_eps)
+    config["units"] = 3
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.0, delta=self._loss_eps)
+    config["one_d_input"] = True
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.0, delta=self._loss_eps)
+
+  @parameterized.parameters(
+      (0.0, 9.0, None, 0.0),
+      (1.0, 8.0, None, 0.2),
+      (1.0, 8.0, [(6, 5)], 0.25),
+      (1.0, 8.0, [(6, 5), (5, 4)], 0.4),
+      (1.0, 8.0, [(6, 5), (7, 5)], 0.4),
+      (1.0, 8.0, [(6, 5), (5, 4), (4, 3)], 0.7),
+      (1.0, 8.0, [(7, 6), (6, 5), (4, 3), (3, 2)], 0.6),
+      (1.0, 8.0, [(7, 6), (6, 5), (5, 4), (4, 3), (3, 2)], 1.95),
+  )
+  def testConstraints(self, output_min, output_max, monotonicities,
+                      expected_loss):
+    if self._disable_all:
+      return
+    config = {
+        "num_training_records": 1000,
+        "num_training_epoch": 1000,
+        "optimizer": tf.keras.optimizers.Adam,
+        "learning_rate": 1.0,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": np.mean,
+        "num_buckets": 10,
+        "output_min": output_min,
+        "output_max": output_max,
+        "monotonicities": monotonicities,
+    }
+
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+
+    # Same input with multiple calibration units, should give out the same loss.
+    config["one_d_input"] = True
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+
+    # With independently sampled unit-dim inputs loss is caled by 1/units.
+    config["one_d_input"] = False
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(
+        loss,
+        expected_loss / config["units"],
+        delta=self._loss_eps * config["units"])
+
+    # Using separate calibration layers should give out the same loss.
+    config["use_multi_calibration_layer"] = True
+    loss_multi_calib = self._TrainModel(config)
+    self.assertAlmostEqual(loss, loss_multi_calib, delta=self._loss_diff_eps)
+
+  def testCircularMonotonicites(self):
+    if self._disable_all:
+      return
+    config = {
+        "num_training_records": 200,
+        "num_training_epoch": 500,
+        "optimizer": tf.keras.optimizers.Adam,
+        "learning_rate": 0.15,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": float,
+        "num_buckets": 5,
+        "monotonicities": [(0, 1), (1, 2), (2, 3), (3, 4), (4, 0)],
+    }
+
+    with self.assertRaises(ValueError):
+      self._TrainModel(config)
+
+  @parameterized.parameters(
+      # Standard Keras regularizer:
+      (keras.regularizers.l1_l2(l1=0.01, l2=0.001),),
+      # Tuple of regularizers:
+      ((keras.regularizers.l1_l2(l1=0.01, l2=0.0),
+        keras.regularizers.l1_l2(l1=0.0, l2=0.001)),),
+  )
+  def testRegularizers(self, regularizer):
+    if self._disable_all:
+      return
+    config = {
+        "num_training_records": 20,
+        "num_training_epoch": 0,
+        "optimizer": tf.keras.optimizers.Adam,
+        "learning_rate": 1.0,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": lambda _: 2.0,
+        "kernel_regularizer": regularizer,
+        "num_buckets": 3,
+        "output_min": 0.0,
+        "output_max": 4.0,
+    }
+    loss = self._TrainModel(config)
+    # This loss is pure regularization loss because initializer matches target
+    # function and there was 0 training epochs.
+    self.assertAlmostEqual(loss, 0.072, delta=self._loss_eps)
+
+
+if __name__ == "__main__":
+  tf.test.main()
diff --git a/tensorflow_lattice/python/configs.py b/tensorflow_lattice/python/configs.py
new file mode 100644
index 0000000..03eacef
--- /dev/null
+++ b/tensorflow_lattice/python/configs.py
@@ -0,0 +1,841 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""TFL model configuration library for canned estimators.
+
+To construct a TFL canned estimator, construct a model configuration and pass
+it to the canned estimator constructor:
+
+```python
+feature_columns = ...
+model_config = tfl.configs.CalibratedLatticeConfig(...)
+feature_analysis_input_fn = create_input_fn(num_epochs=1, ...)
+train_input_fn = create_input_fn(num_epochs=100, ...)
+estimator = tfl.estimators.CannedClassifier(
+    feature_columns=feature_columns,
+    model_config=model_config,
+    feature_analysis_input_fn=feature_analysis_input_fn)
+estimator.train(input_fn=train_input_fn)
+```
+
+Supported models are:
+
+*   **Calibrated linear model**: Constructed using
+    `tfl.configs.CalibratedLinearConfig`.
+    A calibrated linear model that applies piecewise-linear and categorical
+    calibration on the input feature, followed by a linear combination and an
+    optional output piecewise-linear calibration. When using output calibration
+    or when output bounds are specified, the linear layer will apply weighted
+    averaging on calibrated inputs.
+
+*   **Calibrated lattice model**: Constructed using
+    `tfl.configs.CalibratedLatticeConfig`.
+    A calibrated lattice model applies piecewise-linear and categorical
+    calibration on the input feature, followed by a lattice model and an
+    optional output piecewise-linear calibration.
+
+*   **Calibrated lattice ensemble model**: Constructed using
+    `tfl.configs.CalibratedLatticeEnsembleConfig`.
+    A calibrated lattice ensemble model applies piecewise-linear and categorical
+    calibration on the input feature, followed by an ensemble of lattice models
+    and an optional output piecewise-linear calibration.
+
+Feature calibration and per-feature configurations are set using
+`tfl.configs.FeatureConfig`. Feature configurations include monotonicity
+constraints, per-feature regularization (see `tfl.configs.RegularizerConfig`),
+and lattice sizes for lattice models.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from absl import logging
+
+_HPARAM_FEATURE_PREFIX = 'feature'
+_HPARAM_REGULARIZER_PREFIX = 'regularizer'
+
+
+class _Config(object):
+  """Base class for configs."""
+
+  def __init__(self, kwargs):
+    if 'self' in kwargs:
+      kwargs.pop('self')
+    if '__class__' in kwargs:
+      kwargs.pop('__class__')
+    self.__dict__ = kwargs
+
+  def __repr__(self):
+    return self.__dict__.__repr__()
+
+
+class _HasFeatureConfigs(object):
+  """Base class for configs with `feature_configs` attribute."""
+
+  def feature_config_by_name(self, feature_name):
+    """Returns existing or default FeatureConfig with the given name."""
+    if self.feature_configs is None:
+      self.feature_configs = []
+    for feature_config in self.feature_configs:
+      if feature_config.name == feature_name:
+        return feature_config
+    feature_config = FeatureConfig(feature_name)
+    self.feature_configs.append(feature_config)
+    return feature_config
+
+
+class _HasRegularizerConfigs(object):
+  """Base class for configs with `regularizer_configs` attribute."""
+
+  def regularizer_config_by_name(self, regularizer_name):
+    """Returns existing or default RegularizerConfig with the given name."""
+    if self.regularizer_configs is None:
+      self.regularizer_configs = []
+    for regularizer_config in self.regularizer_configs:
+      if regularizer_config.name == regularizer_name:
+        return regularizer_config
+    regularizer_config = RegularizerConfig(regularizer_name)
+    self.regularizer_configs.append(regularizer_config)
+    return regularizer_config
+
+
+# pylint: disable=unused-argument
+
+
+class CalibratedLatticeEnsembleConfig(_Config, _HasFeatureConfigs,
+                                      _HasRegularizerConfigs):
+  """Config for calibrated lattice model.
+
+  A calibrated lattice ensemble model applies piecewise-linear and categorical
+  calibration on the input feature, followed by an ensemble of lattice models
+  and an optional output piecewise-linear calibration.
+
+  The ensemble structure can be one of the following and set via the lattice
+  flag:
+
+    - Expliclit list of list of features specifying features used in each
+      submodel.
+    - A random arrangement (also called Random Tiny Lattices, or RTL).
+    - Crystals growing algorithm: This algorithm first constructs a prefitting
+      model to assess pairwise interactions between features, and then uses
+      those estimates to construct a final model that puts interacting
+      features in the same lattice. For details see "Fast and flexible monotonic
+      functions with ensembles of lattices", Advances in Neural Information
+      Processing Systems, 2016.
+
+  Examples:
+
+  Creating a random ensemble (RTL) model:
+
+  ```python
+  model_config = tfl.configs.CalibratedLatticeEnsembleConfig(
+      num_lattices=6,  # number of lattices
+      lattice_rank=5,  # number of features in each lattice
+      feature_configs=[...],
+  )
+  feature_analysis_input_fn = create_input_fn(num_epochs=1, ...)
+  train_input_fn = create_input_fn(num_epochs=100, ...)
+  estimator = tfl.estimators.CannedClassifier(
+      feature_columns=feature_columns,
+      model_config=model_config,
+      feature_analysis_input_fn=feature_analysis_input_fn)
+  estimator.train(input_fn=train_input_fn)
+  ```
+  To create a Crystals model, you will need to provide a *prefitting_input_fn*
+  to the estimator constructor. This input_fn is used to train the prefitting
+  model, as described above. The prefitting model does not need to be fully
+  trained, so a few epochs should be enough.
+
+  ```python
+  model_config = tfl.configs.CalibratedLatticeEnsembleConfig(
+      lattices='crystals',  # feature arrangement method
+      num_lattices=6,  # number of lattices
+      lattice_rank=5,  # number of features in each lattice
+      feature_configs=[...],
+  )
+  feature_analysis_input_fn = create_input_fn(num_epochs=1, ...)
+  prefitting_input_fn = create_input_fn(num_epochs=5, ...)
+  train_input_fn = create_input_fn(num_epochs=100, ...)
+  estimator = tfl.estimators.CannedClassifier(
+      feature_columns=feature_columns,
+      model_config=model_config,
+      feature_analysis_input_fn=feature_analysis_input_fn
+      prefitting_input_fn=prefitting_input_fn)
+  estimator.train(input_fn=train_input_fn)
+  ```
+  """
+  _HAS_DYNAMIC_ATTRIBUTES = True  # Required for pytype checks.
+
+  def __init__(self,
+               feature_configs=None,
+               lattices='random',
+               num_lattices=None,
+               lattice_rank=None,
+               separate_calibrators=True,
+               regularizer_configs=None,
+               output_min=None,
+               output_max=None,
+               output_calibration=False,
+               output_calibration_num_keypoint=10,
+               output_initialization='quantiles',
+               fix_ensemble_for_2d_constraints=True,
+               random_seed=0):
+    # pyformat: disable
+    """Initializes a `CalibratedLatticeEnsembleConfig` instance.
+
+    Args:
+      feature_configs: A list of `tfl.configs.FeatureConfig` instances that
+        specify configurations for each feature. If a configuration is not
+        provided for a feature, a default configuration will be used.
+      lattices: Should be one of the following:
+        - String `'random'` indicating that the features in each lattice should
+          be selected randomly
+        - String `'crystals'` to use a heuristic to construct the lattice
+          ensemble based on pairwise feature interactions
+        - An explicit list of list of feature names to be used in each lattice
+          in the ensemble.
+      num_lattices: Number of lattices in the ensemble. Must be provided if
+        lattices are not explicitly provided.
+      lattice_rank: Number of features in each lattice. Must be provided if
+        lattices are not explicitly provided.
+      separate_calibrators: If features should be separately calibrated for each
+        lattice in the ensemble.
+      regularizer_configs: A list of `tfl.configs.RegularizerConfig` instances
+        that apply global regularization.
+      output_min: Lower bound constraint on the output of the model.
+      output_max: Upper bound constraint on the output of the model.
+      output_calibration: If a piecewise-linear calibration should be used on
+        the output of the lattice.
+      output_calibration_num_keypoint: Number of keypoints to use for the output
+        piecewise-linear calibration.
+      output_initialization: The initial values to setup for the output of the
+        model. When using output calibration, these values are used to initliaze
+        the output keypoints of the output piecewise-linear calibration.
+        Otherwise the lattice parameters will be setup to form a linear function
+        in the range of output_initialization. It can be one of:
+          - String `'quantiles'`: Output is initliazed to label quantiles, if
+            possible.
+          - String `'uniform'`: Output is initliazed uniformly in label range.
+          - A list of numbers: To be used for initialization of the output
+            lattice or output calibrator.
+      fix_ensemble_for_2d_constraints: A boolean indicating whether to add
+        missing features to some lattices to resolve potential 2d constraint
+        violations which require lattices from ensemble to either contain both
+        constrained features or none of them, e.g. trapezoid trust constraint
+        requires a lattice that has the "conditional" feature to include the
+        "main" feature. Note that this might increase the final lattice rank.
+      random_seed: Random seed to use for randomized lattices.
+    """
+    # pyformat: enable
+    super(CalibratedLatticeEnsembleConfig, self).__init__(locals())
+
+
+class CalibratedLatticeConfig(_Config, _HasFeatureConfigs,
+                              _HasRegularizerConfigs):
+  """Config for calibrated lattice model.
+
+  A calibrated lattice model applies piecewise-linear and categorical
+  calibration on the input feature, followed by a lattice model and an
+  optional output piecewise-linear calibration.
+
+  Example:
+
+  ```python
+  model_config = tfl.configs.CalibratedLatticeConfig(
+      feature_configs=[...],
+  )
+  feature_analysis_input_fn = create_input_fn(num_epochs=1, ...)
+  train_input_fn = create_input_fn(num_epochs=100, ...)
+  estimator = tfl.estimators.CannedClassifier(
+      feature_columns=feature_columns,
+      model_config=model_config,
+      feature_analysis_input_fn=feature_analysis_input_fn)
+  estimator.train(input_fn=train_input_fn)
+  ```
+  """
+  _HAS_DYNAMIC_ATTRIBUTES = True  # Required for pytype checks.
+
+  def __init__(self,
+               feature_configs=None,
+               regularizer_configs=None,
+               output_min=None,
+               output_max=None,
+               output_calibration=False,
+               output_calibration_num_keypoint=10,
+               output_initialization='quantiles'):
+    """Initializes a `CalibratedLatticeConfig` instance.
+
+    Args:
+      feature_configs: A list of `tfl.configs.FeatureConfig` instances that
+        specify configurations for each feature. If a configuration is not
+        provided for a feature, a default configuration will be used.
+      regularizer_configs: A list of `tfl.configs.RegularizerConfig` instances
+        that apply global regularization.
+      output_min: Lower bound constraint on the output of the model.
+      output_max: Upper bound constraint on the output of the model.
+      output_calibration: If a piecewise-linear calibration should be used on
+        the output of the lattice.
+      output_calibration_num_keypoint: Number of keypoints to use for the output
+        piecewise-linear calibration.
+      output_initialization: The initial values to setup for the output of the
+        model. When using output calibration, these values are used to initliaze
+        the output keypoints of the output piecewise-linear calibration.
+        Otherwise the lattice parameters will be setup to form a linear function
+        in the range of output_initialization. It can be one of:
+          - String `'quantiles'`: Output is initliazed to label quantiles, if
+            possible.
+          - String `'uniform'`: Output is initliazed uniformly in label range.
+          - A list of numbers: To be used for initialization of the output
+            lattice or output calibrator.
+    """
+    super(CalibratedLatticeConfig, self).__init__(locals())
+
+
+class CalibratedLinearConfig(_Config, _HasFeatureConfigs,
+                             _HasRegularizerConfigs):
+  """Config for calibrated lattice model.
+
+  A calibrated linear model applies piecewise-linear and categorical
+  calibration on the input feature, followed by a linear combination and an
+  optional output piecewise-linear calibration. When using output calibration
+  or when output bounds are specified, the linear layer will be apply weighted
+  averaging on calibrated inputs.
+
+  Example:
+
+  ```python
+  model_config = tfl.configs.CalibratedLinearConfig(
+      feature_configs=[...],
+  )
+  feature_analysis_input_fn = create_input_fn(num_epochs=1, ...)
+  train_input_fn = create_input_fn(num_epochs=100, ...)
+  estimator = tfl.estimators.CannedClassifier(
+      feature_columns=feature_columns,
+      model_config=model_config,
+      feature_analysis_input_fn=feature_analysis_input_fn)
+  estimator.train(input_fn=train_input_fn)
+  ```
+  """
+  _HAS_DYNAMIC_ATTRIBUTES = True  # Required for pytype checks.
+
+  def __init__(self,
+               feature_configs=None,
+               regularizer_configs=None,
+               use_bias=True,
+               output_min=None,
+               output_max=None,
+               output_calibration=False,
+               output_calibration_num_keypoint=10,
+               output_initialization='quantiles'):
+    """Initializes a `CalibratedLinearConfig` instance.
+
+    Args:
+      feature_configs: A list of `tfl.configs.FeatureConfig` instances that
+        specify configurations for each feature. If a configuration is not
+        provided for a feature, a default configuration will be used.
+      regularizer_configs: A list of `tfl.configs.RegularizerConfig` instances
+        that apply global regularization.
+      use_bias: If a bias term should be used for the linear combination.
+      output_min: Lower bound constraint on the output of the model.
+      output_max: Upper bound constraint on the output of the model.
+      output_calibration: If a piecewise-linear calibration should be used on
+        the output of the lattice.
+      output_calibration_num_keypoint: Number of keypoints to use for the output
+        piecewise-linear calibration.
+      output_initialization: The initial values to setup for the output of the
+        model. When using output calibration, these values are used to initliaze
+        the output keypoints of the output piecewise-linear calibration.
+        Otherwise the lattice parameters will be setup to form a linear function
+        in the range of output_initialization. It can be one of:
+          - String `'quantiles'`: Output is initliazed to label quantiles, if
+            possible.
+          - String `'uniform'`: Output is initliazed uniformly in label range.
+          - A list of numbers: To be used for initialization of the output
+            lattice or output calibrator.
+    """
+    super(CalibratedLinearConfig, self).__init__(locals())
+
+
+class FeatureConfig(_Config, _HasRegularizerConfigs):
+  """Per-feature configuration for TFL canned estimators.
+
+  A feature can either be numerical or categorical. Numeric features will be
+  calibrated using a piecewise-linear function with the given number of
+  keypoints. Categorical features should have `num_buckets > 0` and the
+  `vocabulary_list` represent their categories. Several of the config fields
+  can be filled in automatically based on the `FeatureColumns` used by the
+  model but can also be provided explicitly. See `__init__` args comments for
+  details.
+
+  Currently only one dimensional feature are supported.
+
+  Examples:
+
+  ```python
+  feature_columns = [
+      tf.feature_column.numeric_column.numeric_column(
+          'age', default_value=-1),
+      tf.feature_column.numeric_column.categorical_column_with_vocabulary_list(
+          'thal', vocabulary_list=['normal', 'fixed', 'reversible']),
+      ...
+  ]
+
+  model_config = tfl.configs.CalibratedLatticeConfig(
+      feature_configs=[
+          tfl.configs.FeatureConfig(
+              name='age',
+              lattice_size=3,
+              # Monotonically increasing.
+              monotonicity='increasing',
+              # Per feature regularization.
+              regularizer_configs=[
+                  tfl.configs.RegularizerConfig(name='calib_hessian', l2=1e-4),
+              ],
+          ),
+          tfl.configs.FeatureConfig(
+              name='thal',
+              # Partial monotonicity:
+              # output(normal) <= output(fixed)
+              # output(normal) <= output(reversible)
+              monotonicity=[('normal', 'fixed'), ('normal', 'reversible')],
+          ),
+      ],
+      # Global regularizers
+      regularizer_configs=[...])
+  feature_analysis_input_fn = create_input_fn(num_epochs=1, ...)
+  train_input_fn = create_input_fn(num_epochs=100, ...)
+  estimator = tfl.estimators.CannedClassifier(
+      feature_columns=feature_columns,
+      model_config=model_config,
+      feature_analysis_input_fn=feature_analysis_input_fn)
+  estimator.train(input_fn=train_input_fn)
+  ```
+  """
+  _HAS_DYNAMIC_ATTRIBUTES = True  # Required for pytype checks.
+
+  def __init__(self,
+               name,
+               is_missing_name=None,
+               default_value=None,
+               lattice_size=2,
+               monotonicity='none',
+               unimodality='none',
+               reflects_trust_in=None,
+               dominates=None,
+               pwl_calibration_always_monotonic=False,
+               pwl_calibration_convexity=0,
+               pwl_calibration_num_keypoints=10,
+               pwl_calibration_input_keypoints='quantiles',
+               pwl_calibration_clip_min=None,
+               pwl_calibration_clip_max=None,
+               pwl_calibration_clamp_min=False,
+               pwl_calibration_clamp_max=False,
+               num_buckets=0,
+               vocabulary_list=None,
+               regularizer_configs=None):
+    """Initializes a `FeatureConfig` instance.
+
+    Args:
+      name: The name of the feature, which should match the name of a given
+        FeatureColumn or a key in the input feature dict.
+      is_missing_name: The name of a FeatureColumn or key in the input feature
+        dict that indicates missing-ness of the main feature.
+      default_value: [Automatically filled in from `FeatureColumns`] If set,
+        this value in the input value represents missing. For numeric features,
+        the output will be imputed. If default_value is provided for a
+        categocial features, it would corresponds to the last bucket counted in
+        num_buckets.
+      lattice_size: The number of lattice verticies to be used along the axis
+        for this feature.
+      monotonicity:
+        - For numeric features, specifies if the model output should
+          be monotonic in this feature, using 'increasing' or 1 to indicate
+          increasing monotonicity, 'decreasing' or -1 to indicate decreasing
+          monotonicity, and 'none' or 0 to indicate no monotonicity constraints.
+        - For categorical features, a list of (category_a, category_b) pairs
+          from the vocabulary list indicating that with other features fixed,
+          model output for category_b should be greater than or equal to
+          category_a.
+      unimodality: For numeric features specifies if the model output should
+        be unimodal in corresponding feature, using 'valley' or 1 to indicate
+        that function first decreases, then increases and 'none' or 0 to
+        indicate no unimodality constraints. Not used for categorical features.
+      reflects_trust_in: None or a list of `tfl.configs.TrustConfig` instances.
+      dominates: None or a list of `tfl.configs.DominanceConfig` instances.
+      pwl_calibration_always_monotonic: Specifies if the piecewise-linear
+        calibration should always be monotonic regardless of the specified
+        end-to-end model output `monotonicity` with respect to this feature.
+      pwl_calibration_convexity: Spefices the convexity constraints of the
+        calibrators for numeric features. Convexity is indicated by 'convex' or
+        1, concavity is indicated by 'concave' or -1, 'none' or 0 indicates no
+        convexity/concavity constraints. Does not affect categorical features.
+        Concavity together with increasing monotonicity as well as convexity
+        together with decreasing monotonicity results in diminishing return
+        constraints.
+      pwl_calibration_num_keypoints: Number of keypoints to use for
+        piecewise-linear calibration.
+      pwl_calibration_input_keypoints: Indicates what should be used for the
+        input keypoints of the piecewise-linear calibration. It can be one of:
+          - String `'quantiles'`: Input keypoints are set to feature quantiles.
+          - String `'uniform'`: Input keypoints are uniformly spaced in feature
+            range.
+          - A list of numbers: Explicitly specifies the keypoints.
+      pwl_calibration_clip_min: Input values are lower clipped by this value.
+      pwl_calibration_clip_max: Input values are upper clipped by this value.
+      pwl_calibration_clamp_min: for monotonic calibrators ensures that the
+        minimum value in calibration output is reached.
+      pwl_calibration_clamp_max: for monotonic calibrators ensures that the
+        maximum value in calibration output is reached.
+      num_buckets: [Automatically filled in from `FeatureColumns`] Number of
+        categories for a categorical feature. Out-of-vocabulary and
+        missing/default value should be counted into num_buckets (last buckets).
+      vocabulary_list: [Automatically filled in from `FeatureColumns`] The input
+        vocabulary of the feature.
+      regularizer_configs: None or a list of per-feature
+        `tfl.configs.RegularizerConfig` instances.
+    """
+    super(FeatureConfig, self).__init__(locals())
+
+
+class RegularizerConfig(_Config):
+  """Regularizer configuration for TFL canned estimators.
+
+  Regularizers can either be applied to specific features, or can be applied
+  globally to all features or lattices.
+
+
+  * **Calibrator regularizers:**
+
+    These regularizers are applied to PWL calibration layers.
+
+    - `'calib_laplacian'`: Creates an instance of
+      `tfl.pwl_calibration_layer.LaplacianRegularizer`. A calibrator laplacian
+      regularizer penalizes the changes in the output and results in a *flatter
+      calibration function*.
+    - `'calib_hessian'`: Creates an instance of
+      `tfl.pwl_calibration_layer.HessianRegularizer`. A calibrator hessian
+      regularizer penalizes changes in the slope, resulting in a *more linear
+      calibration*.
+    - `'calib_wrinkle'`: Creates an instance of
+      `tfl.pwl_calibration_layer.WrinkleRegularizer`. A calibrator wrinkle
+      regularizer penalizes the second derivative, resulting in a smoother
+      function with *less changes in the curvature*.
+
+
+  * **Lattice regularizers:**
+
+    These regularizers are applied to lattice layers.
+
+    - `'laplacian'`: Creates an instance of
+      `tfl.lattice_layer.LaplacianRegularizer`. Laplacian regularizers penalize
+      the difference between adjacent vertices in multi-cell lattice, resulting
+      in a *flatter lattice function*.
+    - `'torsion'`: Creates an instance of
+      `tfl.lattice_layer.TorsionRegularizer`. Torsion regularizers penalizes
+      how much the lattice function twists from side-to-side, a non-linear
+      interactions in each 2 x 2 cell. Using this regularization results in a
+      *more linear lattice function*.
+
+
+  Examples:
+
+  ```python
+  model_config = tfl.configs.CalibratedLatticeConfig(
+      feature_configs=[
+          tfl.configs.FeatureConfig(
+              name='age',
+              lattice_size=3,
+              # Per feature regularization.
+              regularizer_configs=[
+                  tfl.configs.RegularizerConfig(name='calib_hessian', l2=1e-4),
+              ],
+          ),
+          tfl.configs.FeatureConfig(
+              name='thal',
+              # Partial monotonicity:
+              # output(normal) <= output(fixed)
+              # output(normal) <= output(reversible)
+              monotonicity=[('normal', 'fixed'), ('normal', 'reversible')],
+          ),
+      ],
+      # Global regularizers
+      regularizer_configs=[
+          # Torsion regularizer applied to the lattice to make it more linear.
+          configs.RegularizerConfig(name='torsion', l2=1e-4),
+          # Globally defined calibration regularizer is applied to all features.
+          configs.RegularizerConfig(name='calib_hessian', l2=1e-4),
+      ])
+  feature_analysis_input_fn = create_input_fn(num_epochs=1, ...)
+  train_input_fn = create_input_fn(num_epochs=100, ...)
+  estimator = tfl.estimators.CannedClassifier(
+      feature_columns=feature_columns,
+      model_config=model_config,
+      feature_analysis_input_fn=feature_analysis_input_fn)
+  estimator.train(input_fn=train_input_fn)
+  ```
+  """
+  _HAS_DYNAMIC_ATTRIBUTES = True  # Required for pytype checks.
+
+  def __init__(self, name, l1=0.0, l2=0.0):
+    """Initializes a `RegularizerConfig` instance.
+
+    Args:
+      name: The name of the regularizer.
+      l1: l1 regularization amount.
+      l2: l2 regularization amount.
+    """
+    super(RegularizerConfig, self).__init__(locals())
+
+
+class TrustConfig(_Config):
+  """Configuration for feature trusts in TFL canned estimators.
+
+  You can specify how a feature reflects trust in another feature. Supported
+  trust types (see `tfl.lattice_layer.Lattice` for details):
+
+  - `'edgeworth'`: Edgeworth trust constrains the function to be more
+      responsive to a main feature as a secondary conditional feature increases
+      or decreases. For example, we may want the model to rely more on average
+      rating (main feature) when the number of reviews (conditional feature) is
+      high. In particular, the constraint guarantees that a given change in the
+      main feature's value will change the model output by more when a secondary
+      feature indicates higher trust in the main feature. Note that the
+      constraint only works when the model is monotonic in the main feature.
+  - `'trapezoid'`: Trapezoid trust is conceptually similar to edgeworth trust,
+      but this constraint guarantees that the range of possible outputs along
+      the main feature dimension, when a conditional feature indicates low
+      trust, is a *subset* of the range of outputs when a conditional feature
+      indicates high trust. When lattices have 2 vertices in each constrained
+      dimension, this implies edgeworth trust (which only constrains the size of
+      the relevant ranges). With more than 2 lattice vertices per dimension, the
+      two constraints diverge and are not necessarily 'weaker' or 'stronger'
+      than each other - edgeworth trust acts throughout the lattice interior on
+      delta shifts in the main feature, while trapezoid trust only acts on the
+      min and max extremes of the main feature, constraining the overall range
+      of outputs across the domain of the main feature. The two types of trust
+      constraints can be applied jointly.
+
+  Trust constraints only affect lattices. When using trapezoid constraints in
+  ensemble models, note that if a conditional feature is used in a lattice
+  without the main feature also being used in the same lattice, then the
+  trapezoid constraint might be violated for the ensemble function.
+
+  Exampes:
+
+  One feature reflecting trust in another:
+
+  ```python
+  model_config = tfl.configs.CalibratedLatticeConfig(
+      feature_configs=[
+          tfl.configs.FeatureConfig(
+              name='num_reviews',
+              reflects_trust_in=[
+                  configs.TrustConfig(
+                      feature_name='average_rating', trust_type='edgeworth'),
+              ],
+          ),
+          tfl.configs.FeatureConfig(
+              name='average_rating',
+          ),
+      ])
+  ```
+
+  Features can reflect positive or negative trust in other features. For example
+  if the task is to estimate a property price in a neighborhood given two
+  average prices for commercial and residential properties, you can use a trust
+  feature `percentage_commercial_properties` to indicate that the model should
+  more responsive to commercial estimate if more properties are commercial in
+  the neighborhood. You can simultaneously have a negative trust constratins for
+  residential properties, since higher commercial land usage indicates fewer
+  houses, hence less market influence and less accurate estimate for residential
+  property prices.
+
+  ```python
+  model_config = tfl.configs.CalibratedLatticeConfig(
+      feature_configs=[
+          tfl.configs.FeatureConfig(
+              name='percentage_commercial_properties',
+              reflects_trust_in=[
+                  configs.TrustConfig(
+                      feature_name='average_commercial_property_price',
+                      direction='positive'),
+                  configs.TrustConfig(
+                      feature_name='average_residential_property_price',
+                      direction='negative'),
+              ],
+          ),
+          tfl.configs.FeatureConfig(
+              name='average_commercial_property_price',
+          ),
+          tfl.configs.FeatureConfig(
+              name='average_residential_property_price',
+          ),
+          tfl.configs.FeatureConfig(
+              name='square_footage',
+          ),
+          ...
+      ])
+  ```
+  """
+  _HAS_DYNAMIC_ATTRIBUTES = True  # Required for pytype checks.
+
+  def __init__(self,
+               feature_name,
+               trust_type='edgeworth',
+               direction='positive'):
+    """Initializes a `TrustConfig` instance.
+
+    Args:
+      feature_name: Name of the "main" feature for the trust constraint.
+      trust_type: Type of trust constraint. Either `'edgeworth'` or
+        `'trapezoid'`.
+      direction: Direction of the trust. Should be: `'positive'`, `'negative'`,
+        1 or -1.
+    """
+    super(TrustConfig, self).__init__(locals())
+
+
+class DominanceConfig(_Config):
+  """Configuration for dominance constraints in TFL canned estimators.
+
+  You can specify how a feature dominantes another feature. Supported dominance
+  types (see `tfl.lattice_layer.Lattice` and `tfl.linear_layer.Linear` for
+  details):
+
+  - `'monotonic'`: Monotonic dominance constrains the function to require the
+      effect (slope) in the direction of the *dominant* dimension to be greater
+      than that of the *weak* dimension for any point in both lattice and linear
+      models. Both dominant and weak dimensions must be monotonic. The
+      constraint is guranteed to satisfy at the end of training for linear
+      models, but might not be strictly satisified for lattice models. In such
+      cases, increase the number of projection iterations.
+
+  Example:
+
+  ```python
+  model_config = tfl.configs.CalibratedLatticeConfig(
+      feature_configs=[
+          tfl.configs.FeatureConfig(
+              name='num_purchases',
+              dominates=[
+                  configs.DominanceConfig(
+                      feature_name='num_clicks', trust_type='monotonic'),
+              ],
+          ),
+          tfl.configs.FeatureConfig(
+              name='num_clicks',
+          ),
+      ])
+  ```
+  """
+  _HAS_DYNAMIC_ATTRIBUTES = True  # Required for pytype checks.
+
+  def __init__(self, feature_name, dominance_type='monotonic'):
+    """Initializes a `DominanceConfig` instance.
+
+    Args:
+      feature_name: Name of the `"dominant"` feature for the dominance
+        constraint.
+      dominance_type: Type of dominance constraint. Currently, supports
+        `'monotonic'`.
+    """
+    super(DominanceConfig, self).__init__(locals())
+
+
+class _TypeDict(collections.defaultdict):
+  """Type dict that defaults to string type for hparams."""
+
+  def __init__(self, hparams):
+    super(_TypeDict,
+          self).__init__(lambda: str,
+                         {k: type(v) for k, v in hparams.values().items()})
+
+  def __contains__(self, _):
+    return True
+
+
+def apply_updates(model_config, updates):
+  """Updates a model config with the given set of (key, values) updates.
+
+  Any value passed in the updates that matches a field of the config will be
+  applied to the config. Nested configs can be updated as follows: to add/update
+  a field `FIELD` in feature config for feature `FEATURE`, use
+  `feature__FEATURE__FIELD` as the key. To add/update a field `FIELD` for
+  regularizer with name `REGULARIZER` use `regularizer__REGULARIZER__FIELD` as
+  the key. This naming scheme can be nested. When possible, string values will
+  be converted to the corresponding value type in the model config.
+
+  Example:
+
+  ```python
+  model_config = ...
+  updates = [
+      ('output_max', 1),
+      ('regularizer__torsion__l1', 0.001),
+      ('feature__some_feature_name__lattice_size', 4),
+      ('feature__some_feature_name__regularizer__calib_hessian__l2', 0.001),
+      ('unrelated_haparam_not_affecting_model_config', 42),
+  ]
+  configs.apply_updates(model_config, updates)
+  ```
+
+  Arguments:
+    model_config: The model config object to apply the updates to.
+    updates: A list of (key, value) pairs with potential config updates. Values
+      that are not matched to a field in the model config will be ignored.
+
+  Returns:
+    Number of updates that are applied to the model config.
+  """
+  applied_updates = 0
+  for k, v in updates:
+    if _apply_update(model_config, k, v):
+      applied_updates += 1
+      logging.info('Updated model config with %s=%s', k, str(v))
+  return applied_updates
+
+
+def _apply_update(node, k, v):
+  """Applies k, v updates to the given config node. See apply_updates."""
+  while '__' in k:
+    parts = k.split('__', 2)
+    if len(parts) != 3:
+      return False
+    prefix, child_node_name, k = parts
+    if (prefix == _HPARAM_FEATURE_PREFIX and
+        isinstance(node, _HasFeatureConfigs)):
+      node = node.feature_config_by_name(child_node_name)
+    elif (prefix == _HPARAM_REGULARIZER_PREFIX and
+          isinstance(node, _HasRegularizerConfigs)):
+      node = node.regularizer_config_by_name(child_node_name)
+    else:
+      return False
+
+  if hasattr(node, k):
+    if isinstance(v, str):
+      current_value = getattr(node, k)
+      if current_value is None:
+        raise ValueError(
+            'Field `{}` has None value and can not be overridden by the '
+            'hparams string value `{}` since the type cannot be inferred. An '
+            'initial value must be set for the field to use string hparams.'
+            .format(k, v))
+      v = type(current_value)(v)
+
+    setattr(node, k, v)
+    return True
+
+  return False
diff --git a/tensorflow_lattice/python/configs_test.py b/tensorflow_lattice/python/configs_test.py
new file mode 100644
index 0000000..a0a0a63
--- /dev/null
+++ b/tensorflow_lattice/python/configs_test.py
@@ -0,0 +1,126 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for TFL model configuration library."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+import tensorflow as tf
+from tensorflow_lattice.python import configs
+
+
+class ConfigsTest(tf.test.TestCase):
+
+  def test_updates(self):
+    model_config = configs.CalibratedLatticeConfig(
+        output_min=0,
+        regularizer_configs=[
+            configs.RegularizerConfig(name='torsion', l2=2e-3),
+        ],
+        feature_configs=[
+            configs.FeatureConfig(
+                name='feature_a',
+                pwl_calibration_input_keypoints='quantiles',
+                pwl_calibration_num_keypoints=8,
+                monotonicity=1,
+                pwl_calibration_clip_max=100,
+            ),
+            configs.FeatureConfig(
+                name='feature_b',
+                lattice_size=3,
+                unimodality='valley',
+                pwl_calibration_input_keypoints='uniform',
+                pwl_calibration_num_keypoints=5,
+                pwl_calibration_clip_min=130,
+                pwl_calibration_convexity='convex',
+                regularizer_configs=[
+                    configs.RegularizerConfig(name='calib_hessian', l2=3e-3),
+                ],
+            ),
+            configs.FeatureConfig(
+                name='feature_c',
+                pwl_calibration_input_keypoints=[0.0, 0.5, 1.0],
+                reflects_trust_in=[
+                    configs.TrustConfig(feature_name='feature_a'),
+                    configs.TrustConfig(feature_name='feature_b', direction=-1),
+                ],
+            ),
+            configs.FeatureConfig(
+                name='feature_d',
+                num_buckets=3,
+                vocabulary_list=['a', 'b', 'c'],
+                default_value=-1,
+            ),
+        ])
+
+    updates = [
+        # Update values can be passed in as numbers.
+        ('output_max', 1.0),  # update
+        ('regularizer__torsion__l2', 0.004),  # update
+        ('regularizer__calib_hessian__l1', 0.005),  # insert
+        ('feature__feature_a__lattice_size', 3),  # update
+        ('feature__feature_e__lattice_size', 4),  # insert
+        # Update values can be strings.
+        ('unrelated_hparams_not_affecting_config', 'unrelated'),
+        ('feature__feature_a__regularizer__calib_wrinkle__l1', '0.6'),  # insert
+        ('feature__feature_b__regularizer__calib_hessian__l1', '0.7'),  # update
+        ('yet__another__unrelated_config', '4'),
+    ]
+    self.assertEqual(configs.apply_updates(model_config, updates), 7)
+
+    model_config.feature_config_by_name('feature_a').monotonicity = 'none'
+    model_config.feature_config_by_name('feature_f').num_buckets = 4  # insert
+
+    feature_names = [
+        feature_config.name for feature_config in model_config.feature_configs
+    ]
+    expected_feature_names = [
+        'feature_a', 'feature_b', 'feature_c', 'feature_d', 'feature_e',
+        'feature_f'
+    ]
+    self.assertCountEqual(feature_names, expected_feature_names)
+
+    global_regularizer_names = [
+        regularizer_config.name
+        for regularizer_config in model_config.regularizer_configs
+    ]
+    expected_global_regularizer_names = ['torsion', 'calib_hessian']
+    self.assertCountEqual(global_regularizer_names,
+                          expected_global_regularizer_names)
+
+    self.assertEqual(model_config.output_max, 1.0)
+    self.assertEqual(
+        model_config.feature_config_by_name('feature_a').lattice_size, 3)
+    self.assertEqual(
+        model_config.feature_config_by_name(
+            'feature_b').pwl_calibration_convexity, 'convex')
+    self.assertEqual(
+        model_config.feature_config_by_name('feature_e').lattice_size, 4)
+    self.assertEqual(
+        model_config.regularizer_config_by_name('torsion').l2, 0.004)
+    self.assertEqual(
+        model_config.regularizer_config_by_name('calib_hessian').l1, 0.005)
+    self.assertEqual(
+        model_config.feature_config_by_name(
+            'feature_a').regularizer_config_by_name('calib_wrinkle').l1, 0.6)
+    self.assertEqual(
+        model_config.feature_config_by_name(
+            'feature_b').regularizer_config_by_name('calib_hessian').l1, 0.7)
+
+
+if __name__ == '__main__':
+  tf.test.main()
diff --git a/tensorflow_lattice/python/estimators.py b/tensorflow_lattice/python/estimators.py
new file mode 100644
index 0000000..5ebeab5
--- /dev/null
+++ b/tensorflow_lattice/python/estimators.py
@@ -0,0 +1,2264 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""TF Lattice canned estimators implement typical monotonic model architectures.
+
+You can use TFL canned estimators to easily construct commonly used monotonic
+model architectures. To construct a TFL canned estimator, construct a model
+configuration from `tfl.configs` and pass it to the canned estimator
+constructor. To use automated quantile calculation, canned estimators also
+require passing a *feature_analysis_input_fn* which is similar to the one used
+for training, but with a single epoch or a subset of the data. To create a
+Crystals ensemble model using `tfl.configs.CalibratedLatticeEnsembleConfig`, you
+will also need to provide a *prefitting_input_fn* to the estimator constructor.
+
+```python
+feature_columns = ...
+model_config = tfl.configs.CalibratedLatticeConfig(...)
+feature_analysis_input_fn = create_input_fn(num_epochs=1, ...)
+train_input_fn = create_input_fn(num_epochs=100, ...)
+estimator = tfl.estimators.CannedClassifier(
+    feature_columns=feature_columns,
+    model_config=model_config,
+    feature_analysis_input_fn=feature_analysis_input_fn)
+estimator.train(input_fn=train_input_fn)
+```
+
+Supported models are defined in `tfl.configs`. Each model architecture can be
+used for:
+
+*   **Classification** using `tfl.estimators.CannedClassifier` with standard
+    classification head (softmax cross-entropy loss).
+
+*   **Regression** using `tfl.estimators.CannedRegressor` with standard
+    regression head (squared loss).
+
+*   **Custom head** using `tfl.estimators.CannedEstimator` with any custom head
+    and loss.
+
+This module also provides `tfl.estimators.get_model_graph` as a mechanism to
+extract abstract model graphs and layer parameters from saved models. The
+resulting graph (not a TF graph) can be used by the `tfl.visualization` module
+for plotting and other visualization and analysis.
+
+```python
+model_graph = estimators.get_model_graph(saved_model_path)
+visualization.plot_feature_calibrator(model_graph, "feature_name")
+visualization.plot_all_calibrators(model_graph)
+visualization.draw_model_graph(model_graph)
+```
+
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import copy
+import itertools
+import json
+import os
+import re
+import time
+
+from . import categorical_calibration_layer
+from . import configs
+from . import lattice_layer
+from . import lattice_lib
+from . import linear_layer
+from . import model_info
+from . import pwl_calibration_layer
+from . import pwl_calibration_lib
+
+from absl import logging
+import enum
+import numpy as np
+import six
+import tensorflow as tf
+
+from tensorflow.python.feature_column import feature_column as fc  # pylint: disable=g-direct-tensorflow-import
+from tensorflow.python.feature_column import feature_column_v2 as fc2  # pylint: disable=g-direct-tensorflow-import
+from tensorflow.python.keras.utils import losses_utils  # pylint: disable=g-direct-tensorflow-import
+from tensorflow.python.training import training_util  # pylint: disable=g-direct-tensorflow-import
+from tensorflow_estimator.python.estimator import estimator as estimator_lib
+from tensorflow_estimator.python.estimator.canned import optimizers
+from tensorflow_estimator.python.estimator.head import head_utils
+from tensorflow_estimator.python.estimator.head import regression_head
+
+# TODO: support multi dim inputs.
+# TODO: support multi dim output.
+# TODO: add linear layer regularizers.
+# TODO: add examples in docs.
+# TODO: make _REPEATED_PAIR_DISCOUNT_IN_CRYSTALS_SCORE config param
+
+# Layer names used for layers in the canned models.
+INPUT_LAYER_NAME = 'tfl_input'
+CALIB_LAYER_NAME = 'tfl_calib'
+LATTICE_LAYER_NAME = 'tfl_lattice'
+LINEAR_LAYER_NAME = 'tfl_linear'
+OUTPUT_CALIB_LAYER_NAME = 'tfl_output_calib'
+
+# Prefix for passthrough (identity) nodes for shared calibration.
+# These nodes pass shared calibrated values to submodels in an ensemble.
+CALIB_PASSTHROUGH_NAME = 'tfl_calib_passthrough'
+
+# Feed and fetch names for the model.
+FEATURES_SCOPE = 'features'
+OUTPUT_NAME = 'output'
+
+# File to store and load feature keypoints.
+_KEYPOINTS_FILE = 'keypoints.json'
+
+# File to store and load lattice ensemble structure.
+_ENSEMBLE_STRUCTURE_FILE = 'ensemble_structure.json'
+
+# Name for label keypoints in keypoints file.
+_LABEL_FEATURE_NAME = '__label__'
+
+# Prefix for defining feature calibrator regularizers.
+_INPUT_CALIB_REGULARIZER_PREFIX = 'calib_'
+
+# Prefix for defining output calibrator regularizers.
+_OUTPUT_CALIB_REGULARIZER_PREFIX = 'output_calib_'
+
+# Pooling interval and maximum wait time for workers waiting for files.
+_MAX_WAIT_TIME = 1200
+_POLL_INTERVAL_SECS = 10
+
+# Weight of laplacian in feature importance for the crystal algorithm.
+_LAPLACIAN_WEIGHT_IN_IMPORTANCE = 6.0
+
+# Discount amount for repeated co-occurrence of pairs of features in crystals.
+_REPEATED_PAIR_DISCOUNT_IN_CRYSTALS_SCORE = 0.5
+
+# Maximum number of swaps for the crystals algorithm.
+_MAX_CRYSTALS_SWAPS = 1000
+
+
+class WaitTimeOutError(Exception):
+  """Timeout error when waiting for a file."""
+  pass
+
+
+def _poll_for_file(filename):
+  """Waits and polls for a file until it exists."""
+  start = time.time()
+  while not tf.io.gfile.exists(filename):
+    time.sleep(_POLL_INTERVAL_SECS)
+    if time.time() - start > _MAX_WAIT_TIME:
+      raise WaitTimeOutError('Waiting for file {} timed-out'.filename)
+
+
+def transform_features(features, feature_columns=None):
+  """Parses the input features using the given feature columns.
+
+  This function can be used to parse input features when constructing a custom
+  estimator. When using this function, you will not need to wrap categorical
+  features with dense feature embeddings, and the resulting tensors will not be
+  concatenated, making it easier to use the features in the calibration layers.
+
+  Args:
+    features: A dict from feature names to tensors.
+    feature_columns: A list of FeatureColumn objects to be used for parsing. If
+      not provided, the input features are assumed to be already parsed.
+
+  Returns:
+    collections.OrderedDict mapping feature names to parsed tensors.
+  """
+  with tf.name_scope('transform'):
+    if feature_columns:
+      parsed_features = collections.OrderedDict()
+      for feature_column in feature_columns:
+        # pylint: disable=protected-access
+        if (isinstance(feature_column, fc._DenseColumn) or
+            isinstance(feature_column, fc2.DenseColumn)):
+          parsed_features[
+              feature_column.name] = feature_column._transform_feature(features)
+        elif (isinstance(feature_column, fc._CategoricalColumn) or
+              isinstance(feature_column, fc2.CategoricalColumn)):
+          parsed_features[
+              feature_column.name] = feature_column._transform_feature(
+                  features).values
+        else:
+          raise ValueError(
+              'Unsupported feature_column: {}'.format(feature_column))
+        # pylint: enable=protected-access
+    else:
+      parsed_features = collections.OrderedDict(features)
+
+    for name, tensor in parsed_features.items():
+      if len(tensor.shape) == 1:
+        parsed_features[name] = tf.expand_dims(tensor, 1)
+      elif len(tensor.shape) > 2 or tensor.shape[1] != 1:
+        raise ValueError('Only 1-d inputs are supported: {}'.format(tensor))
+
+  with tf.name_scope(FEATURES_SCOPE):
+    for name, tensor in parsed_features.items():
+      parsed_features[name] = tf.identity(parsed_features[name], name=name)
+
+  return parsed_features
+
+
+def _materialize_locally(tensors, max_elements=1e6):
+  """Materialize the given tensors locally, during initialization.
+
+  Assumes non-distributed environment (uses SingularMonitoredSession).
+
+  Args:
+    tensors: A dict of name to feed tensors to be materialized.
+    max_elements: Data is read and accmulated from tensors until end-of-input is
+      reached or when we have at least max_elements collected.
+
+  Returns:
+    Materialized tensors as dict.
+  """
+  # tf.compat.v1.train.SingularMonitoredSession silently catches
+  # tf.errors.OutOfRangeError, and we want to expose it to detect end of the
+  # data from the given feed tensors.
+  with tf.compat.v1.train.SingularMonitoredSession() as sess:
+    splits = []
+    count = 0
+    try:
+      while count < max_elements:
+        materialized_tensors = sess.run(tensors)
+        values = list(materialized_tensors.values())
+        if not values:
+          break
+        count += len(values[0])
+        splits.append(materialized_tensors)
+    except (tf.errors.OutOfRangeError, StopIteration):
+      pass
+    concatenated_tensors = {}
+    for k in tensors:
+      concatenated_tensors[k] = np.concatenate(
+          [split[k] for split in splits if split[k].size > 0])
+    return concatenated_tensors
+
+
+def _finalize_keypoints(model_config, config, feature_columns,
+                        feature_analysis_input_fn, logits_output):
+  """Calculates and sets keypoints for input and output calibration.
+
+  Input and label keypoints are calculated, stored in a file and also set in the
+  model_config to be used for model construction.
+
+  Args:
+    model_config: Model config to be updated.
+    config: A `tf.RunConfig` to indicate if worker is chief.
+    feature_columns: A list of FeatureColumn's to use for feature parsing.
+    feature_analysis_input_fn: An input_fn used to collect feature statistics.
+    logits_output: A boolean indicating if model outputs logits.
+
+  Raises:
+    ValueError: If keypoints mode is invalid.
+  """
+  if not feature_analysis_input_fn:
+    return
+
+  keypoints_filename = os.path.join(config.model_dir, _KEYPOINTS_FILE)
+  if ((config is None or config.is_chief) and
+      not tf.io.gfile.exists(keypoints_filename)):
+    with tf.Graph().as_default():
+      features, label = feature_analysis_input_fn()
+      features = transform_features(features, feature_columns)
+      features[_LABEL_FEATURE_NAME] = label
+      features = _materialize_locally(features)
+
+    feature_keypoints = {}
+    for feature_name, feature_values in six.iteritems(features):
+      feature_values = feature_values.flatten()
+
+      if feature_name == _LABEL_FEATURE_NAME:
+        # Default feature_values to [0, ... n_class-1] if string label.
+        if label.dtype == tf.string:
+          feature_values = np.arange(len(set(feature_values)))
+        num_keypoints = model_config.output_calibration_num_keypoint
+        keypoints = model_config.output_initialization
+        clip_min = model_config.output_min
+        clip_max = model_config.output_max
+        default_value = None
+      else:
+        feature_config = model_config.feature_config_by_name(feature_name)
+        if feature_config.num_buckets:
+          # Skip categorical features.
+          continue
+        num_keypoints = feature_config.pwl_calibration_num_keypoints
+        keypoints = feature_config.pwl_calibration_input_keypoints
+        clip_min = feature_config.pwl_calibration_clip_min
+        clip_max = feature_config.pwl_calibration_clip_max
+        default_value = feature_config.default_value
+
+      # Remove default values before calculating stats.
+      feature_values = feature_values[feature_values != default_value]
+
+      if np.isnan(feature_values).any():
+        raise ValueError(
+            'NaN values were observed for numeric feature `{}`. '
+            'Consider replacing the values in transform or input_fn.'.format(
+                feature_name))
+
+      # Before calculating keypoints, clip values as requested.
+      # Add min and max to the value list to make sure min/max in values match
+      # the requested range.
+      if clip_min is not None:
+        feature_values = np.maximum(feature_values, clip_min)
+        feature_values = np.append(feature_values, clip_min)
+      if clip_max is not None:
+        feature_values = np.minimum(feature_values, clip_max)
+        feature_values = np.append(feature_values, clip_max)
+
+      # Remove duplicate values before calculating stats.
+      feature_values = np.unique(feature_values)
+
+      if isinstance(keypoints, str):
+        if keypoints == 'quantiles':
+          if (feature_name != _LABEL_FEATURE_NAME and
+              feature_values.size < num_keypoints):
+            logging.info(
+                'Not enough unique values observed for feature `%s` to '
+                'construct %d keypoints for pwl calibration. Using %d unique '
+                'values as keypoints.', feature_name, num_keypoints,
+                feature_values.size)
+            num_keypoints = feature_values.size
+          quantiles = np.quantile(
+              feature_values,
+              np.linspace(0., 1., num_keypoints),
+              interpolation='nearest')
+          feature_keypoints[feature_name] = [float(x) for x in quantiles]
+        elif keypoints == 'uniform':
+          linspace = np.linspace(
+              np.min(feature_values), np.max(feature_values), num_keypoints)
+          feature_keypoints[feature_name] = [float(x) for x in linspace]
+        else:
+          raise ValueError(
+              'Invalid keypoint generation mode: {}'.format(keypoints))
+      else:
+        # Keypoints are explicitly provided in the config.
+        feature_keypoints[feature_name] = [float(x) for x in keypoints]
+
+    # Save keypoints to file as the chief worker.
+    tmp_keypoints_filename = keypoints_filename + 'tmp'
+    with tf.io.gfile.GFile(tmp_keypoints_filename, 'w') as keypoints_file:
+      keypoints_file.write(json.dumps(feature_keypoints, indent=2))
+    tf.io.gfile.rename(tmp_keypoints_filename, keypoints_filename)
+  else:
+    # Non-chief workers read the keypoints from file.
+    _poll_for_file(keypoints_filename)
+    with tf.io.gfile.GFile(keypoints_filename) as keypoints_file:
+      feature_keypoints = json.loads(keypoints_file.read())
+
+  if _LABEL_FEATURE_NAME in feature_keypoints:
+    output_init = feature_keypoints.pop(_LABEL_FEATURE_NAME)
+    if logits_output and isinstance(model_config.output_initialization, str):
+      # If model is expected to produce logits, initialize linearly in the
+      # range [-2, 2], ignoring the label distribution.
+      model_config.output_initialization = [
+          float(x) for x in np.linspace(
+              -2, 2, model_config.output_calibration_num_keypoint)
+      ]
+    else:
+      model_config.output_initialization = output_init
+
+  for feature_name, keypoints in feature_keypoints.items():
+    model_config.feature_config_by_name(
+        feature_name).pwl_calibration_input_keypoints = keypoints
+
+
+def _fix_ensemble_for_2d_constraints(model_config, feature_names):
+  """Fixes 2d constraint violations by adding missing features to some lattices.
+
+  Some 2d shape constraints require lattices from ensemble to either contain
+  both constrained features or none of them, e.g. trapezoid trust constraint
+  requires a lattice that has the "conditional" feature to include the "main"
+  feature.
+
+  Args:
+    model_config: Model config to be updated.
+    feature_names: List of feature names.
+  """
+  must_include_features = collections.defaultdict(set)
+  for feature_name in feature_names:
+    feature_config = model_config.feature_config_by_name(feature_name)
+    for trust_config in feature_config.reflects_trust_in or []:
+      if trust_config.trust_type == 'trapezoid':
+        must_include_features[feature_name].add(trust_config.feature_name)
+    for dominance_config in feature_config.dominates or []:
+      must_include_features[dominance_config.feature_name].add(feature_name)
+
+  fixed_lattices = []
+  for idx, lattice in enumerate(model_config.lattices):
+    fixed_lattice = set()
+    for feature_name in lattice:
+      fixed_lattice.add(feature_name)
+      fixed_lattice.update(must_include_features[feature_name])
+    assert len(lattice) <= len(fixed_lattice)
+    fixed_lattices.append(list(fixed_lattice))
+    if len(lattice) < len(fixed_lattice):
+      logging.info(
+          'Fixed 2d constraint violations in lattices[%d]. Lattice rank '
+          'increased from %d to %d.', idx, len(lattice), len(fixed_lattice))
+
+  model_config.lattices = fixed_lattices
+
+
+def _set_random_lattice_ensemble(model_config, feature_names):
+  """Sets random lattice ensemble in the given model_config."""
+  # Start by using each feature once.
+  np.random.seed(model_config.random_seed)
+  model_config.lattices = [[] for _ in range(model_config.num_lattices)]
+  for feature_name in feature_names:
+    non_full_indices = [
+        i for (i, lattice) in enumerate(model_config.lattices)
+        if len(lattice) < model_config.lattice_rank
+    ]
+    model_config.lattices[np.random.choice(non_full_indices)].append(
+        feature_name)
+
+  # Fill up lattices avoiding repeated features.
+  for lattice in model_config.lattices:
+    feature_names_not_in_lattice = [
+        feature_name for feature_name in feature_names
+        if feature_name not in lattice
+    ]
+    remaining_size = model_config.lattice_rank - len(lattice)
+    lattice.extend(
+        np.random.choice(
+            feature_names_not_in_lattice, size=remaining_size, replace=False))
+
+
+def _add_pair_to_ensemble(lattices, lattice_rank, i, j):
+  """Adds pair (i, j) to the ensemble heuristically."""
+  # First check if (i, j) pair is already present in a lattice.
+  for lattice in lattices:
+    if i in lattice and j in lattice:
+      return
+
+  # Try adding to a lattice that already has either i or j.
+  for lattice in lattices:
+    if len(lattice) < lattice_rank:
+      if i in lattice:
+        lattice.add(j)
+        return
+      if j in lattice:
+        lattice.add(i)
+        return
+
+  # Add both i and j to a lattice that has enough space left.
+  for lattice in lattices:
+    if len(lattice) < lattice_rank - 1:
+      lattice.add(i)
+      lattice.add(j)
+      return
+
+  # Create a new lattice with pair (i, j).
+  lattices.append(set([i, j]))
+
+
+def _set_all_pairs_cover_lattices(prefitting_model_config, feature_names):
+  """Sets prefitting lattice ensemble such that it covers all feature pairs."""
+  # Pairs of co-occurrence that need to exist in the all-pairs cover.
+  to_cover = list(itertools.combinations(range(len(feature_names)), 2))
+  np.random.seed(prefitting_model_config.random_seed)
+  np.random.shuffle(to_cover)
+
+  lattices = []
+
+  for (i, j) in to_cover:
+    _add_pair_to_ensemble(lattices, prefitting_model_config.lattice_rank, i, j)
+
+  prefitting_model_config.lattices = [
+      [feature_names[i] for i in lattice] for lattice in lattices
+  ]
+
+
+def _get_torsions_and_laplacians(prefitting_model_config, prefitting_estimator,
+                                 feature_names):
+  """Returns average torsion and laplacian regularizers in prefitted model."""
+  num_fatures = len(feature_names)
+  laplacians = [[] for _ in range(num_fatures)]
+  torsions = [[[] for _ in range(num_fatures)] for _ in range(num_fatures)]
+  for (lattice_index, lattice) in enumerate(prefitting_model_config.lattices):
+    # Get normalized lattice weights.
+    lattice_kernel_variable_name = '{}_{}/{}'.format(
+        LATTICE_LAYER_NAME, lattice_index, lattice_layer.LATTICE_KERNEL_NAME)
+    weights = prefitting_estimator.get_variable_value(
+        lattice_kernel_variable_name)
+    weights -= np.min(weights)
+    weights /= np.max(weights)
+    weights = tf.constant(weights)
+
+    # Convert feature names in the lattice to their index in feature_names.
+    lattice = [feature_names.index(feature_name) for feature_name in lattice]
+    lattice_sizes = [2] * len(lattice)
+    # feature_* refers to feature index in feature_names.
+    # within_lattice_index_* is the index of input dimenstion of the lattice.
+    for within_lattice_index_0, feature_0 in enumerate(lattice):
+      l2 = [0] * len(lattice)
+      l2[within_lattice_index_0] = 1
+      laplacians[feature_0].append(
+          lattice_lib.laplacian_regularizer(
+              weights=weights, lattice_sizes=lattice_sizes, l2=l2))
+      for within_lattice_index_1, feature_1 in enumerate(lattice):
+        if within_lattice_index_1 > within_lattice_index_0:
+          l2 = [0] * len(lattice)
+          l2[within_lattice_index_0] = 1
+          l2[within_lattice_index_1] = 1
+          torsion = lattice_lib.torsion_regularizer(
+              weights=weights, lattice_sizes=lattice_sizes, l2=l2)
+          torsions[feature_0][feature_1].append(torsion)
+          torsions[feature_1][feature_0].append(torsion)
+
+  if not tf.executing_eagerly():
+    with tf.compat.v1.Session() as sess:
+      laplacians = sess.run(laplacians)
+      torsions = sess.run(torsions)
+
+  laplacians = [np.mean(v) for v in laplacians]
+  torsions = [[np.mean(v) if v else 0.0 for v in row] for row in torsions]
+  return torsions, laplacians
+
+
+def _set_final_crystal_lattices(model_config, feature_names,
+                                prefitting_model_config, prefitting_estimator):
+  """Sets the lattice ensemble in model_config based on a prefitted model."""
+  torsions, laplacians = _get_torsions_and_laplacians(
+      prefitting_model_config=prefitting_model_config,
+      prefitting_estimator=prefitting_estimator,
+      feature_names=feature_names)
+
+  # Calculate features' importance_score = lambda * laplacians + torsion.
+  # Used to allocate slots to useful features with more non-linear interactions.
+  num_features = len(feature_names)
+  importance_scores = np.array(laplacians) * _LAPLACIAN_WEIGHT_IN_IMPORTANCE
+  for feature_0, feature_1 in itertools.combinations(range(num_features), 2):
+    importance_scores[feature_0] += torsions[feature_0][feature_1]
+    importance_scores[feature_1] += torsions[feature_0][feature_1]
+
+  # Each feature is used at least once, and the remaining slots are distributed
+  # proportional to the importance_scores.
+  features_uses = [1] * num_features
+  total_feature_use = model_config.num_lattices * model_config.lattice_rank
+  remaining_uses = total_feature_use - num_features
+  remaining_scores = np.sum(importance_scores)
+  for feature in np.argsort(-importance_scores):
+    added_uses = int(
+        round(remaining_uses * importance_scores[feature] / remaining_scores))
+    # Each feature cannot be used more than once in a finalized lattice.
+    added_uses = min(added_uses, model_config.num_lattices - 1)
+    features_uses[feature] += added_uses
+    remaining_uses -= added_uses
+    remaining_scores -= importance_scores[feature]
+  assert np.sum(features_uses) == total_feature_use
+
+  # Add features to add list in round-robin order.
+  add_list = []
+  for use in range(1, max(features_uses) + 1):
+    for feature_index, feature_use in enumerate(features_uses):
+      if use <= feature_use:
+        add_list.append(feature_index)
+  assert len(add_list) == total_feature_use
+
+  # Setup initial lattices that will be optimized by swapping later.
+  lattices = [[] for _ in range(model_config.num_lattices)]
+  cooccurrence_counts = [[0] * num_features for _ in range(num_features)]
+  for feature_to_be_added in add_list:
+    # List of pairs of (addition_score, candidate_lattice_to_add_to).
+    score_candidates_pairs = []
+    for candidate_lattice_to_add_to in range(model_config.num_lattices):
+      # addition_score indicates the priority of an addition.
+      if len(
+          lattices[candidate_lattice_to_add_to]) >= model_config.lattice_rank:
+        # going out of bound on the lattice
+        addition_score = -2.0
+      elif feature_to_be_added in lattices[candidate_lattice_to_add_to]:
+        # repeates (fixed repeats later by swapping)
+        addition_score = -1.0
+      elif not lattices[candidate_lattice_to_add_to]:
+        # adding a new lattice roughly has an "average" lattice score
+        addition_score = np.mean(torsions) * model_config.lattice_rank**2 / 2
+      else:
+        # all other cases: change in total discounted torsion after addition.
+        addition_score = 0.0
+        for other_feature in lattices[candidate_lattice_to_add_to]:
+          addition_score += (
+              torsions[feature_to_be_added][other_feature] *
+              _REPEATED_PAIR_DISCOUNT_IN_CRYSTALS_SCORE
+              **(cooccurrence_counts[feature_to_be_added][other_feature]))
+
+      score_candidates_pairs.append(
+          (addition_score, candidate_lattice_to_add_to))
+
+    # Use the highest scoring addition.
+    score_candidates_pairs.sort(reverse=True)
+    best_candidate_lattice_to_add_to = score_candidates_pairs[0][1]
+    for other_feature in lattices[best_candidate_lattice_to_add_to]:
+      cooccurrence_counts[feature_to_be_added][other_feature] += 1
+      cooccurrence_counts[other_feature][feature_to_be_added] += 1
+    lattices[best_candidate_lattice_to_add_to].append(feature_to_be_added)
+
+  # Apply swapping operations to increase within-lattice torsion.
+  changed = True
+  iteration = 0
+  while changed:
+    if iteration > _MAX_CRYSTALS_SWAPS:
+      logging.info('Crystals algorithm did not fully converge.')
+      break
+    changed = False
+    iteration += 1
+    for lattice_0, lattice_1 in itertools.combinations(lattices, 2):
+      # For every pair of lattices: lattice_0, lattice_1
+      for index_0, index_1 in itertools.product(
+          range(len(lattice_0)), range(len(lattice_1))):
+        # Consider swapping lattice_0[index_0] with lattice_1[index_1]
+        rest_lattice_0 = list(lattice_0)
+        rest_lattice_1 = list(lattice_1)
+        feature_0 = rest_lattice_0.pop(index_0)
+        feature_1 = rest_lattice_1.pop(index_1)
+        if feature_0 == feature_1:
+          continue
+
+        # Calculate the change in the overall discounted sum of torsion terms.
+        added_cooccurrence = set(
+            [tuple(sorted((feature_1, other))) for other in rest_lattice_0] +
+            [tuple(sorted((feature_0, other))) for other in rest_lattice_1])
+        removed_cooccurrence = set(
+            [tuple(sorted((feature_0, other))) for other in rest_lattice_0] +
+            [tuple(sorted((feature_1, other))) for other in rest_lattice_1])
+        wash = added_cooccurrence.intersection(removed_cooccurrence)
+        added_cooccurrence = added_cooccurrence.difference(wash)
+        removed_cooccurrence = removed_cooccurrence.difference(wash)
+        swap_diff_torsion = (
+            sum(torsions[i][j] * _REPEATED_PAIR_DISCOUNT_IN_CRYSTALS_SCORE**
+                cooccurrence_counts[i][j] for (i, j) in added_cooccurrence) -
+            sum(torsions[i][j] * _REPEATED_PAIR_DISCOUNT_IN_CRYSTALS_SCORE**
+                (cooccurrence_counts[i][j] - 1)
+                for (i, j) in removed_cooccurrence))
+
+        # Swap if a feature is repeated or if the score change is positive.
+        if (feature_0 not in lattice_1 and feature_1 not in lattice_0 and
+            (lattice_0.count(feature_0) > 1 or lattice_1.count(feature_1) > 1 or
+             swap_diff_torsion > 0)):
+          for (i, j) in added_cooccurrence:
+            cooccurrence_counts[i][j] += 1
+            cooccurrence_counts[j][i] += 1
+          for (i, j) in removed_cooccurrence:
+            cooccurrence_counts[i][j] -= 1
+            cooccurrence_counts[j][i] -= 1
+          lattice_0[index_0], lattice_1[index_1] = (lattice_1[index_1],
+                                                    lattice_0[index_0])
+          changed = True
+
+  model_config.lattices = [[
+      feature_names[features_index] for features_index in lattice
+  ] for lattice in lattices]
+
+
+def _set_crystals_lattice_ensemble(model_config, feature_names, label_dimension,
+                                   feature_columns, head, prefitting_input_fn,
+                                   prefitting_optimizer, prefitting_steps,
+                                   config, dtype):
+  """Sets the lattice ensemble in model_config using the crystals algorithm."""
+  if prefitting_input_fn is None:
+    raise ValueError('prefitting_input_fn must be set for crystals models')
+
+  prefitting_model_config = copy.deepcopy(model_config)
+  _set_all_pairs_cover_lattices(
+      prefitting_model_config=prefitting_model_config,
+      feature_names=feature_names)
+
+  # Trim the model for faster prefitting.
+  for feature_config in prefitting_model_config.feature_configs:
+    feature_config.lattice_size = 2
+    # Unimodality requires lattice_size > 2.
+    feature_config.unimodality = 0
+
+  def prefitting_model_fn(features, labels, mode, config):
+    return _calibrated_lattice_ensemble_model_fn(
+        features=features,
+        labels=labels,
+        label_dimension=label_dimension,
+        feature_columns=feature_columns,
+        mode=mode,
+        head=head,
+        model_config=prefitting_model_config,
+        optimizer=prefitting_optimizer,
+        config=config,
+        dtype=dtype)
+
+  config = tf.estimator.RunConfig(
+      keep_checkpoint_max=1,
+      save_summary_steps=0,
+      save_checkpoints_steps=10000000,
+      tf_random_seed=config.tf_random_seed if config is not None else 42)
+  logging.info('Creating the prefitting estimator.')
+  prefitting_estimator = tf.estimator.Estimator(
+      model_fn=prefitting_model_fn, config=config)
+  logging.info('Training the prefitting estimator.')
+  prefitting_estimator.train(
+      input_fn=prefitting_input_fn, steps=prefitting_steps)
+  _set_final_crystal_lattices(
+      feature_names=feature_names,
+      model_config=model_config,
+      prefitting_model_config=prefitting_model_config,
+      prefitting_estimator=prefitting_estimator)
+  logging.info('Finished training the prefitting estimator.')
+
+  # Cleanup model_dir since we might be reusing it for the main estimator.
+  # Note that other workers are blocked until model structure file is
+  # generated by the chief worker, so modifying files here should be safe.
+  remove_list = [
+      os.path.join(prefitting_estimator.model_dir, 'graph.pbtxt'),
+      os.path.join(prefitting_estimator.model_dir, 'checkpoint'),
+  ]
+  remove_list.extend(
+      tf.io.gfile.glob(prefitting_estimator.latest_checkpoint() + '*'))
+  for file_path in remove_list:
+    tf.io.gfile.remove(file_path)
+
+
+def _finalize_model_structure(model_config, label_dimension, feature_columns,
+                              head, prefitting_input_fn, prefitting_optimizer,
+                              prefitting_steps, model_dir, config,
+                              warm_start_from, dtype):
+  """Sets up the lattice ensemble in model_config with requested algorithm."""
+  if (not isinstance(model_config, configs.CalibratedLatticeEnsembleConfig) or
+      isinstance(model_config.lattices, list)):
+    return
+
+  # TODO: If warmstarting, look for the previous ensemble file.
+  if warm_start_from:
+    raise ValueError('Warm starting lattice ensembles without explicitly '
+                     'defined lattices is not supported yet.')
+
+  if feature_columns:
+    feature_names = [feature_column.name for feature_column in feature_columns]
+  else:
+    feature_names = [
+        feature_config.name for feature_config in model_config.feature_configs
+    ]
+
+  if model_config.lattice_rank > len(feature_names):
+    raise ValueError(
+        'lattice_rank {} cannot be larger than the number of features: {}'
+        .format(model_config.lattice_rank, feature_names))
+
+  if model_config.num_lattices * model_config.lattice_rank < len(feature_names):
+    raise ValueError(
+        'Model with {}x{}d lattices is not large enough for all features: {}'
+        .format(model_config.num_lattices, model_config.lattice_rank,
+                feature_names))
+
+  ensemble_structure_filename = os.path.join(model_dir,
+                                             _ENSEMBLE_STRUCTURE_FILE)
+  if ((config is None or config.is_chief) and
+      not tf.io.gfile.exists(ensemble_structure_filename)):
+    if model_config.lattices == 'random':
+      _set_random_lattice_ensemble(
+          model_config=model_config, feature_names=feature_names)
+    elif model_config.lattices == 'crystals':
+      _set_crystals_lattice_ensemble(
+          feature_names=feature_names,
+          label_dimension=label_dimension,
+          feature_columns=feature_columns,
+          head=head,
+          model_config=model_config,
+          prefitting_input_fn=prefitting_input_fn,
+          prefitting_optimizer=prefitting_optimizer,
+          prefitting_steps=prefitting_steps,
+          config=config,
+          dtype=dtype)
+    else:
+      raise ValueError('Unsupported ensemble structure: {}'.format(
+          model_config.lattices))
+    if model_config.fix_ensemble_for_2d_constraints:
+      _fix_ensemble_for_2d_constraints(model_config, feature_names)
+
+    # Save lattices to file as the chief worker.
+    tmp_ensemble_structure_filename = ensemble_structure_filename + 'tmp'
+    with tf.io.gfile.GFile(tmp_ensemble_structure_filename,
+                           'w') as ensemble_structure_file:
+      ensemble_structure_file.write(json.dumps(model_config.lattices, indent=2))
+    tf.io.gfile.rename(tmp_ensemble_structure_filename,
+                       ensemble_structure_filename)
+  else:
+    # Non-chief workers read the lattices from file.
+    _poll_for_file(ensemble_structure_filename)
+    with tf.io.gfile.GFile(
+        ensemble_structure_filename) as ensemble_structure_file:
+      model_config.lattices = json.loads(ensemble_structure_file.read())
+
+  logging.info('Finalized model structure: %s', str(model_config.lattices))
+
+
+def _verify_config(model_config, feature_columns):
+  """Verifies that the config is setup correctly and ready for model_fn."""
+  if feature_columns:
+    feature_configs = [
+        model_config.feature_config_by_name(feature_column.name)
+        for feature_column in feature_columns
+    ]
+  else:
+    feature_configs = model_config.feature_configs or []
+
+  for feature_config in feature_configs:
+    if not feature_config.num_buckets:
+      if (not np.iterable(feature_config.pwl_calibration_input_keypoints) or
+          any(not isinstance(x, float)
+              for x in feature_config.pwl_calibration_input_keypoints)):
+        raise ValueError(
+            'Input keypoints are invalid for feature {}: {}'.format(
+                feature_config.name,
+                feature_config.pwl_calibration_input_keypoints))
+
+  if (not np.iterable(model_config.output_initialization) or any(
+      not isinstance(x, float) for x in model_config.output_initialization)):
+    raise ValueError('Output initilization is invalid: {}'.format(
+        model_config.output_initialization))
+
+
+def _update_by_feature_columns(model_config, feature_columns):
+  """Updates a model config with the given feature columns."""
+  for feature_column in feature_columns or []:
+    feature_config = model_config.feature_config_by_name(feature_column.name)
+    # pylint: disable=protected-access
+    if (isinstance(feature_column, fc._DenseColumn) or
+        isinstance(feature_column, fc2.DenseColumn)):
+      feature_config.default_value = feature_column.default_value
+    elif (isinstance(feature_column, fc._VocabularyListCategoricalColumn) or
+          isinstance(feature_column, fc2.VocabularyListCategoricalColumn)):
+      feature_config.vocabulary_list = feature_column.vocabulary_list
+      feature_config.num_buckets = feature_column.num_buckets
+      if feature_column.num_oov_buckets:
+        feature_config.default_value = None
+      else:
+        # We add a bucket at the end for the default_value, since num_buckets
+        # does not include the default value (but includes oov buckets).
+        feature_config.default_value = feature_column.default_value
+        feature_config.num_buckets += 1
+    else:
+      raise ValueError('Unsupported feature_column: {}'.format(feature_column))
+    # pylint: enable=protected-access
+
+    # Change categorical monotonicities to indices.
+    if (feature_config.num_buckets and
+        isinstance(feature_config.monotonicity, list)):
+      if not feature_config.vocabulary_list:
+        raise ValueError('Vocabulary list must be provided to use categorical'
+                         'monotonicities.')
+      if not all(
+          isinstance(m, tuple) and len(m) == 2
+          for m in feature_config.monotonicity):
+        raise ValueError(
+            'Monotonicities should be a list of pairs (tuples): {}'.format(
+                feature_config.monotonicity))
+      indexed_monotonicities = []
+      index_map = {
+          category: index
+          for (index, category) in enumerate(feature_config.vocabulary_list)
+      }
+      if feature_config.default_value is not None:
+        index_map[feature_config.default_value] = feature_config.num_buckets - 1
+      for left, right in feature_config.monotonicity:
+        for category in [left, right]:
+          if category not in index_map:
+            raise ValueError(
+                'Category `{}` not found in vocabulary list for feature `{}`'
+                .format(category, feature_config.name))
+        indexed_monotonicities.append((index_map[left], index_map[right]))
+
+      feature_config.monotonicity = indexed_monotonicities
+
+
+def _input_calibration_regularizers(model_config, feature_config):
+  """Returns pwl layer regularizers defined in the model and feature configs."""
+  regularizer_configs = []
+  regularizer_configs.extend(feature_config.regularizer_configs or [])
+  regularizer_configs.extend(model_config.regularizer_configs or [])
+  return [(r.name.replace(_INPUT_CALIB_REGULARIZER_PREFIX, ''), r.l1, r.l2)
+          for r in regularizer_configs
+          if r.name.startswith(_INPUT_CALIB_REGULARIZER_PREFIX)]
+
+
+def _output_calibration_regularizers(model_config):
+  """Returns output calibration regularizers defined in the model config."""
+  return [(r.name.replace(_OUTPUT_CALIB_REGULARIZER_PREFIX, ''), r.l1, r.l2)
+          for r in model_config.regularizer_configs or []
+          if r.name.startswith(_OUTPUT_CALIB_REGULARIZER_PREFIX)]
+
+
+def _lattice_regularizers(model_config, feature_configs):
+  """Returns lattice regularizers defined in the model and feature configs."""
+  # dict from regularizer name to pair of per feature l1 and l2 amounts.
+  regularizers_dict = {}
+  n_dims = len(feature_configs)
+  for index, feature_config in enumerate(feature_configs):
+    for regularizer_config in feature_config.regularizer_configs or []:
+      if not (
+          regularizer_config.name.startswith(_INPUT_CALIB_REGULARIZER_PREFIX) or
+          regularizer_config.name.startswith(_OUTPUT_CALIB_REGULARIZER_PREFIX)):
+        if regularizer_config.name not in regularizers_dict:
+          regularizers_dict[regularizer_config.name] = ([0.0] * n_dims,
+                                                        [0.0] * n_dims)
+        regularizers_dict[
+            regularizer_config.name][0][index] += regularizer_config.l1
+        regularizers_dict[
+            regularizer_config.name][1][index] += regularizer_config.l2
+
+  regularizers = [(k,) + v for k, v in regularizers_dict.items()]
+
+  for regularizer_config in model_config.regularizer_configs or []:
+    if not (
+        regularizer_config.name.startswith(_INPUT_CALIB_REGULARIZER_PREFIX) or
+        regularizer_config.name.startswith(_OUTPUT_CALIB_REGULARIZER_PREFIX)):
+      regularizers.append((regularizer_config.name, regularizer_config.l1,
+                           regularizer_config.l2))
+  return regularizers
+
+
+class _LayerOutputRange(enum.Enum):
+  """Enum to indicate the output range based on the input of the next layers."""
+  MODEL_OUTPUT = 1
+  INPUT_TO_LATTICE = 2
+  INPUT_TO_FINAL_CALIBRATION = 3
+
+
+def _output_range(layer_output_range, model_config, feature_config=None):
+  """Returns min/max/init_min/init_max for a given output range."""
+  if layer_output_range == _LayerOutputRange.INPUT_TO_LATTICE:
+    if feature_config is None:
+      raise ValueError('Expecting feature config for lattice inputs.')
+    output_init_min = output_min = 0.0
+    output_init_max = output_max = feature_config.lattice_size - 1.0
+  elif layer_output_range == _LayerOutputRange.MODEL_OUTPUT:
+    output_min = model_config.output_min
+    output_max = model_config.output_max
+    output_init_min = np.min(model_config.output_initialization)
+    output_init_max = np.max(model_config.output_initialization)
+  elif layer_output_range == _LayerOutputRange.INPUT_TO_FINAL_CALIBRATION:
+    output_init_min = output_min = 0.0
+    output_init_max = output_max = 1.0
+  else:
+    raise ValueError('Unsupported layer output range.')
+  return output_min, output_max, output_init_min, output_init_max
+
+
+def _input_layer(feature_configs, dtype):
+  """Creates a calibration layer."""
+  input_layer = {}
+  for feature_config in feature_configs:
+    layer_name = '{}_{}'.format(INPUT_LAYER_NAME, feature_config.name)
+    if feature_config.num_buckets:
+      input_layer[feature_config.name] = tf.keras.Input(
+          shape=(1,), dtype=tf.int32, name=layer_name)
+    else:
+      input_layer[feature_config.name] = tf.keras.Input(
+          shape=(1,), dtype=dtype, name=layer_name)
+  return input_layer
+
+
+def _calibration_layers(calibration_input_layer, feature_configs, model_config,
+                        layer_output_range, submodels, separate_calibrators,
+                        dtype):
+  """Creates a calibration layer for `submodels` as list of list of features."""
+  # Create a list of (feature_name, calibration_output_idx) pairs for each
+  # submodel. When using shared calibration, all submodels will have
+  # calibration_output_idx = 0.
+  submodels_input_features = []
+  calibration_last_index = collections.defaultdict(int)
+  for submodel in submodels:
+    submodel_input_features = []
+    submodels_input_features.append(submodel_input_features)
+    for feature_name in submodel:
+      submodel_input_features.append(
+          (feature_name, calibration_last_index[feature_name]))
+      if separate_calibrators:
+        calibration_last_index[feature_name] += 1
+
+  calibration_output = {}
+  for feature_config in feature_configs:
+    feature_name = feature_config.name
+    units = max(calibration_last_index[feature_name], 1)
+    calibration_input = calibration_input_layer[feature_name]
+    layer_name = '{}_{}'.format(CALIB_LAYER_NAME, feature_name)
+
+    (output_min, output_max, output_init_min,
+     output_init_max) = _output_range(layer_output_range, model_config,
+                                      feature_config)
+
+    if feature_config.num_buckets:
+      kernel_initializer = tf.compat.v1.random_uniform_initializer(
+          output_init_min, output_init_max)
+      calibrated = (
+          categorical_calibration_layer.CategoricalCalibration(
+              num_buckets=feature_config.num_buckets,
+              units=units,
+              output_min=output_min,
+              output_max=output_max,
+              kernel_initializer=kernel_initializer,
+              monotonicities=feature_config.monotonicity if isinstance(
+                  feature_config.monotonicity, list) else None,
+              default_input_value=feature_config.default_value,
+              dtype=dtype,
+              name=layer_name)(calibration_input))
+    else:
+      kernel_regularizer = _input_calibration_regularizers(
+          model_config, feature_config)
+      monotonicity = feature_config.monotonicity
+      if (pwl_calibration_lib.canonicalize_monotonicity(monotonicity) == 0 and
+          feature_config.pwl_calibration_always_monotonic):
+        monotonicity = 1
+      kernel_initializer = pwl_calibration_layer.UniformOutputInitializer(
+          output_min=output_init_min,
+          output_max=output_init_max,
+          monotonicity=monotonicity)
+      calibrated = (
+          pwl_calibration_layer.PWLCalibration(
+              units=units,
+              input_keypoints=feature_config.pwl_calibration_input_keypoints,
+              output_min=output_min,
+              output_max=output_max,
+              clamp_min=feature_config.pwl_calibration_clamp_min,
+              clamp_max=feature_config.pwl_calibration_clamp_max,
+              missing_input_value=feature_config.default_value,
+              impute_missing=(feature_config.default_value is not None),
+              kernel_initializer=kernel_initializer,
+              kernel_regularizer=kernel_regularizer,
+              monotonicity=monotonicity,
+              convexity=feature_config.pwl_calibration_convexity,
+              dtype=dtype,
+              name=layer_name)(calibration_input))
+    if units == 1:
+      calibration_output[feature_name] = [calibrated]
+    else:
+      calibration_output[feature_name] = tf.split(calibrated, units, axis=1)
+
+  # Create passthrough nodes for each submodel input so that we can recover
+  # the model structure for plotting and analysis.
+  # {CALIB_PASSTHROUGH_NAME}_{feature_name}_
+  #   {calibration_output_idx}_{submodel_idx}_{submodel_input_idx}
+  submodels_inputs = []
+  for submodel_idx, submodel_input_features in enumerate(
+      submodels_input_features):
+    submodel_inputs = []
+    submodels_inputs.append(submodel_inputs)
+    for (submodel_input_idx,
+         (feature_name,
+          calibration_output_idx)) in enumerate(submodel_input_features):
+      passthrough_name = '{}_{}_{}_{}_{}'.format(CALIB_PASSTHROUGH_NAME,
+                                                 feature_name,
+                                                 calibration_output_idx,
+                                                 submodel_idx,
+                                                 submodel_input_idx)
+      submodel_inputs.append(
+          tf.identity(
+              calibration_output[feature_name][calibration_output_idx],
+              name=passthrough_name))
+
+  return submodels_inputs
+
+
+def _monotonicities_from_feature_configs(feature_configs):
+  """Returns list of monotonicities defined in the given feature_configs."""
+  monotonicities = []
+  for feature_config in feature_configs:
+    if not feature_config.monotonicity:
+      monotonicities.append(0)
+    elif (isinstance(feature_config.monotonicity, six.string_types) and
+          feature_config.monotonicity.lower() == 'none'):
+      monotonicities.append(0)
+    else:
+      monotonicities.append(1)
+  return monotonicities
+
+
+def _dominance_constraints_from_feature_configs(feature_configs):
+  """Returns list of dominance constraints in the given feature_configs."""
+  feature_names = [feature_config.name for feature_config in feature_configs]
+  monotonic_dominances = []
+  for dominant_idx, dominant_feature_config in enumerate(feature_configs):
+    for dominance_config in dominant_feature_config.dominates or []:
+      if dominance_config.feature_name in feature_names:
+        weak_idx = feature_names.index(dominance_config.feature_name)
+        if dominance_config.dominance_type == 'monotonic':
+          monotonic_dominances.append((dominant_idx, weak_idx))
+        else:
+          raise ValueError('Unrecognized dominance type: {}'.format(
+              dominance_config.dominance_type))
+  return monotonic_dominances
+
+
+def _linear_layer(linear_input, feature_configs, model_config, weighted_average,
+                  submodel_index, dtype):
+  """Creates a linear layer initialized to be an average."""
+  layer_name = '{}_{}'.format(LINEAR_LAYER_NAME, submodel_index)
+
+  linear_input = tf.keras.layers.Concatenate(axis=1)(linear_input)
+  num_input_dims = len(feature_configs)
+  kernel_initializer = tf.compat.v1.constant_initializer(
+      [1.0 / num_input_dims] * num_input_dims)
+  bias_initializer = tf.compat.v1.constant_initializer(0)
+
+  if weighted_average:
+    # Linear coefficients should be possitive and sum up to one.
+    linear_monotonicities = [1] * num_input_dims
+    normalization_order = 1
+    use_bias = False
+  else:
+    linear_monotonicities = _monotonicities_from_feature_configs(
+        feature_configs)
+    normalization_order = None
+    use_bias = model_config.use_bias
+
+  monotonic_dominances = _dominance_constraints_from_feature_configs(
+      feature_configs)
+
+  return linear_layer.Linear(
+      num_input_dims=num_input_dims,
+      monotonicities=linear_monotonicities,
+      monotonic_dominances=monotonic_dominances,
+      use_bias=use_bias,
+      normalization_order=normalization_order,
+      kernel_initializer=kernel_initializer,
+      bias_initializer=bias_initializer,
+      dtype=dtype,
+      name=layer_name)(
+          linear_input)
+
+
+def _lattice_layer(lattice_input, feature_configs, model_config,
+                   layer_output_range, submodel_index, is_inside_ensemble,
+                   dtype):
+  """Creates a lattice layer."""
+  layer_name = '{}_{}'.format(LATTICE_LAYER_NAME, submodel_index)
+
+  (output_min, output_max, output_init_min,
+   output_init_max) = _output_range(layer_output_range, model_config)
+
+  feature_names = [feature_config.name for feature_config in feature_configs]
+  lattice_sizes = [
+      feature_config.lattice_size for feature_config in feature_configs
+  ]
+  lattice_monotonicities = _monotonicities_from_feature_configs(feature_configs)
+  lattice_unimodalities = [
+      feature_config.unimodality for feature_config in feature_configs
+  ]
+  lattice_regularizers = _lattice_regularizers(model_config, feature_configs)
+
+  # Construct trust constraints within this lattice.
+  edgeworth_trusts = []
+  trapezoid_trusts = []
+  for conditional_idx, conditional_feature_config in enumerate(feature_configs):
+    for trust_config in conditional_feature_config.reflects_trust_in or []:
+      if trust_config.feature_name in feature_names:
+        main_idx = feature_names.index(trust_config.feature_name)
+        if trust_config.trust_type == 'edgeworth':
+          edgeworth_trusts.append(
+              (main_idx, conditional_idx, trust_config.direction))
+        elif trust_config.trust_type == 'trapezoid':
+          trapezoid_trusts.append(
+              (main_idx, conditional_idx, trust_config.direction))
+        else:
+          raise ValueError('Unrecognized trust type: {}'.format(
+              trust_config.trust_type))
+      elif is_inside_ensemble and trust_config.trust_type == 'trapezoid':
+        logging.warning(
+            'A "main" feature (%s) for a trapezoid trust constraint is not '
+            'present in a lattice that includes the "conditional" feature '
+            '(%s). In an ensemble model, this can result in constraint '
+            'violations. Consider manually setting the ensemble structure if '
+            'this constraint needs to be satisfied.', trust_config.feature_name,
+            conditional_feature_config.name)
+
+  monotonic_dominances = _dominance_constraints_from_feature_configs(
+      feature_configs)
+
+  kernel_initializer = lattice_layer.LinearInitializer(
+      lattice_sizes=lattice_sizes,
+      monotonicities=lattice_monotonicities,
+      unimodalities=lattice_unimodalities,
+      output_min=output_init_min,
+      output_max=output_init_max)
+  return lattice_layer.Lattice(
+      lattice_sizes=lattice_sizes,
+      monotonicities=lattice_monotonicities,
+      unimodalities=lattice_unimodalities,
+      edgeworth_trusts=edgeworth_trusts,
+      trapezoid_trusts=trapezoid_trusts,
+      monotonic_dominances=monotonic_dominances,
+      output_min=output_min,
+      output_max=output_max,
+      clip_inputs=False,
+      kernel_regularizer=lattice_regularizers,
+      kernel_initializer=kernel_initializer,
+      dtype=dtype,
+      name=layer_name)(
+          lattice_input)
+
+
+def _output_calibration_layer(output_calibration_input, model_config, dtype):
+  """Creates a monotonic output calibration layer with inputs range [0, 1]."""
+  # kernel format: bias followed by diffs between consecutive keypoint outputs.
+  kernel_init_values = np.ediff1d(
+      model_config.output_initialization,
+      to_begin=model_config.output_initialization[0])
+  input_keypoints = np.linspace(0.0, 1.0, num=len(kernel_init_values))
+  kernel_initializer = tf.compat.v1.constant_initializer(kernel_init_values)
+  kernel_regularizer = _output_calibration_regularizers(model_config)
+  return pwl_calibration_layer.PWLCalibration(
+      input_keypoints=input_keypoints,
+      output_min=model_config.output_min,
+      output_max=model_config.output_max,
+      kernel_initializer=kernel_initializer,
+      kernel_regularizer=kernel_regularizer,
+      monotonicity=1,
+      dtype=dtype,
+      name=OUTPUT_CALIB_LAYER_NAME)(
+          output_calibration_input)
+
+
+def _calibrated_lattice_ensemble_model_fn(features, labels, label_dimension,
+                                          feature_columns, mode, head,
+                                          model_config, optimizer, config,
+                                          dtype):
+  """Calibrated Lattice Ensemble Model."""
+  del config
+  if label_dimension != 1:
+    ValueError('Only 1-dimensional output is supported.')
+
+  # Get input tensors and corresponding feature configs.
+  transformed_features = transform_features(features, feature_columns)
+  feature_names = list(transformed_features.keys())
+  feature_configs = [
+      model_config.feature_config_by_name(feature_name)
+      for feature_name in feature_names
+  ]
+  input_layer = _input_layer(feature_configs=feature_configs, dtype=dtype)
+
+  submodels_inputs = _calibration_layers(
+      calibration_input_layer=input_layer,
+      feature_configs=feature_configs,
+      model_config=model_config,
+      layer_output_range=_LayerOutputRange.INPUT_TO_LATTICE,
+      submodels=model_config.lattices,
+      separate_calibrators=model_config.separate_calibrators,
+      dtype=dtype)
+
+  lattice_outputs = []
+  for submodel_index, (lattice_feature_names, lattice_input) in enumerate(
+      zip(model_config.lattices, submodels_inputs)):
+    lattice_feature_configs = [
+        model_config.feature_config_by_name(feature_name)
+        for feature_name in lattice_feature_names
+    ]
+
+    lattice_layer_output_range = (
+        _LayerOutputRange.INPUT_TO_FINAL_CALIBRATION
+        if model_config.output_calibration else _LayerOutputRange.MODEL_OUTPUT)
+    lattice_outputs.append(
+        _lattice_layer(
+            lattice_input=lattice_input,
+            feature_configs=lattice_feature_configs,
+            model_config=model_config,
+            layer_output_range=lattice_layer_output_range,
+            submodel_index=submodel_index,
+            is_inside_ensemble=True,
+            dtype=dtype))
+
+  averaged_lattice_output = tf.keras.layers.Average()(lattice_outputs)
+  if model_config.output_calibration:
+    model_output = _output_calibration_layer(
+        output_calibration_input=averaged_lattice_output,
+        model_config=model_config,
+        dtype=dtype)
+  else:
+    model_output = averaged_lattice_output
+
+  input_tensors = [
+      transformed_features[feature_name] for feature_name in feature_names
+  ]
+  inputs = [input_layer[feature_name] for feature_name in feature_names]
+  training = (mode == tf.estimator.ModeKeys.TRAIN)
+  model = tf.keras.Model(inputs=inputs, outputs=model_output)
+  logits = tf.identity(
+      model(input_tensors, training=training), name=OUTPUT_NAME)
+
+  if training:
+    optimizer = optimizers.get_optimizer_instance_v2(optimizer)
+    optimizer.iterations = training_util.get_or_create_global_step()
+  else:
+    optimizer = None
+
+  return head.create_estimator_spec(
+      features=features,
+      mode=mode,
+      labels=labels,
+      optimizer=optimizer,
+      logits=logits,
+      trainable_variables=model.trainable_variables,
+      update_ops=model.updates,
+      regularization_losses=model.losses or None)
+
+
+def _calibrated_lattice_model_fn(features, labels, label_dimension,
+                                 feature_columns, mode, head, model_config,
+                                 optimizer, config, dtype):
+  """Calibrated Lattice Model."""
+  del config
+  if label_dimension != 1:
+    ValueError('Only 1-dimensional output is supported.')
+
+  # Get input tensors and corresponding feature configs.
+  transformed_features = transform_features(features, feature_columns)
+  feature_names = list(transformed_features.keys())
+  feature_configs = [
+      model_config.feature_config_by_name(feature_name)
+      for feature_name in feature_names
+  ]
+  input_layer = _input_layer(feature_configs=feature_configs, dtype=dtype)
+  submodels_inputs = _calibration_layers(
+      calibration_input_layer=input_layer,
+      feature_configs=feature_configs,
+      model_config=model_config,
+      layer_output_range=_LayerOutputRange.INPUT_TO_LATTICE,
+      submodels=[[feature_column.name for feature_column in feature_columns]],
+      separate_calibrators=False,
+      dtype=dtype)
+
+  lattice_layer_output_range = (
+      _LayerOutputRange.INPUT_TO_FINAL_CALIBRATION
+      if model_config.output_calibration else _LayerOutputRange.MODEL_OUTPUT)
+  lattice_output = _lattice_layer(
+      lattice_input=submodels_inputs[0],
+      feature_configs=feature_configs,
+      model_config=model_config,
+      layer_output_range=lattice_layer_output_range,
+      submodel_index=0,
+      is_inside_ensemble=False,
+      dtype=dtype)
+
+  if model_config.output_calibration:
+    model_output = _output_calibration_layer(
+        output_calibration_input=lattice_output,
+        model_config=model_config,
+        dtype=dtype)
+  else:
+    model_output = lattice_output
+
+  input_tensors = [
+      transformed_features[feature_name] for feature_name in feature_names
+  ]
+  inputs = [input_layer[feature_name] for feature_name in feature_names]
+  training = (mode == tf.estimator.ModeKeys.TRAIN)
+  model = tf.keras.Model(inputs=inputs, outputs=model_output)
+  logits = tf.identity(
+      model(input_tensors, training=training), name=OUTPUT_NAME)
+
+  if training:
+    optimizer = optimizers.get_optimizer_instance_v2(optimizer)
+    optimizer.iterations = training_util.get_or_create_global_step()
+
+  return head.create_estimator_spec(
+      features=features,
+      mode=mode,
+      labels=labels,
+      optimizer=optimizer,
+      logits=logits,
+      trainable_variables=model.trainable_variables,
+      update_ops=model.updates,
+      regularization_losses=model.losses or None)
+
+
+def _calibrated_linear_model_fn(features, labels, label_dimension,
+                                feature_columns, mode, head, model_config,
+                                optimizer, config, dtype):
+  """Calibrated Linear Model."""
+  del config
+  if label_dimension != 1:
+    ValueError('Only 1-dimensional output is supported.')
+
+  # Get input tensors and corresponding feature configs.
+  transformed_features = transform_features(features, feature_columns)
+  feature_names = list(transformed_features.keys())
+  feature_configs = [
+      model_config.feature_config_by_name(feature_name)
+      for feature_name in feature_names
+  ]
+  input_layer = _input_layer(feature_configs=feature_configs, dtype=dtype)
+
+  calibration_layer_output_range = (
+      _LayerOutputRange.INPUT_TO_FINAL_CALIBRATION
+      if model_config.output_calibration else _LayerOutputRange.MODEL_OUTPUT)
+  submodels_inputs = _calibration_layers(
+      calibration_input_layer=input_layer,
+      feature_configs=feature_configs,
+      model_config=model_config,
+      layer_output_range=calibration_layer_output_range,
+      submodels=[[feature_column.name for feature_column in feature_columns]],
+      separate_calibrators=False,
+      dtype=dtype)
+
+  weighted_average = (
+      model_config.output_min is not None or
+      model_config.output_max is not None or model_config.output_calibration)
+  linear_output = _linear_layer(
+      linear_input=submodels_inputs[0],
+      feature_configs=feature_configs,
+      model_config=model_config,
+      weighted_average=weighted_average,
+      submodel_index=0,
+      dtype=dtype)
+
+  if model_config.output_calibration:
+    model_output = _output_calibration_layer(
+        output_calibration_input=linear_output,
+        model_config=model_config,
+        dtype=dtype)
+  else:
+    model_output = linear_output
+
+  input_tensors = [
+      transformed_features[feature_name] for feature_name in feature_names
+  ]
+  inputs = [input_layer[feature_name] for feature_name in feature_names]
+  training = (mode == tf.estimator.ModeKeys.TRAIN)
+  model = tf.keras.Model(inputs=inputs, outputs=model_output)
+  logits = tf.identity(
+      model(input_tensors, training=training), name=OUTPUT_NAME)
+
+  if training:
+    optimizer = optimizers.get_optimizer_instance_v2(optimizer)
+    optimizer.iterations = training_util.get_or_create_global_step()
+
+  return head.create_estimator_spec(
+      features=features,
+      mode=mode,
+      labels=labels,
+      optimizer=optimizer,
+      logits=logits,
+      trainable_variables=model.trainable_variables,
+      update_ops=model.updates,
+      regularization_losses=model.losses or None)
+
+
+def _get_model_fn(label_dimension, feature_columns, head, model_config,
+                  optimizer, dtype):
+  """Returns the model_fn for the given model_config."""
+  if isinstance(model_config, configs.CalibratedLatticeConfig):
+
+    def calibrated_lattice_model_fn(features, labels, mode, config):
+      return _calibrated_lattice_model_fn(
+          features=features,
+          labels=labels,
+          label_dimension=label_dimension,
+          feature_columns=feature_columns,
+          mode=mode,
+          head=head,
+          model_config=model_config,
+          optimizer=optimizer,
+          config=config,
+          dtype=dtype)
+
+    return calibrated_lattice_model_fn
+  elif isinstance(model_config, configs.CalibratedLinearConfig):
+
+    def calibrated_linear_model_fn(features, labels, mode, config):
+      return _calibrated_linear_model_fn(
+          features=features,
+          labels=labels,
+          label_dimension=label_dimension,
+          feature_columns=feature_columns,
+          mode=mode,
+          head=head,
+          model_config=model_config,
+          optimizer=optimizer,
+          config=config,
+          dtype=dtype)
+
+    return calibrated_linear_model_fn
+  if isinstance(model_config, configs.CalibratedLatticeEnsembleConfig):
+
+    def calibrated_lattice_ensemble_model_fn(features, labels, mode, config):
+      return _calibrated_lattice_ensemble_model_fn(
+          features=features,
+          labels=labels,
+          label_dimension=label_dimension,
+          feature_columns=feature_columns,
+          mode=mode,
+          head=head,
+          model_config=model_config,
+          optimizer=optimizer,
+          config=config,
+          dtype=dtype)
+
+    return calibrated_lattice_ensemble_model_fn
+  else:
+    raise ValueError('Unsupported model type: {}'.format(type(model_config)))
+
+
+class CannedEstimator(estimator_lib.EstimatorV2):
+  """An estimator for TensorFlow lattice models.
+
+  Creates an estimator with a custom head for the model architecutre specified
+  by the `model_config`, which should be one of those defined in `tfl.configs`.
+  Calculation of feature quantiles for input keypoint initialization is done
+  using `feature_analysis_input_fn`. If this auxiliary input fn is not provided,
+  all keypoint values should be explicitly provided via the `model_config`.
+
+  Example:
+
+  ```python
+  model_config = tfl.configs.CalibratedLatticeConfig(...)
+  feature_analysis_input_fn = create_input_fn(num_epochs=1, ...)
+  train_input_fn = create_input_fn(num_epochs=100, ...)
+  head = ...
+  estimator = tfl.estimators.CannedEstimator(
+      feature_columns=feature_columns,
+      model_config=model_config,
+      feature_analysis_input_fn=feature_analysis_input_fn
+      head=head)
+  estimator.train(input_fn=train_input_fn)
+  ```
+  """
+
+  def __init__(self,
+               head,
+               model_config,
+               feature_columns,
+               feature_analysis_input_fn=None,
+               prefitting_input_fn=None,
+               model_dir=None,
+               label_dimension=1,
+               optimizer='Adagrad',
+               prefitting_optimizer='Adagrad',
+               prefitting_steps=None,
+               config=None,
+               warm_start_from=None,
+               dtype=tf.float32):
+    """Initializes a `CannedEstimator` instance.
+
+    Args:
+      head: A `_Head` instance constructed with a method such as
+        `tf.contrib.estimator.multi_label_head`.
+      model_config: Model configuration object describing model architecutre.
+        Should be one of the model configs in `tfl.configs`.
+      feature_columns: An iterable containing all the feature columns used by
+        the model.
+      feature_analysis_input_fn: An input_fn used to calculate statistics about
+        features and labels in order to setup calibration keypoint and values.
+      prefitting_input_fn: An input_fn used in the pre fitting stage to estimate
+        non-linear feature interactions. Required for crystals models.
+        Prefitting typically uses the same dataset as the main training, but
+        with fewer epochs.
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator to
+        continue training a previously saved model.
+      label_dimension: Number of regression targets per example. This is the
+        size of the last dimension of the labels and logits `Tensor` objects
+        (typically, these have shape `[batch_size, label_dimension]`).
+      optimizer: An instance of `tf.Optimizer` used to train the model. Can also
+        be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or
+        callable. Defaults to Adagrad optimizer.
+      prefitting_optimizer: An instance of `tf.Optimizer` used to train the
+        model during the pre-fitting stage. Can also be a string (one of
+        'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to
+        Adagrad optimizer.
+      prefitting_steps: Number of steps for which to pretraing train the model
+        during the prefitting stage. If None, train forever or train until
+        prefitting_input_fn generates the tf.errors.OutOfRange error or
+        StopIteration exception.
+      config: `RunConfig` object to configure the runtime settings.
+      warm_start_from: A string filepath to a checkpoint to warm-start from, or
+        a `WarmStartSettings` object to fully configure warm-starting.  If the
+        string filepath is provided instead of a `WarmStartSettings`, then all
+        weights are warm-started, and it is assumed that vocabularies and Tensor
+        names are unchanged.
+      dtype: dtype of layers used in the model.
+    """
+    config = estimator_lib.maybe_overwrite_model_dir_and_session_config(
+        config, model_dir)
+    model_dir = config.model_dir
+
+    model_config = copy.deepcopy(model_config)
+    _update_by_feature_columns(model_config, feature_columns)
+
+    _finalize_keypoints(
+        model_config=model_config,
+        config=config,
+        feature_columns=feature_columns,
+        feature_analysis_input_fn=feature_analysis_input_fn,
+        logits_output=True)
+
+    _verify_config(model_config, feature_columns)
+
+    _finalize_model_structure(
+        label_dimension=label_dimension,
+        feature_columns=feature_columns,
+        head=head,
+        model_config=model_config,
+        prefitting_input_fn=prefitting_input_fn,
+        prefitting_optimizer=prefitting_optimizer,
+        prefitting_steps=prefitting_steps,
+        model_dir=model_dir,
+        config=config,
+        warm_start_from=warm_start_from,
+        dtype=dtype)
+
+    model_fn = _get_model_fn(
+        label_dimension=label_dimension,
+        feature_columns=feature_columns,
+        head=head,
+        model_config=model_config,
+        optimizer=optimizer,
+        dtype=dtype)
+
+    super(CannedEstimator, self).__init__(
+        model_fn=model_fn,
+        model_dir=model_dir,
+        config=config,
+        warm_start_from=warm_start_from)
+
+
+class CannedClassifier(estimator_lib.EstimatorV2):
+  """Canned classifier for TensorFlow lattice models.
+
+  Creates a classifier for the model architecutre specified by the
+  `model_config`, which should be one of those defined in `tfl.configs`.
+  Calclulation of feature quantiles for input keypoint initialization is done
+  using `feature_analysis_input_fn`. If this auxiliary input fn is not provided,
+  all keypoint values should be explicitly provided via the `model_config`.
+
+  Training loss is softmax cross-entropy as defined for the default
+  TF classificaiton head.
+
+  Example:
+
+  ```python
+  model_config = tfl.configs.CalibratedLatticeConfig(...)
+  feature_analysis_input_fn = create_input_fn(num_epochs=1, ...)
+  train_input_fn = create_input_fn(num_epochs=100, ...)
+  estimator = tfl.estimators.CannedClassifier(
+      feature_columns=feature_columns,
+      model_config=model_config,
+      feature_analysis_input_fn=feature_analysis_input_fn)
+  estimator.train(input_fn=train_input_fn)
+  ```
+  """
+
+  def __init__(self,
+               model_config,
+               feature_columns,
+               feature_analysis_input_fn=None,
+               prefitting_input_fn=None,
+               model_dir=None,
+               n_classes=2,
+               weight_column=None,
+               label_vocabulary=None,
+               optimizer='Adagrad',
+               prefitting_optimizer='Adagrad',
+               prefitting_steps=None,
+               config=None,
+               warm_start_from=None,
+               loss_reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
+               dtype=tf.float32):
+    """Initializes a `CannedClassifier` instance.
+
+    Args:
+      model_config: Model configuration object describing model architecutre.
+        Should be one of the model configs in `tfl.configs`.
+      feature_columns: An iterable containing all the feature columns used by
+        the model.
+      feature_analysis_input_fn: An input_fn used to calculate statistics about
+        features and labels in order to setup calibration keypoint and values.
+      prefitting_input_fn: An input_fn used in the pre fitting stage to estimate
+        non-linear feature interactions. Required for crystals models.
+        Prefitting typically uses the same dataset as the main training, but
+        with fewer epochs.
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator to
+        continue training a previously saved model.
+      n_classes: Number of label classes. Defaults to 2, namely binary
+        classification. Must be > 1.
+      weight_column: A string or a `_NumericColumn` created by
+        `tf.feature_column.numeric_column` defining feature column representing
+        weights. It is used to down weight or boost examples during training. It
+        will be multiplied by the loss of the example. If it is a string, it is
+        used as a key to fetch weight tensor from the `features`. If it is a
+        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then
+        weight_column.normalizer_fn is applied on it to get weight tensor.
+      label_vocabulary: A list of strings represents possible label values. If
+        given, labels must be string type and have any value in
+        `label_vocabulary`. If it is not given, that means labels are already
+        encoded as integer or float within [0, 1] for `n_classes=2` and encoded
+        as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . Also
+        there will be errors if vocabulary is not provided and labels are
+        string.
+      optimizer: An instance of `tf.Optimizer` used to train the model. Can also
+        be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or
+        callable. Defaults to Adagrad optimizer.
+      prefitting_optimizer: An instance of `tf.Optimizer` used to train the
+        model during the pre-fitting stage. Can also be a string (one of
+        'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to
+        Adagrad optimizer.
+      prefitting_steps: Number of steps for which to pretraing train the model
+        during the prefitting stage. If None, train forever or train until
+        prefitting_input_fn generates the tf.errors.OutOfRange error or
+        StopIteration exception.
+      config: `RunConfig` object to configure the runtime settings.
+      warm_start_from: A string filepath to a checkpoint to warm-start from, or
+        a `WarmStartSettings` object to fully configure warm-starting.  If the
+        string filepath is provided instead of a `WarmStartSettings`, then all
+        weights are warm-started, and it is assumed that vocabularies and Tensor
+        names are unchanged.
+      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
+        to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`.
+      dtype: dtype of layers used in the model.
+    """
+    config = estimator_lib.maybe_overwrite_model_dir_and_session_config(
+        config, model_dir)
+    model_dir = config.model_dir
+    head = head_utils.binary_or_multi_class_head(
+        n_classes=n_classes,
+        weight_column=weight_column,
+        label_vocabulary=label_vocabulary,
+        loss_reduction=loss_reduction)
+    label_dimension = 1 if n_classes == 2 else n_classes
+
+    model_config = copy.deepcopy(model_config)
+    _update_by_feature_columns(model_config, feature_columns)
+
+    _finalize_keypoints(
+        model_config=model_config,
+        config=config,
+        feature_columns=feature_columns,
+        feature_analysis_input_fn=feature_analysis_input_fn,
+        logits_output=True)
+
+    _verify_config(model_config, feature_columns)
+
+    _finalize_model_structure(
+        label_dimension=label_dimension,
+        feature_columns=feature_columns,
+        head=head,
+        model_config=model_config,
+        prefitting_input_fn=prefitting_input_fn,
+        prefitting_optimizer=prefitting_optimizer,
+        prefitting_steps=prefitting_steps,
+        model_dir=model_dir,
+        config=config,
+        warm_start_from=warm_start_from,
+        dtype=dtype)
+
+    model_fn = _get_model_fn(
+        label_dimension=label_dimension,
+        feature_columns=feature_columns,
+        head=head,
+        model_config=model_config,
+        optimizer=optimizer,
+        dtype=dtype)
+
+    super(CannedClassifier, self).__init__(
+        model_fn=model_fn,
+        model_dir=model_dir,
+        config=config,
+        warm_start_from=warm_start_from)
+
+
+class CannedRegressor(estimator_lib.EstimatorV2):
+  """A regressor for TensorFlow lattice models.
+
+  Creates a regressor for the model architecutre specified by the
+  `model_config`, which should be one of those defined in `tfl.configs`.
+  Calclulation of feature quantiles for input keypoint initialization is done
+  using `feature_analysis_input_fn`. If this auxiliary input fn is not provided,
+  all keypoint values should be explicitly provided via the `model_config`.
+
+  Training loss is squared error as defined for the default TF regression head.
+
+  Example:
+
+  ```python
+  model_config = tfl.configs.CalibratedLatticeConfig(...)
+  feature_analysis_input_fn = create_input_fn(num_epochs=1, ...)
+  train_input_fn = create_input_fn(num_epochs=100, ...)
+  estimator = tfl.estimators.CannedRegressor(
+      feature_columns=feature_columns,
+      model_config=model_config,
+      feature_analysis_input_fn=feature_analysis_input_fn)
+  estimator.train(input_fn=train_input_fn)
+  ```
+  """
+
+  def __init__(self,
+               model_config,
+               feature_columns,
+               feature_analysis_input_fn=None,
+               prefitting_input_fn=None,
+               model_dir=None,
+               label_dimension=1,
+               weight_column=None,
+               optimizer='Adagrad',
+               prefitting_optimizer='Adagrad',
+               prefitting_steps=None,
+               config=None,
+               warm_start_from=None,
+               loss_reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
+               dtype=tf.float32):
+    """Initializes a `CannedRegressor` instance.
+
+    Args:
+      model_config: Model configuration object describing model architecutre.
+        Should be one of the model configs in `tfl.configs`.
+      feature_columns: An iterable containing all the feature columns used by
+        the model.
+      feature_analysis_input_fn: An input_fn used to calculate statistics about
+        features and labels in order to setup calibration keypoint and values.
+      prefitting_input_fn: An input_fn used in the pre fitting stage to estimate
+        non-linear feature interactions. Required for crystals models.
+        Prefitting typically uses the same dataset as the main training, but
+        with fewer epochs.
+      model_dir: Directory to save model parameters, graph and etc. This can
+        also be used to load checkpoints from the directory into a estimator to
+        continue training a previously saved model.
+      label_dimension: Number of regression targets per example. This is the
+        size of the last dimension of the labels and logits `Tensor` objects
+        (typically, these have shape `[batch_size, label_dimension]`).
+      weight_column: A string or a `_NumericColumn` created by
+        `tf.feature_column.numeric_column` defining feature column representing
+        weights. It is used to down weight or boost examples during training. It
+        will be multiplied by the loss of the example. If it is a string, it is
+        used as a key to fetch weight tensor from the `features`. If it is a
+        `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then
+        weight_column.normalizer_fn is applied on it to get weight tensor.
+      optimizer: An instance of `tf.Optimizer` used to train the model. Can also
+        be a string (one of 'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or
+        callable. Defaults to Adagrad optimizer.
+      prefitting_optimizer: An instance of `tf.Optimizer` used to train the
+        model during the pre-fitting stage. Can also be a string (one of
+        'Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD'), or callable. Defaults to
+        Adagrad optimizer.
+      prefitting_steps: Number of steps for which to pretraing train the model
+        during the prefitting stage. If None, train forever or train until
+        prefitting_input_fn generates the tf.errors.OutOfRange error or
+        StopIteration exception.
+      config: `RunConfig` object to configure the runtime settings.
+      warm_start_from: A string filepath to a checkpoint to warm-start from, or
+        a `WarmStartSettings` object to fully configure warm-starting.  If the
+        string filepath is provided instead of a `WarmStartSettings`, then all
+        weights are warm-started, and it is assumed that vocabularies and Tensor
+        names are unchanged.
+      loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
+        to reduce training loss over batch. Defaults to `SUM_OVER_BATCH_SIZE`.
+      dtype: dtype of layers used in the model.
+    """
+    config = estimator_lib.maybe_overwrite_model_dir_and_session_config(
+        config, model_dir)
+    model_dir = config.model_dir
+    head = regression_head.RegressionHead(
+        label_dimension=label_dimension,
+        weight_column=weight_column,
+        loss_reduction=loss_reduction)
+
+    model_config = copy.deepcopy(model_config)
+    _update_by_feature_columns(model_config, feature_columns)
+
+    _finalize_keypoints(
+        model_config=model_config,
+        config=config,
+        feature_columns=feature_columns,
+        feature_analysis_input_fn=feature_analysis_input_fn,
+        logits_output=True)
+
+    _verify_config(model_config, feature_columns)
+
+    _finalize_model_structure(
+        label_dimension=label_dimension,
+        feature_columns=feature_columns,
+        head=head,
+        model_config=model_config,
+        prefitting_input_fn=prefitting_input_fn,
+        prefitting_optimizer=prefitting_optimizer,
+        prefitting_steps=prefitting_steps,
+        model_dir=model_dir,
+        config=config,
+        warm_start_from=warm_start_from,
+        dtype=dtype)
+
+    model_fn = _get_model_fn(
+        label_dimension=label_dimension,
+        feature_columns=feature_columns,
+        head=head,
+        model_config=model_config,
+        optimizer=optimizer,
+        dtype=dtype)
+
+    super(CannedRegressor, self).__init__(
+        model_fn=model_fn,
+        model_dir=model_dir,
+        config=config,
+        warm_start_from=warm_start_from)
+
+
+def _match_op(ops, regex):
+  """Returns ops that match given regex along with the matched sections."""
+  matches = []
+  for op in ops:
+    op_matches = re.findall(regex, op)
+    if op_matches:
+      matches.append((op, op_matches[0]))
+  return matches
+
+
+def get_model_graph(saved_model_path, tag='serve'):
+  """Returns all layers and parameters used in a saved model as a graph.
+
+  The returned graph is not a TF graph, rather a graph of python object that
+  encodes the model structure and includes trained model parameters. The graph
+  can be used by the `tfl.visualization` module for plotting and other
+  visualization and analysis.
+
+  Example:
+
+  ```python
+  model_graph = estimators.get_model_graph(saved_model_path)
+  visualization.plot_feature_calibrator(model_graph, "feature_name")
+  visualization.plot_all_calibrators(model_graph)
+  visualization.draw_model_graph(model_graph)
+  ```
+
+  Args:
+    saved_model_path: Path to the saved model.
+    tag: Saved model tag for loading.
+
+  Returns:
+    A `model_info.ModelGraph` object that includes the model graph.
+  """
+  # List of all the nodes in the model.
+  nodes = []
+
+  # Dict from feature name to corresponding InputFeatureNode object.
+  feature_nodes = {}
+
+  # Dict from submodel index to a list of calibrated inputs for the submodel.
+  submodel_input_nodes = collections.defaultdict(list)
+
+  # Dict from submodel index to the output node of the submodel.
+  submodel_output_nodes = {}
+
+  tf.compat.v1.reset_default_graph()
+  with tf.compat.v1.Session() as sess:
+    tf.compat.v1.saved_model.loader.load(sess, [tag], saved_model_path)
+    g = tf.compat.v1.get_default_graph()
+    ops = [op.name for op in g.get_operations()]
+
+    #############################
+    # Create input feature nodes.
+    #############################
+
+    # Extract list of features from the graph.
+    # {FEATURES_SCOPE}/{feature_name}
+    feature_op_re = '{}/(.*)'.format(FEATURES_SCOPE)
+    for (_, feature_name) in _match_op(ops, feature_op_re):
+      category_table_op = 'transform/{}_lookup/Const'.format(feature_name)
+      if category_table_op in ops:
+        is_categorical = True
+        vocabulary_list = sess.run(
+            g.get_operation_by_name(category_table_op).outputs[0])
+        # Replace byte types with their string values.
+        vocabulary_list = [
+            str(x.decode()) if isinstance(x, bytes) else str(x)
+            for x in vocabulary_list
+        ]
+      else:
+        is_categorical = False
+        vocabulary_list = None
+
+      feature_node = model_info.InputFeatureNode(
+          name=feature_name,
+          is_categorical=is_categorical,
+          vocabulary_list=vocabulary_list)
+      feature_nodes[feature_name] = feature_node
+      nodes.append(feature_node)
+
+    #######################################
+    # Create categorical calibration nodes.
+    #######################################
+
+    # Get calibrator output values. We need to call the read variable op.
+    # {CALIB_LAYER_NAME}_{feature_name}/
+    #   {CATEGORICAL_CALIBRATION_KERNEL_NAME}/Read/ReadVariableOp
+    kernel_op_re = '^{}_(.*)/{}/Read/ReadVariableOp$'.format(
+        CALIB_LAYER_NAME,
+        categorical_calibration_layer.CATEGORICAL_CALIBRATION_KERNEL_NAME,
+    )
+    for kernel_op, feature_name in _match_op(ops, kernel_op_re):
+      output_values = sess.run(g.get_operation_by_name(kernel_op).outputs[0])
+
+      # Get default input value if defined.
+      # {CALIB_LAYER_NAME}_{feature_name}/
+      #   {DEFAULT_INPUT_VALUE_NAME}
+      default_input_value_op = '^{}_{}/{}$'.format(
+          CALIB_LAYER_NAME,
+          feature_name,
+          categorical_calibration_layer.DEFAULT_INPUT_VALUE_NAME,
+      )
+      if default_input_value_op in ops:
+        default_input = sess.run(
+            g.get_operation_by_name(default_input_value_op).outputs[0])
+      else:
+        default_input = None
+
+      # Create one calibration node per output dimension of the calibrator.
+      categorical_calibration_nodes = []
+      for calibration_output_idx in range(output_values.shape[1]):
+        categorical_calibration_node = model_info.CategoricalCalibrationNode(
+            input_node=feature_nodes[feature_name],
+            output_values=output_values[:, calibration_output_idx],
+            default_input=default_input)
+        categorical_calibration_nodes.append(categorical_calibration_node)
+        nodes.append(categorical_calibration_node)
+
+      # Identity passthrough ops that pass this calibration to each submodel.
+      # {CALIB_PASSTHROUGH_NAME}_{feature_name}_
+      #   {calibration_output_idx}_{submodel_idx}_{submodel_input_idx}
+      shared_calib_passthrough_op_re = r'^{}_{}_(\d*)_(\d*)_(\d*)$'.format(
+          CALIB_PASSTHROUGH_NAME, feature_name)
+      for op, (calibration_output_idx, submodel_idx,
+               submodel_input_idx) in _match_op(ops,
+                                                shared_calib_passthrough_op_re):
+        submodel_input_nodes[submodel_idx].append(
+            (submodel_input_idx,
+             categorical_calibration_nodes[int(calibration_output_idx)]))
+
+    ###############################
+    # Create PWL calibration nodes.
+    ###############################
+
+    # Calculate input keypoints.
+    # We extract lengh (deltas between keypoints) and kernel interpolation
+    # keypoints (which does not include the last keypoint), and then
+    # construct the full keypoints list using both.
+
+    # Lengths (deltas between keypoints).
+    # {CALIB_LAYER_NAME}_{feature_name}/{LENGTHS_NAME}
+    lengths_op_re = '^{}_(.*)/{}$'.format(
+        CALIB_LAYER_NAME,
+        pwl_calibration_layer.LENGTHS_NAME,
+    )
+    for lengths_op, feature_name in _match_op(ops, lengths_op_re):
+      # Interpolation keypoints does not inlcude the last input keypoint.
+      # {CALIB_LAYER_NAME}_{feature_name}/{INTERPOLATION_KEYPOINTS_NAME}
+      keypoints_op = '{}_{}/{}'.format(
+          CALIB_LAYER_NAME,
+          feature_name,
+          pwl_calibration_layer.INTERPOLATION_KEYPOINTS_NAME,
+      )
+
+      # Output keypoints. We need to call the varible read op.
+      # {CALIB_LAYER_NAME}_{feature_name}/{PWL_CALIBRATION_KERNEL_NAME}
+      kernel_op = '{}_{}/{}/Read/ReadVariableOp'.format(
+          CALIB_LAYER_NAME,
+          feature_name,
+          pwl_calibration_layer.PWL_CALIBRATION_KERNEL_NAME,
+      )
+
+      (lengths, keypoints, kernel) = sess.run(
+          (g.get_operation_by_name(lengths_op).outputs[0],
+           g.get_operation_by_name(keypoints_op).outputs[0],
+           g.get_operation_by_name(kernel_op).outputs[0]))
+      output_keypoints = np.cumsum(kernel, axis=0)
+
+      # Add the last keypoint to the keypoint list.
+      # TODO: handle cyclic PWL layers.
+      input_keypoints = np.append(keypoints, keypoints[-1] + lengths[-1])
+
+      # Get missing/default input value if present:
+      # {CALIB_LAYER_NAME}_{feature_name}/{MISSING_INPUT_VALUE_NAME}
+      default_input_value_op = '{}_{}/{}'.format(
+          CALIB_LAYER_NAME,
+          feature_name,
+          pwl_calibration_layer.MISSING_INPUT_VALUE_NAME,
+      )
+      if default_input_value_op in ops:
+        default_input = sess.run(
+            g.get_operation_by_name(default_input_value_op).outputs[0])[0]
+      else:
+        default_input = None
+
+      # Find corresponding default/missing output if present.
+      # {CALIB_LAYER_NAME}_{feature_name}/{PWL_CALIBRATION_MISSING_OUTPUT_NAME}
+      default_output_op = '{}_{}/{}/Read/ReadVariableOp'.format(
+          CALIB_LAYER_NAME,
+          feature_name,
+          pwl_calibration_layer.PWL_CALIBRATION_MISSING_OUTPUT_NAME,
+      )
+      if default_output_op in ops:
+        default_output = sess.run(
+            g.get_operation_by_name(default_output_op).outputs[0])
+      else:
+        default_output = None
+
+      # Create one calibration node per output dimension of the calibrator.
+      pwl_calibration_nodes = []
+      for calibration_output_idx in range(output_keypoints.shape[1]):
+        pwl_calibration_node = model_info.PWLCalibrationNode(
+            input_node=feature_nodes[feature_name],
+            input_keypoints=input_keypoints,
+            output_keypoints=output_keypoints[:, calibration_output_idx],
+            default_input=default_input,
+            default_output=(None if default_output is None else
+                            default_output[:, calibration_output_idx]))
+        pwl_calibration_nodes.append(pwl_calibration_node)
+        nodes.append(pwl_calibration_node)
+
+      # Identity passthrough ops that pass this calibration to each submodel.
+      # {CALIB_PASSTHROUGH_NAME}_{feature_name}_
+      #   {calibration_output_idx}_{submodel_idx}_{submodel_input_idx}
+      shared_calib_passthrough_op_re = r'^{}_{}_(\d*)_(\d*)_(\d*)$'.format(
+          CALIB_PASSTHROUGH_NAME, feature_name)
+      for op, (calibration_output_idx, submodel_idx,
+               submodel_input_idx) in _match_op(ops,
+                                                shared_calib_passthrough_op_re):
+        submodel_input_nodes[submodel_idx].append(
+            (submodel_input_idx,
+             pwl_calibration_nodes[int(calibration_output_idx)]))
+
+    ######################
+    # Create linear nodes.
+    ######################
+
+    # Linear coefficients.
+    # {LINEAR_LAYER_NAME}_{submodel_idx}/{LINEAR_LAYER_KERNEL_NAME}
+    linear_kernel_op_re = '^{}_(.*)/{}/Read/ReadVariableOp$'.format(
+        LINEAR_LAYER_NAME,
+        linear_layer.LINEAR_LAYER_KERNEL_NAME,
+    )
+    for linear_kernel_op, submodel_idx in _match_op(ops, linear_kernel_op_re):
+      coefficients = sess.run(
+          g.get_operation_by_name(linear_kernel_op).outputs[0]).flatten()
+
+      # Bias term.
+      # {LINEAR_LAYER_NAME}/{LINEAR_LAYER_BIAS_NAME}
+      bias_op = '{}/{}/Read/ReadVariableOp'.format(
+          LINEAR_LAYER_NAME,
+          linear_layer.LINEAR_LAYER_BIAS_NAME,
+      )
+      if bias_op in ops:
+        bias = sess.run(g.get_operation_by_name(bias_op).outputs[0])
+      else:
+        bias = 0.0
+
+      # Sort input nodes by input index.
+      input_nodes = [
+          node for _, node in sorted(submodel_input_nodes[submodel_idx])
+      ]
+
+      linear_node = model_info.LinearNode(
+          input_nodes=input_nodes, coefficients=coefficients, bias=bias)
+      submodel_output_nodes[submodel_idx] = linear_node
+      nodes.append(linear_node)
+
+    #######################
+    # Create lattice nodes.
+    #######################
+
+    # Lattice weights.
+    # {Lattice_LAYER_NAME}_{submodel_idx}/{LATTICE_KERNEL_NAME}
+    lattice_kernel_op_re = '^{}_(.*)/{}/Read/ReadVariableOp$'.format(
+        LATTICE_LAYER_NAME,
+        lattice_layer.LATTICE_KERNEL_NAME,
+    )
+    for lattice_kernel_op, submodel_idx in _match_op(ops, lattice_kernel_op_re):
+      lattice_kernel = sess.run(
+          g.get_operation_by_name(lattice_kernel_op).outputs[0]).flatten()
+
+      # Lattice sizes.
+      # {Lattice_LAYER_NAME}_{submodel_idx}/{LATTICE_SIZES_NAME}
+      lattice_sizes_op_name = '{}_{}/{}'.format(
+          LATTICE_LAYER_NAME, submodel_idx, lattice_layer.LATTICE_SIZES_NAME)
+      lattice_sizes = sess.run(
+          g.get_operation_by_name(lattice_sizes_op_name).outputs[0]).flatten()
+
+      # Shape the flat lattice parameters based on the calculated lattice sizes.
+      weights = np.reshape(lattice_kernel, lattice_sizes)
+
+      # Sort input nodes by input index.
+      input_nodes = [
+          node for _, node in sorted(submodel_input_nodes[submodel_idx])
+      ]
+
+      lattice_node = model_info.LatticeNode(
+          input_nodes=input_nodes, weights=weights)
+      submodel_output_nodes[submodel_idx] = lattice_node
+      nodes.append(lattice_node)
+
+    ###################
+    # Create mean node.
+    ###################
+
+    # Mean node is only added for ensemble models.
+    if len(submodel_output_nodes) > 1:
+      input_nodes = [
+          submodel_output_nodes[idx]
+          for idx in sorted(submodel_output_nodes.keys(), key=int)
+      ]
+      average_node = model_info.MeanNode(input_nodes=input_nodes)
+      nodes.append(average_node)
+      model_output_node = average_node
+    else:
+      model_output_node = list(submodel_output_nodes.values())[0]
+
+    #####################################
+    # Create output PWL calibration node.
+    #####################################
+
+    # Lengths (deltas between keypoints).
+    # {OUTPUT_CALIB_LAYER_NAME}/{LENGTHS_NAME}
+    lengths_op = '{}/{}'.format(
+        OUTPUT_CALIB_LAYER_NAME,
+        pwl_calibration_layer.LENGTHS_NAME,
+    )
+    if lengths_op in ops:
+      # Interpolation keypoints does not inlcude the last input keypoint.
+      # {OUTPUT_CALIB_LAYER_NAME}/{INTERPOLATION_KEYPOINTS_NAME}
+      keypoints_op = '{}/{}'.format(
+          OUTPUT_CALIB_LAYER_NAME,
+          pwl_calibration_layer.INTERPOLATION_KEYPOINTS_NAME,
+      )
+
+      # Output keypoints. We need to call the varible read op.
+      # {OUTPUT_CALIB_LAYER_NAME}/{PWL_CALIBRATION_KERNEL_NAME}
+      kernel_op = '{}/{}/Read/ReadVariableOp'.format(
+          OUTPUT_CALIB_LAYER_NAME,
+          pwl_calibration_layer.PWL_CALIBRATION_KERNEL_NAME,
+      )
+
+      (lengths, keypoints, kernel) = sess.run(
+          (g.get_operation_by_name(lengths_op).outputs[0],
+           g.get_operation_by_name(keypoints_op).outputs[0],
+           g.get_operation_by_name(kernel_op).outputs[0]))
+      output_keypoints = np.cumsum(kernel.flatten())
+
+      # Add the last keypoint to the keypoint list.
+      input_keypoints = np.append(keypoints, keypoints[-1] + lengths[-1])
+
+      output_calibration_node = model_info.PWLCalibrationNode(
+          input_node=model_output_node,
+          input_keypoints=input_keypoints,
+          output_keypoints=output_keypoints,
+          default_input=None,
+          default_output=None)
+      nodes.append(output_calibration_node)
+      model_output_node = output_calibration_node
+
+  return model_info.ModelGraph(nodes=nodes, output_node=model_output_node)
diff --git a/tensorflow_lattice/python/estimators/BUILD b/tensorflow_lattice/python/estimators/BUILD
deleted file mode 100644
index bef95fb..0000000
--- a/tensorflow_lattice/python/estimators/BUILD
+++ /dev/null
@@ -1,227 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-licenses(["notice"])  # Apache 2.0 License
-
-package(
-    default_visibility = [
-        "//tensorflow_lattice:__subpackages__",
-    ],
-)
-
-exports_files(["LICENSE"])
-
-# All python tests can run under python 2 and 3.
-
-# estimators.
-py_library(
-    name = "hparams",
-    srcs = ["hparams.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "@six_archive//:six",
-        "//tensorflow_lattice/python:regularizers",
-    ],
-)
-
-py_test(
-    name = "hparams_test",
-    size = "small",
-    srcs = ["hparams_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":hparams",
-        "@org_tensorflow//third_party/py/numpy",
-        "@org_tensorflow//tensorflow:tensorflow_py",
-    ],
-)
-
-py_library(
-    name = "base",
-    srcs = ["base.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "@org_tensorflow//tensorflow:tensorflow_py",
-        "//tensorflow_lattice/python:tools",
-    ],
-)
-
-py_test(
-    name = "base_test",
-    size = "medium",
-    srcs = ["base_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":base",
-        "@org_tensorflow//tensorflow:tensorflow_py",
-        "//tensorflow_lattice/python:test_data",
-    ],
-)
-
-py_library(
-    name = "calibrated",
-    srcs = ["calibrated.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":base",
-        ":hparams",
-        "@six_archive//:six",
-        "@org_tensorflow//tensorflow:tensorflow_py",
-        "//tensorflow_lattice/python:keypoints_initialization",
-        "//tensorflow_lattice/python:pwl_calibration_layers",
-        "//tensorflow_lattice/python:regularizers",
-        "//tensorflow_lattice/python:tools",
-    ],
-)
-
-py_test(
-    name = "calibrated_test",
-    size = "small",
-    srcs = ["calibrated_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":calibrated",
-        "@org_tensorflow//tensorflow:tensorflow_py",
-    ],
-)
-
-py_library(
-    name = "calibrated_linear",
-    srcs = ["calibrated_linear.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":calibrated",
-        ":hparams",
-        "@org_tensorflow//tensorflow:tensorflow_py",
-    ],
-)
-
-py_test(
-    name = "calibrated_linear_test",
-    size = "large",
-    srcs = ["calibrated_linear_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":calibrated_linear",
-        "@org_tensorflow//tensorflow:tensorflow_py",
-        "//tensorflow_lattice/python:test_data",
-    ],
-)
-
-py_library(
-    name = "calibrated_lattice",
-    srcs = ["calibrated_lattice.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":calibrated",
-        ":hparams",
-        "@org_tensorflow//tensorflow:tensorflow_py",
-        "//tensorflow_lattice/python:lattice_layers",
-        "//tensorflow_lattice/python:regularizers",
-    ],
-)
-
-py_test(
-    name = "calibrated_lattice_test",
-    size = "large",
-    srcs = ["calibrated_lattice_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":calibrated_lattice",
-        "@org_tensorflow//tensorflow:tensorflow_py",
-        "//tensorflow_lattice/python:test_data",
-    ],
-)
-
-py_library(
-    name = "calibrated_rtl",
-    srcs = ["calibrated_rtl.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":calibrated",
-        ":hparams",
-        "@six_archive//:six",
-        "@org_tensorflow//tensorflow:tensorflow_py",
-        "//tensorflow_lattice/python:lattice_layers",
-        "//tensorflow_lattice/python:regularizers",
-    ],
-)
-
-py_test(
-    name = "calibrated_rtl_test",
-    size = "large",
-    srcs = ["calibrated_rtl_test.py"],
-    shard_count = 4,
-    srcs_version = "PY2AND3",
-    deps = [
-        ":calibrated_rtl",
-        "@org_tensorflow//tensorflow:tensorflow_py",
-        "//tensorflow_lattice/python:test_data",
-    ],
-)
-
-py_library(
-    name = "calibrated_etl",
-    srcs = ["calibrated_etl.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":calibrated",
-        ":hparams",
-        "@org_tensorflow//tensorflow:tensorflow_py",
-        "//tensorflow_lattice/python:lattice_layers",
-        "//tensorflow_lattice/python:monotone_linear_layers",
-        "//tensorflow_lattice/python:pwl_calibration_layers",
-        "//tensorflow_lattice/python:regularizers",
-    ],
-)
-
-py_test(
-    name = "calibrated_etl_test",
-    size = "large",
-    srcs = ["calibrated_etl_test.py"],
-    shard_count = 4,
-    srcs_version = "PY2AND3",
-    deps = [
-        ":calibrated_etl",
-        "@org_tensorflow//tensorflow:tensorflow_py",
-        "//tensorflow_lattice/python:test_data",
-    ],
-)
-
-py_library(
-    name = "separately_calibrated_rtl",
-    srcs = ["separately_calibrated_rtl.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":calibrated",
-        ":hparams",
-        "@six_archive//:six",
-        "@org_tensorflow//tensorflow:tensorflow_py",
-        "//tensorflow_lattice/python:lattice_layers",
-        "//tensorflow_lattice/python:regularizers",
-    ],
-)
-
-py_test(
-    name = "separately_calibrated_rtl_test",
-    size = "large",
-    srcs = ["separately_calibrated_rtl_test.py"],
-    shard_count = 9,
-    srcs_version = "PY2AND3",
-    deps = [
-        ":separately_calibrated_rtl",
-        "@org_tensorflow//tensorflow:tensorflow_py",
-        "//tensorflow_lattice/python:test_data",
-    ],
-)
diff --git a/tensorflow_lattice/python/estimators/base.py b/tensorflow_lattice/python/estimators/base.py
deleted file mode 100644
index 75664a5..0000000
--- a/tensorflow_lattice/python/estimators/base.py
+++ /dev/null
@@ -1,253 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Base class for generic estimators that handles boilerplate code."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import abc
-# Dependency imports
-import tensorflow as tf
-
-from tensorflow_lattice.python.lib import tools
-from tensorflow.python.lib.io import file_io  # pylint: disable=g-direct-tensorflow-import
-
-# Scope for variable names.
-_SCOPE_TENSORFLOW_LATTICE_PREFIX = "tfl_"
-_TRAIN_OP_NAME = "tfl_train_op"
-
-
-class _ProjectionHook(tf.estimator.SessionRunHook):
-  """SessionRunHook to project to feasible space after each step."""
-
-  def __init__(self):
-    self._projection_ops = []
-
-  def set_projection_ops(self, projection_ops):
-    """Needs to be called in model_fn function, with ops to project."""
-    self._projection_ops = projection_ops
-
-  def after_run(self, run_context, run_values):
-    if self._projection_ops is not None:
-      run_context.session.run(self._projection_ops)
-
-  def end(self, session):
-    if self._projection_ops is not None:
-      session.run(self._projection_ops)
-
-
-class Base(tf.estimator.Estimator):
-  """Base class for generic models.
-
-  It provides minimal preprocessing of the input features, sets up the hook that
-  runs projections at each step (typically used to project parameters to be
-  monotone and within bounds), and adds the appropriate head to the model.
-
-  To extend one has to implement the method prediction_builder()
-  """
-  __metaclass__ = abc.ABCMeta
-
-  def __init__(self,
-               n_classes,
-               feature_columns=None,
-               model_dir=None,
-               optimizer=None,
-               config=None,
-               hparams=None,
-               head=None,
-               weight_column=None,
-               dtype=tf.float32,
-               name="model"):
-    """Construct Classifier/Regressor.
-
-    Args:
-      n_classes: Number of classes, set to 0 if used for regression. If head is
-        not provided, only n_classes = 0 or 2 are currently supported.
-      feature_columns: Optional, if not set the model will use all features
-        returned by input_fn. An iterable containing all the feature columns
-        used by the model. All items in the set should be instances of classes
-        derived from `FeatureColumn` and are used to transform the input columns
-        into a numeric format that is fed into the rest of the graph.
-      model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into a estimator to
-        continue training a previously saved model.
-      optimizer: `Optimizer` object, or callable (with no inputs) that returns
-        an `Optimizer` object, defines the optimizer to use for training. This
-        is typically one of the optimizers defined in tf.train.
-      config: RunConfig object to configure the runtime settings. Typically set
-        to learn_runner.EstimatorConfig().
-      hparams: Hyper parameter object to be passed to prediction builder.
-      head: a `TensorFlow Estimator Head` which specifies how the loss function,
-        final predictions, and so on are generated from model outputs. Defaults
-        to using a sigmoid cross entropy head for binary classification and mean
-        squared error head for regression.
-      weight_column: A string or a `tf.feature_column.numeric_column` defining
-        feature column representing weights. It is used to down weight or boost
-        examples during training. It will be multiplied by the loss of the
-        example.
-      dtype: The internal type to be used for tensors.
-      name: Name to be used as suffix to top-level variable scope for model.
-
-    Raises:
-      ValueError: invalid parameters.
-      KeyError: type of feature not supported.
-    """
-    # We sort the list of feature_columns here, since we will later create
-    # the ops that implement their represented transformations (e.g. embedding)
-    # in the order in which they are listed in self._feature_columns.
-    # The constructed ops are then given names by the tensorflow framework
-    # that depend on their creation order (for example, if two ops have the
-    # same type they will be suffixed by an ordinal reflecting the creation
-    # order). As this code must be deterministic (since it could be
-    # executed in a multi-machine tensorflow cluster), we must have the order
-    # of feature columns deterministic as well (which would not be the case if
-    # it's, for example, the result of calling keys() on a dictionary); thus
-    # we sort the feature columns here by their names.
-    self._feature_columns = (None if feature_columns is None else
-                             tools.get_sorted_feature_columns(feature_columns))
-    self._weight_column = weight_column
-    self._optimizer = optimizer
-    self._config = config
-    self._hparams = hparams
-    self._name = _SCOPE_TENSORFLOW_LATTICE_PREFIX + name
-    self._n_classes = n_classes
-    self._dtype = dtype
-
-    if head is not None:
-      self._head = head
-    else:
-      if n_classes == 0:
-        self._head = (
-            tf.contrib.estimator.regression_head(
-                label_dimension=1,
-                weight_column=self._weight_column,
-                loss_reduction=tf.compat.v1.losses.Reduction.SUM))
-      elif n_classes == 2:
-        self._head = (
-            tf.contrib.estimator.binary_classification_head(
-                weight_column=self._weight_column,
-                loss_reduction=tf.compat.v1.losses.Reduction.SUM))
-      else:
-        raise ValueError("Invalid value for n_classes=%d" % n_classes)
-
-    super(Base, self).__init__(
-        model_fn=self._base_model_fn, model_dir=model_dir, config=config)
-
-    # Make sure model directory exists after initialization.
-    # Notice self.model_dir is set by Estimator class.
-    file_io.recursive_create_dir(self.model_dir)
-
-    self._projection_hook = _ProjectionHook()
-
-  @abc.abstractmethod
-  def prediction_builder(self, columns_to_tensors, mode, hparams, dtype):
-    """Method to be specialized that builds the prediction graph.
-
-    Args:
-      columns_to_tensors: A map from feature_name to raw features tensors, each
-        with shape `[batch_size]` or `[batch_size, feature_dim]`.
-      mode: Estimator's `ModeKeys`.
-      hparams: hyperparameters object passed to prediction builder. This is not
-        used by the Base estimator itself and is passed without checks or any
-        processing and can be of any type.
-      dtype: The dtype to be used for tensors.
-
-    Returns:
-      A tuple of (prediction_tensor, projection_ops, regularization_loss) of
-      type (tf.Tensor, list[], tf.Tensor):
-      prediction_tensor: shaped `[batch_size/?,1]` for regression or binary
-        classification, or `[batch_size, n_classes]` for multi-class
-        classifiers. For classifier this will be the logit(s) value(s).
-      projection_ops: list of projection ops to be applied after each batch,
-        or None.
-      regularization_loss: loss related to regularization or None.
-    """
-    raise NotImplementedError(
-        "This method must be implemented in a child class")
-
-
-  def _base_model_fn(self, features, labels, mode, config):  # pylint: disable=unused-argument
-    """Creates the prediction, loss, and train ops.
-
-    Args:
-      features: A dictionary of tensors keyed by the feature name.
-      labels: A tensor representing the label.
-      mode: The execution mode, as defined in tf.estimator.ModeKeys.
-      config: Optional configuration object. Will receive what is passed to
-        Estimator in `config` parameter, or the default `config`. Allows
-        updating things in your model_fn based on configuration such as
-        `num_ps_replicas`.
-
-    Returns:
-      ModelFnOps, with the predictions, loss, and train_op.
-
-    Raises:
-      ValueError: if incompatible parameters are given.
-    """
-    with tf.compat.v1.variable_scope(self._name):
-      if self._feature_columns is None:
-        columns_to_tensors = features.copy()
-      else:
-        with tf.compat.v1.variable_scope("feature_column_transformation"):
-          columns_to_tensors = {
-              feature_column.name:
-              tools.input_from_feature_column(features.copy(), feature_column,
-                                              self._dtype)
-              for feature_column in self._feature_columns
-          }
-      (prediction, projection_ops,
-       regularization) = self.prediction_builder(columns_to_tensors, mode,
-                                                 self._hparams, self._dtype)
-
-      def _train_op_fn(loss):
-        """Returns train_op tensor if TRAIN mode, or None."""
-        train_op = None
-        if mode == tf.estimator.ModeKeys.TRAIN:
-          if regularization is not None:
-            loss += regularization
-            tf.compat.v1.summary.scalar("loss_with_regularization", loss)
-          optimizer = self._optimizer
-          if optimizer is None:
-            optimizer = tf.compat.v1.train.AdamOptimizer
-          if callable(optimizer):
-            optimizer = optimizer()
-          train_op = optimizer.minimize(
-              loss,
-              global_step=tf.compat.v1.train.get_global_step(),
-              name=_TRAIN_OP_NAME)
-          self._projection_hook.set_projection_ops(projection_ops)
-        return train_op
-
-      # Use head to generate model_fn outputs.
-      estimator_spec = self._head.create_estimator_spec(
-          features=features,
-          labels=labels,
-          mode=mode,
-          train_op_fn=_train_op_fn,
-          logits=prediction)
-
-      # Update chief worker's training session run hooks to include
-      # projection_hook. This means that in a distributed setting, only the
-      # chief worker will run the projection op after its own update and without
-      # synchronization with other workers. Thus, the parameters may temporary
-      # leave the feasible space.
-      if mode == tf.estimator.ModeKeys.TRAIN:
-        updated_training_chief_hooks = (
-            estimator_spec.training_chief_hooks + (self._projection_hook,))
-        estimator_spec = estimator_spec._replace(
-            training_chief_hooks=updated_training_chief_hooks)
-
-      return estimator_spec
diff --git a/tensorflow_lattice/python/estimators/base_test.py b/tensorflow_lattice/python/estimators/base_test.py
deleted file mode 100644
index 527d12c..0000000
--- a/tensorflow_lattice/python/estimators/base_test.py
+++ /dev/null
@@ -1,116 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Base estimator is tested with a simple linear model implementation."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import six
-import tensorflow as tf
-
-from tensorflow_lattice.python.estimators import base
-from tensorflow_lattice.python.lib import test_data
-
-
-class _BaseLinear(base.Base):
-  """Base class for BaseLinearClassifier and BaseLinearRegressor."""
-
-  def __init__(self,
-               n_classes,
-               feature_columns=None,
-               model_dir=None,
-               hparams=None):
-    """Construct LinearClassifier/Regressor."""
-    super(_BaseLinear, self).__init__(
-        n_classes=n_classes,
-        feature_columns=feature_columns,
-        model_dir=model_dir,
-        hparams=hparams,
-        name='linear')
-
-  def prediction_builder(self, columns_to_tensors, mode, hparams, dtype):
-    unstacked_inputs = []
-    for tensor in six.itervalues(columns_to_tensors):
-      if tensor.shape.ndims == 1:
-        unstacked_inputs.append(tensor)
-      elif tensor.shape.ndims == 2:
-        unstacked_inputs.extend(tf.unstack(tensor, axis=1))
-    input_tensor = tf.stack(unstacked_inputs, axis=1, name='stack')
-    weights = tf.compat.v1.get_variable(
-        'weights',
-        initializer=tf.zeros(shape=[len(unstacked_inputs), 1], dtype=dtype))
-    prediction = tf.reshape(
-        tf.tensordot(input_tensor, weights, axes=1, name='tensordot'), [-1, 1])
-    # Add ridge regularizer.
-    regularization = tf.reduce_sum(tf.square(weights))
-    # Add a projection that forces the weight vector to be 0.
-    projeciton_ops = [weights.assign_sub(weights)]
-    return prediction, projeciton_ops, regularization
-
-
-class BaseTest(tf.test.TestCase):
-
-  def setUp(self):
-    super(BaseTest, self).setUp()
-    self._test_data = test_data.TestData()
-
-  def _TestRegressor(self, feature_columns, input_fn):
-    estimator = _BaseLinear(n_classes=0, feature_columns=feature_columns)
-    estimator.train(input_fn=input_fn)
-    preds = [p['predictions'][0] for p in estimator.predict(input_fn=input_fn)]
-    self.assertAllClose(preds, [0.0] * len(preds), 1e-7)
-
-  def _TestCalssifier(self, feature_columns, input_fn):
-    estimator = _BaseLinear(n_classes=2, feature_columns=feature_columns)
-    estimator.train(input_fn=input_fn)
-    preds = [p['logits'][0] for p in estimator.predict(input_fn=input_fn)]
-    self.assertAllClose(preds, [0.0] * len(preds), 1e-7)
-
-  def testBaseLinearRegressorTraining1D(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x'),
-    ]
-    self._TestRegressor(feature_columns, self._test_data.oned_input_fn())
-
-  def testBaseLinearRegressorTraining3D(self):
-    # Tests also a categorical feature with vocabulary list.
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-        tf.feature_column.categorical_column_with_vocabulary_list(
-            'x2', ['Y', 'N'])
-    ]
-    self._TestRegressor(feature_columns,
-                        self._test_data.threed_input_fn(False, 1))
-
-  def testBaseLinearRegressorTrainingMultiDimensionalFeature(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x', shape=(2,)),
-    ]
-    self._TestRegressor(feature_columns,
-                        self._test_data.multid_feature_input_fn())
-
-  def testBaseLinearClassifierTraining(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    self._TestCalssifier(feature_columns,
-                         self._test_data.twod_classificer_input_fn())
-
-
-if __name__ == '__main__':
-  tf.test.main()
diff --git a/tensorflow_lattice/python/estimators/calibrated.py b/tensorflow_lattice/python/estimators/calibrated.py
deleted file mode 100644
index 5c1df76..0000000
--- a/tensorflow_lattice/python/estimators/calibrated.py
+++ /dev/null
@@ -1,515 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Base class for TensorFlow Lattice estimators with input calibration."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import abc
-import six
-import tensorflow as tf
-
-from tensorflow_lattice.python.estimators import base
-from tensorflow_lattice.python.estimators import hparams as tf_lattice_hparams
-from tensorflow_lattice.python.lib import keypoints_initialization
-from tensorflow_lattice.python.lib import pwl_calibration_layers
-from tensorflow_lattice.python.lib import regularizers
-from tensorflow_lattice.python.lib import tools
-
-# Scope for variable names.
-_SCOPE_CALIBRATED_PREFIX = "calibrated_"
-_SCOPE_INPUT_CALIBRATION = "input_calibration"
-
-
-def _get_feature_dict(features):
-  if isinstance(features, dict):
-    return features
-  return {"": features}
-
-
-def _get_optimizer(optimizer, hparams):
-  """Materializes the optimizer into a tf.train optimizer object."""
-  if optimizer is None:
-    optimizer = tf.compat.v1.train.AdamOptimizer
-  if callable(optimizer):
-    learning_rate = hparams.get_param("learning_rate")
-    if learning_rate is None:
-      return optimizer()
-    else:
-      return optimizer(learning_rate=learning_rate)
-  else:
-    return optimizer
-
-
-def _get_per_feature_dict(hparams, param_name, default_value=None):
-  """Creates dict with values returned by hparams for param for each feature."""
-  if not issubclass(type(hparams), tf_lattice_hparams.PerFeatureHParams):
-    raise ValueError(
-        "hparams passed to Estimator is not a subclass of "
-        "tensorflow_lattice.PerFeatureHParams, it can't figure out parameters "
-        "for calibration")
-  return {
-      feature_name: hparams.get_feature_param(feature_name, param_name,
-                                              default_value)
-      for feature_name in hparams.get_feature_names()
-  }
-
-
-def _call_keypoints_inializers_fn(keypoints_initializers_fn):
-  """Call the closure and check/return results."""
-  if callable(keypoints_initializers_fn):
-    kp_init = keypoints_initializers_fn()
-    if (len(kp_init) != 2 or not issubclass(type(kp_init[0]), tf.Tensor) or
-        not issubclass(type(kp_init[1]), tf.Tensor)):
-      raise ValueError(
-          "invalid value returned by keypoints_initializers_fn, expected a "
-          "pair of tensors, got %s" % kp_init)
-    return kp_init
-  elif isinstance(keypoints_initializers_fn, dict):
-    return {
-        k: _call_keypoints_inializers_fn(v)
-        for k, v in six.iteritems(keypoints_initializers_fn)
-    }
-  else:
-    raise ValueError("Unknown type for keypoints_initializers_fn: %s" %
-                     type(keypoints_initializers_fn))
-
-
-def _update_keypoints(feature_name, asked_keypoints, kp_init_keypoints):
-  """Updates num_keypoints according to availability."""
-  if not asked_keypoints or kp_init_keypoints == asked_keypoints:
-    # Meet asked_keypoints if no calibration was asked for this feature,
-    # or if the correct number of kp_init_keypoints are available.
-    return asked_keypoints
-  if kp_init_keypoints < asked_keypoints:
-    # If fewer keypoints were returned by init functions, emit debug
-    # message and return those available.
-    tf.compat.v1.logging.debug(
-        "Using {} keypoints for calibration of {} instead of "
-        "the requested {}".format(kp_init_keypoints, feature_name,
-                                  asked_keypoints))
-    return kp_init_keypoints
-  raise ValueError("Calibration initialization returned more keypoints ({}) "
-                   "than requested ({}) for feature {}".format(
-                       kp_init_keypoints, asked_keypoints, feature_name))
-
-
-def input_calibration_layer_from_hparams(columns_to_tensors,
-                                         hparams,
-                                         quantiles_dir=None,
-                                         keypoints_initializers=None,
-                                         name=None,
-                                         dtype=tf.float32):
-  """Creates a calibration layer for the input using hyper-parameters.
-
-  Similar to `input_calibration_layer` but reads its parameters from a
-  `CalibratedHParams` object.
-
-  Args:
-    columns_to_tensors: A mapping from feature name to tensors.
-    hparams: Hyper-parameters, need to inherit from `CalibratedHParams`. See
-      `CalibratedHParams` and `input_calibration_layer` for descriptions of how
-      these hyper-parameters work.
-    quantiles_dir: location where quantiles for the data was saved. Typically
-      the same directory as the training data. These quantiles can be generated
-      with `pwl_calibration_layers.calculate_quantiles_for_keypoints`, maybe in
-      a separate invocation of your program. Different models that share the
-      same quantiles information -- so this needs to be generated only once when
-      hyper-parameter tuning. If you don't want to use quantiles, you can set
-      `keypoints_initializers` instead.
-    keypoints_initializers: if you know the distribution of your input features
-      you can provide that directly instead of `quantiles_dir`. See
-      `pwl_calibrators_layers.uniform_keypoints_for_signal`. It must be a pair
-      of tensors with keypoints inputs and outputs to use for initialization
-      (must match `num_keypoints` configured in `hparams`). Alternatively can be
-      given as a dict mapping feature name to pairs, for initialization per
-      feature. If `quantiles_dir` and `keypoints_initializer` are set, the
-      latter takes precendence, and the features for which
-      `keypoints_initializers` are not defined fallback to using the quantiles
-      found in `quantiles_dir`.
-    name: Name scope for layer.
-    dtype: If any of the scalars are not given as tensors, they are converted to
-      tensors with this dtype.
-
-  Returns:
-    A tuple of:
-    * calibrated tensor of shape [batch_size, sum(features dimensions)].
-    * list of the feature names in the order they appear in the calibrated
-      tensor. A name may appear more than once if the feature is
-      multi-dimension (for instance a multi-dimension embedding)
-    * list of projection ops, that must be applied at each step (or every so
-      many steps) to project the model to a feasible space: used for bounding
-      the outputs or for imposing monotonicity. Empty if none are requested.
-    * tensor with regularization loss, or None for no regularization.
-
-  Raises:
-    ValueError: if dtypes are incompatible.
-
-
-  """
-  with tf.name_scope(name or "input_calibration_layer_from_hparams"):
-
-    # Sort out list of feature names.
-    unique_feature_names = tools.get_sorted_feature_names(
-        columns_to_tensors=columns_to_tensors)
-
-    # Get per-feature parameters.
-    num_keypoints = _get_per_feature_dict(hparams, "num_keypoints")
-    calibration_output_min = _get_per_feature_dict(hparams,
-                                                   "calibration_output_min")
-    calibration_output_max = _get_per_feature_dict(hparams,
-                                                   "calibration_output_max")
-    calibration_bound = _get_per_feature_dict(hparams, "calibration_bound")
-    monotonicity = _get_per_feature_dict(hparams, "monotonicity")
-    missing_input_values = _get_per_feature_dict(hparams, "missing_input_value")
-    missing_output_values = _get_per_feature_dict(hparams,
-                                                  "missing_output_value")
-
-    # Convert keypoints_initializers to a dict if needed, or otherwise make a
-    # copy of the original keypoints_initializers dict.
-    if keypoints_initializers is None:
-      keypoints_initializers = {}
-    elif not isinstance(keypoints_initializers, dict):
-      keypoints_initializers = {
-          name: keypoints_initializers for name in unique_feature_names
-      }
-    else:
-      keypoints_initializers = keypoints_initializers.copy()
-
-    # If quantiles_dir is given, add any missing keypoint initializers with
-    # keypoints based on quantiles.
-    if quantiles_dir is not None:
-      quantiles_feature_names = [
-          name for name in unique_feature_names
-          if name not in keypoints_initializers
-      ]
-
-      # Reverse initial output keypoints for decreasing monotonic features.
-      reversed_dict = {
-          feature_name: (monotonicity[feature_name] == -1)
-          for feature_name in quantiles_feature_names
-      }
-
-      # Read initializers from quantiles_dir, for those not already
-      # defined.
-      #
-      # Notice that output_min and output_max won't matter much if
-      # they are not bounded, since they will be adjusted during training.
-      quantiles_init = keypoints_initialization.load_keypoints_from_quantiles(
-          feature_names=quantiles_feature_names,
-          save_dir=quantiles_dir,
-          num_keypoints=num_keypoints,
-          output_min=calibration_output_min,
-          output_max=calibration_output_max,
-          reversed_dict=reversed_dict,
-          missing_input_values_dict=missing_input_values,
-          dtype=dtype)
-
-      # Merge with explicit initializers.
-      keypoints_initializers.update(quantiles_init)
-
-    # Update num_keypoints according to keypoints actually used by the
-    # initialization functions: some initialization functions may change
-    # them, for instance if there are not enough unique values.
-    for (feature_name, initializers) in six.iteritems(keypoints_initializers):
-      kp_init_keypoints = initializers[0].shape.as_list()[0]
-      num_keypoints[feature_name] = _update_keypoints(
-          feature_name, num_keypoints[feature_name], kp_init_keypoints)
-
-    # Setup the regularization.
-    regularizer_amounts = {}
-    for regularizer_name in regularizers.CALIBRATOR_REGULARIZERS:
-      regularizer_amounts[regularizer_name] = _get_per_feature_dict(
-          hparams, "calibration_{}".format(regularizer_name))
-
-    return pwl_calibration_layers.input_calibration_layer(
-        columns_to_tensors=columns_to_tensors,
-        num_keypoints=num_keypoints,
-        keypoints_initializers=keypoints_initializers,
-        bound=calibration_bound,
-        monotonic=monotonicity,
-        missing_input_values=missing_input_values,
-        missing_output_values=missing_output_values,
-        **regularizer_amounts)
-
-
-class _ProjectionHook(tf.estimator.SessionRunHook):
-  """SessionRunHook to project to feasible space after each step."""
-
-  def __init__(self):
-    self._projection_ops = []
-
-  def set_projection_ops(self, projection_ops):
-    """Needs to be called in model_fn function, with ops to project."""
-    self._projection_ops = projection_ops
-
-  def after_run(self, run_context, run_values):
-    if self._projection_ops is not None:
-      run_context.session.run(self._projection_ops)
-
-
-class Calibrated(base.Base):
-  """Base class for TensorFlow calibrated models.
-
-  It provides preprocessing and calibration of the input features, and
-  sets up the hook that runs projections at each step -- typically used
-  to project parameters to be monotone and within bounds.
-
-  To extend one has to implement the method prediction_builder()
-  """
-  __metaclass__ = abc.ABCMeta
-
-  def __init__(self,
-               n_classes,
-               feature_columns=None,
-               model_dir=None,
-               quantiles_dir=None,
-               keypoints_initializers_fn=None,
-               optimizer=None,
-               config=None,
-               hparams=None,
-               head=None,
-               weight_column=None,
-               name="model"):
-    """Construct CalibrateLinearClassifier/Regressor.
-
-    Args:
-      n_classes: Number of classes, set to 0 if used for regression.
-      feature_columns: Optional, if not set the model will use all features
-        returned by input_fn. An iterable containing all the feature columns
-        used by the model. All items in the set should be instances of classes
-        derived from `FeatureColumn`.
-      model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into a estimator to
-        continue training a previously saved model.
-      quantiles_dir: location where quantiles for the data was saved. Typically
-        the same directory as the training data. These quantiles can be
-        generated only once with
-        `pwl_calibration_layers.calculate_quantiles_for_keypoints` in a separate
-        invocation of your program. If you don't want to use quantiles, you can
-        set `keypoints_initializer` instead.
-      keypoints_initializers_fn: if you know the distribution of your input
-        features you can provide that directly instead of `quantiles_dir`. See
-        `pwl_calibrators_layers.uniform_keypoints_for_signal`. It must be a
-        closure that returns a pair of tensors with keypoints inputs and outputs
-        to use for initialization (must match `num_keypoints` configured in
-        `hparams`). Alternatively the closure can return a dict mapping feature
-        name to pairs for initialization per feature. If `quantiles_dir` and
-        `keypoints_initializers_fn` are set, the later takes precendence, and
-        the features for which `keypoints_initializers` are not defined fallback
-        to using the quantiles found in `quantiles_dir`. It uses a closure
-        instead of the tensors themselves because the graph has to be created at
-        the time the model is being build, which happens at a later time.
-      optimizer: `Optimizer` object, or callable that defines the optimizer to
-        use for training -- if a callable, it will be called with
-        learning_rate=hparams.learning_rate if provided.
-      config: RunConfig object to configure the runtime settings. Typically set
-        to learn_runner.EstimatorConfig().
-      hparams: an instance of tf_lattice_hparams.CalibrationHParams. If set to
-        None default parameters are used.
-      head: a `TensorFlow Estimator Head` which specifies how the loss function,
-        final predictions, and so on are generated from model outputs. Defaults
-        to using a sigmoid cross entropy head for binary classification and mean
-        squared error head for regression.
-      weight_column: A string or a `tf.feature_column.numeric_column` defining
-        feature column representing weights. It is used to down weight or boost
-        examples during training. It will be multiplied by the loss of the
-        example.
-      name: Name to be used as suffix to top-level variable scope for model.
-
-    Raises:
-      ValueError: invalid parameters.
-      KeyError: type of feature not supported.
-    """
-    super(Calibrated, self).__init__(
-        n_classes=n_classes,
-        feature_columns=feature_columns,
-        model_dir=model_dir,
-        optimizer=_get_optimizer(optimizer, hparams),
-        config=config,
-        hparams=hparams,
-        head=head,
-        weight_column=weight_column,
-        dtype=tf.float32,
-        name=_SCOPE_CALIBRATED_PREFIX + name)
-
-    self._quantiles_dir = quantiles_dir
-    self._keypoints_initializers_fn = keypoints_initializers_fn
-
-    if self._hparams is None:
-      raise ValueError("hparams cannot be none")
-    if not issubclass(
-        type(self._hparams), tf_lattice_hparams.CalibratedHParams):
-      raise ValueError("hparams is not an instance of hparams.CalibratedHParams"
-                       ", got type(params)=%s" % type(self._hparams))
-
-  @abc.abstractmethod
-  def calibration_structure_builder(self, columns_to_tensors, hparams):
-    """Method to be specialized that builds the calibration structure.
-
-    Derived classes should override this method to return the set of features
-    used in each separately calibrated submodel, or return None to indicate
-    all features should be calibrated only once.
-
-    Args:
-      columns_to_tensors: A mapping from feature name to tensors.
-      hparams: hyperparameters passed to object constructor.
-
-    Returns:
-      calibration_structure: list of sub_columns_to_tensors corresponding to the
-        features used in each sub-model, or None to indicate that this is a
-        single model structure that uses all features. Each element is a dict
-        from feature name to tensors in the same format as the input
-        columns_to_tensors.
-    """
-    raise NotImplementedError(
-        "This method must be implemented in a child class")
-
-
-  @abc.abstractmethod
-  def prediction_builder_from_calibrated(self, mode,
-                                         per_dimension_feature_names, hparams,
-                                         calibrated):
-    """Method to be specialized that builds the prediction graph.
-
-    Args:
-      mode: Estimator's `ModeKeys`.
-      per_dimension_feature_names: Name of features. The ordering should be
-        matched with the ordering in calibrated feature tensor. Notice
-        feature_names may be repeated, if some of the features were originally
-        multi-dimensional.
-      hparams: hyperparameters passed to object constructor.
-      calibrated: calibrated feature tensor, shaped `[batch_size, num_features]`
-
-    Returns:
-      A tuple of (prediction_tensor, oprojection_ops, regularization_loss) of
-      type (tf.Tensor, list[], tf.Tensor):
-      prediction_tensor: shaped `[batch_size/?,1]` for regression or binary
-        classification, or `[batch_size, n_classes]` for multi-class
-        classifiers. For classifier this will be the logit(s) value(s).
-      projection_ops: list of projection ops to be applied after each batch,
-        or None.
-      regularization_loss: loss related to regularization or None.
-    """
-    raise NotImplementedError(
-        "This method must be implemented in a child class")
-
-  def prediction_builder(self, columns_to_tensors, mode, hparams, dtype):
-    """Method that builds the prediction graph.
-
-    Args:
-      columns_to_tensors: A map from feature_name to raw features tensors, each
-        with shape `[batch_size]` or `[batch_size, feature_dim]`.
-      mode: Estimator's `ModeKeys`.
-      hparams: hyperparameters object passed to prediction builder. This is not
-        used by the Base estimator itself and is passed without checks or any
-        processing and can be of any type.
-      dtype: The dtype to be used for tensors.
-
-    Returns:
-      A tuple of (prediction_tensor, oprojection_ops, regularization_loss) of
-      type (tf.Tensor, list[], tf.Tensor):
-      prediction_tensor: shaped `[batch_size/?,1]` for regression or binary
-        classification, or `[batch_size, n_classes]` for multi-class
-        classifiers. For classifier this will be the logit(s) value(s).
-      projection_ops: list of projection ops to be applied after each batch,
-        or None.
-      regularization_loss: loss related to regularization or None.
-    Raises:
-      ValueError: invalid parameters.
-    """
-    if (mode == tf.estimator.ModeKeys.TRAIN and self._quantiles_dir is None and
-        self._keypoints_initializers_fn is None):
-      raise ValueError(
-          "At least one of quantiles_dir or keypoints_initializers_fn "
-          "must be given for training")
-
-    # If keypoint_initializer closures were given, call them to create the
-    # initializers tensors.
-    kp_init_explicit = None
-    if self._keypoints_initializers_fn is not None:
-      kp_init_explicit = _call_keypoints_inializers_fn(
-          self._keypoints_initializers_fn)
-
-    # Add feature names to hparams so that builders can make use of them.
-    for feature_name in columns_to_tensors:
-      self._hparams.add_feature(feature_name)
-
-    total_projection_ops = None
-    total_regularization = None
-    total_prediction = None
-
-    # Get the ensemble structure.
-    calibration_structure = self.calibration_structure_builder(
-        columns_to_tensors, self._hparams)
-
-    if calibration_structure is None:
-      # Single model or shared calibration.
-      (calibrated, per_dimension_feature_names, calibration_projections,
-       calibration_regularization) = (
-           input_calibration_layer_from_hparams(
-               columns_to_tensors=columns_to_tensors,
-               hparams=self._hparams,
-               quantiles_dir=self._quantiles_dir,
-               keypoints_initializers=kp_init_explicit,
-               name=_SCOPE_INPUT_CALIBRATION,
-               dtype=self._dtype))
-      (total_prediction, prediction_projections,
-       prediction_regularization) = self.prediction_builder_from_calibrated(
-           mode, per_dimension_feature_names, self._hparams, calibrated)
-      total_projection_ops = tools.add_if_not_none(calibration_projections,
-                                                   prediction_projections)
-      total_regularization = tools.add_if_not_none(calibration_regularization,
-                                                   prediction_regularization)
-    else:
-      # Ensemble model with separate calibration.
-      predictions = []
-      for (index, sub_columns_to_tensors) in enumerate(calibration_structure):
-        # Calibrate.
-        with tf.compat.v1.variable_scope("submodel_{}".format(index)):
-          (calibrated, per_dimension_feature_names, calibration_projections,
-           calibration_regularization) = (
-               input_calibration_layer_from_hparams(
-                   columns_to_tensors=sub_columns_to_tensors,
-                   hparams=self._hparams,
-                   quantiles_dir=self._quantiles_dir,
-                   keypoints_initializers=kp_init_explicit,
-                   name=_SCOPE_INPUT_CALIBRATION,
-                   dtype=self._dtype))
-          (prediction, prediction_projections,
-           prediction_regularization) = self.prediction_builder_from_calibrated(
-               mode, per_dimension_feature_names, self._hparams, calibrated)
-          projection_ops = tools.add_if_not_none(calibration_projections,
-                                                 prediction_projections)
-          regularization = tools.add_if_not_none(calibration_regularization,
-                                                 prediction_regularization)
-
-        # Merge back the results.
-        total_projection_ops = tools.add_if_not_none(total_projection_ops,
-                                                     projection_ops)
-        total_regularization = tools.add_if_not_none(total_regularization,
-                                                     regularization)
-        predictions.append(prediction)
-
-      # Final prediction is a mean of predictions, plus a bias term.
-      stacked_predictions = tf.stack(
-          predictions, axis=0, name="stacked_predictions")
-      ensemble_output = tf.reduce_mean(stacked_predictions, axis=0)
-      ensemble_bias_init = self._hparams.get_param("ensemble_bias")
-      bias = tf.Variable([ensemble_bias_init], name="ensemble_bias")
-      total_prediction = ensemble_output + bias
-
-    return total_prediction, total_projection_ops, total_regularization
diff --git a/tensorflow_lattice/python/estimators/calibrated_etl.py b/tensorflow_lattice/python/estimators/calibrated_etl.py
deleted file mode 100644
index 480e08c..0000000
--- a/tensorflow_lattice/python/estimators/calibrated_etl.py
+++ /dev/null
@@ -1,685 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""CalibratedEtl canned estimators."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import copy
-
-import tensorflow as tf
-
-from tensorflow_lattice.python.estimators import calibrated as calibrated_lib
-from tensorflow_lattice.python.estimators import hparams as tfl_hparams
-from tensorflow_lattice.python.lib import keypoints_initialization
-from tensorflow_lattice.python.lib import lattice_layers
-from tensorflow_lattice.python.lib import monotone_linear_layers
-from tensorflow_lattice.python.lib import pwl_calibration_layers
-from tensorflow_lattice.python.lib import regularizers
-from tensorflow_lattice.python.lib import tools
-
-_EPSILON = 1e-7
-
-
-def _calibration_layer(input_tensor, input_dim, input_min, input_max,
-                       num_keypoints, output_min, output_max):
-  """Create an intermediate calibration layer."""
-  init_keypoints = keypoints_initialization.uniform_keypoints_for_signal(
-      num_keypoints=num_keypoints,
-      input_min=input_min,
-      input_max=input_max,
-      output_min=output_min,
-      output_max=output_max,
-      dtype=input_tensor.dtype)
-  packed_results = pwl_calibration_layers.calibration_layer(
-      input_tensor,
-      num_keypoints=num_keypoints,
-      keypoints_initializers=[init_keypoints] * input_dim,
-      bound=True,
-      monotonic=+1)
-  (calibrated_input_tensor, projection_ops, _) = packed_results
-  return (calibrated_input_tensor, projection_ops)
-
-
-def _ensemble_lattices_layer(
-    input_tensor, input_dim, output_dim, interpolation_type, calibration_min,
-    calibration_max, calibration_num_keypoints, num_lattices, lattice_rank,
-    lattice_size, regularizer_amounts, is_monotone):
-  """Creates an ensemble of lattices layer."""
-  projections = []
-  structures = [
-      range(lattice_cnt * lattice_rank, (lattice_cnt + 1) * lattice_rank)
-      for lattice_cnt in range(num_lattices)
-  ]
-  calibrated_input, proj = _calibration_layer(
-      input_tensor,
-      input_dim,
-      calibration_min,
-      calibration_max,
-      calibration_num_keypoints,
-      output_min=0,
-      output_max=lattice_size - 1)
-  if proj:
-    projections += proj
-  lattice_outputs, _, proj, reg = lattice_layers.ensemble_lattices_layer(
-      calibrated_input, [lattice_size] * input_dim,
-      structures,
-      is_monotone=is_monotone,
-      output_dim=output_dim,
-      interpolation_type=interpolation_type,
-      **regularizer_amounts)
-  if proj:
-    projections += proj
-  return lattice_outputs, projections, reg
-
-
-def _embedded_lattices(calibrated_input_tensor,
-                       input_dim,
-                       output_dim,
-                       interpolation_type,
-                       monotonic_num_lattices,
-                       monotonic_lattice_rank,
-                       monotonic_lattice_size,
-                       non_monotonic_num_lattices,
-                       non_monotonic_lattice_rank,
-                       non_monotonic_lattice_size,
-                       linear_embedding_calibration_min,
-                       linear_embedding_calibration_max,
-                       linear_embedding_calibration_num_keypoints,
-                       regularizer_amounts,
-                       is_monotone=None):
-  """Creates an ensemble of lattices with a linear embedding.
-
-  This function constructs the following deep lattice network:
-  calibrated_input -> linear_embedding -> calibration -> ensemble of lattices.
-  Then ensemble of lattices' output are averaged and bias term is added to make
-  a final prediction.
-
-  ensemble of lattices is consists of two parts: monotonic lattices and
-  non-monotonic lattices. The input to the monotonic lattices is an output of
-  linear_embedding that contains both monotonic and non-monotonic
-  calibrated_input. All inputs to the monotonic lattices are set to be monotonic
-  to preserve end-to-end monotonicity in the monotonic feature.
-  The input to the non-monotonic lattices is an output of linear_embedding that
-  only contains non-monotonic calibrated_input. All inputs to the non-monotonic
-  lattices are set to be non-monotonic, since we do not need to guarantee
-  monotonicity.
-
-  Args:
-    calibrated_input_tensor: [batch_size, input_dim] tensor.
-    input_dim: (int) input dimnension.
-    output_dim: (int) output dimension.
-    interpolation_type: defines whether the lattice will interpolate using the
-      full hypercube or only the simplex ("hyper-triangle") around the point
-      being evaluated. Valid values: 'hypercube' or 'simplex'
-    monotonic_num_lattices: (int) number of monotonic lattices in the ensemble
-      lattices layer.
-    monotonic_lattice_rank: (int) number of inputs to each monotonic lattice in
-      the ensemble lattices layer.
-    monotonic_lattice_size: (int) lattice cell size for each monotonic lattice
-      in the ensemble lattices layer.
-    non_monotonic_num_lattices: (int) number of non monotonic lattices in the
-      ensemble lattices layer.
-    non_monotonic_lattice_rank: (int) number of inputs to each non monotonic
-      lattice in the ensemble lattices layer.
-    non_monotonic_lattice_size: (int) lattice cell size for each non monotonic
-      lattice in the ensemble lattices layer.
-    linear_embedding_calibration_min: (float) a minimum input keypoints value
-      for linear_embedding calibration.
-    linear_embedding_calibration_max: (float) a maximum input keypoints value
-      for linear_embedding calibration.
-    linear_embedding_calibration_num_keypoints: (int) a number of eypoints for
-      linear_embedding calibration.
-    regularizer_amounts: Dict of regularization amounts passed as keyword args
-      to regularizers.lattice_regularization().
-    is_monotone: (bool, list of booleans) is_monotone[k] == true then
-      calibrated_input_tensor[:, k] is considered to be a monotonic input.
-  Returns:
-    A tuple of (output_tensor, projection_ops, regularization).
-  Raises:
-    ValueError: If there is no non-monotonic inputs but
-    non_monotonic_num_lattices is not zero.
-  """
-  projections = []
-  regularization = None
-
-  # Explictly assign number of lattices to zero for any empty cases.
-  if not monotonic_num_lattices:
-    monotonic_num_lattices = 0
-  if not non_monotonic_num_lattices:
-    non_monotonic_num_lattices = 0
-
-  # Step 1. Create a linear embedding.
-  if monotonic_num_lattices:
-    monotonic_embedding_dim = monotonic_num_lattices * monotonic_lattice_rank
-  else:
-    monotonic_num_lattices = 0
-    monotonic_embedding_dim = 0
-  if non_monotonic_num_lattices:
-    non_monotonic_embedding_dim = (
-        non_monotonic_num_lattices * non_monotonic_lattice_rank)
-  else:
-    non_monotonic_num_lattices = 0
-    non_monotonic_embedding_dim = 0
-
-  if is_monotone is not None:
-    is_monotone = tools.cast_to_list(is_monotone, input_dim, 'is_monotone')
-  with tf.compat.v1.variable_scope('linear_embedding'):
-    packed_results = monotone_linear_layers.split_monotone_linear_layer(
-        calibrated_input_tensor,
-        input_dim,
-        monotonic_embedding_dim,
-        non_monotonic_embedding_dim,
-        is_monotone=is_monotone)
-    (monotonic_output, _, non_monotonic_output, _, proj, _) = packed_results
-    if proj is not None:
-      projections.append(proj)
-
-  # Step 2. Create ensemble of monotonic lattices.
-  if monotonic_num_lattices == 0:
-    m_lattice_outputs = None
-  else:
-    with tf.compat.v1.variable_scope('monotonic_lattices'):
-      m_lattice_outputs, projs, reg = _ensemble_lattices_layer(
-          monotonic_output,
-          monotonic_embedding_dim,
-          output_dim,
-          interpolation_type,
-          linear_embedding_calibration_min,
-          linear_embedding_calibration_max,
-          linear_embedding_calibration_num_keypoints,
-          monotonic_num_lattices,
-          monotonic_lattice_rank,
-          monotonic_lattice_size,
-          regularizer_amounts,
-          is_monotone=True)
-      if projs:
-        projections += projs
-      regularization = tools.add_if_not_none(regularization, reg)
-
-  # Step 3. Construct non-monotonic ensembles.
-  if non_monotonic_output is None and non_monotonic_num_lattices > 0:
-    raise ValueError(
-        'All input signals are monotonic but the number of non monotonic '
-        'lattices is not zero.')
-  if non_monotonic_num_lattices == 0:
-    n_lattice_outputs = None
-  else:
-    with tf.compat.v1.variable_scope('non_monotonic_lattices'):
-      n_lattice_outputs, projs, reg = _ensemble_lattices_layer(
-          non_monotonic_output,
-          non_monotonic_embedding_dim,
-          output_dim,
-          interpolation_type,
-          linear_embedding_calibration_min,
-          linear_embedding_calibration_max,
-          linear_embedding_calibration_num_keypoints,
-          non_monotonic_num_lattices,
-          non_monotonic_lattice_rank,
-          non_monotonic_lattice_size,
-          regularizer_amounts,
-          is_monotone=False)
-      if projs:
-        projections += projs
-      regularization = tools.add_if_not_none(regularization, reg)
-
-  # Step 4. Take average to make a final prediction.
-  with tf.compat.v1.variable_scope('ensemble_average'):
-    output = tf.compat.v1.get_variable(
-        name='ensemble_bias',
-        initializer=[0.0] * output_dim,
-        dtype=calibrated_input_tensor.dtype)
-    if m_lattice_outputs:
-      output = output + tf.divide(
-          tf.add_n(m_lattice_outputs), monotonic_num_lattices)
-    if n_lattice_outputs is not None:
-      output = output + tf.divide(
-          tf.add_n(n_lattice_outputs), non_monotonic_num_lattices)
-
-  return (output, projections, regularization)
-
-
-class _CalibratedEtl(calibrated_lib.Calibrated):
-  """Base class for CalibratedEtl{Classifier|Regressor}."""
-
-  def __init__(self,
-               n_classes,
-               feature_columns=None,
-               model_dir=None,
-               quantiles_dir=None,
-               keypoints_initializers_fn=None,
-               optimizer=None,
-               config=None,
-               hparams=None,
-               feature_engineering_fn=None,
-               head=None,
-               weight_column=None):
-    """Construct CalibrateEtlClassifier/Regressor."""
-    if not hparams:
-      hparams = tfl_hparams.CalibratedEtlHParams([])
-    self.check_hparams(hparams)
-    hparams = self._adjust_calibration_params(hparams)
-
-    super(_CalibratedEtl,
-          self).__init__(n_classes, feature_columns, model_dir, quantiles_dir,
-                         keypoints_initializers_fn, optimizer, config, hparams,
-                         head, weight_column, 'etl')
-    # After initialization, we expect model_dir exists.
-    if self._model_dir is None:
-      raise ValueError('model_dir is not created')
-
-  def _check_lattices_params(self, hparams):
-    """Check lattice parameters."""
-    monotonic_num_lattices = hparams.get_param('monotonic_num_lattices')
-    monotonic_lattice_rank = hparams.get_param('monotonic_lattice_rank')
-    monotonic_lattice_size = hparams.get_param('monotonic_lattice_size')
-    non_monotonic_num_lattices = hparams.get_param('non_monotonic_num_lattices')
-    non_monotonic_lattice_rank = hparams.get_param('non_monotonic_lattice_rank')
-    non_monotonic_lattice_size = hparams.get_param('non_monotonic_lattice_size')
-
-    error_messages = []
-    if monotonic_num_lattices is None and non_monotonic_num_lattices is None:
-      error_messages.append('At least one of monotonic_num_lattices or '
-                            'non_monotonic_num_lattices should be provided')
-
-    if monotonic_num_lattices:
-      if monotonic_lattice_rank is None:
-        error_messages.append('monotonic_lattice_rank should be specified.')
-      if monotonic_lattice_size is None:
-        error_messages.append('monotonic_lattice_size should be specified.')
-      elif monotonic_lattice_size < 2:
-        error_messages.append(
-            'monotonic_lattice_size cannot be less than 2, but got %d' %
-            monotonic_lattice_size)
-
-    if non_monotonic_num_lattices:
-      if non_monotonic_lattice_rank is None:
-        error_messages.append('non_monotonic_lattice_rank should be specified.')
-      if non_monotonic_lattice_size is None:
-        error_messages.append('non_monotonic_lattice_size should be specified.')
-      elif non_monotonic_lattice_size < 2:
-        error_messages.append(
-            'non_monotonic_lattice_size cannot be less than 2, but got %d' %
-            non_monotonic_lattice_size)
-
-    return error_messages
-
-  def _adjust_calibration_params(self, hparams):
-    """Makes sure we have the correct input calibration set up."""
-    hparams = copy.deepcopy(hparams)
-    hparams.set_param('calibration_output_min', -1.)
-    hparams.set_param('calibration_output_max', 1.)
-    return hparams
-
-  def _check_not_allowed_feature_params(self, hparams):
-    not_allowed_feature_params = map(
-        'lattice_{}'.format,
-        regularizers.LATTICE_MULTI_DIMENSIONAL_REGULARIZERS)
-    error_messages = []
-    for param in not_allowed_feature_params:
-      for feature_name in hparams.get_feature_names():
-        if hparams.is_feature_set_param(feature_name, param):
-          error_messages.append('feature %s sets %s, which is not allowed.' %
-                                (feature_name, param))
-    return error_messages
-
-  def _check_per_feature_param_configuration(self, monotonicity,
-                                             calibration_bound):
-    """Check parameter configuration and returns the error messages."""
-    error_messages = []
-
-    if monotonicity not in {-1, 0, +1}:
-      error_messages.append('monotonicity should be an integer {-1, 0, +1} '
-                            'but is %s' % monotonicity)
-
-    if not calibration_bound:
-      error_messages.append(
-          'A deep lattice network expects an bounded input from a calibration '
-          'layer, but calibration_bound is set to be False')
-
-    return error_messages
-
-  def check_hparams(self, hparams):
-    """Check pre-conditions of hparams.
-
-    Args:
-      hparams: (tfl_hparams.CalibratedEtlHParams) Hyperparameter to be
-      examined.
-
-    Raises:
-      ValueError: If the hyperparameter configuration is invalid, for example
-      calibration_monotonic is None, but lattice_monotonic is True, then raise
-      the error with a root cause.
-    """
-    error_messages = self._check_lattices_params(hparams)
-    # Check global params.
-    feature_names = hparams.get_feature_names()
-    packed_feature_values = hparams.get_global_and_feature_params(
-        ['monotonicity', 'calibration_bound'], feature_names)
-    default_feature_values, per_feature_values = packed_feature_values
-    param_error_messages = self._check_per_feature_param_configuration(
-        *default_feature_values)
-    if param_error_messages:
-      error_messages.append('Error message for default feature param:')
-      error_messages += param_error_messages
-
-    # Check per feature params. hparams.get_feature_names()  will only return
-    # feature names that sets per feature parameters.
-    for feature_idx in range(len(per_feature_values)):
-      param_error_messages = self._check_per_feature_param_configuration(
-          *per_feature_values[feature_idx])
-      if param_error_messages:
-        error_messages.append(
-            'Error message for %s feature param:' % feature_names[feature_idx])
-        error_messages += param_error_messages
-
-    if error_messages:
-      raise ValueError(
-          'Hyperparameter configuration cannot be used in the calibrated '
-          'etl estimator. Error messages report the issue per feature, but'
-          ' the parameter may be inherited from global parameter.\nDetailed '
-          'error messsages\n%s' % '\n'.join(error_messages))
-
-  def calibration_structure_builder(self, columns_to_tensors, hparams):
-    """Returns the calibration structure of the model. See base class."""
-    return None
-
-  def prediction_builder_from_calibrated(
-      self, mode, per_dimension_feature_names, hparams, calibrated):
-    """Construct the prediciton."""
-    self.check_hparams(hparams)
-    lattice_monotonic = [(hparams.get_feature_param(f, 'monotonicity') != 0)
-                         for f in per_dimension_feature_names]
-    monotonic_num_lattices = hparams.get_param('monotonic_num_lattices')
-    monotonic_lattice_rank = hparams.get_param('monotonic_lattice_rank')
-    monotonic_lattice_size = hparams.get_param('monotonic_lattice_size')
-    non_monotonic_num_lattices = hparams.get_param('non_monotonic_num_lattices')
-    non_monotonic_lattice_rank = hparams.get_param('non_monotonic_lattice_rank')
-    non_monotonic_lattice_size = hparams.get_param('non_monotonic_lattice_size')
-    linear_embedding_calibration_min = hparams.get_param(
-        'linear_embedding_calibration_min')
-    linear_embedding_calibration_max = hparams.get_param(
-        'linear_embedding_calibration_max')
-    linear_embedding_calibration_num_keypoints = hparams.get_param(
-        'linear_embedding_calibration_num_keypoints')
-    interpolation_type = hparams.get_param('interpolation_type')
-
-    # Setup the regularization.
-    regularizer_amounts = {}
-    for regularizer_name in regularizers.LATTICE_REGULARIZERS:
-      regularizer_amounts[regularizer_name] = hparams.get_param(
-          'lattice_{}'.format(regularizer_name))
-
-    input_dim = len(per_dimension_feature_names)
-    output_dim = 1
-    return _embedded_lattices(
-        calibrated,
-        input_dim,
-        output_dim,
-        interpolation_type,
-        monotonic_num_lattices,
-        monotonic_lattice_rank,
-        monotonic_lattice_size,
-        non_monotonic_num_lattices,
-        non_monotonic_lattice_rank,
-        non_monotonic_lattice_size,
-        linear_embedding_calibration_min,
-        linear_embedding_calibration_max,
-        linear_embedding_calibration_num_keypoints,
-        regularizer_amounts,
-        is_monotone=lattice_monotonic)
-
-
-def calibrated_etl_classifier(feature_columns=None,
-                              model_dir=None,
-                              quantiles_dir=None,
-                              keypoints_initializers_fn=None,
-                              optimizer=None,
-                              config=None,
-                              hparams=None,
-                              head=None,
-                              weight_column=None):
-  """Calibrated etl binary classifier model.
-
-
-
-  This model uses a piecewise lattice calibration function on each of the
-  inputs (parametrized) and then feeds them to ensemble of random lattices.
-  num_lattices and lattice_rank (number of inputs to each lattice) must be
-  specified in the hyperparameter. Optionally calibration can be made monotonic.
-
-  It usually requires a preprocessing step on the data, to calculate the
-  quantiles of each used feature. This can be done locally or in one worker
-  only before training, in a separate invocation of your program (or directly).
-  Typically this can be saved (`save_dir` parameter) to the same
-  directory where the data is.
-
-  Hyper-parameters are given in the form of the object
-  tfl_hparams.CalibrationEtlHParams. lattice_rank and num_lattices must
-  be specified; there would be no default value for this. It also takes in
-  per-feature parameters.
-
-  Internally values will be converted to tf.float32.
-
-  Example:
-
-  ```python
-  def input_fn_train: ...
-  def input_fn_eval: ...
-
-  my_feature_columns=[...]
-
-  # Have a separate program flag to generate the quantiles. Need to be run
-  # only once.
-  if FLAGS.create_quantiles:
-    pwl_calibrators_layers.calculate_quantiles_for_keypoints(
-      input_fn=input_fn_train,
-      feature_columns=my_feature_columns,
-      save_dir=FLAGS.data_dir,
-      num_quantiles=1000,
-      override=True)
-    return  # Exit program.
-
-  hparams = hparams.CalibratedEtlHparams(num_lattices=10, lattice_rank=2)
-  estimator = calibrated_etl.calibrated_etl_classifier(
-    feature_columns=feature_columns, hparams=hparams)
-  estimator.train(input_fn=input_fn_train)
-  estimator.evaluate(input_fn=input_fn_eval)
-  estimator.predict(input_fn=input_fn_predict)
-  ```
-
-  Args:
-    feature_columns: Optional, an iteratable containing all the feature
-      columns used by the model. All items in the set should be instances of
-      classes derived from `FeatureColumn`. If not given, the model will
-      use as features the tensors returned by input_fn.
-      Supported types of columns: RealValuedColumn.
-    model_dir: Directory to save model parameters, graphs and etc. This can
-      also be used to load checkpoints from the directory into an estimator to
-      continue training a previously saved model.
-    quantiles_dir: location where quantiles for the data was saved. Typically
-      the same directory as the training data. These quantiles can be
-      generated only once with
-      `pwl_calibration_layers.calculate_quantiles_for_keypoints` in a separate
-      invocation of your program. If you don't want to use quantiles, you can
-      set `keypoints_initializer` instead.
-    keypoints_initializers_fn: if you know the distribution of your
-      input features you can provide that directly instead of `quantiles_dir`.
-      See `pwl_calibrators_layers.uniform_keypoints_for_signal`. It must be
-      a closure that returns a pair of tensors with keypoints inputs and
-      outputs to use for initialization (must match `num_keypoints` configured
-      in `hparams`). Alternatively the closure can return a dict mapping
-      feature name to pairs for initialization per feature. If `quantiles_dir`
-      and `keypoints_initializers_fn` are set, the later takes precendence,
-      and the features for which `keypoints_initializers` are not defined
-      fallback to using the quantiles found in `quantiles_dir`. It uses a
-      closure instead of the tensors themselves because the graph has to be
-      created at the time the model is being build, which happens at a later
-      time.
-    optimizer: string, `Optimizer` object, or callable that defines the
-      optimizer to use for training -- if a callable, it will be called with
-      learning_rate=hparams.learning_rate.
-    config: RunConfig object to configure the runtime settings. Typically set
-      to learn_runner.EstimatorConfig().
-    hparams: an instance of tfl_hparams.CalibrationEtlHParams. If set to
-      None default parameters are used.
-    head: a `TensorFlow Estimator Head` which specifies how the loss function,
-      final predictions, and so on are generated from model outputs. Defaults
-      to using a sigmoid cross entropy head for binary classification and mean
-      squared error head for regression.
-    weight_column: A string or a `tf.feature_column.numeric_column` defining
-      feature column representing weights. It is used to down weight or boost
-      examples during training. It will be multiplied by the loss of the
-      example.
-
-  Returns:
-    A `calibrated_etl_classifier` estimator.
-
-  Raises:
-    ValueError: invalid parameters.
-    KeyError: type of feature not supported.
-  """
-  return _CalibratedEtl(
-      n_classes=2,
-      feature_columns=feature_columns,
-      model_dir=model_dir,
-      quantiles_dir=quantiles_dir,
-      keypoints_initializers_fn=keypoints_initializers_fn,
-      optimizer=optimizer,
-      config=config,
-      hparams=hparams,
-      head=head,
-      weight_column=weight_column)
-
-
-def calibrated_etl_regressor(feature_columns=None,
-                             model_dir=None,
-                             quantiles_dir=None,
-                             keypoints_initializers_fn=None,
-                             optimizer=None,
-                             config=None,
-                             hparams=None,
-                             head=None,
-                             weight_column=None):
-  """Calibrated etl regressor model.
-
-  This model uses a piecewise lattice calibration function on each of the
-  inputs (parametrized) and then feeds them to ensemble of random lattices.
-  num_lattices and lattice_rank (number of inputs to each lattice) must be
-  specified in the hyperparameter. Optionally calibration can be made monotonic.
-
-  It usually requires a preprocessing step on the data, to calculate the
-  quantiles of each used feature. This can be done locally or in one worker
-  only before training, in a separate invocation of your program (or directly).
-  Typically this can be saved (`save_dir` parameter) to the same
-  directory where the data is.
-
-  Hyper-parameters are given in the form of the object
-  tfl_hparams.CalibrationEtlHParams. lattice_rank and num_lattices must
-  be specified; there would be no default value for this. It also takes in
-  per-feature parameters.
-
-  Internally values will be converted to tf.float32.
-
-  Example:
-
-  ```python
-  def input_fn_train: ...
-  def input_fn_eval: ...
-
-  my_feature_columns=[...]
-
-  # Have a separate program flag to generate the quantiles. Need to be run
-  # only once.
-  if FLAGS.create_quantiles:
-    pwl_calibrators_layers.calculate_quantiles_for_keypoints(
-      input_fn=input_fn_train,
-      feature_columns=my_feature_columns,
-      save_dir=FLAGS.data_dir,
-      num_quantiles=1000,
-      override=True)
-    return  # Exit program.
-
-  hparams = hparams.CalibratedEtlHparams(num_lattices=10, lattice_rank=2)
-  estimator = calibrated_etl.calibrated_etl_classifier(
-    feature_columns=feature_columns, hparams=hparams)
-  estimator.train(input_fn=input_fn_train)
-  estimator.evaluate(input_fn=input_fn_eval)
-  estimator.predict(input_fn=input_fn_predict)
-  ```
-
-  Args:
-    feature_columns: Optional, an iteratable containing all the feature
-      columns used by the model. All items in the set should be instances of
-      classes derived from `FeatureColumn`. If not given, the model will
-      use as features the tensors returned by input_fn.
-      Supported types of columns: RealValuedColumn.
-    model_dir: Directory to save model parameters, graphs and etc. This can
-      also be used to load checkpoints from the directory into an estimator to
-      continue training a previously saved model.
-    quantiles_dir: location where quantiles for the data was saved. Typically
-      the same directory as the training data. These quantiles can be
-      generated only once with
-      `pwl_calibration_layers.calculate_quantiles_for_keypoints` in a separate
-      invocation of your program. If you don't want to use quantiles, you can
-      set `keypoints_initializer` instead.
-    keypoints_initializers_fn: if you know the distribution of your
-      input features you can provide that directly instead of `quantiles_dir`.
-      See `pwl_calibrators_layers.uniform_keypoints_for_signal`. It must be
-      a closure that returns a pair of tensors with keypoints inputs and
-      outputs to use for initialization (must match `num_keypoints` configured
-      in `hparams`). Alternatively the closure can return a dict mapping
-      feature name to pairs for initialization per feature. If `quantiles_dir`
-      and `keypoints_initializers_fn` are set, the later takes precendence,
-      and the features for which `keypoints_initializers` are not defined
-      fallback to using the quantiles found in `quantiles_dir`. It uses a
-      closure instead of the tensors themselves because the graph has to be
-      created at the time the model is being build, which happens at a later
-      time.
-    optimizer: string, `Optimizer` object, or callable that defines the
-      optimizer to use for training -- if a callable, it will be called with
-      learning_rate=hparams.learning_rate.
-    config: RunConfig object to configure the runtime settings. Typically set
-      to learn_runner.EstimatorConfig().
-    hparams: an instance of tfl_hparams.CalibrationEtlHParams. If set to
-      None default parameters are used.
-    head: a `TensorFlow Estimator Head` which specifies how the loss function,
-      final predictions, and so on are generated from model outputs. Defaults
-      to using a sigmoid cross entropy head for binary classification and mean
-      squared error head for regression.
-    weight_column: A string or a `tf.feature_column.numeric_column` defining
-      feature column representing weights. It is used to down weight or boost
-      examples during training. It will be multiplied by the loss of the
-      example.
-
-  Returns:
-    A `calibrated_etl_regressor` estimator.
-
-  Raises:
-    ValueError: invalid parameters.
-    KeyError: type of feature not supported.
-  """
-  return _CalibratedEtl(
-      n_classes=0,
-      feature_columns=feature_columns,
-      model_dir=model_dir,
-      quantiles_dir=quantiles_dir,
-      keypoints_initializers_fn=keypoints_initializers_fn,
-      optimizer=optimizer,
-      config=config,
-      hparams=hparams,
-      head=head,
-      weight_column=weight_column)
diff --git a/tensorflow_lattice/python/estimators/calibrated_etl_test.py b/tensorflow_lattice/python/estimators/calibrated_etl_test.py
deleted file mode 100644
index 71bb39e..0000000
--- a/tensorflow_lattice/python/estimators/calibrated_etl_test.py
+++ /dev/null
@@ -1,389 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""CalibratedEtl tests."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import tensorflow as tf
-
-from tensorflow_lattice.python.estimators import calibrated_etl
-from tensorflow_lattice.python.estimators import hparams as tfl_hparams
-from tensorflow_lattice.python.lib import keypoints_initialization
-from tensorflow_lattice.python.lib import test_data
-
-_NUM_KEYPOINTS = 50
-
-
-class CalibratedEtlHParamsTest(tf.test.TestCase):
-
-  def testEmptyMonotonicLatticeRankExpectsError(self):
-    hparams = tfl_hparams.CalibratedEtlHParams(feature_names=['x'])
-    hparams.set_param('monotonic_num_lattices', 2)
-    hparams.set_param('monotonic_lattice_size', 2)
-    with self.assertRaisesRegexp(
-        ValueError,
-        'Hyperparameter configuration cannot be used in the calibrated etl '
-        'estimator.'):
-      calibrated_etl.calibrated_etl_classifier(hparams=hparams)
-
-  def testEmptyMonotonicLatticeSizeExpectsError(self):
-    hparams = tfl_hparams.CalibratedEtlHParams(feature_names=['x'])
-    hparams.set_param('monotonic_num_lattices', 2)
-    hparams.set_param('monotonic_lattice_rank', 2)
-    with self.assertRaisesRegexp(
-        ValueError,
-        'Hyperparameter configuration cannot be used in the calibrated etl '
-        'estimator.'):
-      calibrated_etl.calibrated_etl_classifier(hparams=hparams)
-
-  def testEmptyNonMonotonicLatticeRankExpectsError(self):
-    hparams = tfl_hparams.CalibratedEtlHParams(feature_names=['x'])
-    hparams.set_param('non_monotonic_num_lattices', 2)
-    hparams.set_param('non_monotonic_lattice_size', 2)
-    with self.assertRaisesRegexp(
-        ValueError,
-        'Hyperparameter configuration cannot be used in the calibrated etl '
-        'estimator.'):
-      calibrated_etl.calibrated_etl_classifier(hparams=hparams)
-
-  def testEmptyNonMonotonicLatticeSizeExpectsError(self):
-    hparams = tfl_hparams.CalibratedEtlHParams(feature_names=['x'])
-    hparams.set_param('non_monotonic_num_lattices', 2)
-    hparams.set_param('non_monotonic_lattice_rank', 2)
-    with self.assertRaisesRegexp(
-        ValueError,
-        'Hyperparameter configuration cannot be used in the calibrated etl '
-        'estimator.'):
-      calibrated_etl.calibrated_etl_classifier(hparams=hparams)
-
-    with self.assertRaisesRegexp(
-        ValueError,
-        'Hyperparameter configuration cannot be used in the calibrated etl '
-        'estimator.'):
-      calibrated_etl.calibrated_etl_classifier(hparams=hparams)
-
-
-class CalibratedEtlTest(tf.test.TestCase):
-
-  def setUp(self):
-    super(CalibratedEtlTest, self).setUp()
-    self._test_data = test_data.TestData()
-
-  def _CalibratedEtlRegressor(self,
-                              feature_names,
-                              feature_columns,
-                              weight_column=None,
-                              **hparams_args):
-
-    def init_fn():
-      return keypoints_initialization.uniform_keypoints_for_signal(
-          _NUM_KEYPOINTS, -1., 1., 0., 1.)
-
-    hparams = tfl_hparams.CalibratedEtlHParams(
-        feature_names,
-        num_keypoints=_NUM_KEYPOINTS,
-        monotonic_num_lattices=1,
-        monotonic_lattice_rank=1,
-        monotonic_lattice_size=2,
-        non_monotonic_num_lattices=1,
-        non_monotonic_lattice_rank=1,
-        non_monotonic_lattice_size=2,
-        **hparams_args)
-    # Turn off monotonic calibrator.
-    hparams.set_param('calibration_monotonic', None)
-    hparams.set_param('learning_rate', 0.1)
-
-    return calibrated_etl.calibrated_etl_regressor(
-        feature_columns=feature_columns,
-        weight_column=weight_column,
-        hparams=hparams,
-        keypoints_initializers_fn=init_fn)
-
-  def _CalibratedEtlClassifier(self, feature_columns, **hparams_args):
-
-    def init_fn():
-      return keypoints_initialization.uniform_keypoints_for_signal(
-          _NUM_KEYPOINTS, -1., 1., 0., 1.)
-
-    hparams = tfl_hparams.CalibratedEtlHParams(
-        num_keypoints=_NUM_KEYPOINTS,
-        monotonic_num_lattices=1,
-        monotonic_lattice_rank=1,
-        monotonic_lattice_size=2,
-        non_monotonic_num_lattices=1,
-        non_monotonic_lattice_rank=1,
-        non_monotonic_lattice_size=2,
-        **hparams_args)
-    # Turn off monotonic calibrator.
-    hparams.set_param('calibration_monotonic', None)
-    hparams.set_param('learning_rate', 0.1)
-
-    return calibrated_etl.calibrated_etl_classifier(
-        feature_columns=feature_columns,
-        hparams=hparams,
-        keypoints_initializers_fn=init_fn)
-
-  def testCalibratedEtlRegressorTraining1D(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x'),
-    ]
-    estimator = self._CalibratedEtlRegressor(['x'],
-                                             feature_columns,
-                                             interpolation_type='simplex')
-    estimator.train(input_fn=self._test_data.oned_input_fn())
-    # Here we only check the successful evaluation.
-    # Checking the actual number, accuracy, etc, makes the test too flaky.
-    _ = estimator.evaluate(input_fn=self._test_data.oned_input_fn())
-
-  def testCalibratedEtlRegressorWeightedTraining1D(self):
-    feature_columns = [tf.feature_column.numeric_column('x')]
-    weight_column = tf.feature_column.numeric_column('zero')
-    estimator = self._CalibratedEtlRegressor(['x'],
-                                             feature_columns,
-                                             weight_column=weight_column)
-    estimator.train(input_fn=self._test_data.oned_zero_weight_input_fn())
-    results = estimator.evaluate(
-        input_fn=self._test_data.oned_zero_weight_input_fn())
-    # Expects almost zero since the weight values are exactly zero.
-    self.assertLess(results['average_loss'], 1e-7)
-
-  def testCalibratedEtlRegressorTraining2D(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedEtlRegressor(['x0', 'x1'],
-                                             feature_columns,
-                                             interpolation_type='hypercube')
-    estimator.train(input_fn=self._test_data.twod_input_fn())
-    # Here we only check the successful evaluation.
-    # Checking the actual number, accuracy, etc, makes the test too flaky.
-    _ = estimator.evaluate(input_fn=self._test_data.twod_input_fn())
-
-  def testCalibratedEtlRegressorTraining2DWithCalbrationRegularization(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedEtlRegressor(['x0', 'x1'],
-                                             feature_columns,
-                                             interpolation_type='simplex',
-                                             calibration_l1_reg=1e-2,
-                                             calibration_l2_reg=1e-2,
-                                             calibration_l1_laplacian_reg=0.05,
-                                             calibration_l2_laplacian_reg=0.01)
-    estimator.train(input_fn=self._test_data.twod_input_fn())
-    # Here we only check the successful evaluation.
-    # Checking the actual number, accuracy, etc, makes the test too flaky.
-    _ = estimator.evaluate(input_fn=self._test_data.twod_input_fn())
-
-  def testCalibratedEtlRegressorTraining2DWithLatticeRegularizer(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedEtlRegressor(['x0', 'x1'],
-                                             feature_columns,
-                                             interpolation_type='simplex',
-                                             lattice_l1_reg=1.0,
-                                             lattice_l2_reg=1.0,
-                                             lattice_l1_torsion_reg=1.0,
-                                             lattice_l2_torsion_reg=1.0,
-                                             lattice_l1_laplacian_reg=1.0,
-                                             lattice_l2_laplacian_reg=1.0)
-    estimator.train(input_fn=self._test_data.twod_input_fn())
-    # Here we only check the successful evaluation.
-    # Checking the actual number, accuracy, etc, makes the test too flaky.
-    _ = estimator.evaluate(input_fn=self._test_data.twod_input_fn())
-
-  def testCalibratedEtlRegressorTrainingMultiDimensionalFeature(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x', shape=(2,)),
-    ]
-    estimator = self._CalibratedEtlRegressor(['x'], feature_columns)
-    estimator.train(input_fn=self._test_data.multid_feature_input_fn())
-    results = estimator.evaluate(
-        input_fn=self._test_data.multid_feature_input_fn())
-    self.assertLess(results['average_loss'], 1e-2)
-
-    # Turn-off calibration for feature 'x', it should turn off for both
-    # dimensions, and the results should get much worse.
-    estimator = self._CalibratedEtlRegressor(['x'],
-                                             feature_columns,
-                                             feature__x__num_keypoints=0)
-    estimator.train(input_fn=self._test_data.multid_feature_input_fn())
-    # Here we only check the successful evaluation.
-    # Checking the actual number, accuracy, etc, makes the test too flaky.
-    _ = estimator.evaluate(input_fn=self._test_data.multid_feature_input_fn())
-
-  def testCalibratedEtlClassifierTraining(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedEtlClassifier(feature_columns)
-    estimator.train(input_fn=self._test_data.twod_classificer_input_fn())
-    # Here we only check the successful evaluation.
-    # Checking the actual number, accuracy, etc, makes the test too flaky.
-    _ = estimator.evaluate(input_fn=self._test_data.twod_classificer_input_fn())
-
-  def testCalibratedEtlClassifierTrainingWithCalibrationRegularizer(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedEtlClassifier(
-        feature_columns,
-        calibration_l1_reg=1e-2,
-        calibration_l2_reg=1e-2,
-        calibration_l1_laplacian_reg=1e-1,
-        calibration_l2_laplacian_reg=1e-1,
-        interpolation_type='hypercube')
-
-    estimator.train(input_fn=self._test_data.twod_classificer_input_fn())
-    # Here we only check the successful evaluation.
-    # Checking the actual number, accuracy, etc, makes the test too flaky.
-    _ = estimator.evaluate(input_fn=self._test_data.twod_classificer_input_fn())
-
-  def testCalibratedEtlClassifierTrainingWithLatticeRegularizer(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedEtlClassifier(
-        feature_columns,
-        lattice_l1_reg=1.0,
-        lattice_l2_reg=1.0,
-        lattice_l1_torsion_reg=1.0,
-        lattice_l2_torsion_reg=1.0,
-        lattice_l1_laplacian_reg=1.0,
-        lattice_l2_laplacian_reg=1.0,
-        interpolation_type='hypercube')
-
-    estimator.train(input_fn=self._test_data.twod_classificer_input_fn())
-    # Here we only check the successful evaluation.
-    # Checking the actual number, accuracy, etc, makes the test too flaky.
-    _ = estimator.evaluate(input_fn=self._test_data.twod_classificer_input_fn())
-
-  def testCalibratedEtlMonotonicClassifierTraining(self):
-    # Construct the following training pair.
-    #
-    # Training: (x, y)
-    # ([0., 0.], 0.0)
-    # ([0., 1.], 1.0)
-    # ([1., 0.], 1.0)
-    # ([1., 1.], 0.0)
-    #
-    # which is not a monotonic function. Then check the forcing monotonicity
-    # resulted in the following monotonicity or not.
-    # f(0, 0) <= f(0, 1), f(0, 0) <= f(1, 0), f(0, 1) <= f(1, 1),
-    # f(1, 0) < = f(1, 1).
-    x0 = np.array([0.0, 0.0, 1.0, 1.0])
-    x1 = np.array([0.0, 1.0, 0.0, 1.0])
-    x_samples = {'x0': x0, 'x1': x1}
-    training_y = np.array([[False], [True], [True], [False]])
-
-    train_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x=x_samples, y=training_y, batch_size=4, num_epochs=1000, shuffle=False)
-    test_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x=x_samples, y=None, shuffle=False)
-
-    # Define monotonic lattice classifier.
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-
-    def init_fn():
-      return keypoints_initialization.uniform_keypoints_for_signal(
-          2, 0., 1., 0., 1.)
-
-    hparams = tfl_hparams.CalibratedEtlHParams(
-        num_keypoints=2,
-        monotonic_num_lattices=2,
-        monotonic_lattice_rank=2,
-        monotonic_lattice_size=2)
-    hparams.set_param('calibration_monotonic', +1)
-    hparams.set_param('lattice_monotonic', True)
-    hparams.set_param('learning_rate', 0.1)
-
-    estimator = calibrated_etl.calibrated_etl_classifier(
-        feature_columns=feature_columns,
-        hparams=hparams,
-        keypoints_initializers_fn=init_fn)
-    estimator.train(input_fn=train_input_fn)
-    predictions = [
-        results['logits'][0]
-        for results in estimator.predict(input_fn=test_input_fn)
-    ]
-
-    self.assertEqual(len(predictions), 4)
-    # Check monotonicity. Note that projection has its own precision, so we
-    # add a small number.
-    self.assertLess(predictions[0], predictions[1] + 1e-4)
-    self.assertLess(predictions[0], predictions[2] + 1e-4)
-    self.assertLess(predictions[1], predictions[3] + 1e-4)
-    self.assertLess(predictions[2], predictions[3] + 1e-4)
-
-  def testCalibratedEtlWithMissingTraining(self):
-    # x0 is missing with it's own vertex: so it can take very different values,
-    # while x1 is missing and calibrated, in this case to the middle of the
-    # lattice.
-    x0 = np.array([0., 0., 1., 1., -1., -1., 0., 1.])
-    x1 = np.array([0., 1., 0., 1., 0., 1., -1., -1.])
-    training_y = np.array([1., 3., 7., 11., 23., 27., 2., 9.])
-    x_samples = {'x0': x0, 'x1': x1}
-
-    train_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x=x_samples,
-        y=training_y,
-        batch_size=x0.shape[0],
-        num_epochs=2000,
-        shuffle=False)
-    test_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x=x_samples, y=training_y, shuffle=False)
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-
-    def init_fn():
-      return keypoints_initialization.uniform_keypoints_for_signal(
-          2, 0., 1., 0., 1.)
-
-    hparams = tfl_hparams.CalibratedEtlHParams(['x0', 'x1'],
-                                               num_keypoints=2,
-                                               non_monotonic_num_lattices=5,
-                                               non_monotonic_lattice_rank=2,
-                                               non_monotonic_lattice_size=2,
-                                               learning_rate=0.1,
-                                               missing_input_value=-1.)
-
-    estimator = calibrated_etl.calibrated_etl_regressor(
-        feature_columns=feature_columns,
-        hparams=hparams,
-        keypoints_initializers_fn=init_fn)
-
-    estimator.train(input_fn=train_input_fn)
-    # Here we only check the successful evaluation.
-    # Checking the actual number, accuracy, etc, makes the test too flaky.
-    _ = estimator.evaluate(input_fn=test_input_fn)
-
-
-if __name__ == '__main__':
-  tf.test.main()
diff --git a/tensorflow_lattice/python/estimators/calibrated_lattice.py b/tensorflow_lattice/python/estimators/calibrated_lattice.py
deleted file mode 100644
index ac4c4dd..0000000
--- a/tensorflow_lattice/python/estimators/calibrated_lattice.py
+++ /dev/null
@@ -1,460 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""CalibratedLattice canned estimators."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import copy
-
-from tensorflow_lattice.python.estimators import calibrated as calibrated_lib
-from tensorflow_lattice.python.estimators import hparams as tfl_hparams
-from tensorflow_lattice.python.lib import lattice_layers
-from tensorflow_lattice.python.lib import regularizers
-
-_EPSILON = 1e-7
-
-
-class _CalibratedLattice(calibrated_lib.Calibrated):
-  """Base class for CalibratedLattice{Classifier|Regressor}."""
-
-  def __init__(self,
-               n_classes,
-               feature_columns=None,
-               model_dir=None,
-               quantiles_dir=None,
-               keypoints_initializers_fn=None,
-               lattice_initializers_fn=None,
-               optimizer=None,
-               config=None,
-               hparams=None,
-               head=None,
-               weight_column=None):
-    """Construct CalibrateLatticeClassifier/Regressor."""
-    if not hparams:
-      hparams = tfl_hparams.CalibratedLatticeHParams([])
-    self.check_hparams(hparams)
-    hparams = self._set_calibration_params(hparams)
-
-    self.lattice_initializers_fn_ = lattice_initializers_fn
-
-    super(_CalibratedLattice,
-          self).__init__(n_classes, feature_columns, model_dir, quantiles_dir,
-                         keypoints_initializers_fn, optimizer, config, hparams,
-                         head, weight_column, 'lattice')
-
-  def _check_param_configuration(self, adjusted, monotonicity, lattice_size,
-                                 calibration_output_min, calibration_output_max,
-                                 calibration_bound, missing_input_value,
-                                 missing_vertex, *unused_args):
-    error_messages = []
-    if monotonicity not in {-1, 0, +1}:
-      error_messages.append('monotonicity should be an integer {-1, 0, +1} '
-                            'but is %s' % monotonicity)
-    if lattice_size < 2:
-      error_messages.append('lattice_size should be greater than equal to 2'
-                            'but is %d' % (lattice_size))
-
-    if not calibration_bound:
-      error_messages.append(
-          'A lattice expects an bounded input from a calibration layer, but '
-          'calibration_bound is set to be False')
-
-    if not adjusted:
-      if calibration_output_min is not None:
-        error_messages.append(
-            'calibration_output_min=%d should not be set, it is adjusted '
-            'automatically to match the lattice_size' % calibration_output_min)
-      if calibration_output_max is not None:
-        error_messages.append(
-            'calibration_output_max=%d should not be set, it is adjusted '
-            'automatically to match the lattice_size' % calibration_output_max)
-
-    if missing_input_value is None and missing_vertex:
-      error_messages.append(
-          'missing_vertex is True, however missing_input_value not set')
-
-    return error_messages
-
-  def _check_not_allowed_feature_params(self, hparams):
-    not_allowed_feature_params = map(
-        'lattice_{}'.format,
-        regularizers.LATTICE_MULTI_DIMENSIONAL_REGULARIZERS)
-    error_messages = []
-    for param in not_allowed_feature_params:
-      for feature_name in hparams.get_feature_names():
-        if hparams.is_feature_set_param(feature_name, param):
-          error_messages.append('feature %s sets %s, which is not allowed.' %
-                                (feature_name, param))
-    return error_messages
-
-  def check_hparams(self, hparams, adjusted=False):
-    """Check pre-conditions of hparams.
-
-    Args:
-      hparams: (tfl_hparams.CalibratedLatticeHParams) Hyperparameter to
-      be examined.
-      adjusted: if these are the parameters already adjusted
-    Raises:
-      ValueError: If the hyperparameter configuration is invalid, for example
-      calibration_monotonic is None, but lattice_monotonic is True, then raise
-      the error with a root cause.
-    """
-    error_messages = self._check_not_allowed_feature_params(hparams)
-
-    # Check global params.
-    feature_names = hparams.get_feature_names()
-    param_list = [
-        'monotonicity',
-        'lattice_size',
-        'calibration_output_min',
-        'calibration_output_max',
-        'calibration_bound',
-        'missing_input_value',
-        'missing_vertex',
-    ] + ['lattice_{}'.format(r) for r in regularizers.LATTICE_REGULARIZERS]
-
-    global_values, per_feature_values = hparams.get_global_and_feature_params(
-        param_list, feature_names)
-    global_param_error_messages = self._check_param_configuration(
-        adjusted, *global_values)
-    if global_param_error_messages:
-      error_messages.append('Error message for global param:')
-      error_messages += global_param_error_messages
-
-    # Check per feature params. hparams.get_feature_names()  will only return
-    # feature names that sets per feature parameters.
-    for feature_idx in range(len(per_feature_values)):
-      per_feature_param_error_messages = self._check_param_configuration(
-          adjusted, *per_feature_values[feature_idx])
-      if per_feature_param_error_messages:
-        error_messages.append(
-            'Error message for %s feature param:' % feature_names[feature_idx])
-        error_messages += per_feature_param_error_messages
-
-    if error_messages:
-      raise ValueError(
-          'Hyperparameter configuration cannot be used in the calibrated '
-          'lattice estimator. Error messages report the issue per feature, but'
-          ' the parameter may be inherited from global parameter.\nDetailed '
-          'error messsages\n%s' % '\n'.join(error_messages))
-
-  def _set_calibration_params(self, hparams):
-    hparams = copy.deepcopy(hparams)
-    feature_names = hparams.get_feature_names()
-    global_values, per_feature_values = hparams.get_global_and_feature_params(
-        ['lattice_size', 'missing_input_value', 'missing_vertex'],
-        feature_names)
-
-    final_lattice_size, missing_output_value = self._calibration_params(
-        *global_values)
-    lattice_size = global_values[0]
-    hparams.set_param('calibration_output_min', 0)
-    hparams.set_param('calibration_output_max', lattice_size - 1)
-    hparams.set_param('final_lattice_size', final_lattice_size)
-    hparams.set_param('missing_output_value', missing_output_value)
-
-    for feature_idx in range(len(per_feature_values)):
-      feature_name = feature_names[feature_idx]
-      final_lattice_size, missing_output_value = self._calibration_params(
-          *per_feature_values[feature_idx])
-      lattice_size = per_feature_values[feature_idx][0]
-      hparams.set_feature_param(feature_name, 'calibration_output_min', 0)
-      hparams.set_feature_param(feature_name, 'calibration_output_max',
-                                lattice_size - 1)
-      hparams.set_feature_param(feature_name, 'final_lattice_size',
-                                final_lattice_size)
-      hparams.set_feature_param(feature_name, 'missing_output_value',
-                                missing_output_value)
-    return hparams
-
-  def _calibration_params(self, lattice_size, missing_input_value,
-                          missing_vertex):
-    """Returns final_lattice_size and missing_output_value."""
-    if missing_input_value is None or not missing_vertex:
-      return lattice_size, None
-
-    # Last vertex of the lattice is reserved for missing values.
-    return lattice_size + 1, lattice_size
-
-  def calibration_structure_builder(self, columns_to_tensors, hparams):
-    """Returns the calibration structure of the model. See base class."""
-    return None
-
-  def prediction_builder_from_calibrated(
-      self, mode, per_dimension_feature_names, hparams, calibrated):
-    """Construct the prediciton."""
-    self.check_hparams(hparams, adjusted=True)
-    lattice_sizes = [
-        hparams.get_feature_param(f, 'final_lattice_size')
-        for f in per_dimension_feature_names
-    ]
-    lattice_monotonic = [(hparams.get_feature_param(f, 'monotonicity') != 0)
-                         for f in per_dimension_feature_names]
-    interpolation_type = hparams.get_param('interpolation_type')
-
-    # Setup the regularization.
-    regularizer_amounts = {}
-    for reg_name in regularizers.LATTICE_MULTI_DIMENSIONAL_REGULARIZERS:
-      regularizer_amounts[reg_name] = hparams.get_param(
-          'lattice_{}'.format(reg_name))
-    for reg_name in regularizers.LATTICE_ONE_DIMENSIONAL_REGULARIZERS:
-      regularizer_amounts[reg_name] = [
-          hparams.get_feature_param(feature_name, 'lattice_{}'.format(reg_name))
-          for feature_name in per_dimension_feature_names
-      ]
-
-    packed_results = lattice_layers.lattice_layer(
-        calibrated,
-        lattice_sizes,
-        is_monotone=lattice_monotonic,
-        interpolation_type=interpolation_type,
-        lattice_initializer=self.lattice_initializers_fn_,
-        **regularizer_amounts)
-    (prediction, _, projection_ops, regularization) = packed_results
-    # Returns prediction Tensor, projection ops, and regularization.
-    return prediction, projection_ops, regularization
-
-
-def calibrated_lattice_classifier(feature_columns=None,
-                                  model_dir=None,
-                                  quantiles_dir=None,
-                                  keypoints_initializers_fn=None,
-                                  optimizer=None,
-                                  config=None,
-                                  hparams=None,
-                                  head=None,
-                                  weight_column=None):
-  """Calibrated lattice classifier binary model.
-
-
-
-  This model uses a piecewise lattice calibration function on each of the
-  real (as opposed to binary) inputs (parametrized) and then combines (sum up)
-  the results. Optionally calibration can be made monotonic.
-
-  It usually requires a preprocessing step on the data, to calculate the
-  quantiles of each used feature. This can be done locally or in one worker
-  only before training, in a separate invocation of your program (or directly).
-  Typically this can be save (`save_dir` parameter) to the same
-  directory where the data is.
-
-  Hyper-parameters are given in the form of the object
-  tfl_hparams.CalibrationHParams. It takes in per-feature calibration
-  parameters.
-
-  Internally values will be converted to tf.float32.
-
-  Example:
-
-  ```python
-  def input_fn_train: ...
-  def input_fn_eval: ...
-
-  my_feature_columns=[...]
-
-  # Have a separate program flag to generate the quantiles. Need to be run
-  # only once.
-  if FLAGS.create_quantiles:
-    pwl_calibrators_layers.calculate_quantiles_for_keypoints(
-      input_fn=input_fn_train,
-      feature_columns=my_feature_columns,
-      save_dir=FLAGS.data_dir,
-      num_quantiles=1000,
-      override=True)
-    return  # Exit program.
-
-  estimator = calibrated_lattice.CalibratedLatticeClassifier(
-    feature_columns=feature_columns)
-  estimator.train(input_fn=input_fn_train)
-  estimator.evaluate(input_fn=input_fn_eval)
-  estimator.predict(input_fn=input_fn_predict)
-  ```
-
-  Args:
-    feature_columns: Optional, an iteratable containing all the feature
-      columns used by the model. All items in the set should be instances of
-      classes derived from `FeatureColumn`. If not given, the model will
-      use as features the tensors returned by input_fn.
-      Supported types of columns: RealValuedColumn.
-    model_dir: Directory to save model parameters, graph and etc. This can
-      also be used to load checkpoints from the directory into a estimator to
-      continue training a previously saved model.
-    quantiles_dir: location where quantiles for the data was saved. Typically
-      the same directory as the training data. These quantiles can be
-      generated only once with
-      `pwl_calibrators_layers.calculate_quantiles_for_keypoints` in a separate
-      invocation of your program. If you don't want to use quantiles, you can
-      set `keypoints_initializer` instead.
-    keypoints_initializers_fn: if you know the distribution of your
-      input features you can provide that directly instead of `quantiles_dir`.
-      See `pwl_calibrators_layers.uniform_keypoints_for_signal`. It must be
-      as a closure that when called will return a pair of tensors with
-      keypoints input and output initializes. Alternatively can be given as
-      a dict mapping feature name to keypoints_initializers_fn, so one
-      can have one initialization per feature. It uses a closure instead of
-      the tensors themselves because the graph has to be created at the time
-      the model is being build, which happens at a later time.
-    optimizer: string, `Optimizer` object, or callable that defines the
-      optimizer to use for training -- if a callable, it will be called with
-      learning_rate=hparams.learning_rate.
-    config: RunConfig object to configure the runtime settings. Typically set
-      to learn_runner.EstimatorConfig().
-    hparams: an instance of tfl_hparams.CalibrationHParams. If set to
-      None default parameters are used.
-    head: a `TensorFlow Estimator Head` which specifies how the loss function,
-      final predictions, and so on are generated from model outputs. Defaults
-      to using a sigmoid cross entropy head for binary classification and mean
-      squared error head for regression.
-    weight_column: A string or a `tf.feature_column.numeric_column` defining
-      feature column representing weights. It is used to down weight or boost
-      examples during training. It will be multiplied by the loss of the
-      example.
-
-  Returns:
-    A `CalibratedLatticeClassifier` estimator.
-
-  Raises:
-    ValueError: invalid parameters.
-    KeyError: type of feature not supported.
-  """
-  return _CalibratedLattice(
-      n_classes=2,
-      feature_columns=feature_columns,
-      model_dir=model_dir,
-      quantiles_dir=quantiles_dir,
-      keypoints_initializers_fn=keypoints_initializers_fn,
-      optimizer=optimizer,
-      config=config,
-      hparams=hparams,
-      head=head,
-      weight_column=weight_column)
-
-
-def calibrated_lattice_regressor(feature_columns=None,
-                                 model_dir=None,
-                                 quantiles_dir=None,
-                                 keypoints_initializers_fn=None,
-                                 optimizer=None,
-                                 config=None,
-                                 hparams=None,
-                                 head=None,
-                                 weight_column=None):
-  """Calibrated lattice estimator (model) for regression.
-
-  This model uses a piecewise lattice calibration function on each of the
-  inputs (parametrized) and then combine (sum up) the results. Optionally
-  calibration can be made monotonic.
-
-  It usually requires a preprocessing step on the data, to calculate the
-  quantiles of each used feature. This can be done locally or in one worker
-  only before training, in a separate invocation of your program (or directly)
-  in . Typically this can be save (`save_dir` parameter) to the same
-  directory where the data is.
-
-  Hyper-parameters are given in the form of the object
-  tfl_hparams.CalibrationHParams. It takes in per-feature calibration
-  parameters.
-
-  Internally values will be converted to tf.float32.
-
-
-
-  Example:
-
-  ```python
-  def input_fn_train: ...
-  def input_fn_eval: ...
-
-  my_feature_columns=[...]
-
-  # Have a separate program flag to generate the quantiles. Need to be run
-  # only once.
-  if FLAGS.create_quantiles:
-    pwl_calibrators_layers.calculate_quantiles_for_keypoints(
-      input_fn=input_fn_train,
-      feature_columns=my_feature_columns,
-      save_dir=FLAGS.data_dir,
-      num_quantiles=1000,
-      override=True)
-    return  # Exit program.
-
-  estimator = calibrated_lattice.calibrated_lattice_regressor(
-    feature_columns=feature_columns)
-  estimator.train(input_fn=input_fn_train)
-  estimator.evaluate(input_fn=input_fn_eval)
-  estimator.predict(input_fn=input_fn_predict)
-  ```
-
-  Args:
-    feature_columns: Optional, if not set the model will use all features
-      returned by input_fn. An iteratable containing all the feature
-      columns used by the model. All items in the set should be instances of
-      classes derived from `FeatureColumn`. If not given, the model will
-      use as features the tensors returned by input_fn.
-      Supported types: RealValuedColumn.
-    model_dir: Directory to save model parameters, graph and etc. This can
-      also be used to load checkpoints from the directory into a estimator to
-      continue training a previously saved model.
-    quantiles_dir: location where quantiles for the data was saved. Typically
-      the same directory as the training data. These quantiles can be
-      generated only once with
-      `pwl_calibrators_layers.calculate_quantiles_for_keypoints` in a separate
-      invocation of your program. If you don't want to use quantiles, you can
-      set `keypoints_initializer` instead.
-    keypoints_initializers_fn: if you know the distribution of your
-      input features you can provide that directly instead of `quantiles_dir`.
-      See `pwl_calibrators_layers.uniform_keypoints_for_signal`. It must be
-      as a closure that when called will return a pair of tensors with
-      keypoints input and output initializes. Alternatively can be given as
-      a dict mapping feature name to keypoints_initializers_fn, so one
-      can have one initialization per feature. It uses a closure instead of
-      the tensors themselves because the graph has to be created at the time
-      the model is being build, which happens at a later time.
-    optimizer: string, `Optimizer` object, or callable that defines the
-      optimizer to use for training -- if a callable, it will be called with
-      learning_rate=hparams.learning_rate.
-    config: RunConfig object to configure the runtime settings. Typically set
-      to learn_runner.EstimatorConfig().
-    hparams: an instance of tfl_hparams.CalibrationHParams. If set to
-      None default parameters are used.
-    head: a `TensorFlow Estimator Head` which specifies how the loss function,
-      final predictions, and so on are generated from model outputs. Defaults
-      to using a sigmoid cross entropy head for binary classification and mean
-      squared error head for regression.
-    weight_column: A string or a `tf.feature_column.numeric_column` defining
-      feature column representing weights. It is used to down weight or boost
-      examples during training. It will be multiplied by the loss of the
-      example.
-
-  Returns:
-    A `CalibratedLatticeRegressor` estimator.
-
-  Raises:
-    ValueError: invalid parameters.
-    KeyError: type of feature not supported.
-  """
-  return _CalibratedLattice(
-      n_classes=0,
-      feature_columns=feature_columns,
-      model_dir=model_dir,
-      quantiles_dir=quantiles_dir,
-      keypoints_initializers_fn=keypoints_initializers_fn,
-      optimizer=optimizer,
-      config=config,
-      hparams=hparams,
-      head=head,
-      weight_column=weight_column)
diff --git a/tensorflow_lattice/python/estimators/calibrated_lattice_test.py b/tensorflow_lattice/python/estimators/calibrated_lattice_test.py
deleted file mode 100644
index e10a5df..0000000
--- a/tensorflow_lattice/python/estimators/calibrated_lattice_test.py
+++ /dev/null
@@ -1,406 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""CalibratedLattice provide canned estimators."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import tensorflow as tf
-
-from tensorflow_lattice.python.estimators import calibrated_lattice
-from tensorflow_lattice.python.estimators import hparams as tfl_hparams
-from tensorflow_lattice.python.lib import keypoints_initialization
-from tensorflow_lattice.python.lib import test_data
-
-_NUM_KEYPOINTS = 50
-
-
-class CalibratedLatticeHParamsTest(tf.test.TestCase):
-
-  def setUp(self):
-    super(CalibratedLatticeHParamsTest, self).setUp()
-    self.empty_estimator = calibrated_lattice.calibrated_lattice_classifier()
-    self.hparams = tfl_hparams.CalibratedLatticeHParams(feature_names=['x'])
-    self.hparams.set_param('lattice_size', 2)
-    self.hparams.set_param('calibrator_output_min', 0)
-    self.hparams.set_param('calibrator_output_max', 1)
-    self.hparams.set_param('calibration_bound', True)
-
-  def testWrongLatticeSize(self):
-    self.hparams.set_feature_param('x', 'lattice_size', -1)
-    self.assertRaisesRegexp(
-        ValueError,
-        'Hyperparameter configuration cannot be used in the calibrated lattice '
-        'estimator.', self.empty_estimator.check_hparams, self.hparams)
-
-  def testWrongCalibrationOutputMin(self):
-    self.hparams.set_param('calibration_output_min', 0.0)
-    self.hparams.set_feature_param('x', 'calibration_output_min', -1.0)
-    self.assertRaisesRegexp(
-        ValueError,
-        'calibration_output_min=-1 should not be set, it is adjusted '
-        'automatically to match the lattice_size',
-        self.empty_estimator.check_hparams, self.hparams)
-
-  def testWrongCalibrationOutputMax(self):
-    self.hparams.set_param('calibration_output_max', 0.0)
-    self.hparams.set_feature_param('x', 'calibration_output_max', 10)
-    self.assertRaisesRegexp(
-        ValueError,
-        'calibration_output_max=10 should not be set, it is adjusted '
-        'automatically to match the lattice_size',
-        self.empty_estimator.check_hparams, self.hparams)
-
-  def testWrongCalibrationBound(self):
-    self.hparams.set_feature_param('x', 'calibration_bound', False)
-    self.assertRaisesRegexp(
-        ValueError,
-        'Hyperparameter configuration cannot be used in the calibrated lattice '
-        'estimator.', self.empty_estimator.check_hparams, self.hparams)
-
-  def testWrongLatticeRegularization(self):
-    self.hparams.set_feature_param('x', 'lattice_l1_reg', 0.1)
-    self.hparams.set_feature_param('x', 'lattice_l2_reg', 0.1)
-    self.hparams.set_feature_param('x', 'lattice_l1_torsion_reg', 0.1)
-    self.hparams.set_feature_param('x', 'lattice_l1_torsion_reg', 0.1)
-    self.assertRaisesRegexp(
-        ValueError,
-        'Hyperparameter configuration cannot be used in the calibrated lattice '
-        'estimator.', self.empty_estimator.check_hparams, self.hparams)
-
-
-class CalibratedLatticeTest(tf.test.TestCase):
-
-  def setUp(self):
-    super(CalibratedLatticeTest, self).setUp()
-    self._test_data = test_data.TestData()
-
-  def _CalibratedLatticeRegressor(self,
-                                  feature_names,
-                                  feature_columns,
-                                  weight_column=None,
-                                  **hparams_args):
-
-    def init_fn():
-      return keypoints_initialization.uniform_keypoints_for_signal(
-          _NUM_KEYPOINTS, -1., 1., 0., 1.)
-
-    hparams = tfl_hparams.CalibratedLatticeHParams(
-        feature_names, num_keypoints=_NUM_KEYPOINTS, **hparams_args)
-    # Turn off monotonic calibrator.
-    hparams.set_param('calibration_monotonic', None)
-    hparams.set_param('learning_rate', 0.1)
-    return calibrated_lattice.calibrated_lattice_regressor(
-        feature_columns=feature_columns,
-        hparams=hparams,
-        weight_column=weight_column,
-        keypoints_initializers_fn=init_fn)
-
-  def _CalibratedLatticeClassifier(self, feature_columns, **hparams_args):
-
-    def init_fn():
-      return keypoints_initialization.uniform_keypoints_for_signal(
-          _NUM_KEYPOINTS, -1., 1., 0., 1.)
-
-    hparams = tfl_hparams.CalibratedLatticeHParams(
-        num_keypoints=_NUM_KEYPOINTS, **hparams_args)
-    # Turn off monotonic calibrator.
-    hparams.set_param('calibration_monotonic', None)
-    hparams.set_param('learning_rate', 0.1)
-
-    return calibrated_lattice.calibrated_lattice_classifier(
-        feature_columns=feature_columns,
-        hparams=hparams,
-        keypoints_initializers_fn=init_fn)
-
-  def testCalibratedLatticeRegressorTraining1D(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x'),
-    ]
-    estimator = self._CalibratedLatticeRegressor(['x'], feature_columns)
-    estimator.train(input_fn=self._test_data.oned_input_fn())
-    results = estimator.evaluate(input_fn=self._test_data.oned_input_fn())
-    self.assertLess(results['average_loss'], 1e-3)
-
-  def testCalibratedLatticeRegressorWeightedTraining1D(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x'),
-    ]
-    weight_column = tf.feature_column.numeric_column('zero')
-    estimator = self._CalibratedLatticeRegressor(['x'],
-                                                 feature_columns,
-                                                 weight_column=weight_column)
-    estimator.train(input_fn=self._test_data.oned_zero_weight_input_fn())
-    results = estimator.evaluate(
-        input_fn=self._test_data.oned_zero_weight_input_fn())
-    self.assertLess(results['average_loss'], 1e-7)
-
-  def testCalibratedLatticeRegressorTraining2D(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedLatticeRegressor(['x0', 'x1'], feature_columns)
-    estimator.train(input_fn=self._test_data.twod_input_fn())
-    results = estimator.evaluate(input_fn=self._test_data.twod_input_fn())
-    self.assertLess(results['average_loss'], 5e-3)
-
-  def testCalibratedLatticeRegressorTraining2DWithCalibrationRegularizer(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedLatticeRegressor(
-        ['x0', 'x1'],
-        feature_columns,
-        interpolation_type='simplex',
-        calibration_l1_reg=1.0,
-        calibration_l2_reg=1.0,
-        calibration_l1_laplacian_reg=1.0,
-        calibration_l2_laplacian_reg=1.0)
-    estimator.train(input_fn=self._test_data.twod_input_fn())
-    results = estimator.evaluate(input_fn=self._test_data.twod_input_fn())
-    # We expect the loss is larger than the loss without regularization.
-    self.assertGreater(results['average_loss'], 1e-2)
-    self.assertLess(results['average_loss'], 0.1)
-
-  def testCalibratedLatticeRegressorTraining2DWithLatticeRegularizer(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedLatticeRegressor(['x0', 'x1'],
-                                                 feature_columns,
-                                                 interpolation_type='simplex',
-                                                 lattice_l1_reg=1.0,
-                                                 lattice_l2_reg=1.0,
-                                                 lattice_l1_torsion_reg=1.0,
-                                                 lattice_l2_torsion_reg=1.0,
-                                                 lattice_l1_laplacian_reg=1.0,
-                                                 lattice_l2_laplacian_reg=1.0)
-    estimator.train(input_fn=self._test_data.twod_input_fn())
-    results = estimator.evaluate(input_fn=self._test_data.twod_input_fn())
-    # We expect the loss is larger than the loss without regularization.
-    self.assertGreater(results['average_loss'], 1e-2)
-    self.assertLess(results['average_loss'], 0.5)
-
-  def testCalibratedLatticeRegressorTraining2DWithPerFeatureRegularizer(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedLatticeRegressor(
-        ['x0', 'x1'],
-        feature_columns,
-        feature__x0__lattice_l1_laplacian_reg=100.0,
-        feature__x1__lattice_l2_laplacian_reg=100.0)
-    estimator.train(input_fn=self._test_data.twod_input_fn())
-    results = estimator.evaluate(input_fn=self._test_data.twod_input_fn())
-    # We expect the loss is larger than the loss without regularization.
-    self.assertGreater(results['average_loss'], 0.1)
-    self.assertLess(results['average_loss'], 0.2)
-
-  def testCalibratedLatticeRegressorTrainingMultiDimensionalFeature(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x', shape=(2,)),
-    ]
-    estimator = self._CalibratedLatticeRegressor(['x'],
-                                                 feature_columns,
-                                                 interpolation_type='hypercube')
-    estimator.train(input_fn=self._test_data.multid_feature_input_fn())
-    results = estimator.evaluate(
-        input_fn=self._test_data.multid_feature_input_fn())
-    self.assertLess(results['average_loss'], 1e-3)
-
-    # Turn-off calibration for feature 'x', it should turn if off for both
-    # dimensions, and the results should get much worse.
-    estimator = self._CalibratedLatticeRegressor(['x'],
-                                                 feature_columns,
-                                                 feature__x__num_keypoints=0)
-    estimator.train(input_fn=self._test_data.multid_feature_input_fn())
-    results = estimator.evaluate(
-        input_fn=self._test_data.multid_feature_input_fn())
-    self.assertGreater(results['average_loss'], 1e-2)
-
-  def testCalibratedLatticeClassifierTraining(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedLatticeClassifier(feature_columns)
-    estimator.train(input_fn=self._test_data.twod_classificer_input_fn())
-    results = estimator.evaluate(
-        input_fn=self._test_data.twod_classificer_input_fn())
-    self.assertGreater(results['auc'], 0.990)
-
-  def testCalibratedLatticeClassifierTrainingWithCalibrationRegularizer(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedLatticeClassifier(
-        feature_columns,
-        interpolation_type='hypercube',
-        calibration_l1_reg=0.3,
-        calibration_l2_reg=0.3,
-        calibration_l1_laplacian_reg=1.0,
-        calibration_l2_laplacian_reg=1.0)
-    estimator.train(input_fn=self._test_data.twod_classificer_input_fn())
-    results = estimator.evaluate(
-        input_fn=self._test_data.twod_classificer_input_fn())
-    # We expect AUC is worse than the model without regularization.
-    self.assertLess(results['auc'], 0.98)
-
-  def testCalibratedLatticeClassifierTrainingWithLatticeRegularizer(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedLatticeClassifier(
-        feature_columns,
-        interpolation_type='simplex',
-        lattice_l1_reg=5.0,
-        lattice_l2_reg=5.0,
-        lattice_l1_torsion_reg=5.0,
-        lattice_l2_torsion_reg=5.0,
-        lattice_l1_laplacian_reg=5.0,
-        lattice_l2_laplacian_reg=5.0)
-    estimator.train(input_fn=self._test_data.twod_classificer_input_fn())
-    results = estimator.evaluate(
-        input_fn=self._test_data.twod_classificer_input_fn())
-    # We expect AUC is worse than the model without regularization.
-    self.assertLess(results['auc'], 0.98)
-    self.assertGreater(results['auc'], 0.68)
-
-  def testCalibratedLatticeClassifierTrainingWithPerFeatureRegularizer(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedLatticeClassifier(
-        feature_columns,
-        feature_names=['x0', 'x1'],
-        feature__x0__lattice_l1_laplacian_reg=50.0,
-        feature__x1__lattice_l2_laplacian_reg=50.0)
-    estimator.train(input_fn=self._test_data.twod_classificer_input_fn())
-    results = estimator.evaluate(
-        input_fn=self._test_data.twod_classificer_input_fn())
-    # We expect AUC is worse than the model without regularization.
-    self.assertLess(results['auc'], 0.98)
-    self.assertGreater(results['auc'], 0.8)
-
-  def testCalibratedLatticeMonotonicClassifierTraining(self):
-    # Construct the following training/testing pair.
-    #
-    # Training: (x, y)
-    # ([0., 0.], 0.0)
-    # ([0., 1.], 1.0)
-    # ([1., 0.], 1.0)
-    # ([1., 1.], 0.0)
-    #
-    # Test: (x, y)
-    # ([0., 0.], 0.0)
-    # ([0., 1.], 1.0)
-    # ([1., 0.], 1.0)
-    # ([1., 1.], 1.0)
-    #
-    # Note that training example has a noisy sample, ([1., 1.], 0.0), and test
-    # examples are generated by the logical-OR function. Therefore by enforcing
-    # increasing monotonicity to all features, we should be able to work well
-    # in the test examples.
-    x0 = np.array([0.0, 0.0, 1.0, 1.0])
-    x1 = np.array([0.0, 1.0, 0.0, 1.0])
-    x_samples = {'x0': x0, 'x1': x1}
-    training_y = np.array([[False], [True], [True], [False]])
-    test_y = np.array([[False], [True], [True], [True]])
-
-    train_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x=x_samples, y=training_y, batch_size=4, num_epochs=1000, shuffle=False)
-    test_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x=x_samples, y=test_y, shuffle=False)
-
-    # Define monotonic lattice classifier.
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-
-    def init_fn():
-      return keypoints_initialization.uniform_keypoints_for_signal(
-          2, 0., 1., 0., 1.)
-
-    hparams = tfl_hparams.CalibratedLatticeHParams(num_keypoints=2)
-    # Monotonic calibrated lattice.
-
-    hparams.set_param('monotonicity', +1)
-    hparams.set_param('learning_rate', 0.1)
-    hparams.set_param('interpolation_type', 'hypercube')
-
-    estimator = calibrated_lattice.calibrated_lattice_classifier(
-        feature_columns=feature_columns,
-        hparams=hparams,
-        keypoints_initializers_fn=init_fn)
-
-    estimator.train(input_fn=train_input_fn)
-    results = estimator.evaluate(input_fn=test_input_fn)
-    # We should expect 1.0 accuracy.
-    self.assertGreater(results['accuracy'], 0.999)
-
-  def testCalibratedLatticeWithMissingTraining(self):
-    # x0 is missing with it's own vertex: so it can take very different values,
-    # while x1 is missing and calibrated, in this case to the middle of the
-    # lattice.
-    x0 = np.array([0., 0., 1., 1., -1., -1., 0., 1.])
-    x1 = np.array([0., 1., 0., 1., 0., 1., -1., -1.])
-    training_y = np.array([1., 3., 7., 11., 23., 27., 2., 9.])
-    x_samples = {'x0': x0, 'x1': x1}
-
-    train_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x=x_samples,
-        y=training_y,
-        batch_size=x0.shape[0],
-        num_epochs=2000,
-        shuffle=False)
-    test_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x=x_samples, y=training_y, shuffle=False)
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-
-    def init_fn():
-      return keypoints_initialization.uniform_keypoints_for_signal(
-          2, 0., 1., 0., 1.)
-
-    hparams = tfl_hparams.CalibratedLatticeHParams(['x0', 'x1'],
-                                                   num_keypoints=2,
-                                                   learning_rate=0.1,
-                                                   missing_input_value=-1.)
-    hparams.set_feature_param('x0', 'missing_vertex', True)
-
-    estimator = calibrated_lattice.calibrated_lattice_regressor(
-        feature_columns=feature_columns,
-        hparams=hparams,
-        keypoints_initializers_fn=init_fn)
-
-    estimator.train(input_fn=train_input_fn)
-    results = estimator.evaluate(input_fn=test_input_fn)
-    self.assertLess(results['average_loss'], 0.1)
-
-
-if __name__ == '__main__':
-  tf.test.main()
diff --git a/tensorflow_lattice/python/estimators/calibrated_linear.py b/tensorflow_lattice/python/estimators/calibrated_linear.py
deleted file mode 100644
index 9f3e167..0000000
--- a/tensorflow_lattice/python/estimators/calibrated_linear.py
+++ /dev/null
@@ -1,365 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""CalibratedLinear canned estimators."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-
-from tensorflow_lattice.python.estimators import calibrated as calibrated_lib
-from tensorflow_lattice.python.estimators import hparams as tfl_hparams
-
-# Scope for variable names.
-_SCOPE_BIAS_WEIGHT = 'bias_weight'
-
-
-class _CalibratedLinear(calibrated_lib.Calibrated):
-  """Base class for CalibratedLinearClassifier and CalibratedLinearRegressor."""
-
-  def __init__(self,
-               n_classes,
-               feature_columns=None,
-               model_dir=None,
-               quantiles_dir=None,
-               keypoints_initializers_fn=None,
-               optimizer=None,
-               config=None,
-               hparams=None,
-               head=None,
-               weight_column=None):
-    """Construct CalibrateLinearClassifier/Regressor."""
-    if not hparams:
-      hparams = tfl_hparams.CalibratedLinearHParams([])
-    self.check_hparams(hparams)
-
-    super(_CalibratedLinear,
-          self).__init__(n_classes, feature_columns, model_dir, quantiles_dir,
-                         keypoints_initializers_fn, optimizer, config, hparams,
-                         head, weight_column, 'linear')
-
-  def _check_param_configuration(self, num_keypoints, missing_input_value,
-                                 missing_output_value):
-    error_messages = []
-    if ((num_keypoints is None or num_keypoints < 2) and
-        missing_input_value is not None):
-      error_messages.append(
-          'num_keypoints not set (or too low) so value is not calibrated, '
-          'and cannot handle missing values')
-    if missing_output_value is not None:
-      error_messages.append('CalibratedLinear models do not support fixed '
-                            'output for missing values')
-    return error_messages
-
-  def check_hparams(self, hparams):
-    """Check pre-conditions of hparams.
-
-    Args:
-      hparams: (tfl_hparams.CalibratedLatticeHParams) Hyperparameter to
-      be examined.
-    Raises:
-      ValueError: If the hyperparameter configuration is invalid, for example
-      calibration_monotonic is None, but lattice_monotonic is True, then raise
-      the error with a root cause.
-    """
-    error_messages = []
-
-    # Check global params.
-    feature_names = hparams.get_feature_names()
-    global_values, per_feature_values = hparams.get_global_and_feature_params(
-        ['num_keypoints', 'missing_input_value', 'missing_output_value'],
-        feature_names)
-    global_param_error_messages = self._check_param_configuration(
-        *global_values)
-    if global_param_error_messages:
-      error_messages.append('Error message for global param:')
-      error_messages += global_param_error_messages
-
-    # Check per feature params. hparams.get_feature_names()  will only return
-    # feature names that sets per feature parameters.
-    for feature_idx in range(len(per_feature_values)):
-      per_feature_param_error_messages = self._check_param_configuration(
-          *per_feature_values[feature_idx])
-      if per_feature_param_error_messages:
-        error_messages.append(
-            'Error message for %s feature param:' % feature_names[feature_idx])
-        error_messages += per_feature_param_error_messages
-
-    if error_messages:
-      raise ValueError(
-          'Hyperparameter configuration cannot be used in the calibrated '
-          'lattice estimator. Error messages report the issue per feature, but'
-          ' the parameter may be inherited from global parameter.\nDetailed '
-          'error messsages\n%s' % '\n'.join(error_messages))
-
-  def calibration_structure_builder(self, columns_to_tensors, hparams):
-    """Returns the calibration structure of the model. See base class."""
-    return None
-
-  def prediction_builder_from_calibrated(
-      self, mode, per_dimension_feature_names, hparams, calibrated):
-    # No need for linear weights: since they are redundant, the calibration
-    # can accommodate the weights. Same could be said for the bias, but
-    # it turns out that a bias makes it easier to train in the presence of
-    # many features.
-
-    self.check_hparams(hparams)
-    prediction = tf.reduce_sum(calibrated, 1, keepdims=True)
-    bias = tf.compat.v1.get_variable(
-        _SCOPE_BIAS_WEIGHT,
-        initializer=tf.zeros(shape=[], dtype=self._dtype))
-    prediction += bias
-    # Returns prediction Tensor, projection ops, and regularization ops.
-    return prediction, None, None
-
-
-def calibrated_linear_classifier(feature_columns=None,
-                                 model_dir=None,
-                                 quantiles_dir=None,
-                                 keypoints_initializers_fn=None,
-                                 optimizer=None,
-                                 config=None,
-                                 hparams=None,
-                                 head=None,
-                                 weight_column=None):
-  """Calibrated linear classifier binary model.
-
-
-
-  This model uses a piecewise linear calibration function on each of the
-  real (as opposed to binary) inputs (parametrized) and then combines (sum up)
-  the results. Optionally calibration can be made monotonic.
-
-  It usually requires a preprocessing step on the data, to calculate the
-  quantiles of each used feature. This can be done locally or in one worker
-  only before training, in a separate invocation of your program (or directly).
-  Typically this can be save (`save_dir` parameter) to the same
-  directory where the data is.
-
-  Hyper-parameters are given in the form of the object
-  tfl_hparams.CalibrationHParams. It takes in per-feature calibration
-  parameters.
-
-  Internally values will be converted to tf.float32.
-
-  Example:
-
-  ```python
-  def input_fn_train: ...
-  def input_fn_eval: ...
-
-  my_feature_columns=[...]
-
-  # Have a separate program flag to generate the quantiles. Need to be run
-  # only once.
-  if FLAGS.create_quantiles:
-    pwl_calibrators_layers.calculate_quantiles_for_keypoints(
-      input_fn=input_fn_train,
-      feature_columns=my_feature_columns,
-      save_dir=FLAGS.data_dir,
-      num_quantiles=1000,
-      override=True)
-    return  # Exit program.
-
-  estimator = calibrated_linear.CalibratedLinearClassifier(
-    feature_columns=feature_columns)
-  estimator.train(input_fn=input_fn_train)
-  estimator.evaluate(input_fn=input_fn_eval)
-  estimator.predict(input_fn=input_fn_predict)
-  ```
-
-  Args:
-    feature_columns: Optional, an iteratable containing all the feature
-      columns used by the model. All items in the set should be instances of
-      classes derived from `FeatureColumn`. If not given, the model will
-      use as features the tensors returned by input_fn.
-      Supported types of columns: RealValuedColumn.
-    model_dir: Directory to save model parameters, graph and etc. This can
-      also be used to load checkpoints from the directory into a estimator to
-      continue training a previously saved model.
-    quantiles_dir: location where quantiles for the data was saved. Typically
-      the same directory as the training data. These quantiles can be
-      generated only once with
-      `pwl_calibration_layers.calculate_quantiles_for_keypoints` in a separate
-      invocation of your program. If you don't want to use quantiles, you can
-      set `keypoints_initializer` instead.
-    keypoints_initializers_fn: if you know the distribution of your
-      input features you can provide that directly instead of `quantiles_dir`.
-      See `pwl_calibrators_layers.uniform_keypoints_for_signal`. It must be
-      a closure that returns a pair of tensors with keypoints inputs and
-      outputs to use for initialization (must match `num_keypoints` configured
-      in `hparams`). Alternatively the closure can return a dict mapping
-      feature name to pairs for initialization per feature. If `quantiles_dir`
-      and `keypoints_initializers_fn` are set, the later takes precendence,
-      and the features for which `keypoints_initializers` are not defined
-      fallback to using the quantiles found in `quantiles_dir`. It uses a
-      closure instead of the tensors themselves because the graph has to be
-      created at the time the model is being build, which happens at a later
-      time.
-    optimizer: string, `Optimizer` object, or callable that defines the
-      optimizer to use for training -- if a callable, it will be called with
-      learning_rate=hparams.learning_rate.
-    config: RunConfig object to configure the runtime settings. Typically set
-      to learn_runner.EstimatorConfig().
-    hparams: an instance of tfl_hparams.CalibrationHParams. If set to
-      None default parameters are used.
-    head: a `TensorFlow Estimator Head` which specifies how the loss function,
-      final predictions, and so on are generated from model outputs. Defaults
-      to using a sigmoid cross entropy head for binary classification and mean
-      squared error head for regression.
-    weight_column: A string or a `tf.feature_column.numeric_column` defining
-      feature column representing weights. It is used to down weight or boost
-      examples during training. It will be multiplied by the loss of the
-      example.
-
-  Returns:
-    A `CalibratedLinearClassifier` estimator.
-
-  Raises:
-    ValueError: invalid parameters.
-    KeyError: type of feature not supported.
-  """
-  return _CalibratedLinear(
-      n_classes=2,
-      feature_columns=feature_columns,
-      model_dir=model_dir,
-      quantiles_dir=quantiles_dir,
-      keypoints_initializers_fn=keypoints_initializers_fn,
-      optimizer=optimizer,
-      config=config,
-      hparams=hparams,
-      head=head,
-      weight_column=weight_column)
-
-
-def calibrated_linear_regressor(feature_columns=None,
-                                model_dir=None,
-                                quantiles_dir=None,
-                                keypoints_initializers_fn=None,
-                                optimizer=None,
-                                config=None,
-                                hparams=None,
-                                head=None,
-                                weight_column=None):
-  """Calibrated linear estimator (model) for regression.
-
-  This model uses a piecewise linear calibration function on each of the
-  inputs (parametrized) and then combine (sum up) the results. Optionally
-  calibration can be made monotonic.
-
-  It usually requires a preprocessing step on the data, to calculate the
-  quantiles of each used feature. This can be done locally or in one worker
-  only before training, in a separate invocation of your program (or directly)
-  in . Typically this can be save (`save_dir` parameter) to the same
-  directory where the data is.
-
-  Hyper-parameters are given in the form of the object
-  tfl_hparams.CalibrationHParams. It takes in per-feature calibration
-  parameters.
-
-  Internally values will be converted to tf.float32.
-
-
-
-  Example:
-
-  ```python
-  def input_fn_train: ...
-  def input_fn_eval: ...
-
-  my_feature_columns=[...]
-
-  # Have a separate program flag to generate the quantiles. Need to be run
-  # only once.
-  if FLAGS.create_quantiles:
-    pwl_calibrators_layers.calculate_quantiles_for_keypoints(
-      input_fn=input_fn_train,
-      feature_columns=my_feature_columns,
-      save_dir=FLAGS.data_dir,
-      num_quantiles=1000,
-      override=True)
-    return  # Exit program.
-
-  estimator = calibrated_linear.calibrated_linear_regressor(
-    feature_columns=feature_columns)
-  estimator.train(input_fn=input_fn_train)
-  estimator.evaluate(input_fn=input_fn_eval)
-  estimator.predict(input_fn=input_fn_predict)
-  ```
-
-  Args:
-    feature_columns: Optional, if not set the model will use all features
-      returned by input_fn. An iteratable containing all the feature
-      columns used by the model. All items in the set should be instances of
-      classes derived from `FeatureColumn`. If not given, the model will
-      use as features the tensors returned by input_fn.
-      Supported types: RealValuedColumn.
-    model_dir: Directory to save model parameters, graph and etc. This can
-      also be used to load checkpoints from the directory into a estimator to
-      continue training a previously saved model.
-    quantiles_dir: location where quantiles for the data was saved. Typically
-      the same directory as the training data. These quantiles can be
-      generated only once with
-      `pwl_calibration_layers.calculate_quantiles_for_keypoints` in a separate
-      invocation of your program. If you don't want to use quantiles, you can
-      set `keypoints_initializer` instead.
-    keypoints_initializers_fn: if you know the distribution of your
-      input features you can provide that directly instead of `quantiles_dir`.
-      See `pwl_calibrators_layers.uniform_keypoints_for_signal`. It must be
-      a closure that returns a pair of tensors with keypoints inputs and
-      outputs to use for initialization (must match `num_keypoints` configured
-      in `hparams`). Alternatively the closure can return a dict mapping
-      feature name to pairs for initialization per feature. If `quantiles_dir`
-      and `keypoints_initializers_fn` are set, the later takes precendence,
-      and the features for which `keypoints_initializers` are not defined
-      fallback to using the quantiles found in `quantiles_dir`. It uses a
-      closure instead of the tensors themselves because the graph has to be
-      created at the time the model is being build, which happens at a later
-      time.
-    optimizer: string, `Optimizer` object, or callable that defines the
-      optimizer to use for training -- if a callable, it will be called with
-      learning_rate=hparams.learning_rate.
-    config: RunConfig object to configure the runtime settings. Typically set
-      to learn_runner.EstimatorConfig().
-    hparams: an instance of tfl_hparams.CalibrationHParams. If set to
-      None default parameters are used.
-    head: a `TensorFlow Estimator Head` which specifies how the loss function,
-      final predictions, and so on are generated from model outputs. Defaults
-      to using a sigmoid cross entropy head for binary classification and mean
-      squared error head for regression.
-    weight_column: A string or a `tf.feature_column.numeric_column` defining
-      feature column representing weights. It is used to down weight or boost
-      examples during training. It will be multiplied by the loss of the
-      example.
-
-  Returns:
-    A `CalibratedLinearRegressor` estimator.
-
-  Raises:
-    ValueError: invalid parameters.
-    KeyError: type of feature not supported.
-  """
-  return _CalibratedLinear(
-      n_classes=0,
-      feature_columns=feature_columns,
-      model_dir=model_dir,
-      quantiles_dir=quantiles_dir,
-      keypoints_initializers_fn=keypoints_initializers_fn,
-      optimizer=optimizer,
-      config=config,
-      hparams=hparams,
-      head=head,
-      weight_column=weight_column)
diff --git a/tensorflow_lattice/python/estimators/calibrated_linear_test.py b/tensorflow_lattice/python/estimators/calibrated_linear_test.py
deleted file mode 100644
index 5406b96..0000000
--- a/tensorflow_lattice/python/estimators/calibrated_linear_test.py
+++ /dev/null
@@ -1,249 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""CalibratedLinear provides canned estimators."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-
-from tensorflow_lattice.python.estimators import calibrated_linear
-from tensorflow_lattice.python.estimators import hparams as tfl_hparams
-from tensorflow_lattice.python.lib import keypoints_initialization
-from tensorflow_lattice.python.lib import test_data
-
-_NUM_KEYPOINTS = 50
-
-
-class CalibratedLinearTest(tf.test.TestCase):
-
-  def setUp(self):
-    super(CalibratedLinearTest, self).setUp()
-    self._test_data = test_data.TestData()
-
-  def _LinearRegressor(self, feature_columns):
-    # Can be used for baseline.
-    return tf.estimator.LinearRegressor(feature_columns=feature_columns)
-
-  def _CalibratedLinearRegressor(self,
-                                 feature_names,
-                                 feature_columns,
-                                 weight_column=None,
-                                 **hparams_args):
-
-    def init_fn():
-      return keypoints_initialization.uniform_keypoints_for_signal(
-          _NUM_KEYPOINTS, -1., 1., -1., 1.)
-
-    hparams = tfl_hparams.CalibratedLinearHParams(
-        feature_names, num_keypoints=_NUM_KEYPOINTS, **hparams_args)
-    return calibrated_linear.calibrated_linear_regressor(
-        feature_columns=feature_columns,
-        weight_column=weight_column,
-        hparams=hparams,
-        keypoints_initializers_fn=init_fn)
-
-  def _CalibratedLinearRegressorWithQuantiles(self, feature_names,
-                                              feature_columns, **hparams_args):
-    """Model that saves/retrieves quantiles."""
-
-    # Quantiles to be used for x2
-    quantiles_dir = self.get_temp_dir()
-    keypoints_initialization.save_quantiles_for_keypoints(
-        input_fn=self._test_data.threed_input_fn(True),
-        save_dir=quantiles_dir,
-        feature_columns=feature_columns,
-        num_steps=1)
-
-    # Keypoint initialization function for x0 and x1
-    def init_fn():
-      return keypoints_initialization.uniform_keypoints_for_signal(
-          _NUM_KEYPOINTS, -1., 1., -1., 1.)
-
-    hparams = tfl_hparams.CalibratedLinearHParams(
-        feature_names, num_keypoints=_NUM_KEYPOINTS, **hparams_args)
-    return calibrated_linear.calibrated_linear_regressor(
-        feature_columns=feature_columns,
-        hparams=hparams,
-        keypoints_initializers_fn={
-            'x0': init_fn,
-            'x1': init_fn
-        },
-        quantiles_dir=quantiles_dir  # Used for 'x2'
-    )
-
-  def _LinearClassifier(self, feature_columns):
-    # Can be used for baseline.
-    return tf.estimator.LinearClassifier(
-        n_classes=2, feature_columns=feature_columns)
-
-  def _CalibratedLinearClassifier(self, feature_names, feature_columns,
-                                  **hparams_args):
-
-    def init_fn():
-      return keypoints_initialization.uniform_keypoints_for_signal(
-          _NUM_KEYPOINTS, -1., 1., -1., 1.)
-
-    hparams = tfl_hparams.CalibratedLinearHParams(
-        feature_names, num_keypoints=_NUM_KEYPOINTS, **hparams_args)
-    return calibrated_linear.calibrated_linear_classifier(
-        feature_columns=feature_columns,
-        hparams=hparams,
-        keypoints_initializers_fn=init_fn)
-
-  def testCalibratedLinearRegressorTraining1D(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x'),
-    ]
-    estimator = self._CalibratedLinearRegressor(['x'], feature_columns)
-    estimator.train(input_fn=self._test_data.oned_input_fn())
-    results = estimator.evaluate(input_fn=self._test_data.oned_input_fn())
-    # For the record:
-    #   Loss(CalibratedLinear)=~2.5e-5
-    #   Loss(LinearRegressor)=~2.5e-2
-    self.assertLess(results['average_loss'], 1e-4)
-
-  def testCalibratedLinearRegressorWeightedTraining1D(self):
-    feature_columns = [tf.feature_column.numeric_column('x')]
-    weight_column = tf.feature_column.numeric_column('zero')
-    estimator = self._CalibratedLinearRegressor(['x'],
-                                                feature_columns,
-                                                weight_column=weight_column)
-    estimator.train(input_fn=self._test_data.oned_zero_weight_input_fn())
-    results = estimator.evaluate(
-        input_fn=self._test_data.oned_zero_weight_input_fn())
-    # Expects almost zero since the weight values are exactly zero.
-    self.assertLess(results['average_loss'], 1e-7)
-
-  def testCalibratedLinearMonotonicRegressorTraining1D(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x'),
-    ]
-    estimator = self._CalibratedLinearRegressor(
-        ['x'],
-        feature_columns,
-        feature__x__monotonicity=+1,
-        feature__x__missing_input_value=-1.0)
-    estimator.train(input_fn=self._test_data.oned_input_fn())
-    _ = estimator.evaluate(input_fn=self._test_data.oned_input_fn())
-
-  def testCalibratedLinearRegressorTraining1DWithCalibrationRegularizer(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x'),
-    ]
-    estimator = self._CalibratedLinearRegressor(
-        ['x'],
-        feature_columns,
-        calibration_l1_reg=0.001,
-        calibration_l2_reg=0.001,
-        calibration_l1_laplacian_reg=0.001,
-        calibration_l2_laplacian_reg=0.001)
-    estimator.train(input_fn=self._test_data.oned_input_fn())
-    results = estimator.evaluate(input_fn=self._test_data.oned_input_fn())
-    self.assertLess(results['average_loss'], 1e-2)
-
-  def testCalibratedLinearRegressorTraining2D(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedLinearRegressor(['x0', 'x1'], feature_columns)
-    estimator.train(input_fn=self._test_data.twod_input_fn())
-    results = estimator.evaluate(input_fn=self._test_data.twod_input_fn())
-    # For the record:
-    #   Loss(CalibratedLinear)=~6.9e-5
-    #   Loss(LinearRegressor)=~3.3e-2
-    self.assertLess(results['average_loss'], 1e-4)
-
-  def testCalibratedLinearRegressorTraining3D(self):
-    # Tests also categorical features that has a limited number
-    # of valid values.
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-        tf.feature_column.categorical_column_with_vocabulary_list(
-            'x2', ['Y', 'N'])
-    ]
-    with tf.Graph().as_default():
-      estimator = self._CalibratedLinearRegressorWithQuantiles(
-          ['x0', 'x1', 'x2'], feature_columns)
-    estimator.train(input_fn=self._test_data.threed_input_fn(False, 4))
-    results = estimator.evaluate(
-        input_fn=self._test_data.threed_input_fn(False, 1))
-    # For the record:
-    #   average_loss(CalibratedLinear, 4 epochs)=~1e-5
-    #   average_loss(LinearRegressor, 100 epochs)=~0.159
-    self.assertLess(results['average_loss'], 1e-4)
-
-  def testCalibratedLinearRegressorTrainingMultiDimensionalFeature(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x', shape=(2,)),
-    ]
-
-    # With calibration.
-    estimator = self._CalibratedLinearRegressor(['x'], feature_columns)
-    estimator.train(input_fn=self._test_data.multid_feature_input_fn())
-    results = estimator.evaluate(
-        input_fn=self._test_data.multid_feature_input_fn())
-    # For the record:
-    #   Loss(CalibratedLinear)=~6.6e-5
-    #   Loss(LinearRegressor)=~3.2e-2
-    self.assertLess(results['average_loss'], 1e-4)
-
-    # Turn-off calibration for feature 'x', it should turn if off for both
-    # dimensions.
-    estimator = self._CalibratedLinearRegressor(['x'],
-                                                feature_columns,
-                                                feature__x__num_keypoints=0)
-    estimator.train(input_fn=self._test_data.multid_feature_input_fn())
-    results = estimator.evaluate(
-        input_fn=self._test_data.multid_feature_input_fn())
-    self.assertGreater(results['average_loss'], 1e-2)
-
-  def testCalibratedLinearClassifierTraining(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedLinearClassifier(['x0', 'x1'], feature_columns)
-    estimator.train(input_fn=self._test_data.twod_classificer_input_fn())
-    results = estimator.evaluate(
-        input_fn=self._test_data.twod_classificer_input_fn())
-    # For the record:
-    #   auc(CalibratedLinear)=~0.999
-    #   auc(LinearClassifier)=~0.481
-    self.assertGreater(results['auc'], 0.990)
-
-  def testCalibratedLinearClassifierTrainingWithCalibrationRegularizer(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedLinearClassifier(
-        ['x0', 'x1'],
-        feature_columns,
-        calibration_l1_reg=0.001,
-        calibration_l2_reg=0.03,
-        calibration_l1_laplacian_reg=0.03,
-        calibration_l2_laplacian_reg=0.05)
-    estimator.train(input_fn=self._test_data.twod_classificer_input_fn())
-    results = estimator.evaluate(
-        input_fn=self._test_data.twod_classificer_input_fn())
-    self.assertGreater(results['auc'], 0.980)
-
-
-if __name__ == '__main__':
-  tf.test.main()
diff --git a/tensorflow_lattice/python/estimators/calibrated_rtl.py b/tensorflow_lattice/python/estimators/calibrated_rtl.py
deleted file mode 100644
index a36d6a1..0000000
--- a/tensorflow_lattice/python/estimators/calibrated_rtl.py
+++ /dev/null
@@ -1,562 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""CalibratedRtl canned estimators."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import copy
-import os
-import random
-
-# Dependency imports
-import six
-import tensorflow as tf
-
-from tensorflow_lattice.python.estimators import calibrated as calibrated_lib
-from tensorflow_lattice.python.estimators import hparams as tfl_hparams
-from tensorflow_lattice.python.lib import lattice_layers
-from tensorflow_lattice.python.lib import regularizers
-from tensorflow.python.lib.io import file_io  # pylint: disable=g-direct-tensorflow-import
-
-_EPSILON = 1e-7
-
-_RTL_STRUCTURE_FILE = 'rtl_structure.csv'
-
-
-class _CalibratedRtl(calibrated_lib.Calibrated):
-  """Base class for CalibratedRtl{Classifier|Regressor}."""
-
-  def __init__(self,
-               n_classes,
-               feature_columns=None,
-               model_dir=None,
-               quantiles_dir=None,
-               keypoints_initializers_fn=None,
-               lattice_initializers_fn=None,
-               optimizer=None,
-               config=None,
-               hparams=None,
-               head=None,
-               weight_column=None):
-    """Construct CalibrateRtlClassifier/Regressor."""
-    if not hparams:
-      hparams = tfl_hparams.CalibratedRtlHParams([])
-    self.check_hparams(hparams)
-    hparams = self._adjust_calibration_params(hparams)
-
-    self.lattice_initializers_fn_ = lattice_initializers_fn
-
-    super(_CalibratedRtl,
-          self).__init__(n_classes, feature_columns, model_dir, quantiles_dir,
-                         keypoints_initializers_fn, optimizer, config, hparams,
-                         head, weight_column, 'rtl')
-    self._structure_file = os.path.join(self._model_dir, _RTL_STRUCTURE_FILE)
-
-  def _check_per_feature_param_configuration(
-      self, adjusted, monotonicity, lattice_size, calibration_output_min,
-      calibration_output_max, calibration_bound, missing_input_value,
-      missing_vertex):
-    """Check parameter configuration and returns the error messages."""
-    error_messages = []
-    if monotonicity not in {-1, 0, +1}:
-      error_messages.append('monotonicity should be an integer {-1, 0, +1} '
-                            'but is %s' % monotonicity)
-
-    if lattice_size < 2:
-      error_messages.append('lattice_size should be greater than equal to 2'
-                            'but is %d' % (lattice_size))
-
-    if not calibration_bound:
-      error_messages.append(
-          'A lattice expects an bounded input from a calibration layer, but '
-          'calibration_bound is set to be False')
-
-    if not adjusted:
-      if calibration_output_min is not None:
-        error_messages.append(
-            'calibration_output_min=%d should not be set, it is adjusted '
-            'automatically to match the lattice_size' % calibration_output_min)
-      if calibration_output_max is not None:
-        error_messages.append(
-            'calibration_output_max=%d should not be set, it is adjusted '
-            'automatically to match the lattice_size' % calibration_output_max)
-
-    if missing_input_value is None and missing_vertex:
-      error_messages.append(
-          'missing_vertex is True, however missing_input_value not set')
-
-    return error_messages
-
-  def _check_not_allowed_feature_params(self, hparams):
-    """Check hparams contains feature-level value that are not allowed.
-
-    Certain values cannot be feature-level hyperparameters. This function checks
-    whether any of feature sets hparams that are not allowed to be feature-level
-    hyperparameter, and returns non-empty error messages if there is an error.
-
-    Args:
-      hparams: (CalibratedRtlHparams) hyperparameters needs to be checked.
-    Returns:
-      error_messages: (list of strings) error messages.
-    """
-    not_allowed_feature_params = map(
-        'lattice_{}'.format,
-        regularizers.LATTICE_MULTI_DIMENSIONAL_REGULARIZERS)
-    error_messages = []
-    for param in not_allowed_feature_params:
-      for feature_name in hparams.get_feature_names():
-        if hparams.is_feature_set_param(feature_name, param):
-          error_messages.append('feature %s sets %s, which is not allowed.' %
-                                (feature_name, param))
-    return error_messages
-
-  def check_hparams(self, hparams, adjusted=False):
-    """Check pre-conditions of hparams.
-
-    Args:
-      hparams: (tfl_hparams.CalibratedRtlHParams) Hyperparameter to be
-      examined.
-      adjusted: if these are the parameters already adjusted. For example,
-        calibrator_output_min and max should be adjusted so that the output is
-        in [0, lattice_size - 1] (or [0, lattice_size] if missing_vertex
-        == True) and calibrator bound should set to be true, etc.
-        If adjust is True, we will check that all the parameter values is valid,
-        otherwise, some checks will be skipped.
-    Raises:
-      ValueError: If the hyperparameter configuration is invalid, for example
-      calibration_monotonic is None, but lattice_monotonic is True, then raise
-      the error with a root cause.
-    """
-    error_messages = self._check_not_allowed_feature_params(hparams)
-
-    # Check lattice_rank and num_lattices.
-    lattice_rank = hparams.get_param('lattice_rank')
-    num_lattices = hparams.get_param('num_lattices')
-    if lattice_rank is None or num_lattices is None:
-      error_messages.append('lattice_rank and num_lattices should be provided')
-
-    # Check global params.
-    feature_names = hparams.get_feature_names()
-    packed_feature_values = hparams.get_global_and_feature_params([
-        'monotonicity', 'lattice_size', 'calibration_output_min',
-        'calibration_output_max', 'calibration_bound', 'missing_input_value',
-        'missing_vertex'
-    ], feature_names)
-    default_feature_values, per_feature_values = packed_feature_values
-    param_error_messages = self._check_per_feature_param_configuration(
-        adjusted, *default_feature_values)
-    if param_error_messages:
-      error_messages.append('Error message for default feature param:')
-      error_messages += param_error_messages
-
-    # Check per feature params. hparams.get_feature_names()  will only return
-    # feature names that sets per feature parameters.
-    for feature_idx in range(len(per_feature_values)):
-      param_error_messages = self._check_per_feature_param_configuration(
-          adjusted, *per_feature_values[feature_idx])
-      if param_error_messages:
-        error_messages.append(
-            'Error message for %s feature param:' % feature_names[feature_idx])
-        error_messages += param_error_messages
-
-    if error_messages:
-      raise ValueError(
-          'Hyperparameter configuration cannot be used in the calibrated '
-          'rtl estimator. Error messages report the issue per feature, but'
-          ' the parameter may be inherited from global parameter.\nDetailed '
-          'error messsages\n%s' % '\n'.join(error_messages))
-
-  def _adjust_calibration_params(self, hparams):
-    """Adjust the calibration parameters to match the input siz of lattices."""
-    hparams = copy.deepcopy(hparams)
-    feature_names = hparams.get_feature_names()
-    packed_feature_values = hparams.get_global_and_feature_params(
-        ['lattice_size', 'missing_input_value', 'missing_vertex'],
-        feature_names)
-    default_feature_values, per_feature_values = packed_feature_values
-    final_lattice_size, missing_output_value = self._calibration_params(
-        *default_feature_values)
-    lattice_size = default_feature_values[0]
-    hparams.set_param('calibration_output_min', 0)
-    hparams.set_param('calibration_output_max', lattice_size - 1)
-    hparams.set_param('final_lattice_size', final_lattice_size)
-    hparams.set_param('missing_output_value', missing_output_value)
-
-    if len(per_feature_values) != len(feature_names):
-      raise ValueError(
-          'length of per_feature_value (%d) != length of feature_names (%d)' %
-          (len(per_feature_values), len(feature_names)))
-    for (per_feature_value, feature_name) in zip(per_feature_values,
-                                                 feature_names):
-      final_lattice_size, missing_output_value = self._calibration_params(
-          *per_feature_value)
-      lattice_size = per_feature_value[0]
-      hparams.set_feature_param(feature_name, 'calibration_output_min', 0)
-      hparams.set_feature_param(feature_name, 'calibration_output_max',
-                                lattice_size - 1)
-      hparams.set_feature_param(feature_name, 'final_lattice_size',
-                                final_lattice_size)
-      hparams.set_feature_param(feature_name, 'missing_output_value',
-                                missing_output_value)
-    return hparams
-
-  def _calibration_params(self, lattice_size, missing_input_value,
-                          missing_vertex):
-    """Returns final_lattice_size and missing_output_value."""
-    if missing_input_value is None or not missing_vertex:
-      return lattice_size, None
-
-    # Last vertex of the lattice is reserved for missing values.
-    return lattice_size + 1, lattice_size
-
-  def _load_structure(self):
-    """Load rtl structure from model_dir."""
-    if not file_io.file_exists(self._structure_file):
-      raise ValueError(
-          'Structure file does not exists in %s!' % self._structure_file)
-    structure_csv_string = file_io.read_file_to_string(self._structure_file)
-    structure_csvs = structure_csv_string.split('\n')
-    structure = []
-    for structure_csv in structure_csvs:
-      structure.append([int(idx) for idx in structure_csv.split(',')])
-    return structure
-
-  def _save_structure(self, structure):
-    """Save rtl structure to model_dir."""
-    structure_csvs = []
-    for lattice in structure:
-      structure_csvs.append(','.join([str(idx) for idx in lattice]))
-    structure_csv_string = '\n'.join(structure_csvs)
-    file_io.write_string_to_file(self._structure_file, structure_csv_string)
-
-
-  def _create_structure(self, input_dim, num_lattices, lattice_rank, rtl_seed):
-    """Create and save rtl structure to model_dir."""
-    rtl_random = random.Random(rtl_seed)
-    structure = []
-    for _ in range(num_lattices):
-      structure.append(
-          rtl_random.sample(six.moves.xrange(input_dim), lattice_rank))
-    return structure
-
-  def calibration_structure_builder(self, columns_to_tensors, hparams):
-    """Returns the calibration structure of the model. See base class."""
-    return None
-
-  def prediction_builder_from_calibrated(
-      self, mode, per_dimension_feature_names, hparams, calibrated):
-    """Construct the prediciton."""
-
-    self.check_hparams(hparams, adjusted=True)
-    lattice_sizes = [
-        hparams.get_feature_param(f, 'final_lattice_size')
-        for f in per_dimension_feature_names
-    ]
-    lattice_monotonic = [(hparams.get_feature_param(f, 'monotonicity') != 0)
-                         for f in per_dimension_feature_names]
-    num_lattices = hparams.get_param('num_lattices')
-    lattice_rank = hparams.get_param('lattice_rank')
-    rtl_seed = hparams.get_param('rtl_seed')
-    interpolation_type = hparams.get_param('interpolation_type')
-    # Create and save structure if it does not exists.
-    if not file_io.file_exists(self._structure_file):
-      structure = self._create_structure(
-          len(lattice_sizes), num_lattices, lattice_rank, rtl_seed)
-      self._save_structure(structure)
-    structure = self._load_structure()
-    # Check structure is what we expect.
-    if len(structure) != num_lattices:
-      raise ValueError(
-          'Expect %d number of lattices, but found %d number of lattices in '
-          'structure: %s' % (num_lattices, len(structure), str(structure)))
-    for each_lattice in structure:
-      if len(each_lattice) != lattice_rank:
-        raise ValueError('Expect %d lattice rank, but found %d in structure: %s'
-                         % (lattice_rank, len(each_lattice), str(structure)))
-
-    # Setup the regularization.
-    regularizer_amounts = {}
-    for reg_name in regularizers.LATTICE_MULTI_DIMENSIONAL_REGULARIZERS:
-      regularizer_amounts[reg_name] = hparams.get_param(
-          'lattice_{}'.format(reg_name))
-    for reg_name in regularizers.LATTICE_ONE_DIMENSIONAL_REGULARIZERS:
-      regularizer_amounts[reg_name] = [
-          hparams.get_feature_param(feature_name, 'lattice_{}'.format(reg_name))
-          for feature_name in per_dimension_feature_names
-      ]
-
-    packed_results = lattice_layers.ensemble_lattices_layer(
-        calibrated,
-        lattice_sizes,
-        structure,
-        is_monotone=lattice_monotonic,
-        interpolation_type=interpolation_type,
-        lattice_initializers=self.lattice_initializers_fn_,
-        **regularizer_amounts)
-    (output_tensors, _, projection_ops, regularization) = packed_results
-    # Take an average of output_tensors and add bias.
-    output_tensor = tf.stack(
-        output_tensors, axis=0, name='stacked_output')
-    ensemble_output = tf.reduce_mean(output_tensor, axis=0)
-    ensemble_bias_init = hparams.get_param('ensemble_bias')
-    b = tf.Variable([ensemble_bias_init], name='ensemble_bias')
-    prediction = ensemble_output + b
-
-    # Returns prediction Tensor, projection ops, and regularization.
-    return prediction, projection_ops, regularization
-
-
-def calibrated_rtl_classifier(feature_columns=None,
-                              model_dir=None,
-                              quantiles_dir=None,
-                              keypoints_initializers_fn=None,
-                              optimizer=None,
-                              config=None,
-                              hparams=None,
-                              head=None,
-                              weight_column=None):
-  """Calibrated rtl binary classifier model.
-
-
-
-  This model uses a piecewise lattice calibration function on each of the
-  inputs (parametrized) and then feeds them to ensemble of random lattices.
-  num_lattices and lattice_rank (number of inputs to each lattice) must be
-  specified in the hyperparameter. Optionally calibration can be made monotonic.
-
-  It usually requires a preprocessing step on the data, to calculate the
-  quantiles of each used feature. This can be done locally or in one worker
-  only before training, in a separate invocation of your program (or directly).
-  Typically this can be saved (`save_dir` parameter) to the same
-  directory where the data is.
-
-  Hyper-parameters are given in the form of the object
-  tfl_hparams.CalibrationRtlHParams. lattice_rank and num_lattices must
-  be specified; there would be no default value for this. It also takes in
-  per-feature parameters.
-
-  Internally values will be converted to tf.float32.
-
-  Example:
-
-  ```python
-  def input_fn_train: ...
-  def input_fn_eval: ...
-
-  my_feature_columns=[...]
-
-  # Have a separate program flag to generate the quantiles. Need to be run
-  # only once.
-  if FLAGS.create_quantiles:
-    pwl_calibrators_layers.calculate_quantiles_for_keypoints(
-      input_fn=input_fn_train,
-      feature_columns=my_feature_columns,
-      save_dir=FLAGS.data_dir,
-      num_quantiles=1000,
-      override=True)
-    return  # Exit program.
-
-  hparams = hparams.CalibratedRtlHparams(num_lattices=10, lattice_rank=2)
-  estimator = calibrated_rtl.calibrated_rtl_classifier(
-    feature_columns=feature_columns, hparams=hparams)
-  estimator.train(input_fn=input_fn_train)
-  estimator.evaluate(input_fn=input_fn_eval)
-  estimator.predict(input_fn=input_fn_predict)
-  ```
-
-  Args:
-    feature_columns: Optional, an iteratable containing all the feature
-      columns used by the model. All items in the set should be instances of
-      classes derived from `FeatureColumn`. If not given, the model will
-      use as features the tensors returned by input_fn.
-      Supported types of columns: RealValuedColumn.
-    model_dir: Directory to save model parameters, graphs and etc. This can
-      also be used to load checkpoints from the directory into an estimator to
-      continue training a previously saved model.
-    quantiles_dir: location where quantiles for the data was saved. Typically
-      the same directory as the training data. These quantiles can be
-      generated only once with
-      `pwl_calibration_layers.calculate_quantiles_for_keypoints` in a separate
-      invocation of your program. If you don't want to use quantiles, you can
-      set `keypoints_initializer` instead.
-    keypoints_initializers_fn: if you know the distribution of your
-      input features you can provide that directly instead of `quantiles_dir`.
-      See `pwl_calibrators_layers.uniform_keypoints_for_signal`. It must be
-      a closure that returns a pair of tensors with keypoints inputs and
-      outputs to use for initialization (must match `num_keypoints` configured
-      in `hparams`). Alternatively the closure can return a dict mapping
-      feature name to pairs for initialization per feature. If `quantiles_dir`
-      and `keypoints_initializers_fn` are set, the later takes precendence,
-      and the features for which `keypoints_initializers` are not defined
-      fallback to using the quantiles found in `quantiles_dir`. It uses a
-      closure instead of the tensors themselves because the graph has to be
-      created at the time the model is being build, which happens at a later
-      time.
-    optimizer: string, `Optimizer` object, or callable that defines the
-      optimizer to use for training -- if a callable, it will be called with
-      learning_rate=hparams.learning_rate.
-    config: RunConfig object to configure the runtime settings. Typically set
-      to learn_runner.EstimatorConfig().
-    hparams: an instance of tfl_hparams.CalibrationRtlHParams. If set to
-      None default parameters are used.
-    head: a `TensorFlow Estimator Head` which specifies how the loss function,
-      final predictions, and so on are generated from model outputs. Defaults
-      to using a sigmoid cross entropy head for binary classification and mean
-      squared error head for regression.
-    weight_column: A string or a `tf.feature_column.numeric_column` defining
-      feature column representing weights. It is used to down weight or boost
-      examples during training. It will be multiplied by the loss of the
-      example.
-
-  Returns:
-    A `calibrated_rtl_classifier` estimator.
-
-  Raises:
-    ValueError: invalid parameters.
-    KeyError: type of feature not supported.
-  """
-  return _CalibratedRtl(
-      n_classes=2,
-      feature_columns=feature_columns,
-      model_dir=model_dir,
-      quantiles_dir=quantiles_dir,
-      keypoints_initializers_fn=keypoints_initializers_fn,
-      optimizer=optimizer,
-      config=config,
-      hparams=hparams,
-      head=head,
-      weight_column=weight_column)
-
-
-def calibrated_rtl_regressor(feature_columns=None,
-                             model_dir=None,
-                             quantiles_dir=None,
-                             keypoints_initializers_fn=None,
-                             optimizer=None,
-                             config=None,
-                             hparams=None,
-                             head=None,
-                             weight_column=None):
-  """Calibrated rtl regressor model.
-
-  This model uses a piecewise lattice calibration function on each of the
-  inputs (parametrized) and then feeds them to ensemble of random lattices.
-  num_lattices and lattice_rank (number of inputs to each lattice) must be
-  specified in the hyperparameter. Optionally calibration can be made monotonic.
-
-  It usually requires a preprocessing step on the data, to calculate the
-  quantiles of each used feature. This can be done locally or in one worker
-  only before training, in a separate invocation of your program (or directly).
-  Typically this can be saved (`save_dir` parameter) to the same
-  directory where the data is.
-
-  Hyper-parameters are given in the form of the object
-  tfl_hparams.CalibrationRtlHParams. lattice_rank and num_lattices must
-  be specified; there would be no default value for this. It also takes in
-  per-feature parameters.
-
-  Internally values will be converted to tf.float32.
-
-  Example:
-
-  ```python
-  def input_fn_train: ...
-  def input_fn_eval: ...
-
-  my_feature_columns=[...]
-
-  # Have a separate program flag to generate the quantiles. Need to be run
-  # only once.
-  if FLAGS.create_quantiles:
-    pwl_calibrators_layers.calculate_quantiles_for_keypoints(
-      input_fn=input_fn_train,
-      feature_columns=my_feature_columns,
-      save_dir=FLAGS.data_dir,
-      num_quantiles=1000,
-      override=True)
-    return  # Exit program.
-
-  hparams = hparams.CalibratedRtlHparams(num_lattices=10, lattice_rank=2)
-  estimator = calibrated_rtl.calibrated_rtl_classifier(
-    feature_columns=feature_columns, hparams=hparams)
-  estimator.train(input_fn=input_fn_train)
-  estimator.evaluate(input_fn=input_fn_eval)
-  estimator.predict(input_fn=input_fn_test)
-  ```
-
-  Args:
-    feature_columns: Optional, an iteratable containing all the feature
-      columns used by the model. All items in the set should be instances of
-      classes derived from `FeatureColumn`. If not given, the model will
-      use as features the tensors returned by input_fn.
-      Supported types of columns: RealValuedColumn.
-    model_dir: Directory to save model parameters, graphs and etc. This can
-      also be used to load checkpoints from the directory into an estimator to
-      continue training a previously saved model.
-    quantiles_dir: location where quantiles for the data was saved. Typically
-      the same directory as the training data. These quantiles can be
-      generated only once with
-      `pwl_calibration_layers.calculate_quantiles_for_keypoints` in a separate
-      invocation of your program. If you don't want to use quantiles, you can
-      set `keypoints_initializer` instead.
-    keypoints_initializers_fn: if you know the distribution of your
-      input features you can provide that directly instead of `quantiles_dir`.
-      See `pwl_calibrators_layers.uniform_keypoints_for_signal`. It must be
-      a closure that returns a pair of tensors with keypoints inputs and
-      outputs to use for initialization (must match `num_keypoints` configured
-      in `hparams`). Alternatively the closure can return a dict mapping
-      feature name to pairs for initialization per feature. If `quantiles_dir`
-      and `keypoints_initializers_fn` are set, the later takes precendence,
-      and the features for which `keypoints_initializers` are not defined
-      fallback to using the quantiles found in `quantiles_dir`. It uses a
-      closure instead of the tensors themselves because the graph has to be
-      created at the time the model is being build, which happens at a later
-      time.
-    optimizer: string, `Optimizer` object, or callable that defines the
-      optimizer to use for training -- if a callable, it will be called with
-      learning_rate=hparams.learning_rate.
-    config: RunConfig object to configure the runtime settings. Typically set
-      to learn_runner.EstimatorConfig().
-    hparams: an instance of tfl_hparams.CalibrationRtlHParams. If set to
-      None default parameters are used.
-    head: a `TensorFlow Estimator Head` which specifies how the loss function,
-      final predictions, and so on are generated from model outputs. Defaults
-      to using a sigmoid cross entropy head for binary classification and mean
-      squared error head for regression.
-    weight_column: A string or a `tf.feature_column.numeric_column` defining
-      feature column representing weights. It is used to down weight or boost
-      examples during training. It will be multiplied by the loss of the
-      example.
-
-  Returns:
-    A `calibrated_rtl_regressor` estimator.
-
-  Raises:
-    ValueError: invalid parameters.
-    KeyError: type of feature not supported.
-  """
-  return _CalibratedRtl(
-      n_classes=0,
-      feature_columns=feature_columns,
-      model_dir=model_dir,
-      quantiles_dir=quantiles_dir,
-      keypoints_initializers_fn=keypoints_initializers_fn,
-      optimizer=optimizer,
-      config=config,
-      hparams=hparams,
-      head=head,
-      weight_column=weight_column)
diff --git a/tensorflow_lattice/python/estimators/calibrated_rtl_test.py b/tensorflow_lattice/python/estimators/calibrated_rtl_test.py
deleted file mode 100644
index 5deb2cf..0000000
--- a/tensorflow_lattice/python/estimators/calibrated_rtl_test.py
+++ /dev/null
@@ -1,456 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""CalibratedRtl provide canned estimators."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import tensorflow as tf
-
-from tensorflow_lattice.python.estimators import calibrated_rtl
-from tensorflow_lattice.python.estimators import hparams as tfl_hparams
-from tensorflow_lattice.python.lib import keypoints_initialization
-from tensorflow_lattice.python.lib import test_data
-
-_NUM_KEYPOINTS = 50
-
-
-class CalibratedRtlHParamsTest(tf.test.TestCase):
-
-  def setUp(self):
-    super(CalibratedRtlHParamsTest, self).setUp()
-    self.hparams = tfl_hparams.CalibratedRtlHParams(feature_names=['x'])
-    self.hparams.set_param('lattice_size', 2)
-    self.hparams.set_param('calibrator_output_min', 0)
-    self.hparams.set_param('calibrator_output_max', 1)
-    self.hparams.set_param('calibration_bound', True)
-    self.hparams.set_param('lattice_rank', 2)
-    self.hparams.set_param('num_lattices', 10)
-    self.empty_estimator = calibrated_rtl.calibrated_rtl_classifier(
-        hparams=self.hparams)
-
-  def testWrongLatticeSize(self):
-    self.hparams.set_feature_param('x', 'lattice_size', -1)
-    self.assertRaisesRegexp(
-        ValueError,
-        'Hyperparameter configuration cannot be used in the calibrated rtl '
-        'estimator.', self.empty_estimator.check_hparams, self.hparams)
-
-  def testWrongCalibrationOutputMin(self):
-    self.hparams.set_param('calibration_output_min', 0.0)
-    self.hparams.set_feature_param('x', 'calibration_output_min', -1)
-    self.assertRaisesRegexp(
-        ValueError,
-        'calibration_output_min=-1 should not be set, it is adjusted '
-        'automatically to match the lattice_size',
-        self.empty_estimator.check_hparams, self.hparams)
-
-  def testWrongCalibrationOutputMax(self):
-    self.hparams.set_param('calibration_output_max', 0.0)
-    self.hparams.set_feature_param('x', 'calibration_output_max', 10)
-    self.assertRaisesRegexp(
-        ValueError,
-        'calibration_output_max=10 should not be set, it is adjusted '
-        'automatically to match the lattice_size',
-        self.empty_estimator.check_hparams, self.hparams)
-
-  def testWrongCalibrationBound(self):
-    self.hparams.set_feature_param('x', 'calibration_bound', False)
-    self.assertRaisesRegexp(
-        ValueError,
-        'Hyperparameter configuration cannot be used in the calibrated rtl '
-        'estimator.', self.empty_estimator.check_hparams, self.hparams)
-
-  def testNoLatticeRank(self):
-    self.hparams.set_param('lattice_rank', None)
-    self.assertRaisesRegexp(
-        ValueError,
-        'Hyperparameter configuration cannot be used in the calibrated rtl '
-        'estimator.', self.empty_estimator.check_hparams, self.hparams)
-
-  def testNoNumLattices(self):
-    self.hparams.set_param('num_lattices', None)
-    self.assertRaisesRegexp(
-        ValueError,
-        'Hyperparameter configuration cannot be used in the calibrated rtl '
-        'estimator.', self.empty_estimator.check_hparams, self.hparams)
-
-  def testWrongLatticeRegularization(self):
-    self.hparams.set_feature_param('x', 'lattice_l1_reg', 0.1)
-    self.hparams.set_feature_param('x', 'lattice_l2_reg', 0.1)
-    self.hparams.set_feature_param('x', 'lattice_l1_torsion_reg', 0.1)
-    self.hparams.set_feature_param('x', 'lattice_l1_torsion_reg', 0.1)
-    self.assertRaisesRegexp(
-        ValueError,
-        'Hyperparameter configuration cannot be used in the calibrated rtl '
-        'estimator.', self.empty_estimator.check_hparams, self.hparams)
-
-
-class CalibratedRtlTest(tf.test.TestCase):
-
-  def setUp(self):
-    super(CalibratedRtlTest, self).setUp()
-    self._test_data = test_data.TestData(num_epochs=10)
-
-  def _CalibratedRtlRegressor(self,
-                              feature_names,
-                              feature_columns,
-                              num_lattices=1,
-                              lattice_rank=1,
-                              num_keypoints=_NUM_KEYPOINTS,
-                              weight_column=None,
-                              **hparams_args):
-
-    def init_fn():
-      return keypoints_initialization.uniform_keypoints_for_signal(
-          num_keypoints, -1., 1., 0., 1.)
-
-    hparams = tfl_hparams.CalibratedRtlHParams(
-        feature_names,
-        num_keypoints=num_keypoints,
-        num_lattices=num_lattices,
-        lattice_rank=lattice_rank,
-        **hparams_args)
-    # Turn off monotonic calibrator.
-    hparams.set_param('calibration_monotonic', None)
-    hparams.set_param('learning_rate', 0.1)
-
-    return calibrated_rtl.calibrated_rtl_regressor(
-        feature_columns=feature_columns,
-        weight_column=weight_column,
-        hparams=hparams,
-        keypoints_initializers_fn=init_fn)
-
-  def _CalibratedRtlClassifier(self,
-                               feature_columns,
-                               num_lattices=1,
-                               lattice_rank=1,
-                               **hparams_args):
-
-    def init_fn():
-      return keypoints_initialization.uniform_keypoints_for_signal(
-          _NUM_KEYPOINTS, -1., 1., 0., 1.)
-
-    hparams = tfl_hparams.CalibratedRtlHParams(
-        num_keypoints=_NUM_KEYPOINTS,
-        num_lattices=num_lattices,
-        lattice_rank=lattice_rank,
-        **hparams_args)
-    # Turn off monotonic calibrator.
-    hparams.set_param('calibration_monotonic', None)
-    hparams.set_param('learning_rate', 0.1)
-
-    return calibrated_rtl.calibrated_rtl_classifier(
-        feature_columns=feature_columns,
-        hparams=hparams,
-        keypoints_initializers_fn=init_fn)
-
-  def testCalibratedRtlRegressorTraining1D(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x'),
-    ]
-    estimator = self._CalibratedRtlRegressor(
-        ['x'], feature_columns, num_lattices=3, lattice_rank=1)
-    estimator.train(input_fn=self._test_data.oned_input_fn())
-    results = estimator.evaluate(input_fn=self._test_data.oned_input_fn())
-    self.assertLess(results['average_loss'], 1e-3)
-
-  def testCalibratedRtlRegressorWeightedTraining1D(self):
-    feature_columns = [tf.feature_column.numeric_column('x')]
-    weight_column = tf.feature_column.numeric_column('zero')
-    estimator = self._CalibratedRtlRegressor(
-        ['x'], feature_columns, weight_column=weight_column)
-    estimator.train(input_fn=self._test_data.oned_zero_weight_input_fn())
-    results = estimator.evaluate(
-        input_fn=self._test_data.oned_zero_weight_input_fn())
-    # Expects almost zero since the weight values are exactly zero.
-    self.assertLess(results['average_loss'], 1e-7)
-
-  def testCalibratedRtlRegressorTraining2D(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedRtlRegressor(
-        ['x0', 'x1'], feature_columns, num_lattices=3, lattice_rank=2)
-    estimator.train(input_fn=self._test_data.twod_input_fn())
-    results = estimator.evaluate(input_fn=self._test_data.twod_input_fn())
-    self.assertLess(results['average_loss'], 5e-3)
-
-  def testCalibratedRtlRegressorTraining2DWithCalibrationRegularization(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedRtlRegressor(
-        ['x0', 'x1'],
-        feature_columns,
-        num_lattices=3,
-        lattice_rank=2,
-        calibration_l1_reg=1e-2,
-        calibration_l2_reg=1e-2,
-        calibration_l1_laplacian_reg=0.05,
-        calibration_l2_laplacian_reg=0.01)
-    estimator.train(input_fn=self._test_data.twod_input_fn())
-    results = estimator.evaluate(input_fn=self._test_data.twod_input_fn())
-    self.assertLess(results['average_loss'], 0.1)
-
-  def testCalibratedLatticeRegressorTraining2DWithLatticeRegularizer(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedRtlRegressor(
-        ['x0', 'x1'],
-        feature_columns,
-        num_lattics=5,
-        lattice_rank=2,
-        lattice_l1_reg=1.0,
-        lattice_l2_reg=1.0,
-        lattice_l1_torsion_reg=1.0,
-        lattice_l2_torsion_reg=1.0,
-        lattice_l1_laplacian_reg=1.0,
-        lattice_l2_laplacian_reg=0.1)
-    estimator.train(input_fn=self._test_data.twod_input_fn())
-    results = estimator.evaluate(input_fn=self._test_data.twod_input_fn())
-    # We expect the loss is larger than the loss without regularization.
-    self.assertGreater(results['average_loss'], 1e-2)
-    self.assertLess(results['average_loss'], 0.5)
-
-  def testCalibratedLatticeRegressorTraining2DWithPerFeatureRegularizer(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedRtlRegressor(
-        ['x0', 'x1'],
-        feature_columns,
-        num_lattics=5,
-        lattice_rank=2,
-        feature__x0__lattice_l1_laplacian_reg=100.0,
-        feature__x1__lattice_l2_laplacian_reg=1.0)
-    estimator.train(input_fn=self._test_data.twod_input_fn())
-    results = estimator.evaluate(input_fn=self._test_data.twod_input_fn())
-    # We expect the loss is larger than the loss without regularization.
-    self.assertGreater(results['average_loss'], 0.1)
-    self.assertLess(results['average_loss'], 0.2)
-
-  def testCalibratedRtlRegressorTrainingMultiDimensionalFeature(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x', shape=(2,)),
-    ]
-    estimator = self._CalibratedRtlRegressor(
-        ['x'],
-        feature_columns,
-        num_lattices=3,
-        lattice_rank=2,)
-    estimator.train(input_fn=self._test_data.multid_feature_input_fn())
-    results = estimator.evaluate(
-        input_fn=self._test_data.multid_feature_input_fn())
-    self.assertLess(results['average_loss'], 2e-2)
-
-    # Turn-off calibration for feature 'x', it should turn off for both
-    # dimensions, and the results should get much worse.
-    estimator = self._CalibratedRtlRegressor(
-        ['x'],
-        feature_columns,
-        num_lattices=3,
-        lattice_rank=2,
-        feature__x__num_keypoints=0)
-    estimator.train(input_fn=self._test_data.multid_feature_input_fn())
-    results = estimator.evaluate(
-        input_fn=self._test_data.multid_feature_input_fn())
-    self.assertGreater(results['average_loss'], 4e-2)
-
-  def testCalibratedRtlClassifierTraining(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedRtlClassifier(
-        feature_columns, num_lattices=3, lattice_rank=2)
-    estimator.train(input_fn=self._test_data.twod_classificer_input_fn())
-    results = estimator.evaluate(
-        input_fn=self._test_data.twod_classificer_input_fn())
-    self.assertGreater(results['auc'], 0.990)
-
-  def testCalibratedRtlClassifierTrainingWithCalibrationRegularizer(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedRtlClassifier(
-        feature_columns,
-        num_lattices=3,
-        lattice_rank=2,
-        interpolation_type='simplex',
-        calibration_l1_reg=1e-5,
-        calibration_l2_reg=1e-5,
-        calibration_l1_laplacian_reg=1e-5,
-        calibration_l2_laplacian_reg=1e-5)
-    estimator.train(input_fn=self._test_data.twod_classificer_input_fn())
-    results = estimator.evaluate(
-        input_fn=self._test_data.twod_classificer_input_fn())
-    self.assertGreater(results['auc'], 0.980)
-
-  def testCalibratedRtlClassifierTrainingWithLatticeRegularizer(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedRtlClassifier(
-        feature_columns,
-        num_lattices=3,
-        lattice_rank=2,
-        interpolation_type='hypercube',
-        lattice_l1_reg=1.0,
-        lattice_l2_reg=1.0,
-        lattice_l1_torsion_reg=1.0,
-        lattice_l2_torsion_reg=1.0,
-        lattice_l1_laplacian_reg=1.0,
-        lattice_l2_laplacian_reg=0.1)
-    estimator.train(input_fn=self._test_data.twod_classificer_input_fn())
-    results = estimator.evaluate(
-        input_fn=self._test_data.twod_classificer_input_fn())
-    # We expect AUC is worse than the model without regularization.
-    self.assertLess(results['auc'], 0.99)
-    self.assertGreater(results['auc'], 0.6)
-
-  def testCalibratedRtlClassifierTrainingWithPerFeatureRegularizer(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedRtlClassifier(
-        feature_columns,
-        feature_names=['x0', 'x1'],
-        num_lattices=3,
-        lattice_rank=2,
-        feature__x0__lattice_l1_laplacian_reg=5.0,
-        feature__x1__lattice_l2_laplacian_reg=0.1)
-    estimator.train(input_fn=self._test_data.twod_classificer_input_fn())
-    results = estimator.evaluate(
-        input_fn=self._test_data.twod_classificer_input_fn())
-    # We expect AUC is worse than the model without regularization.
-    self.assertLess(results['auc'], 0.98)
-    self.assertGreater(results['auc'], 0.75)
-
-  def testCalibratedRtlMonotonicClassifierTraining(self):
-    # Construct the following training/testing pair.
-    #
-    # Training: (x, y)
-    # ([0., 0.], 0.0)
-    # ([0., 1.], 1.0)
-    # ([1., 0.], 1.0)
-    # ([1., 1.], 0.0)
-    #
-    # Test: (x, y)
-    # ([0., 0.], 0.0)
-    # ([0., 1.], 1.0)
-    # ([1., 0.], 1.0)
-    # ([1., 1.], 1.0)
-    #
-    # Note that training example has a noisy sample, ([1., 1.], 0.0), and test
-    # examples are generated by the logical-OR function. Therefore by enforcing
-    # increasing monotonicity to all features, we should be able to work well
-    # in the test examples.
-    x0 = np.array([0.0, 0.0, 1.0, 1.0])
-    x1 = np.array([0.0, 1.0, 0.0, 1.0])
-    x_samples = {'x0': x0, 'x1': x1}
-    training_y = np.array([[False], [True], [True], [False]])
-    test_y = np.array([[False], [True], [True], [True]])
-
-    train_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x=x_samples, y=training_y, batch_size=4, num_epochs=1000, shuffle=False)
-    test_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x=x_samples, y=test_y, shuffle=False)
-
-    # Define monotonic lattice classifier.
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-
-    def init_fn():
-      return keypoints_initialization.uniform_keypoints_for_signal(
-          2, 0., 1., 0., 1.)
-
-    hparams = tfl_hparams.CalibratedRtlHParams(
-        num_keypoints=2, num_lattices=3, lattice_rank=2)
-    # Monotonic calibrated lattice.
-
-    hparams.set_param('monotonicity', +1)
-    hparams.set_param('learning_rate', 0.1)
-    hparams.set_param('interpolation_type', 'hypercube')
-
-    estimator = calibrated_rtl.calibrated_rtl_classifier(
-        feature_columns=feature_columns,
-        hparams=hparams,
-        keypoints_initializers_fn=init_fn)
-
-    estimator.train(input_fn=train_input_fn)
-    results = estimator.evaluate(input_fn=test_input_fn)
-    # We should expect 1.0 accuracy.
-    self.assertGreater(results['accuracy'], 0.999)
-
-  def testCalibratedRtlWithMissingTraining(self):
-    # x0 is missing with it's own vertex: so it can take very different values,
-    # while x1 is missing and calibrated, in this case to the middle of the
-    # lattice.
-    x0 = np.array([0., 0., 1., 1., -1., -1., 0., 1.])
-    x1 = np.array([0., 1., 0., 1., 0., 1., -1., -1.])
-    training_y = np.array([1., 3., 7., 11., 23., 27., 2., 9.])
-    x_samples = {'x0': x0, 'x1': x1}
-
-    train_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x=x_samples,
-        y=training_y,
-        batch_size=x0.shape[0],
-        num_epochs=2000,
-        shuffle=False)
-    test_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x=x_samples, y=training_y, shuffle=False)
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-
-    def init_fn():
-      return keypoints_initialization.uniform_keypoints_for_signal(
-          2, 0., 1., 0., 1.)
-
-    hparams = tfl_hparams.CalibratedRtlHParams(
-        ['x0', 'x1'],
-        num_keypoints=2,
-        num_lattices=3,
-        lattice_rank=2,
-        learning_rate=0.1,
-        missing_input_value=-1.)
-    hparams.set_feature_param('x0', 'missing_vertex', True)
-
-    estimator = calibrated_rtl.calibrated_rtl_regressor(
-        feature_columns=feature_columns,
-        hparams=hparams,
-        keypoints_initializers_fn=init_fn)
-
-    estimator.train(input_fn=train_input_fn)
-    results = estimator.evaluate(input_fn=test_input_fn)
-    self.assertLess(results['average_loss'], 0.1)
-
-
-if __name__ == '__main__':
-  tf.test.main()
diff --git a/tensorflow_lattice/python/estimators/calibrated_test.py b/tensorflow_lattice/python/estimators/calibrated_test.py
deleted file mode 100644
index 50b1d83..0000000
--- a/tensorflow_lattice/python/estimators/calibrated_test.py
+++ /dev/null
@@ -1,113 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Calibrated is an abstract base class. This mostly tests dependencies."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import tempfile
-import tensorflow as tf
-
-from tensorflow_lattice.python.estimators import calibrated as calibrated_lib
-from tensorflow_lattice.python.estimators import hparams as lf_hparams
-
-
-class CalibratedFake(calibrated_lib.Calibrated):
-  """Fake Calibrated class, only used to instantiate the model."""
-
-  def __init__(self,
-               n_classes,
-               feature_columns=None,
-               model_dir=None,
-               quantiles_dir=None,
-               keypoints_initializers_fn=None,
-               optimizer=None,
-               config=None,
-               hparams=None):
-    super(CalibratedFake, self).__init__(
-        n_classes, feature_columns, model_dir, quantiles_dir,
-        keypoints_initializers_fn, optimizer, config, hparams, 'Fake')
-
-  def calibration_structure_builder(self, columns_to_tensors, hparams):
-    return None
-
-  def prediction_builder_from_calibrated(
-      self, mode, per_dimension_feature_names, hparams, calibrated):
-    return None
-
-
-class CalibratedTest(tf.test.TestCase):
-  """Constructor tests only, actual test of the code in CalibratedLinearTest."""
-
-  def _testConstructor(self, n_classes):
-    hparams = lf_hparams.CalibratedHParams(
-        feature_names=['x0', 'x1'],
-        num_keypoints=20,
-        feature__x0__num_keypoints=0)
-    _ = CalibratedFake(n_classes=n_classes, hparams=hparams)
-
-  def testConstructors(self):
-    self._testConstructor(n_classes=2)
-    self._testConstructor(n_classes=0)
-
-  def testNumKeypointsInitialization(self):
-    hparams = lf_hparams.CalibratedHParams(
-        feature_names=['x0', 'x1'],
-        num_keypoints=20,
-        feature__x0__num_keypoints=0)
-    _ = CalibratedFake(n_classes=2, hparams=hparams)
-
-    # Test that same number of keypoints initialization is fine.
-    self.assertEqual(
-        calibrated_lib._update_keypoints(
-            feature_name='x0', asked_keypoints=20, kp_init_keypoints=20), 20)
-
-    # Test that fewer number of keypoints initialization is fine.
-    self.assertEqual(
-        calibrated_lib._update_keypoints(
-            feature_name='x0', asked_keypoints=20, kp_init_keypoints=10), 10)
-
-    # Test that no calibration is respected.
-    self.assertEqual(
-        calibrated_lib._update_keypoints(
-            feature_name='x1', asked_keypoints=0, kp_init_keypoints=20), 0)
-    self.assertEqual(
-        calibrated_lib._update_keypoints(
-            feature_name='x0', asked_keypoints=None, kp_init_keypoints=20),
-        None)
-
-    # Test that too many keypoints is not ok!
-    self.assertRaisesRegexp(
-        ValueError,
-        r'Calibration initialization returned more keypoints \(20\) than '
-        r'requested \(10\) for feature x0', calibrated_lib._update_keypoints,
-        'x0', 10, 20)
-
-  def testCreatedDirectory(self):
-    # Create and remove temporary directory.
-    model_dir = tempfile.mkdtemp()
-    os.rmdir(model_dir)
-    hparams = lf_hparams.CalibratedHParams(
-        feature_names=['x0', 'x1'],
-        num_keypoints=20,
-        feature__x0__num_keypoints=10)
-    CalibratedFake(n_classes=2, hparams=hparams, model_dir=model_dir)
-    self.assertTrue(os.path.exists(model_dir))
-
-
-if __name__ == '__main__':
-  tf.test.main()
diff --git a/tensorflow_lattice/python/estimators/hparams.py b/tensorflow_lattice/python/estimators/hparams.py
deleted file mode 100644
index f89ef4f..0000000
--- a/tensorflow_lattice/python/estimators/hparams.py
+++ /dev/null
@@ -1,626 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Hyper-parameters support classes for TensorFlow Lattice estimators."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from distutils.util import strtobool
-import six
-
-from tensorflow_lattice.python.lib import regularizers
-
-
-class PerFeatureHParams(object):
-  """Parameters object with per feature parametrization.
-
-  Each parameter can be overwritten for specific features by setting
-  `feature__<feature_name>__<parameter_name>`, otherwise it falls back to the
-  global parameter name value `<parameter_name>`.
-
-  Parameter types are set from their first value set -- but they can also be
-  reset by `set_param_type`.
-
-  Example: let's say we have a parameter `lattice_size` that should be 2 if not
-  specified (global value), but can be overridden per feature; let's assume
-  there are 3 features: `a`, `b`, and `c` (added after construction). Then:
-
-  ```python
-      hparams = PerFeatureHParams(["a", "b"], lattice_size=2,
-                                  feature__b__lattice_size=3)
-      hparams.add_feature(["c"])
-      hparams.get_param("lattice_size") == 2
-      hparams.get_feature_param("a", "lattice_size") == 2
-      hparams.get_feature_param("b", "lattice_size") == 3
-      hparams.get_feature_param("c", "lattice_size") == 2
-      hparams.get_feature_param("d", "lattice_size") raises a ValueError
-  ```
-
-  Use the `get_feature_param` method to automatically get the specialized value,
-  or fall-back to the global one.
-
-
-
-
-  """
-
-  # Used to separate feature prefix, name and parameter name.
-  FEATURE_SEPARATOR = '__'
-
-  # Feature prefix for feature specific parameter values.
-  FEATURE_PREFIX = 'feature'
-
-  def __init__(self, feature_names=None, **kwargs):
-    """Construct with arbitrary list of parameters.
-
-    Args:
-      feature_names: list of feature names. Only features names listed here
-        (or added later with add_feature) can have feature specific parameter
-        values.
-      **kwargs: parameters names.
-
-    Returns:
-      PerFeatureHParams object.
-
-    Raises:
-      ValueError: if a feature-specific parameter value is set for an
-        unknown feature.
-    """
-    super(PerFeatureHParams, self).__init__()
-    self._data = {}
-    self._params_type = {}
-    self._feature_names = set(
-        feature_names) if feature_names is not None else set()
-    for feature_name in self._feature_names:
-      PerFeatureHParams._check_feature_name(feature_name)
-
-    # First set the global parameters, so they become known and then feature
-    # specific parameters.
-    for param_name, value in six.iteritems(kwargs):
-      if not PerFeatureHParams._is_feature_specific(param_name):
-        self.set_param(param_name, value)
-    for param_name, value in six.iteritems(kwargs):
-      if PerFeatureHParams._is_feature_specific(param_name):
-        self.set_param(param_name, value)
-
-  @staticmethod
-  def _check_feature_name(feature_name):
-    """Raises ValueError if feature_name is not valid."""
-    if (PerFeatureHParams.FEATURE_SEPARATOR in feature_name or
-        '=' in feature_name):
-      raise ValueError(
-          'Invalid feature name "{}": "{}" and "=" are not supported in '
-          'feature names'.format(feature_name,
-                                 PerFeatureHParams.FEATURE_SEPARATOR))
-
-  @staticmethod
-  def _is_feature_specific(param_name):
-    return param_name.startswith(PerFeatureHParams.FEATURE_PREFIX +
-                                 PerFeatureHParams.FEATURE_SEPARATOR)
-
-  def get_feature_names(self):
-    """Returns copy of list of known feature names."""
-    feature_names_list = list(self._feature_names)
-    feature_names_list.sort()
-    return feature_names_list
-
-  def add_feature(self, feature_name):
-    """Add feature_name (one name or list of names) to list of known names."""
-    if isinstance(feature_name, list):
-      # Add all elements in the list, if a list.
-      for f in feature_name:
-        if not isinstance(f, six.string_types):
-          raise ValueError(
-              'feature_name should either be a list of strings, or a string, '
-              'got "%s"' % feature_name)
-        PerFeatureHParams._check_feature_name(f)
-        self._feature_names.add(f)
-    elif isinstance(feature_name, six.string_types):
-      PerFeatureHParams._check_feature_name(feature_name)
-      self._feature_names.add(feature_name)
-    else:
-      raise ValueError(
-          'feature_name should either be a list of strings, or a string, '
-          'got "%s"' % feature_name)
-    return self
-
-  def param_name_for_feature(self, feature_name, param_name):
-    """Returns parameter name for specific feature parameter."""
-    if feature_name not in self._feature_names:
-      raise ValueError('Unknown feature name "%s" for parameter "%s"' %
-                       (feature_name, param_name))
-    return PerFeatureHParams.FEATURE_SEPARATOR.join(
-        [PerFeatureHParams.FEATURE_PREFIX, feature_name, param_name])
-
-  def is_feature_set_param(self, feature_name, param_name):
-    """Returns whether param_name parameter is set for feature_name."""
-    key = self.param_name_for_feature(feature_name, param_name)
-    return hasattr(self, key)
-
-  def get_feature_param(self, feature_name, param_name, default=None):
-    """Returns parameter for feature or falls back to global parameter."""
-    key = self.param_name_for_feature(feature_name, param_name)
-    if hasattr(self, key):
-      return getattr(self, key, None)
-    return getattr(self, param_name, default)
-
-  def set_feature_param(self, feature_name, param_name, value):
-    """Sets parameter value specific for feature. Returns self."""
-    if feature_name not in self.get_feature_names():
-      raise ValueError(
-          'Unknown feature name "%s" when trying to set parameter "%s", known '
-          'values are %s' % (feature_name, param_name,
-                             self.get_feature_names()))
-    if param_name not in self._params_type:
-      raise ValueError(
-          'Unknown parameter name "%s" when trying to set parameter for '
-          'feature "%s"' % (param_name, feature_name))
-
-    key = self.param_name_for_feature(feature_name, param_name)
-    self._data[key] = value
-    return self
-
-  def get_param(self, param_name, default=None):
-    """Returns the global parameter or falls back to default."""
-    return self._data[param_name] if param_name in self._data else default
-
-  def __getattr__(self, param_name):
-    if param_name.startswith('_') or param_name not in self._data:
-      raise AttributeError('No value set for "{}"'.format(param_name))
-    return self._data[param_name]
-
-  @staticmethod
-  def _parse_value(value_str, value_type):
-    """Parses string a the given value_type."""
-    if value_type is str:
-      return value_str
-    elif value_type is int:
-      return int(value_str)
-    elif value_type is float:
-      return float(value_str)
-    elif value_type is bool:
-      return strtobool(value_str)
-
-    raise ValueError(
-        'Do not know how to parse types {} -- value was {!r}'.format(
-            value_type, value_str))
-
-  def _set_param(self, param_name, value, parse):
-    """Sets parameter, optionally parse it."""
-    # Make sure that feature specific parameters are properly named.
-    if PerFeatureHParams._is_feature_specific(param_name):
-      parts = param_name.split(PerFeatureHParams.FEATURE_SEPARATOR, 3)
-      if len(parts) != 3:
-        raise ValueError(
-            'Bad formatted feature specific parameter "{}", please use '
-            '"{}{}<feature_name>{}<parameter_name>"'.format(
-                param_name, PerFeatureHParams.FEATURE_PREFIX,
-                PerFeatureHParams.FEATURE_SEPARATOR,
-                PerFeatureHParams.FEATURE_SEPARATOR))
-      if parts[1] not in self._feature_names:
-        raise ValueError(
-            'Unknown feature "{}" for feature specific parameter "{}"'.format(
-                parts[1], param_name))
-      if parts[2] not in self._params_type:
-        raise ValueError(
-            'Unknown parameter name "{}", can not set for feature "{}"'.format(
-                parts[2], parts[1]))
-      if parse:
-        value = PerFeatureHParams._parse_value(value,
-                                               self._params_type[parts[2]])
-    else:
-      # Non-feature specific parameter: set _param_type if not yet set.
-      if param_name not in self._params_type:
-        if parse:
-          raise ValueError(
-              'Parsing value for unknown parameter "{}"'.format(param_name))
-        self._params_type[param_name] = type(value)
-      elif parse:
-        value = PerFeatureHParams._parse_value(value,
-                                               self._params_type[param_name])
-    self._data[param_name] = value
-
-  def set_param(self, param_name, value):
-    """Sets parameter value. Returns self."""
-    self._set_param(param_name, value, parse=False)
-    return self
-
-  def set_param_type(self, param_name, param_type):
-    """Sets the parameter type, it must already exist. Returns self."""
-    if param_name not in self._params_type:
-      raise ValueError(
-          'Can not set parameter type if parameter has not been set for "{}"'.
-          format(param_name))
-    self._params_type[param_name] = param_type
-
-  def parse_param(self, param_name, value_str):
-    """Parses parameter values from string. Returns self."""
-    self._set_param(param_name, value_str, parse=True)
-    return self
-
-  def get_global_and_feature_params(self, param_names, feature_names):
-    """Returns values for multiple params, global and for each feature.
-
-    Args:
-      param_names: list of parameters to get values for.
-      feature_names: list of features to get specific values for.
-
-    Returns:
-      * List of global values for parameters requested in `param_names`.
-      * List of list of per feature values for parameters requested in
-        `param_names` for features requested in `feature_names`.
-    """
-    global_values = [self.get_param(param_name) for param_name in param_names]
-    feature_values = []
-    for feature in feature_names:
-      feature_values.append([
-          self.get_feature_param(feature, param_name)
-          for param_name in param_names
-      ])
-    return (global_values, feature_values)
-
-  def values(self):
-    """Returns shallow copy of the hyperparameter dict."""
-    return {k: v for k, v in six.iteritems(self._data)}
-
-  def __str__(self):
-    return str(sorted(self.values().items()))
-
-  def parse_hparams(self, hparams):
-    """Incorporates hyper-parameters from another HParams object.
-
-    Copies over values of hyper-parameters from the given object. New parameters
-    may be set, but not new features. Also works with
-    `tf.contrib.training.HParams` objects.
-
-    Args:
-      hparams: `PerFeatureHParams` object, but also works with the standard
-        `tf.contrib.training.HParams` object.
-
-    Returns:
-      Changes affect self, but returns self for convenience.
-
-    Raises:
-      ValueError: if trying to set unknown features, or if setting a feature
-        specific parameter for an unknown parameter.
-    """
-    # First set the global parameters, so they become known and then feature
-    # specific parameters.
-    if hparams is not None:
-      for param_name, value in six.iteritems(hparams.values()):
-        if not PerFeatureHParams._is_feature_specific(param_name):
-          self.set_param(param_name, value)
-      for param_name, value in six.iteritems(hparams.values()):
-        if PerFeatureHParams._is_feature_specific(param_name):
-          self.set_param(param_name, value)
-    return self
-
-  def parse(self, hparams_str):
-    """Parses strings into hparams.
-
-    Args:
-      hparams_str: must be a comma separated list of "<key>=<value>",
-      where "<key>" is a hyper-parameter name, and "<value>" its value.
-
-    Returns:
-      Changes affect self, but returns self for convenience.
-
-    Raises:
-      ValueError: if there is a problem with the input:
-         * if trying to set an unknown parameter.
-         * if trying to set unknown feature(s)
-         * if can't convert value to parameter type.
-    """
-    if hparams_str:
-      for pair in hparams_str.split(','):
-        (key, value) = pair.split('=')
-        self.parse_param(key, value)
-    return self
-
-
-class CalibratedHParams(PerFeatureHParams):
-  """PerFeatureHParams specialization with input calibration parameters.
-
-  The following hyper-parameters can be set as global, or per-feature (see
-  base `PerFeatureHParams` for details):
-
-    * `feature_names`: list of feature names. Only features names listed here
-      (or added later with add_feature) can have feature specific parameter
-      values.
-    * `num_keypoints`: Number of keypoints to use for calibration, Set to 0 or
-      `None` for no calibration.
-    * `calibration_output_min`, `calibration_output_max`: initial and final
-      values for calibrations. -1.0 to 1.0 works well for calibrated linear
-      models. For lattices one will want to set these to (0, `lattice_size`-1).
-      Only used during initialization of the calibration, if `quantiles_dir`
-      is given to the calibrated model (as opposed to defining one's own value
-      with `keypoints_initializers_fn`). It must be defined for calibration to
-      work, no default is set.
-    * `calibration_bound`: If output of calibration max/min are bound to the
-      limits given in `calibration_output_min/max`.
-    * `monotonicity`: Monotonicity for the feature. 0 for no monotonicity,
-      1 and -1 for increasing and decreasing monotonicity respectively.
-    * `missing_input_value`: If set, and if the input has this value it is
-    assumed
-      to be missing and the output will either be calibrated to some value
-      between `[calibration_output_min, calibration_output_max]` or set to a
-      fixed value set by missing_output_value.
-    * `missing_output_value`: Requires missing_input_value also to be set. If
-    set
-      if will convert missing input to this value. Leave it undefined and the
-      output will be learned.
-    * `calibration_<regularizer_name>` for all regularizer_name's in
-      regularizers.CALIBRATOR_REGULARIZERS. e.g. `calibration_l2_reg`.
-  """
-
-  def __init__(self, feature_names=None, **kwargs):
-    # Set default args, and override with given ones.
-    args = {
-        'num_keypoints': 10,
-        'calibration_output_min': None,
-        'calibration_output_max': None,
-        'calibration_bound': False,
-        'monotonicity': 0,
-        'missing_input_value': None,
-        'missing_output_value': None,
-    }
-    regularizer_hparam_names = [
-        'calibration_{}'.format(regularizer_name)
-        for regularizer_name in regularizers.CALIBRATOR_REGULARIZERS
-    ]
-    args.update({
-        regularizer_name: None for regularizer_name in regularizer_hparam_names
-    })
-    args.update(kwargs)
-    super(CalibratedHParams, self).__init__(feature_names, **args)
-    self.set_param_type('monotonicity', int)
-    self.set_param_type('calibration_output_min', float)
-    self.set_param_type('calibration_output_max', float)
-    self.set_param_type('missing_input_value', float)
-    self.set_param_type('missing_output_value', float)
-    for regularizer_name in regularizer_hparam_names:
-      self.set_param_type(regularizer_name, float)
-
-
-class CalibratedLinearHParams(CalibratedHParams):
-  """Hyper-parameters for CalibratedLinear models.
-
-  Same as `CalibratedHParams` (hyper-parameters for input calibration) plus
-  the global learning_rate.
-
-  The parameters `calibration_output_min` and `calibration_output_max` shouldn't
-  be changed (they are fixed at -1. and +1), since they are eventually re-scaled
-  by the linear layer on top.
-
-  It supports regularization, monotonicity and missing values (input and
-  optionally output).
-  """
-
-  def __init__(self, feature_names=None, **kwargs):
-    # Set default args, and override with given ones.
-    args = {
-        'learning_rate': 0.1,
-        'calibration_output_min': -1.,
-        'calibration_output_max': 1.,
-    }
-    args.update(kwargs)
-    super(CalibratedLinearHParams, self).__init__(feature_names, **args)
-
-
-class CalibratedLatticeHParams(CalibratedHParams):
-  """Hyper-parameters for CalibratedLattice models.
-
-  Supports regularization and monotonicity like described in `CalibratedHParam`.
-  Values for `calibration_output_min`, `calibration_output_max` and
-  `missing_output_value` get set automatically.
-
-  Added parameters:
-
-  * `learning_rate`: (float) a global parameter that assigns a step size of an
-    optimizer.
-  * `lattice_size`: (int) a global or per feature parameter that controls number
-    of cells for a feature. Should be greater than equal to 2, and the
-    recommended default value is 2. Also calibrator output min and max should be
-    [0, lattice_size - 1], and the output should be bounded, since a lattice
-    expects an input in the range [0, lattice_size - 1].
-  * `interpolation_type`: a global parameter that defines if the lattice will
-    interpolate using the full hypercube or only the simplex ("hyper-triangle",
-    much faster for larger lattices) around the point being evaluated.
-    Valid values: 'hypercube' or 'simplex'
-  * `missing_input_value`: Value for which a feature is considered missing. Such
-    values are either automatically learned to some calibrated value, or,
-    if missing_vertex is set, they get their own value in the lattice.
-  * `missing_vertex`: if missing_input_value is set, this boolean value indicate
-    whether to create an extra vertex for missing values.
-  * `lattice_<regularizer_name>` for all regularizer_name's in
-    regularizers.LATTICE_REGULARIZERS. e.g. `lattice_l2_reg`.
-  """
-
-  def __init__(self, feature_names=None, **kwargs):
-    # Set default args, and override with given ones.
-    args = {
-        'learning_rate': 0.1,
-        'lattice_size': 2,
-        'interpolation_type': 'hypercube',
-        'calibration_bound': True,
-        'missing_input_value': None,
-        'missing_vertex': False,
-    }
-    regularizer_hparam_names = [
-        'lattice_{}'.format(regularizer_name)
-        for regularizer_name in regularizers.LATTICE_REGULARIZERS
-    ]
-    args.update({
-        regularizer_name: None for regularizer_name in regularizer_hparam_names
-    })
-    args.update(kwargs)
-    super(CalibratedLatticeHParams, self).__init__(feature_names, **args)
-    self.set_param_type('missing_input_value', float)
-    for regularizer_name in regularizer_hparam_names:
-      self.set_param_type(regularizer_name, float)
-
-
-class CalibratedRtlHParams(CalibratedHParams):
-  """Hyper-parameters for CalibratedRtl (RandomTinyLattices) models.
-
-  Supports regularization and monotonicity like described in `CalibratedHParam`.
-  Values for `calibration_output_min`, `calibration_output_max` and
-  `missing_output_value` get set automatically.
-
-  Added parameters:
-
-  * `learning_rate`: (float) a global parameter that assigns a step size of an
-    optimizer.
-  * `lattice_size`: (int) a global or per feature parameter that controls number
-    of cells for a feature. Should be greater than equal to 2, and the
-    recommended default value is 2. Also calibrator output min and max should be
-    [0, lattice_size - 1], and the output should be bounded, since a lattice
-    expects an input in the range [0, lattice_size - 1]. (Note if missing_vertex
-    is True, then we add an extra vertex, so input range is [0, lattice_size])
-  * `num_lattices`: (int) a number of lattices to be created.
-  * `lattice_rank`: (int) a lattice rank in each lattice.
-  * `interpolation_type`: a global parameter that defines if the lattice will
-    interpolate using the full hypercube or only the simplex ("hyper-triangle",
-    much faster for larger lattices) around the point being evaluated.
-    Valid values: 'hypercube' or 'simplex'
-  * `ensemble_bias`: (float) an initial value of bias term to be added to the
-    output of ensemble.
-  * `rtl_seed`: (int) a random seed for rtl construction.
-  * `missing_input_value`: Value for which a feature is considered missing. Such
-    values are either automatically learned to some calibrated value, or,
-    if missing_vertex is set, they get their own value in the lattice.
-  * `missing_vertex`: if missing_input_value is set, this boolean value indicate
-    whether to create an extra vertex for missing values.
-  * `lattice_<regularizer_name>` for all regularizer_name's in
-    regularizers.LATTICE_REGULARIZERS. e.g. `lattice_l2_reg`.
-  """
-
-  def __init__(self, feature_names=None, **kwargs):
-    # Set default args, and override with given ones.
-    args = {
-        'learning_rate': 0.1,
-        'lattice_size': 2,
-        'num_lattices': None,
-        'lattice_rank': None,
-        'interpolation_type': 'hypercube',
-        'rtl_seed': 12345,
-        'calibration_bound': True,
-        'missing_input_value': None,
-        'missing_vertex': False,
-        'ensemble_bias': 0.0,
-    }
-    regularizer_hparam_names = [
-        'lattice_{}'.format(regularizer_name)
-        for regularizer_name in regularizers.LATTICE_REGULARIZERS
-    ]
-    args.update({
-        regularizer_name: None for regularizer_name in regularizer_hparam_names
-    })
-    args.update(kwargs)
-    super(CalibratedRtlHParams, self).__init__(feature_names, **args)
-    self.set_param_type('num_lattices', int)
-    self.set_param_type('lattice_rank', int)
-    self.set_param_type('missing_input_value', float)
-    for regularizer_name in regularizer_hparam_names:
-      self.set_param_type(regularizer_name, float)
-
-
-class CalibratedEtlHParams(CalibratedHParams):
-  """Hyper-parameters for CalibratedEtl (Embedded tiny lattices) models.
-
-  Supports regularization and monotonicity like described in `CalibratedHParam`.
-  Values for `calibration_output_min`, `calibration_output_max` and
-  `missing_output_value` get set automatically.
-
-  Note that this architecture does not support any of per-feature based lattice
-  hyper-parameters such as missing_vertex, per-feature missing_input_value,
-  per-feature lattice_size, per-feature lattice regularization, because after
-  the linear embedding, all of features are mixed together, so it is not clear
-  how to merge per-feature parameters after the linear embedding layer.
-
-  If there is no non-monotonic feature, but `non_monotonic_lattice_rank` or
-  `non_monotonic_num_lattices` are not `None`, then this will raise the error.
-
-  Added parameters:
-
-  * `learning_rate`: (float) a global parameter that assigns a step size of an
-    optimizer.
-  * `lattice_size`: (int) a global parameter that controls number of
-    cells for a feature. Should be greater than equal to 2, and the recommended
-    default value is 2. Also calibrator output min and max should be
-    [0, `lattice_size` - 1], and the output should be bounded.
-  * `interpolation_type`: a global parameter that defines if the lattice will
-    interpolate using the full hypercube or only the simplex ("hyper-triangle",
-    much faster for larger lattices) around the point being evaluated.
-    Valid values: 'hypercube' or 'simplex'
-  * `monotonic_lattice_rank`: (int) a lattice rank in each monotonic lattice.
-  * `monotonic_num_lattices`: (int) a number of monotonic lattices to be
-    created.
-  * `monotonic_lattice_size`: (int) lattice cell size for each monotonic lattice
-    in the ensemble lattices layer.
-  * `non_monotonic_lattice_rank`: (int) a lattice rank in each non monotonic
-    lattice. If all features are monotonic, this parameter should be None.
-  * `non_monotonic_num_lattices`: (int) a number of non-monotonic lattices to be
-    created. If all features are monotonic, this parameter should be None.
-  * `monotonic_lattice_size`: (int) lattice cell size for each non-monotonic
-    lattice in the ensemble lattices layer.
-  * `linear_embedding_calibration_min`: (float) a global parameter that controls
-    a minimum value of intermediate calibration layers. Default is -100.
-  * `linear_embedding_calibration_max`: (float) a global parameter that controls
-    a maximum value of intermediate calibration layers. Default is 100.
-  * `linear_embedding_calibration_num_keypoints`: (float) a global parameter
-    that controls a `num_keypoints` in intermediate calibration layers. Default
-    is 100.
-  * `lattice_<regularizer_name>` for all regularizer_name's in
-    regularizers.LATTICE_REGULARIZERS. e.g. `lattice_l2_reg`.
-  """
-
-  def __init__(self, feature_names=None, **kwargs):
-    # Set default args, and override with given ones.
-    args = {
-        'learning_rate': 0.1,
-        'monotonic_lattice_rank': None,
-        'monotonic_num_lattices': None,
-        'monotonic_lattice_size': None,
-        'non_monotonic_lattice_rank': None,
-        'non_monotonic_num_lattices': None,
-        'non_monotonic_lattice_size': None,
-        'interpolation_type': 'hypercube',
-        'calibration_bound': True,
-        'linear_embedding_calibration_min': -100.0,
-        'linear_embedding_calibration_max': 100.0,
-        'linear_embedding_calibration_num_keypoints': 100,
-    }
-    regularizer_hparam_names = [
-        'lattice_{}'.format(regularizer_name)
-        for regularizer_name in regularizers.LATTICE_REGULARIZERS
-    ]
-    args.update({
-        regularizer_name: None for regularizer_name in regularizer_hparam_names
-    })
-    args.update(kwargs)
-    super(CalibratedEtlHParams, self).__init__(feature_names, **args)
-    self.set_param_type('monotonic_lattice_rank', int)
-    self.set_param_type('monotonic_num_lattices', int)
-    self.set_param_type('monotonic_lattice_size', int)
-    self.set_param_type('non_monotonic_lattice_rank', int)
-    self.set_param_type('non_monotonic_num_lattices', int)
-    self.set_param_type('non_monotonic_lattice_size', int)
-    self.set_param_type('linear_embedding_calibration_min', float)
-    self.set_param_type('linear_embedding_calibration_max', float)
-    self.set_param_type('linear_embedding_calibration_num_keypoints', int)
-    for regularizer_name in regularizer_hparam_names:
-      self.set_param_type(regularizer_name, float)
diff --git a/tensorflow_lattice/python/estimators/hparams_test.py b/tensorflow_lattice/python/estimators/hparams_test.py
deleted file mode 100644
index 01f1477..0000000
--- a/tensorflow_lattice/python/estimators/hparams_test.py
+++ /dev/null
@@ -1,182 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for hyper-parameters support class for TensorFlow Lattice."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-
-from tensorflow_lattice.python.estimators import hparams
-
-
-class TensorFlowLatticeHParamsTest(tf.test.TestCase):
-
-  def testPerFeatureHParams(self):
-    default_num_keypoints = 10
-    feature_x0_num_keypoints = 5
-    hp = hparams.PerFeatureHParams(
-        ['x0', 'x1'],
-        num_keypoints=default_num_keypoints,
-        feature__x0__num_keypoints=feature_x0_num_keypoints)
-    hp.add_feature(['x2'])
-    self.assertEqual(hp.get_feature_names(), ['x0', 'x1', 'x2'])
-
-    # Check missing parameter: not feature specific parameter
-    # can be set if the generic one wasn't set first.
-    with self.assertRaises(ValueError):
-      hp.set_param('feature__x0__foobar', 10)
-
-    # Make sure returns copy of internal list.
-    hp.get_feature_names()[0] = 'z'
-    self.assertEqual(hp.get_feature_names(), ['x0', 'x1', 'x2'])
-
-    # Check values: both global and for specialized value for x0.
-    self.assertEqual(hp.num_keypoints, default_num_keypoints)
-    self.assertEqual(
-        hp.get_feature_param('x0', 'num_keypoints'), feature_x0_num_keypoints)
-    self.assertEqual(
-        hp.get_feature_param('x1', 'num_keypoints'), default_num_keypoints)
-    self.assertEqual(
-        hp.get_feature_param('x2', 'num_keypoints'), default_num_keypoints)
-
-    # Check missing parameter.
-    with self.assertRaises(AttributeError):
-      _ = hp.foobar
-
-    # Check that missing feature raises exception.
-    with self.assertRaisesRegexp(ValueError, 'Unknown feature name "x3".*'):
-      hp.get_feature_param('x3', 'num_keypoints')
-
-    # Check that missing parameter returns None.
-    self.assertEqual(hp.get_feature_param('x2', 'unknown_parameter'), None)
-
-    # Check is_feature_set_param.
-    self.assertEqual(hp.is_feature_set_param('x0', 'num_keypoints'), True)
-    self.assertEqual(hp.is_feature_set_param('x1', 'num_keypoints'), False)
-
-    # Check that object can't be created with feature specific parameters set
-    # for unknown feature.
-    with self.assertRaisesRegexp(
-        ValueError, 'Unknown feature "x2" for feature specific parameter '
-        '"feature__x2__num_keypoints"'):
-      # x2 doesn't exist, this should raise.
-      _ = hparams.PerFeatureHParams(
-          ['x0', 'x1'],
-          num_keypoints=default_num_keypoints,
-          feature__x2__num_keypoints=10)
-
-  def testAddFeature(self):
-    default_num_keypoints = 10
-    feature_x0_num_keypoints = 5
-    hp = hparams.PerFeatureHParams(
-        [u'x0', 'x1'],
-        num_keypoints=default_num_keypoints,
-        feature__x0__num_keypoints=feature_x0_num_keypoints)
-    # Unicode feature name.
-    hp.add_feature([u'x2'])
-    self.assertEqual(hp.get_feature_names(), ['x0', 'x1', 'x2'])
-    self.assertEqual(
-        hp.get_feature_param('x0', 'num_keypoints'), feature_x0_num_keypoints)
-    self.assertEqual(
-        hp.get_feature_param('x1', 'num_keypoints'), default_num_keypoints)
-    self.assertEqual(
-        hp.get_feature_param('x2', 'num_keypoints'), default_num_keypoints)
-    # Feature name not of expected type string.
-    with self.assertRaises(ValueError) as value_error:
-      hp.add_feature([1.0])
-    self.assertEqual('feature_name should either be a list of strings,'
-                     ' or a string, got "[1.0]"',
-                     str(value_error.exception))
-
-  def testGlobalPerFeatureHParams(self):
-    hp = hparams.PerFeatureHParams(['x0', 'x1'], num_keypoints=2)
-    self.assertEqual(hp.get_param('num_keypoints'), 2)
-    hp.set_param('num_keypoints', 3)
-    self.assertEqual(hp.get_param('num_keypoints'), 3)
-
-  def testParseHParms(self):
-    hp_from = hparams.PerFeatureHParams(['x0', 'x1'], num_keypoints=5)
-    hp_to = hparams.PerFeatureHParams(['x0', 'x1'], num_keypoints=2)
-    hp_to.set_feature_param('x0', 'num_keypoints', 3)
-    hp_to.parse_hparams(hp_from)
-    self.assertEqual(hp_to.get_feature_param('x0', 'num_keypoints'), 3)
-    self.assertEqual(hp_to.get_feature_param('x1', 'num_keypoints'), 5)
-    hp_to.parse_hparams(None)
-
-  def testParseString(self):
-    hp = hparams.PerFeatureHParams(
-        ['x0', 'x1', 'x2'], num_keypoints=2, learning_rate=1.0)
-    hp.set_feature_param('x0', 'num_keypoints', 3)
-
-    # Test normal use case.
-    hp.parse('num_keypoints=5,learning_rate=0.1,feature__x2__num_keypoints=7')
-    self.assertEqual(hp.get_feature_param('x0', 'num_keypoints'), 3)
-    self.assertEqual(hp.get_feature_param('x1', 'num_keypoints'), 5)
-    self.assertEqual(hp.get_feature_param('x2', 'num_keypoints'), 7)
-    self.assertEqual(hp.learning_rate, 0.1)
-
-    # Test that parsing None and empy has no effect.
-    hp.parse(None)
-    hp.parse('')
-    self.assertEqual(hp.get_feature_param('x0', 'num_keypoints'), 3)
-    self.assertEqual(hp.get_feature_param('x1', 'num_keypoints'), 5)
-    self.assertEqual(hp.get_feature_param('x2', 'num_keypoints'), 7)
-    self.assertEqual(hp.learning_rate, 0.1)
-
-    # Test failures.
-    with self.assertRaises(ValueError):
-      hp.parse('feature__x3__num_keypoints=10')  # Unknwon feature.
-    with self.assertRaises(ValueError):
-      hp.parse('foobar=10')  # Unknwon parameter.
-    with self.assertRaises(ValueError):
-      hp.parse('feature__x1__foobar=10')  # Unknwon parameter for feature.
-    with self.assertRaises(ValueError):
-      hp.parse('num_keypoints=1.1')  # Invalid type to parse.
-
-  def testSetParamType(self):
-    hp = hparams.PerFeatureHParams(['x0', 'x1'], foo='abc')
-    hp.parse('foo=def')
-    self.assertEqual(hp.foo, 'def')
-    hp.set_param('foo', 10)
-    hp.set_param_type('foo', int)
-    with self.assertRaises(ValueError):
-      # Should fail, since now foo is of type int.
-      hp.parse('foo=def')
-
-  def testConstructorsAllTypes(self):
-    _ = hparams.CalibratedHParams(['x0', 'x1'])
-    _ = hparams.CalibratedLinearHParams(['x0', 'x1'], learning_rate=0.1)
-    _ = hparams.CalibratedLatticeHParams(['x0', 'x1'], learning_rate=0.1)
-    _ = hparams.CalibratedRtlHParams(['x0', 'x1'], learning_rate=0.1)
-    etl = hparams.CalibratedEtlHParams(['x0', 'x1'], learning_rate=0.1)
-
-    etl.parse('calibration_bound=yes')
-    self.assertTrue(etl.calibration_bound)
-    etl.parse('calibration_bound=off')
-    self.assertFalse(etl.calibration_bound)
-    with self.assertRaises(ValueError):
-      etl.parse('calibration_bound=foobar')
-
-  def testAddNonExistingPerFeatureParam(self):
-    hp = hparams.CalibratedLinearHParams(['x0', 'x1'])
-    hp.set_feature_param('x0', 'calibration_l2_laplacian_reg', 0.1)
-    self.assertAlmostEqual(
-        hp.get_feature_param('x0', 'calibration_l2_laplacian_reg'), 0.1)
-
-
-if __name__ == '__main__':
-  tf.test.main()
diff --git a/tensorflow_lattice/python/estimators/separately_calibrated_rtl.py b/tensorflow_lattice/python/estimators/separately_calibrated_rtl.py
deleted file mode 100644
index 496fbac..0000000
--- a/tensorflow_lattice/python/estimators/separately_calibrated_rtl.py
+++ /dev/null
@@ -1,569 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""SeparatelyCalibratedRtl canned estimators."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import copy
-import os
-import random
-
-# Dependency imports
-import six
-
-from tensorflow_lattice.python.estimators import calibrated as calibrated_lib
-from tensorflow_lattice.python.estimators import hparams as tfl_hparams
-from tensorflow_lattice.python.lib import lattice_layers
-from tensorflow_lattice.python.lib import regularizers
-from tensorflow.python.lib.io import file_io  # pylint: disable=g-direct-tensorflow-import
-
-_EPSILON = 1e-7
-
-_RTL_STRUCTURE_FILE = 'rtl_structure.csv'
-
-
-class _SeparatelyCalibratedRtl(calibrated_lib.Calibrated):
-  """Base class for SeparatelyCalibratedRtl{Classifier|Regressor}."""
-
-  def __init__(self,
-               n_classes,
-               feature_columns=None,
-               model_dir=None,
-               quantiles_dir=None,
-               keypoints_initializers_fn=None,
-               lattice_initializers_fn=None,
-               optimizer=None,
-               config=None,
-               hparams=None,
-               head=None,
-               weight_column=None):
-    """Construct CalibrateRtlClassifier/Regressor."""
-    if not hparams:
-      hparams = tfl_hparams.CalibratedRtlHParams([])
-    self.check_hparams(hparams)
-    hparams = self._adjust_calibration_params(hparams)
-
-    self.lattice_initializers_fn_ = lattice_initializers_fn
-
-    super(_SeparatelyCalibratedRtl,
-          self).__init__(n_classes, feature_columns, model_dir, quantiles_dir,
-                         keypoints_initializers_fn, optimizer, config, hparams,
-                         head, weight_column, 'rtl')
-    self._structure_file = os.path.join(self._model_dir, _RTL_STRUCTURE_FILE)
-
-  def _check_per_feature_param_configuration(
-      self, adjusted, monotonicity, lattice_size, calibration_output_min,
-      calibration_output_max, calibration_bound, missing_input_value,
-      missing_vertex):
-    """Check parameter configuration and returns the error messages."""
-    error_messages = []
-    if monotonicity not in {-1, 0, +1}:
-      error_messages.append('monotonicity should be an integer {-1, 0, +1} '
-                            'but is %s' % monotonicity)
-
-    if lattice_size < 2:
-      error_messages.append('lattice_size should be greater than equal to 2'
-                            'but is %d' % (lattice_size))
-
-    if not calibration_bound:
-      error_messages.append(
-          'A lattice expects an bounded input from a calibration layer, but '
-          'calibration_bound is set to be False')
-
-    if not adjusted:
-      if calibration_output_min is not None:
-        error_messages.append(
-            'calibration_output_min=%d should not be set, it is adjusted '
-            'automatically to match the lattice_size' % calibration_output_min)
-      if calibration_output_max is not None:
-        error_messages.append(
-            'calibration_output_max=%d should not be set, it is adjusted '
-            'automatically to match the lattice_size' % calibration_output_max)
-
-    if missing_input_value is None and missing_vertex:
-      error_messages.append(
-          'missing_vertex is True, however missing_input_value not set')
-
-    return error_messages
-
-  def _check_not_allowed_feature_params(self, hparams):
-    """Check hparams contains feature-level value that are not allowed.
-
-    Certain values cannot be feature-level hyperparameters. This function checks
-    whether any of feature sets hparams that are not allowed to be feature-level
-    hyperparameter, and returns non-empty error messages if there is an error.
-
-    Args:
-      hparams: (CalibratedRtlHparams) hyperparameters needs to be checked.
-    Returns:
-      error_messages: (list of strings) error messages.
-    """
-    not_allowed_feature_params = map(
-        'lattice_{}'.format,
-        regularizers.LATTICE_MULTI_DIMENSIONAL_REGULARIZERS)
-    error_messages = []
-    for param in not_allowed_feature_params:
-      for feature_name in hparams.get_feature_names():
-        if hparams.is_feature_set_param(feature_name, param):
-          error_messages.append('feature %s sets %s, which is not allowed.' %
-                                (feature_name, param))
-    return error_messages
-
-  def check_hparams(self, hparams, adjusted=False):
-    """Check pre-conditions of hparams.
-
-    Args:
-      hparams: (tfl_hparams.CalibratedRtlHParams) Hyperparameter to be
-      examined.
-      adjusted: if these are the parameters already adjusted. For example,
-        calibrator_output_min and max should be adjusted so that the output is
-        in [0, lattice_size - 1] (or [0, lattice_size] if missing_vertex
-        == True) and calibrator bound should set to be true, etc.
-        If adjust is True, we will check that all the parameter values is valid,
-        otherwise, some checks will be skipped.
-    Raises:
-      ValueError: If the hyperparameter configuration is invalid, for example
-      calibration_monotonic is None, but lattice_monotonic is True, then raise
-      the error with a root cause.
-    """
-    error_messages = self._check_not_allowed_feature_params(hparams)
-
-    # Check lattice_rank and num_lattices.
-    lattice_rank = hparams.get_param('lattice_rank')
-    num_lattices = hparams.get_param('num_lattices')
-    if lattice_rank is None or num_lattices is None:
-      error_messages.append('lattice_rank and num_lattices should be provided')
-
-    # Check global params.
-    feature_names = hparams.get_feature_names()
-    packed_feature_values = hparams.get_global_and_feature_params([
-        'monotonicity', 'lattice_size', 'calibration_output_min',
-        'calibration_output_max', 'calibration_bound', 'missing_input_value',
-        'missing_vertex'
-    ], feature_names)
-    default_feature_values, per_feature_values = packed_feature_values
-    param_error_messages = self._check_per_feature_param_configuration(
-        adjusted, *default_feature_values)
-    if param_error_messages:
-      error_messages.append('Error message for default feature param:')
-      error_messages += param_error_messages
-
-    # Check per feature params. hparams.get_feature_names()  will only return
-    # feature names that sets per feature parameters.
-    for feature_idx in range(len(per_feature_values)):
-      param_error_messages = self._check_per_feature_param_configuration(
-          adjusted, *per_feature_values[feature_idx])
-      if param_error_messages:
-        error_messages.append(
-            'Error message for %s feature param:' % feature_names[feature_idx])
-        error_messages += param_error_messages
-
-    if error_messages:
-      raise ValueError(
-          'Hyperparameter configuration cannot be used in the calibrated '
-          'rtl estimator. Error messages report the issue per feature, but'
-          ' the parameter may be inherited from global parameter.\nDetailed '
-          'error messsages\n%s' % '\n'.join(error_messages))
-
-  def _adjust_calibration_params(self, hparams):
-    """Adjust the calibration parameters to match the input siz of lattices."""
-    hparams = copy.deepcopy(hparams)
-    feature_names = hparams.get_feature_names()
-    packed_feature_values = hparams.get_global_and_feature_params(
-        ['lattice_size', 'missing_input_value', 'missing_vertex'],
-        feature_names)
-    default_feature_values, per_feature_values = packed_feature_values
-    final_lattice_size, missing_output_value = self._calibration_params(
-        *default_feature_values)
-    lattice_size = default_feature_values[0]
-    hparams.set_param('calibration_output_min', 0)
-    hparams.set_param('calibration_output_max', lattice_size - 1)
-    hparams.set_param('final_lattice_size', final_lattice_size)
-    hparams.set_param('missing_output_value', missing_output_value)
-
-    if len(per_feature_values) != len(feature_names):
-      raise ValueError(
-          'length of per_feature_value (%d) != length of feature_names (%d)' %
-          (len(per_feature_values), len(feature_names)))
-    for (per_feature_value, feature_name) in zip(per_feature_values,
-                                                 feature_names):
-      final_lattice_size, missing_output_value = self._calibration_params(
-          *per_feature_value)
-      lattice_size = per_feature_value[0]
-      hparams.set_feature_param(feature_name, 'calibration_output_min', 0)
-      hparams.set_feature_param(feature_name, 'calibration_output_max',
-                                lattice_size - 1)
-      hparams.set_feature_param(feature_name, 'final_lattice_size',
-                                final_lattice_size)
-      hparams.set_feature_param(feature_name, 'missing_output_value',
-                                missing_output_value)
-    return hparams
-
-  def _calibration_params(self, lattice_size, missing_input_value,
-                          missing_vertex):
-    """Returns final_lattice_size and missing_output_value."""
-    if missing_input_value is None or not missing_vertex:
-      return lattice_size, None
-
-    # Last vertex of the lattice is reserved for missing values.
-    return lattice_size + 1, lattice_size
-
-  def _load_structure(self):
-    """Load rtl structure from model_dir."""
-    if not file_io.file_exists(self._structure_file):
-      raise ValueError(
-          'Structure file does not exists in %s!' % self._structure_file)
-    structure_csv_string = file_io.read_file_to_string(self._structure_file)
-    structure_csvs = structure_csv_string.split('\n')
-    structure = []
-    for structure_csv in structure_csvs:
-      structure.append([int(idx) for idx in structure_csv.split(',')])
-    return structure
-
-  def _save_structure(self, structure):
-    """Save rtl structure to model_dir."""
-    structure_csvs = []
-    for lattice in structure:
-      structure_csvs.append(','.join([str(idx) for idx in lattice]))
-    structure_csv_string = '\n'.join(structure_csvs)
-    file_io.write_string_to_file(self._structure_file, structure_csv_string)
-
-
-  def _create_structure(self, input_dim, num_lattices, lattice_rank, rtl_seed):
-    """Create and save rtl structure to model_dir."""
-    rtl_random = random.Random(rtl_seed)
-    structure = []
-    for _ in range(num_lattices):
-      structure.append(
-          rtl_random.sample(six.moves.xrange(input_dim), lattice_rank))
-    return structure
-
-  def calibration_structure_builder(self, columns_to_tensors, hparams):
-    """Returns the calibration structure of the model. See base class."""
-
-
-    # Check to make sure input features are single dimensional.
-    for (column, tensor) in six.iteritems(columns_to_tensors):
-      if len(tensor.shape) > 1 and tensor.shape.dims[1].value > 1:
-        raise ValueError(
-            'Separately calibrated RTLs do not support multi dimensional '
-            'features: %s with shape %s' % (column, tensor.shape))
-    sorted_columns = sorted(columns_to_tensors.keys())
-    n_inputs = len(columns_to_tensors)
-
-    num_lattices = hparams.get_param('num_lattices')
-    lattice_rank = hparams.get_param('lattice_rank')
-    rtl_seed = hparams.get_param('rtl_seed')
-    # Create and save structure if it does not exists.
-
-    if not file_io.file_exists(self._structure_file):
-      structure = self._create_structure(n_inputs, num_lattices, lattice_rank,
-                                         rtl_seed)
-      self._save_structure(structure)
-    structure = self._load_structure()
-    # Check structure is what we expect.
-    if len(structure) != num_lattices:
-      raise ValueError(
-          'Expect %d number of lattices, but found %d number of lattices in '
-          'structure: %s' % (num_lattices, len(structure), str(structure)))
-    calibration_structure = []
-    for indices in structure:
-      if len(indices) != lattice_rank:
-        raise ValueError('Expect %d lattice rank, but found %d in structure: %s'
-                         % (lattice_rank, len(indices), str(structure)))
-      sub_columns_to_tensors = {
-          sorted_columns[i]: columns_to_tensors[sorted_columns[i]]
-          for i in indices
-      }
-      calibration_structure.append(sub_columns_to_tensors)
-
-    return calibration_structure
-
-  def prediction_builder_from_calibrated(
-      self, mode, per_dimension_feature_names, hparams, calibrated):
-    """Construct the prediciton."""
-    self.check_hparams(hparams, adjusted=True)
-    lattice_sizes = [
-        hparams.get_feature_param(f, 'final_lattice_size')
-        for f in per_dimension_feature_names
-    ]
-    lattice_monotonic = [(hparams.get_feature_param(f, 'monotonicity') != 0)
-                         for f in per_dimension_feature_names]
-    interpolation_type = hparams.get_param('interpolation_type')
-    # Setup the regularization.
-    regularizer_amounts = {}
-    for reg_name in regularizers.LATTICE_MULTI_DIMENSIONAL_REGULARIZERS:
-      regularizer_amounts[reg_name] = hparams.get_param(
-          'lattice_{}'.format(reg_name))
-    for reg_name in regularizers.LATTICE_ONE_DIMENSIONAL_REGULARIZERS:
-      regularizer_amounts[reg_name] = [
-          hparams.get_feature_param(feature_name, 'lattice_{}'.format(reg_name))
-          for feature_name in per_dimension_feature_names
-      ]
-
-    (prediction, _, projection_ops,
-     regularization) = lattice_layers.lattice_layer(
-         calibrated,
-         lattice_sizes,
-         is_monotone=lattice_monotonic,
-         interpolation_type=interpolation_type,
-         lattice_initializer=self.lattice_initializers_fn_,
-         **regularizer_amounts)
-    # Returns prediction Tensor, projection ops, and regularization.
-    return prediction, projection_ops, regularization
-
-
-def separately_calibrated_rtl_classifier(feature_columns=None,
-                                         model_dir=None,
-                                         quantiles_dir=None,
-                                         keypoints_initializers_fn=None,
-                                         optimizer=None,
-                                         config=None,
-                                         hparams=None,
-                                         head=None,
-                                         weight_column=None):
-  """Calibrated rtl binary classifier model.
-
-
-
-  This model uses a piecewise lattice calibration function on each of the
-  inputs (parametrized) and then feeds them to ensemble of random lattices.
-  num_lattices and lattice_rank (number of inputs to each lattice) must be
-  specified in the hyperparameter. Optionally calibration can be made monotonic.
-
-  It usually requires a preprocessing step on the data, to calculate the
-  quantiles of each used feature. This can be done locally or in one worker
-  only before training, in a separate invocation of your program (or directly).
-  Typically this can be saved (`save_dir` parameter) to the same
-  directory where the data is.
-
-  Hyper-parameters are given in the form of the object
-  tfl_hparams.CalibrationRtlHParams. lattice_rank and num_lattices must
-  be specified; there would be no default value for this. It also takes in
-  per-feature parameters.
-
-  Internally values will be converted to tf.float32.
-
-  Example:
-
-  ```python
-  def input_fn_train: ...
-  def input_fn_eval: ...
-
-  my_feature_columns=[...]
-
-  # Have a separate program flag to generate the quantiles. Need to be run
-  # only once.
-  if FLAGS.create_quantiles:
-    pwl_calibrators_layers.calculate_quantiles_for_keypoints(
-      input_fn=input_fn_train,
-      feature_columns=my_feature_columns,
-      save_dir=FLAGS.data_dir,
-      num_quantiles=1000,
-      override=True)
-    return  # Exit program.
-
-  hparams = hparams.CalibratedRtlHparams(num_lattices=10, lattice_rank=2)
-  estimator = separately_calibrated_rtl.separately_calibrated_rtl_classifier(
-    feature_columns=feature_columns, hparams=hparams)
-  estimator.train(input_fn=input_fn_train)
-  estimator.evaluate(input_fn=input_fn_eval)
-  estimator.predict(input_fn=input_fn_predict)
-  ```
-
-  Args:
-    feature_columns: Optional, an iteratable containing all the feature
-      columns used by the model. All items in the set should be instances of
-      classes derived from `FeatureColumn`. If not given, the model will
-      use as features the tensors returned by input_fn.
-      Supported types of columns: RealValuedColumn.
-    model_dir: Directory to save model parameters, graphs and etc. This can
-      also be used to load checkpoints from the directory into an estimator to
-      continue training a previously saved model.
-    quantiles_dir: location where quantiles for the data was saved. Typically
-      the same directory as the training data. These quantiles can be
-      generated only once with
-      `pwl_calibration_layers.calculate_quantiles_for_keypoints` in a separate
-      invocation of your program. If you don't want to use quantiles, you can
-      set `keypoints_initializer` instead.
-    keypoints_initializers_fn: if you know the distribution of your
-      input features you can provide that directly instead of `quantiles_dir`.
-      See `pwl_calibrators_layers.uniform_keypoints_for_signal`. It must be
-      a closure that returns a pair of tensors with keypoints inputs and
-      outputs to use for initialization (must match `num_keypoints` configured
-      in `hparams`). Alternatively the closure can return a dict mapping
-      feature name to pairs for initialization per feature. If `quantiles_dir`
-      and `keypoints_initializers_fn` are set, the later takes precendence,
-      and the features for which `keypoints_initializers` are not defined
-      fallback to using the quantiles found in `quantiles_dir`. It uses a
-      closure instead of the tensors themselves because the graph has to be
-      created at the time the model is being build, which happens at a later
-      time.
-    optimizer: string, `Optimizer` object, or callable that defines the
-      optimizer to use for training -- if a callable, it will be called with
-      learning_rate=hparams.learning_rate.
-    config: RunConfig object to configure the runtime settings. Typically set
-      to learn_runner.EstimatorConfig().
-    hparams: an instance of tfl_hparams.CalibrationRtlHParams. If set to
-      None default parameters are used.
-    head: a `TensorFlow Estimator Head` which specifies how the loss function,
-      final predictions, and so on are generated from model outputs. Defaults
-      to using a sigmoid cross entropy head for binary classification and mean
-      squared error head for regression.
-    weight_column: A string or a `tf.feature_column.numeric_column` defining
-      feature column representing weights. It is used to down weight or boost
-      examples during training. It will be multiplied by the loss of the
-      example.
-
-  Returns:
-    A `separately_calibrated_rtl_classifier` estimator.
-
-  Raises:
-    ValueError: invalid parameters.
-    KeyError: type of feature not supported.
-  """
-  return _SeparatelyCalibratedRtl(
-      n_classes=2,
-      feature_columns=feature_columns,
-      model_dir=model_dir,
-      quantiles_dir=quantiles_dir,
-      keypoints_initializers_fn=keypoints_initializers_fn,
-      optimizer=optimizer,
-      config=config,
-      hparams=hparams,
-      head=head,
-      weight_column=weight_column)
-
-
-def separately_calibrated_rtl_regressor(feature_columns=None,
-                                        model_dir=None,
-                                        quantiles_dir=None,
-                                        keypoints_initializers_fn=None,
-                                        optimizer=None,
-                                        config=None,
-                                        hparams=None,
-                                        head=None,
-                                        weight_column=None):
-  """Calibrated rtl regressor model.
-
-  This model uses a piecewise lattice calibration function on each of the
-  inputs (parametrized) and then feeds them to ensemble of random lattices.
-  num_lattices and lattice_rank (number of inputs to each lattice) must be
-  specified in the hyperparameter. Optionally calibration can be made monotonic.
-
-  It usually requires a preprocessing step on the data, to calculate the
-  quantiles of each used feature. This can be done locally or in one worker
-  only before training, in a separate invocation of your program (or directly).
-  Typically this can be saved (`save_dir` parameter) to the same
-  directory where the data is.
-
-  Hyper-parameters are given in the form of the object
-  tfl_hparams.CalibrationRtlHParams. lattice_rank and num_lattices must
-  be specified; there would be no default value for this. It also takes in
-  per-feature parameters.
-
-  Internally values will be converted to tf.float32.
-
-  Example:
-
-  ```python
-  def input_fn_train: ...
-  def input_fn_eval: ...
-
-  my_feature_columns=[...]
-
-  # Have a separate program flag to generate the quantiles. Need to be run
-  # only once.
-  if FLAGS.create_quantiles:
-    pwl_calibrators_layers.calculate_quantiles_for_keypoints(
-      input_fn=input_fn_train,
-      feature_columns=my_feature_columns,
-      save_dir=FLAGS.data_dir,
-      num_quantiles=1000,
-      override=True)
-    return  # Exit program.
-
-  hparams = hparams.CalibratedRtlHparams(num_lattices=10, lattice_rank=2)
-  estimator = separately_calibrated_rtl.separately_calibrated_rtl_classifier(
-    feature_columns=feature_columns, hparams=hparams)
-  estimator.train(input_fn=input_fn_train)
-  estimator.evaluate(input_fn=input_fn_eval)
-  estimator.predict(input_fn=input_fn_test)
-  ```
-
-  Args:
-    feature_columns: Optional, an iteratable containing all the feature
-      columns used by the model. All items in the set should be instances of
-      classes derived from `FeatureColumn`. If not given, the model will
-      use as features the tensors returned by input_fn.
-      Supported types of columns: RealValuedColumn.
-    model_dir: Directory to save model parameters, graphs and etc. This can
-      also be used to load checkpoints from the directory into an estimator to
-      continue training a previously saved model.
-    quantiles_dir: location where quantiles for the data was saved. Typically
-      the same directory as the training data. These quantiles can be
-      generated only once with
-      `pwl_calibration_layers.calculate_quantiles_for_keypoints` in a separate
-      invocation of your program. If you don't want to use quantiles, you can
-      set `keypoints_initializer` instead.
-    keypoints_initializers_fn: if you know the distribution of your
-      input features you can provide that directly instead of `quantiles_dir`.
-      See `pwl_calibrators_layers.uniform_keypoints_for_signal`. It must be
-      a closure that returns a pair of tensors with keypoints inputs and
-      outputs to use for initialization (must match `num_keypoints` configured
-      in `hparams`). Alternatively the closure can return a dict mapping
-      feature name to pairs for initialization per feature. If `quantiles_dir`
-      and `keypoints_initializers_fn` are set, the later takes precendence,
-      and the features for which `keypoints_initializers` are not defined
-      fallback to using the quantiles found in `quantiles_dir`. It uses a
-      closure instead of the tensors themselves because the graph has to be
-      created at the time the model is being build, which happens at a later
-      time.
-    optimizer: string, `Optimizer` object, or callable that defines the
-      optimizer to use for training -- if a callable, it will be called with
-      learning_rate=hparams.learning_rate.
-    config: RunConfig object to configure the runtime settings. Typically set
-      to learn_runner.EstimatorConfig().
-    hparams: an instance of tfl_hparams.CalibrationRtlHParams. If set to
-      None default parameters are used.
-    head: a `TensorFlow Estimator Head` which specifies how the loss function,
-      final predictions, and so on are generated from model outputs. Defaults
-      to using a sigmoid cross entropy head for binary classification and mean
-      squared error head for regression.
-    weight_column: A string or a `tf.feature_column.numeric_column` defining
-      feature column representing weights. It is used to down weight or boost
-      examples during training. It will be multiplied by the loss of the
-      example.
-
-  Returns:
-    A `separately_calibrated_rtl_regressor` estimator.
-
-  Raises:
-    ValueError: invalid parameters.
-    KeyError: type of feature not supported.
-  """
-  return _SeparatelyCalibratedRtl(
-      n_classes=0,
-      feature_columns=feature_columns,
-      model_dir=model_dir,
-      quantiles_dir=quantiles_dir,
-      keypoints_initializers_fn=keypoints_initializers_fn,
-      optimizer=optimizer,
-      config=config,
-      hparams=hparams,
-      head=head,
-      weight_column=weight_column)
diff --git a/tensorflow_lattice/python/estimators/separately_calibrated_rtl_test.py b/tensorflow_lattice/python/estimators/separately_calibrated_rtl_test.py
deleted file mode 100644
index a72960f..0000000
--- a/tensorflow_lattice/python/estimators/separately_calibrated_rtl_test.py
+++ /dev/null
@@ -1,432 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""CalibratedRtl provide canned estimators."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import tensorflow as tf
-
-from tensorflow_lattice.python.estimators import hparams as tfl_hparams
-from tensorflow_lattice.python.estimators import separately_calibrated_rtl as scrtl
-from tensorflow_lattice.python.lib import keypoints_initialization
-from tensorflow_lattice.python.lib import test_data
-
-_NUM_KEYPOINTS = 50
-
-
-class CalibratedRtlHParamsTest(tf.test.TestCase):
-
-  def setUp(self):
-    super(CalibratedRtlHParamsTest, self).setUp()
-    self.hparams = tfl_hparams.CalibratedRtlHParams(feature_names=['x'])
-    self.hparams.set_param('lattice_size', 2)
-    self.hparams.set_param('calibrator_output_min', 0)
-    self.hparams.set_param('calibrator_output_max', 1)
-    self.hparams.set_param('calibration_bound', True)
-    self.hparams.set_param('lattice_rank', 2)
-    self.hparams.set_param('num_lattices', 10)
-    self.empty_estimator = scrtl.separately_calibrated_rtl_classifier(
-        hparams=self.hparams)
-
-  def testWrongLatticeSize(self):
-    self.hparams.set_feature_param('x', 'lattice_size', -1)
-    self.assertRaisesRegexp(
-        ValueError,
-        'Hyperparameter configuration cannot be used in the calibrated rtl '
-        'estimator.', self.empty_estimator.check_hparams, self.hparams)
-
-  def testWrongCalibrationOutputMin(self):
-    self.hparams.set_param('calibration_output_min', 0.0)
-    self.hparams.set_feature_param('x', 'calibration_output_min', -1)
-    self.assertRaisesRegexp(
-        ValueError,
-        'calibration_output_min=-1 should not be set, it is adjusted '
-        'automatically to match the lattice_size',
-        self.empty_estimator.check_hparams, self.hparams)
-
-  def testWrongCalibrationOutputMax(self):
-    self.hparams.set_param('calibration_output_max', 0.0)
-    self.hparams.set_feature_param('x', 'calibration_output_max', 10)
-    self.assertRaisesRegexp(
-        ValueError,
-        'calibration_output_max=10 should not be set, it is adjusted '
-        'automatically to match the lattice_size',
-        self.empty_estimator.check_hparams, self.hparams)
-
-  def testWrongCalibrationBound(self):
-    self.hparams.set_feature_param('x', 'calibration_bound', False)
-    self.assertRaisesRegexp(
-        ValueError,
-        'Hyperparameter configuration cannot be used in the calibrated rtl '
-        'estimator.', self.empty_estimator.check_hparams, self.hparams)
-
-  def testNoLatticeRank(self):
-    self.hparams.set_param('lattice_rank', None)
-    self.assertRaisesRegexp(
-        ValueError,
-        'Hyperparameter configuration cannot be used in the calibrated rtl '
-        'estimator.', self.empty_estimator.check_hparams, self.hparams)
-
-  def testNoNumLattices(self):
-    self.hparams.set_param('num_lattices', None)
-    self.assertRaisesRegexp(
-        ValueError,
-        'Hyperparameter configuration cannot be used in the calibrated rtl '
-        'estimator.', self.empty_estimator.check_hparams, self.hparams)
-
-  def testWrongLatticeRegularization(self):
-    self.hparams.set_feature_param('x', 'lattice_l1_reg', 0.1)
-    self.hparams.set_feature_param('x', 'lattice_l2_reg', 0.1)
-    self.hparams.set_feature_param('x', 'lattice_l1_torsion_reg', 0.1)
-    self.hparams.set_feature_param('x', 'lattice_l1_torsion_reg', 0.1)
-    self.assertRaisesRegexp(
-        ValueError,
-        'Hyperparameter configuration cannot be used in the calibrated rtl '
-        'estimator.', self.empty_estimator.check_hparams, self.hparams)
-
-
-class CalibratedRtlTest(tf.test.TestCase):
-
-  def setUp(self):
-    super(CalibratedRtlTest, self).setUp()
-    self._test_data = test_data.TestData(num_epochs=10)
-
-  def _CalibratedRtlRegressor(self,
-                              feature_names,
-                              feature_columns,
-                              num_lattices=1,
-                              lattice_rank=1,
-                              num_keypoints=_NUM_KEYPOINTS,
-                              weight_column=None,
-                              **hparams_args):
-
-    def init_fn():
-      return keypoints_initialization.uniform_keypoints_for_signal(
-          num_keypoints, -1., 1., 0., 1.)
-
-    hparams = tfl_hparams.CalibratedRtlHParams(
-        feature_names,
-        num_keypoints=num_keypoints,
-        num_lattices=num_lattices,
-        lattice_rank=lattice_rank,
-        **hparams_args)
-    # Turn off monotonic calibrator.
-    hparams.set_param('calibration_monotonic', None)
-    hparams.set_param('learning_rate', 0.1)
-
-    return scrtl.separately_calibrated_rtl_regressor(
-        feature_columns=feature_columns,
-        weight_column=weight_column,
-        hparams=hparams,
-        keypoints_initializers_fn=init_fn)
-
-  def _CalibratedRtlClassifier(self,
-                               feature_columns,
-                               num_lattices=1,
-                               lattice_rank=1,
-                               **hparams_args):
-
-    def init_fn():
-      return keypoints_initialization.uniform_keypoints_for_signal(
-          _NUM_KEYPOINTS, -1., 1., 0., 1.)
-
-    hparams = tfl_hparams.CalibratedRtlHParams(
-        num_keypoints=_NUM_KEYPOINTS,
-        num_lattices=num_lattices,
-        lattice_rank=lattice_rank,
-        **hparams_args)
-    # Turn off monotonic calibrator.
-    hparams.set_param('calibration_monotonic', None)
-    hparams.set_param('learning_rate', 0.1)
-
-    return scrtl.separately_calibrated_rtl_classifier(
-        feature_columns=feature_columns,
-        hparams=hparams,
-        keypoints_initializers_fn=init_fn)
-
-  def testCalibratedRtlRegressorTraining1D(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x'),
-    ]
-    estimator = self._CalibratedRtlRegressor(['x'],
-                                             feature_columns,
-                                             num_lattices=3,
-                                             lattice_rank=1)
-    estimator.train(input_fn=self._test_data.oned_input_fn())
-    results = estimator.evaluate(input_fn=self._test_data.oned_input_fn())
-    self.assertLess(results['average_loss'], 1e-2)
-
-  def testSeparatelyCalibratedRtlRegressorWeightedTraining1D(self):
-    feature_columns = [tf.feature_column.numeric_column('x')]
-    weight_column = tf.feature_column.numeric_column('zero')
-    estimator = self._CalibratedRtlRegressor(['x'],
-                                             feature_columns,
-                                             num_lattices=2,
-                                             weight_column=weight_column)
-    estimator.train(input_fn=self._test_data.oned_zero_weight_input_fn())
-    results = estimator.evaluate(
-        input_fn=self._test_data.oned_zero_weight_input_fn())
-    # Expects almost zero since the weight values are exactly zero.
-    self.assertLess(results['average_loss'], 1e-7)
-
-  def testCalibratedRtlRegressorTraining2D(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedRtlRegressor(['x0', 'x1'],
-                                             feature_columns,
-                                             num_lattices=3,
-                                             lattice_rank=2)
-    estimator.train(input_fn=self._test_data.twod_input_fn())
-    results = estimator.evaluate(input_fn=self._test_data.twod_input_fn())
-    self.assertLess(results['average_loss'], 5e-3)
-
-  def testCalibratedRtlRegressorTraining2DWithCalibrationRegularization(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedRtlRegressor(['x0', 'x1'],
-                                             feature_columns,
-                                             num_lattices=3,
-                                             lattice_rank=2,
-                                             calibration_l1_reg=1e-2,
-                                             calibration_l2_reg=1e-2,
-                                             calibration_l1_laplacian_reg=0.05,
-                                             calibration_l2_laplacian_reg=0.01)
-    estimator.train(input_fn=self._test_data.twod_input_fn())
-    results = estimator.evaluate(input_fn=self._test_data.twod_input_fn())
-    self.assertLess(results['average_loss'], 0.1)
-
-  def testCalibratedLatticeRegressorTraining2DWithLatticeRegularizer(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedRtlRegressor(['x0', 'x1'],
-                                             feature_columns,
-                                             num_lattices=2,
-                                             lattice_rank=2,
-                                             lattice_l1_reg=1.0,
-                                             lattice_l2_reg=1.0,
-                                             lattice_l1_torsion_reg=1.0,
-                                             lattice_l2_torsion_reg=1.0,
-                                             lattice_l1_laplacian_reg=1.0,
-                                             lattice_l2_laplacian_reg=0.1)
-    estimator.train(input_fn=self._test_data.twod_input_fn())
-    results = estimator.evaluate(input_fn=self._test_data.twod_input_fn())
-    # We expect the loss is larger than the loss without regularization.
-    self.assertGreater(results['average_loss'], 1e-2)
-    self.assertLess(results['average_loss'], 0.5)
-
-  def testCalibratedLatticeRegressorTraining2DWithPerFeatureRegularizer(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedRtlRegressor(
-        ['x0', 'x1'],
-        feature_columns,
-        num_lattices=2,
-        lattice_rank=2,
-        feature__x0__lattice_l1_laplacian_reg=100.0,
-        feature__x1__lattice_l2_laplacian_reg=1.0)
-    estimator.train(input_fn=self._test_data.twod_input_fn())
-    results = estimator.evaluate(input_fn=self._test_data.twod_input_fn())
-    # We expect the loss is larger than the loss without regularization.
-    self.assertGreater(results['average_loss'], 0.1)
-    self.assertLess(results['average_loss'], 0.2)
-
-  def testCalibratedRtlClassifierTraining(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedRtlClassifier(
-        feature_columns, num_lattices=3, lattice_rank=2)
-    estimator.train(input_fn=self._test_data.twod_classificer_input_fn())
-    results = estimator.evaluate(
-        input_fn=self._test_data.twod_classificer_input_fn())
-    self.assertGreater(results['auc'], 0.990)
-
-  def testCalibratedRtlClassifierTrainingWithCalibrationRegularizer(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedRtlClassifier(
-        feature_columns,
-        num_lattices=3,
-        lattice_rank=2,
-        interpolation_type='simplex',
-        calibration_l1_reg=1e-5,
-        calibration_l2_reg=1e-5,
-        calibration_l1_laplacian_reg=1e-5,
-        calibration_l2_laplacian_reg=1e-5)
-    estimator.train(input_fn=self._test_data.twod_classificer_input_fn())
-    results = estimator.evaluate(
-        input_fn=self._test_data.twod_classificer_input_fn())
-    self.assertGreater(results['auc'], 0.980)
-
-  def testCalibratedRtlClassifierTrainingWithLatticeRegularizer(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedRtlClassifier(
-        feature_columns,
-        num_lattices=3,
-        lattice_rank=2,
-        interpolation_type='hypercube',
-        lattice_l1_reg=1.0,
-        lattice_l2_reg=1.0,
-        lattice_l1_torsion_reg=1.0,
-        lattice_l2_torsion_reg=1.0,
-        lattice_l1_laplacian_reg=1.0,
-        lattice_l2_laplacian_reg=1.0)
-    estimator.train(input_fn=self._test_data.twod_classificer_input_fn())
-    results = estimator.evaluate(
-        input_fn=self._test_data.twod_classificer_input_fn())
-    # We expect AUC is worse than the model without regularization.
-    self.assertLess(results['auc'], 0.99)
-    self.assertGreater(results['auc'], 0.4)
-
-  def testCalibratedRtlClassifierTrainingWithPerFeatureRegularizer(self):
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-    estimator = self._CalibratedRtlClassifier(
-        feature_columns,
-        feature_names=['x0', 'x1'],
-        num_lattices=3,
-        lattice_rank=2,
-        feature__x0__lattice_l1_laplacian_reg=5.0,
-        feature__x1__lattice_l2_laplacian_reg=0.5)
-    estimator.train(input_fn=self._test_data.twod_classificer_input_fn())
-    results = estimator.evaluate(
-        input_fn=self._test_data.twod_classificer_input_fn())
-    # We expect AUC is worse than the model without regularization.
-    self.assertLess(results['auc'], 0.98)
-    self.assertGreater(results['auc'], 0.7)
-
-  def testCalibratedRtlMonotonicClassifierTraining(self):
-    # Construct the following training/testing pair.
-    #
-    # Training: (x, y)
-    # ([0., 0.], 0.0)
-    # ([0., 1.], 1.0)
-    # ([1., 0.], 1.0)
-    # ([1., 1.], 0.0)
-    #
-    # Test: (x, y)
-    # ([0., 0.], 0.0)
-    # ([0., 1.], 1.0)
-    # ([1., 0.], 1.0)
-    # ([1., 1.], 1.0)
-    #
-    # Note that training example has a noisy sample, ([1., 1.], 0.0), and test
-    # examples are generated by the logical-OR function. Therefore by enforcing
-    # increasing monotonicity to all features, we should be able to work well
-    # in the test examples.
-    x0 = np.array([0.0, 0.0, 1.0, 1.0])
-    x1 = np.array([0.0, 1.0, 0.0, 1.0])
-    x_samples = {'x0': x0, 'x1': x1}
-    training_y = np.array([[False], [True], [True], [False]])
-    test_y = np.array([[False], [True], [True], [True]])
-
-    train_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x=x_samples, y=training_y, batch_size=4, num_epochs=1000, shuffle=False)
-    test_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x=x_samples, y=test_y, shuffle=False)
-
-    # Define monotonic lattice classifier.
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-
-    def init_fn():
-      return keypoints_initialization.uniform_keypoints_for_signal(
-          2, 0., 1., 0., 1.)
-
-    hparams = tfl_hparams.CalibratedRtlHParams(
-        num_keypoints=2, num_lattices=3, lattice_rank=2)
-    # Monotonic calibrated lattice.
-
-    hparams.set_param('monotonicity', +1)
-    hparams.set_param('learning_rate', 0.1)
-    hparams.set_param('interpolation_type', 'hypercube')
-
-    estimator = scrtl.separately_calibrated_rtl_classifier(
-        feature_columns=feature_columns,
-        hparams=hparams,
-        keypoints_initializers_fn=init_fn)
-
-    estimator.train(input_fn=train_input_fn)
-    results = estimator.evaluate(input_fn=test_input_fn)
-    # We should expect 1.0 accuracy.
-    self.assertGreater(results['accuracy'], 0.999)
-
-  def testCalibratedRtlWithMissingTraining(self):
-    # x0 is missing with it's own vertex: so it can take very different values,
-    # while x1 is missing and calibrated, in this case to the middle of the
-    # lattice.
-    x0 = np.array([0., 0., 1., 1., -1., -1., 0., 1.])
-    x1 = np.array([0., 1., 0., 1., 0., 1., -1., -1.])
-    training_y = np.array([1., 3., 7., 11., 23., 27., 2., 9.])
-    x_samples = {'x0': x0, 'x1': x1}
-
-    train_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x=x_samples,
-        y=training_y,
-        batch_size=x0.shape[0],
-        num_epochs=2000,
-        shuffle=False)
-    test_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x=x_samples, y=training_y, shuffle=False)
-    feature_columns = [
-        tf.feature_column.numeric_column('x0'),
-        tf.feature_column.numeric_column('x1'),
-    ]
-
-    def init_fn():
-      return keypoints_initialization.uniform_keypoints_for_signal(
-          2, 0., 1., 0., 1.)
-
-    hparams = tfl_hparams.CalibratedRtlHParams(['x0', 'x1'],
-                                               num_keypoints=2,
-                                               num_lattices=3,
-                                               lattice_rank=2,
-                                               learning_rate=0.1,
-                                               missing_input_value=-1.)
-    hparams.set_feature_param('x0', 'missing_vertex', True)
-
-    estimator = scrtl.separately_calibrated_rtl_regressor(
-        feature_columns=feature_columns,
-        hparams=hparams,
-        keypoints_initializers_fn=init_fn)
-
-    estimator.train(input_fn=train_input_fn)
-    results = estimator.evaluate(input_fn=test_input_fn)
-    self.assertLess(results['average_loss'], 0.1)
-
-
-if __name__ == '__main__':
-  tf.test.main()
diff --git a/tensorflow_lattice/python/estimators_test.py b/tensorflow_lattice/python/estimators_test.py
new file mode 100644
index 0000000..13b871b
--- /dev/null
+++ b/tensorflow_lattice/python/estimators_test.py
@@ -0,0 +1,715 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests TFL canned estimators."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl import logging
+from absl.testing import parameterized
+import numpy as np
+import pandas as pd
+from sklearn.datasets import load_boston
+import tensorflow as tf
+from tensorflow import feature_column as fc
+from tensorflow_lattice.python import configs
+from tensorflow_lattice.python import estimators
+from tensorflow_lattice.python import model_info
+from tensorflow_estimator.python.estimator.head import regression_head
+
+
+class CannedEstimatorsTest(parameterized.TestCase, tf.test.TestCase):
+
+  def setUp(self):
+    super(CannedEstimatorsTest, self).setUp()
+    self.eps = 0.001
+
+    # UCI Statlog (Heart) dataset.
+    heart_csv_file = tf.keras.utils.get_file(
+        'heart.csv', 'http://storage.googleapis.com/applied-dl/heart.csv')
+    heart_df = pd.read_csv(heart_csv_file)
+    heart_target = heart_df.pop('target')
+    heart_train_size = int(len(heart_df) * 0.8)
+    self.heart_train_x = heart_df[:heart_train_size]
+    self.heart_train_y = heart_target[:heart_train_size]
+    self.heart_test_x = heart_df[heart_train_size:]
+    self.heart_test_y = heart_target[heart_train_size:]
+
+    # Feature columns.
+    # - age
+    # - sex
+    # - cp        chest pain type (4 values)
+    # - trestbps  resting blood pressure
+    # - chol      serum cholestoral in mg/dl
+    # - fbs       fasting blood sugar > 120 mg/dl
+    # - restecg   resting electrocardiographic results (values 0,1,2)
+    # - thalach   maximum heart rate achieved
+    # - exang     exercise induced angina
+    # - oldpeak   ST depression induced by exercise relative to rest
+    # - slope     the slope of the peak exercise ST segment
+    # - ca        number of major vessels (0-3) colored by flourosopy
+    # - thal      3 = normal; 6 = fixed defect; 7 = reversable defect
+    self.heart_feature_columns = [
+        fc.numeric_column('age', default_value=-1),
+        fc.categorical_column_with_vocabulary_list('sex', [0, 1]),
+        fc.numeric_column('cp'),
+        fc.numeric_column('trestbps', default_value=-1),
+        fc.numeric_column('chol'),
+        fc.categorical_column_with_vocabulary_list('fbs', [0, 1]),
+        fc.categorical_column_with_vocabulary_list('restecg', [0, 1, 2]),
+        fc.numeric_column('thalach'),
+        fc.categorical_column_with_vocabulary_list('exang', [0, 1]),
+        fc.numeric_column('oldpeak'),
+        fc.categorical_column_with_vocabulary_list('slope', [0, 1, 2]),
+        fc.numeric_column('ca'),
+        fc.categorical_column_with_vocabulary_list(
+            'thal', ['normal', 'fixed', 'reversible']),
+    ]
+
+    # Feature configs. Each model can pick and choose which features to use.
+    self.heart_feature_configs = [
+        configs.FeatureConfig(
+            name='age',
+            lattice_size=3,
+            pwl_calibration_num_keypoints=5,
+            monotonicity=1,
+            pwl_calibration_clip_max=100,
+        ),
+        configs.FeatureConfig(
+            name='cp',
+            pwl_calibration_num_keypoints=4,
+            pwl_calibration_input_keypoints='uniform',
+            monotonicity='increasing',
+        ),
+        configs.FeatureConfig(
+            name='chol',
+            pwl_calibration_input_keypoints=[126.0, 210.0, 247.0, 286.0, 564.0],
+            monotonicity=1,
+            pwl_calibration_clip_min=130,
+            pwl_calibration_clamp_min=True,
+            pwl_calibration_clamp_max=True,
+            regularizer_configs=[
+                configs.RegularizerConfig(name='calib_hessian', l2=1e-4),
+            ],
+        ),
+        configs.FeatureConfig(
+            name='fbs',
+            monotonicity=[(0, 1)],
+        ),
+        configs.FeatureConfig(
+            name='trestbps',
+            pwl_calibration_num_keypoints=5,
+            monotonicity='decreasing',
+        ),
+        configs.FeatureConfig(
+            name='thalach',
+            pwl_calibration_num_keypoints=5,
+            monotonicity=-1,
+        ),
+        configs.FeatureConfig(
+            name='restecg',
+            monotonicity=[(0, 1), (0, 2)],
+        ),
+        configs.FeatureConfig(
+            name='exang',
+            monotonicity=[(0, 1)],
+        ),
+        configs.FeatureConfig(
+            name='oldpeak',
+            pwl_calibration_num_keypoints=5,
+            monotonicity=1,
+        ),
+        configs.FeatureConfig(
+            name='slope',
+            monotonicity=[(0, 1), (1, 2)],
+        ),
+        configs.FeatureConfig(
+            name='ca',
+            pwl_calibration_num_keypoints=4,
+            monotonicity='increasing',
+        ),
+        configs.FeatureConfig(
+            name='thal',
+            monotonicity=[('normal', 'fixed'), ('normal', 'reversible')],
+        ),
+    ]
+
+    # UCI Boston dataset.
+    boston_dataset = load_boston()
+    boston_df = pd.DataFrame(
+        boston_dataset.data, columns=boston_dataset.feature_names)
+    boston_df['CHAS'] = boston_df['CHAS'].astype(np.int32)
+    boston_target = pd.Series(boston_dataset.target)
+    boston_train_size = int(len(boston_df) * 0.8)
+    self.boston_train_x = boston_df[:boston_train_size]
+    self.boston_train_y = boston_target[:boston_train_size]
+    self.boston_test_x = boston_df[boston_train_size:]
+    self.boston_test_y = boston_target[boston_train_size:]
+
+    # Feature columns.
+    # - CRIM     per capita crime rate by town
+    # - ZN       proportion of residential land zoned for lots over 25,000 sq.ft
+    # - INDUS    proportion of non-retail business acres per town
+    # - CHAS     Charles River dummy variable (= 1 if tract bounds river)
+    # - NOX      nitric oxides concentration (parts per 10 million)
+    # - RM       average number of rooms per dwelling
+    # - AGE      proportion of owner-occupied units built prior to 1940
+    # - DIS      weighted distances to five Boston employment centres
+    # - RAD      index of accessibility to radial highways
+    # - TAX      full-value property-tax rate per $10,000
+    # - PTRATIO  pupil-teacher ratio by town
+    # - B        1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
+    # - LSTAT    % lower status of the population
+    # - Target   Median value of owner-occupied homes in $1000's
+    self.boston_feature_columns = [
+        fc.numeric_column('CRIM'),
+        fc.numeric_column('ZN'),
+        fc.numeric_column('INDUS'),
+        fc.categorical_column_with_vocabulary_list('CHAS', [0, 1]),
+        fc.numeric_column('NOX'),
+        fc.numeric_column('RM'),
+        fc.numeric_column('AGE'),
+        fc.numeric_column('DIS'),
+        fc.numeric_column('RAD'),
+        fc.numeric_column('TAX'),
+        fc.numeric_column('PTRATIO'),
+        fc.numeric_column('B'),
+        fc.numeric_column('LSTAT'),
+    ]
+
+    # Feature configs. Each model can pick and choose which features to use.
+    self.boston_feature_configs = [
+        configs.FeatureConfig(
+            name='CRIM',
+            lattice_size=3,
+            monotonicity=-1,
+            pwl_calibration_convexity=1,
+        ),
+        configs.FeatureConfig(
+            name='ZN',
+            pwl_calibration_input_keypoints=[0.0, 25.0, 50.0, 75.0, 100.0],
+            monotonicity=1,
+            reflects_trust_in=[
+                configs.TrustConfig(feature_name='RM', trust_type='trapezoid'),
+            ],
+        ),
+        configs.FeatureConfig(
+            name='INDUS',
+            pwl_calibration_input_keypoints='uniform',
+            pwl_calibration_always_monotonic=False,
+            reflects_trust_in=[
+                configs.TrustConfig(feature_name='RM',
+                                    trust_type='edgeworth',
+                                    direction='negative'),
+            ],
+            regularizer_configs=[
+                configs.RegularizerConfig(name='calib_wrinkle', l2=1e-4),
+            ],
+        ),
+        configs.FeatureConfig(name='CHAS',),
+        configs.FeatureConfig(name='NOX',),
+        configs.FeatureConfig(
+            name='RM',
+            monotonicity='increasing',
+            pwl_calibration_convexity='concave',
+        ),
+        configs.FeatureConfig(
+            name='AGE',
+            monotonicity=-1,
+        ),
+        configs.FeatureConfig(
+            name='DIS',
+            lattice_size=3,
+            unimodality=1,
+        ),
+        configs.FeatureConfig(name='RAD',),
+        configs.FeatureConfig(name='TAX',),
+        configs.FeatureConfig(
+            name='PTRATIO',
+            monotonicity='decreasing',
+        ),
+        configs.FeatureConfig(name='B',),
+        configs.FeatureConfig(
+            name='LSTAT',
+            monotonicity=-1,
+            dominates=[
+                configs.DominanceConfig(feature_name='AGE',
+                                        dominance_type='monotonic'),
+            ],
+        ),
+    ]
+
+  def _ResetAllBackends(self):
+    tf.keras.backend.clear_session()
+    tf.compat.v1.reset_default_graph()
+
+  def _GetInputFn(self, x, y, num_epochs=1, batch_size=100):
+    return tf.compat.v1.estimator.inputs.pandas_input_fn(
+        x=x,
+        y=y,
+        batch_size=batch_size,
+        shuffle=False,
+        num_epochs=num_epochs,
+        num_threads=1)
+
+  def _GetHeartTrainInputFn(self, **kwargs):
+    return self._GetInputFn(self.heart_train_x, self.heart_train_y, **kwargs)
+
+  def _GetHeartTestInputFn(self, **kwargs):
+    return self._GetInputFn(
+        self.heart_test_x, self.heart_test_y, num_epochs=1, **kwargs)
+
+  def _GetBostonTrainInputFn(self, **kwargs):
+    return self._GetInputFn(self.boston_train_x, self.boston_train_y, **kwargs)
+
+  def _GetBostonTestInputFn(self, **kwargs):
+    return self._GetInputFn(
+        self.boston_test_x, self.boston_test_y, num_epochs=1, **kwargs)
+
+  @parameterized.parameters(
+      ([
+          'age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
+          'exang', 'oldpeak', 'slope', 'ca', 'thal'
+      ], [['sex', 'oldpeak'], ['fbs', 'thalach'], ['thalach', 'thal'],
+          ['cp', 'trestbps'], ['age', 'ca', 'chol']
+         ], None, None, False, True, 0.8),
+      ([
+          'age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
+          'exang', 'oldpeak', 'slope', 'ca', 'thal'
+      ], 'random', 6, 5, True, False, 0.85),
+      ([
+          'age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
+          'exang', 'oldpeak', 'slope', 'ca', 'thal'
+      ], 'crystals', 6, 5, True, False, 0.85),
+  )
+  def testCalibratedLatticeEnsembleClassifier(self, feature_names, lattices,
+                                              num_lattices, lattice_rank,
+                                              separate_calibrators,
+                                              output_calibration, auc):
+    self._ResetAllBackends()
+    feature_columns = [
+        feature_column for feature_column in self.heart_feature_columns
+        if feature_column.name in feature_names
+    ]
+    feature_configs = [
+        feature_config for feature_config in self.heart_feature_configs
+        if feature_config.name in feature_names
+    ]
+    model_config = configs.CalibratedLatticeEnsembleConfig(
+        regularizer_configs=[
+            configs.RegularizerConfig(name='torsion', l2=1e-4),
+            configs.RegularizerConfig(name='output_calib_hessian', l2=1e-4),
+        ],
+        feature_configs=feature_configs,
+        lattices=lattices,
+        num_lattices=num_lattices,
+        lattice_rank=lattice_rank,
+        separate_calibrators=separate_calibrators,
+        output_calibration=output_calibration,
+    )
+    estimator = estimators.CannedClassifier(
+        feature_columns=feature_columns,
+        model_config=model_config,
+        feature_analysis_input_fn=self._GetHeartTrainInputFn(num_epochs=1),
+        prefitting_input_fn=self._GetHeartTrainInputFn(num_epochs=50),
+        optimizer=tf.keras.optimizers.Adam(0.01),
+        prefitting_optimizer=tf.keras.optimizers.Adam(0.01))
+    estimator.train(input_fn=self._GetHeartTrainInputFn(num_epochs=200))
+    results = estimator.evaluate(input_fn=self._GetHeartTestInputFn())
+    logging.info('Calibrated lattice ensemble classifier results:')
+    logging.info(results)
+    self.assertGreater(results['auc'], auc)
+
+  @parameterized.parameters(
+      (['age', 'sex', 'fbs', 'restecg', 'ca', 'thal'], False, 0.75),
+      (['age', 'cp', 'chol', 'slope', 'ca', 'thal'], False, 0.8),
+      (['trestbps', 'thalach', 'exang', 'oldpeak', 'thal'], True, 0.8),
+  )
+  def testCalibratedLatticeClassifier(self, feature_names, output_calibration,
+                                      auc):
+    self._ResetAllBackends()
+    feature_columns = [
+        feature_column for feature_column in self.heart_feature_columns
+        if feature_column.name in feature_names
+    ]
+    feature_configs = [
+        feature_config for feature_config in self.heart_feature_configs
+        if feature_config.name in feature_names
+    ]
+    model_config = configs.CalibratedLatticeConfig(
+        regularizer_configs=[
+            configs.RegularizerConfig(name='torsion', l2=1e-4),
+            configs.RegularizerConfig(name='output_calib_hessian', l2=1e-4),
+        ],
+        output_calibration=output_calibration,
+        feature_configs=feature_configs)
+    estimator = estimators.CannedClassifier(
+        feature_columns=feature_columns,
+        model_config=model_config,
+        feature_analysis_input_fn=self._GetHeartTrainInputFn(num_epochs=1),
+        optimizer=tf.keras.optimizers.Adam(0.01))
+    estimator.train(input_fn=self._GetHeartTrainInputFn(num_epochs=200))
+    results = estimator.evaluate(input_fn=self._GetHeartTestInputFn())
+    logging.info('Calibrated lattice classifier results:')
+    logging.info(results)
+    self.assertGreater(results['auc'], auc)
+
+  @parameterized.parameters(
+      (['age', 'sex', 'fbs', 'restecg', 'ca', 'thal'], False, False, 0.7),
+      ([
+          'age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
+          'exang', 'oldpeak', 'slope', 'ca', 'thal'
+      ], True, True, 0.8),
+  )
+  def testCalibratedLinearClassifier(self, feature_names, output_calibration,
+                                     use_bias, auc):
+    self._ResetAllBackends()
+    feature_columns = [
+        feature_column for feature_column in self.heart_feature_columns
+        if feature_column.name in feature_names
+    ]
+    feature_configs = [
+        feature_config for feature_config in self.heart_feature_configs
+        if feature_config.name in feature_names
+    ]
+    model_config = configs.CalibratedLinearConfig(
+        use_bias=use_bias,
+        regularizer_configs=[
+            configs.RegularizerConfig(name='output_calib_hessian', l2=1e-4),
+        ],
+        output_calibration=output_calibration,
+        feature_configs=feature_configs)
+    estimator = estimators.CannedClassifier(
+        feature_columns=feature_columns,
+        model_config=model_config,
+        feature_analysis_input_fn=self._GetHeartTrainInputFn(num_epochs=1),
+        optimizer=tf.keras.optimizers.Adam(0.01))
+    estimator.train(input_fn=self._GetHeartTrainInputFn(num_epochs=200))
+    results = estimator.evaluate(input_fn=self._GetHeartTestInputFn())
+    logging.info('Calibrated linear classifier results:')
+    logging.info(results)
+    self.assertGreater(results['auc'], auc)
+
+  @parameterized.parameters(
+      ([
+          'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',
+          'TAX', 'PTRATIO', 'B', 'LSTAT'
+      ], [['CRIM', 'ZN', 'RAD', 'DIS'], ['PTRATIO', 'LSTAT', 'ZN', 'RM'],
+          ['AGE', 'NOX', 'B'], ['INDUS', 'NOX', 'PTRATIO'], ['TAX', 'CHAS'],
+          ['CRIM', 'INDUS', 'AGE', 'RM', 'CHAS']
+         ], None, None, False, True, 60.0),
+      ([
+          'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',
+          'TAX', 'PTRATIO', 'B', 'LSTAT'
+      ], 'random', 6, 5, True, False, 50.0),
+      ([
+          'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',
+          'TAX', 'PTRATIO', 'B', 'LSTAT'
+      ], 'crystals', 6, 5, True, False, 50.0),
+  )
+  def testCalibratedLatticeEnsembleRegressor(self, feature_names, lattices,
+                                             num_lattices, lattice_rank,
+                                             separate_calibrators,
+                                             output_calibration, average_loss):
+    self._ResetAllBackends()
+    feature_columns = [
+        feature_column for feature_column in self.boston_feature_columns
+        if feature_column.name in feature_names
+    ]
+    feature_configs = [
+        feature_config for feature_config in self.boston_feature_configs
+        if feature_config.name in feature_names
+    ]
+    model_config = configs.CalibratedLatticeEnsembleConfig(
+        regularizer_configs=[
+            configs.RegularizerConfig(name='torsion', l2=1e-5),
+            configs.RegularizerConfig(name='output_calib_hessian', l2=1e-5),
+        ],
+        feature_configs=feature_configs,
+        lattices=lattices,
+        num_lattices=num_lattices,
+        lattice_rank=lattice_rank,
+        separate_calibrators=separate_calibrators,
+        output_calibration=output_calibration,
+    )
+    estimator = estimators.CannedRegressor(
+        feature_columns=feature_columns,
+        model_config=model_config,
+        feature_analysis_input_fn=self._GetBostonTrainInputFn(num_epochs=1),
+        prefitting_input_fn=self._GetBostonTrainInputFn(num_epochs=50),
+        optimizer=tf.keras.optimizers.Adam(0.05),
+        prefitting_optimizer=tf.keras.optimizers.Adam(0.05))
+    estimator.train(input_fn=self._GetBostonTrainInputFn(num_epochs=200))
+    results = estimator.evaluate(input_fn=self._GetBostonTestInputFn())
+    logging.info('Calibrated lattice ensemble regressor results:')
+    logging.info(results)
+    self.assertLess(results['average_loss'], average_loss)
+
+  @parameterized.parameters(
+      (['CRIM', 'ZN', 'RM', 'DIS', 'PTRATIO', 'LSTAT'], False, 40.0),
+      (['CRIM', 'INDUS', 'CHAS', 'NOX', 'AGE', 'RAD', 'TAX', 'B'], True, 40.0),
+      (['CRIM', 'INDUS', 'LSTAT', 'NOX', 'AGE', 'RAD', 'TAX', 'B'], True, 40.0),
+  )
+  def testCalibratedLatticeRegressor(self, feature_names, output_calibration,
+                                     average_loss):
+    self._ResetAllBackends()
+    feature_columns = [
+        feature_column for feature_column in self.boston_feature_columns
+        if feature_column.name in feature_names
+    ]
+    feature_configs = [
+        feature_config for feature_config in self.boston_feature_configs
+        if feature_config.name in feature_names
+    ]
+    model_config = configs.CalibratedLinearConfig(
+        regularizer_configs=[
+            configs.RegularizerConfig(name='torsion', l2=1e-4),
+            configs.RegularizerConfig(name='output_calib_hessian', l2=1e-4),
+        ],
+        output_calibration=output_calibration,
+        feature_configs=feature_configs)
+    estimator = estimators.CannedRegressor(
+        feature_columns=feature_columns,
+        model_config=model_config,
+        feature_analysis_input_fn=self._GetBostonTrainInputFn(num_epochs=1),
+        optimizer=tf.keras.optimizers.Adam(0.01))
+    estimator.train(input_fn=self._GetBostonTrainInputFn(num_epochs=200))
+    results = estimator.evaluate(input_fn=self._GetBostonTestInputFn())
+    logging.info('Calibrated lattice regressor results:')
+    logging.info(results)
+    self.assertLess(results['average_loss'], average_loss)
+
+  @parameterized.parameters(
+      (['CRIM', 'ZN', 'RM', 'DIS', 'PTRATIO', 'LSTAT'], False, False, 40.0),
+      ([
+          'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',
+          'TAX', 'PTRATIO', 'B', 'LSTAT'
+      ], True, True, 40.0),
+  )
+  def testCalibratedLinearRegressor(self, feature_names, output_calibration,
+                                    use_bias, average_loss):
+    self._ResetAllBackends()
+    feature_columns = [
+        feature_column for feature_column in self.boston_feature_columns
+        if feature_column.name in feature_names
+    ]
+    feature_configs = [
+        feature_config for feature_config in self.boston_feature_configs
+        if feature_config.name in feature_names
+    ]
+    model_config = configs.CalibratedLinearConfig(
+        use_bias=use_bias,
+        regularizer_configs=[
+            configs.RegularizerConfig(name='output_calib_hessian', l2=1e-4),
+        ],
+        output_calibration=output_calibration,
+        feature_configs=feature_configs)
+    estimator = estimators.CannedRegressor(
+        feature_columns=feature_columns,
+        model_config=model_config,
+        feature_analysis_input_fn=self._GetBostonTrainInputFn(num_epochs=1),
+        optimizer=tf.keras.optimizers.Adam(0.01))
+    estimator.train(input_fn=self._GetBostonTrainInputFn(num_epochs=200))
+    results = estimator.evaluate(input_fn=self._GetBostonTestInputFn())
+    logging.info('Calibrated linear regressor results:')
+    logging.info(results)
+    self.assertLess(results['average_loss'], average_loss)
+
+  @parameterized.parameters(
+      (['CRIM', 'ZN', 'RM', 'DIS', 'PTRATIO', 'LSTAT'], False, False, 40.0),
+      ([
+          'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',
+          'TAX', 'PTRATIO', 'B', 'LSTAT'
+      ], True, True, 40.0),
+  )
+  def testCalibratedLinearEstimator(self, feature_names, output_calibration,
+                                    use_bias, average_loss):
+    self._ResetAllBackends()
+    feature_columns = [
+        feature_column for feature_column in self.boston_feature_columns
+        if feature_column.name in feature_names
+    ]
+    feature_configs = [
+        feature_config for feature_config in self.boston_feature_configs
+        if feature_config.name in feature_names
+    ]
+    model_config = configs.CalibratedLinearConfig(
+        use_bias=use_bias,
+        regularizer_configs=[
+            configs.RegularizerConfig(name='output_calib_hessian', l2=1e-4),
+        ],
+        output_calibration=output_calibration,
+        feature_configs=feature_configs)
+    estimator = estimators.CannedEstimator(
+        head=regression_head.RegressionHead(),
+        feature_columns=feature_columns,
+        model_config=model_config,
+        feature_analysis_input_fn=self._GetBostonTrainInputFn(num_epochs=1),
+        optimizer=tf.keras.optimizers.Adam(0.01))
+    estimator.train(input_fn=self._GetBostonTrainInputFn(num_epochs=200))
+    results = estimator.evaluate(input_fn=self._GetBostonTestInputFn())
+    logging.info('Calibrated linear regressor results:')
+    logging.info(results)
+    self.assertLess(results['average_loss'], average_loss)
+
+  @parameterized.parameters(
+      (5, 6, False, True),
+      (4, 5, True, False),
+  )
+  def testCalibratedLatticeEnsembleModelInfo(self, num_lattices, lattice_rank,
+                                             separate_calibrators,
+                                             output_calibration):
+    self._ResetAllBackends()
+    model_config = configs.CalibratedLatticeEnsembleConfig(
+        feature_configs=self.heart_feature_configs,
+        num_lattices=num_lattices,
+        lattice_rank=lattice_rank,
+        separate_calibrators=separate_calibrators,
+        output_calibration=output_calibration,
+    )
+    estimator = estimators.CannedClassifier(
+        feature_columns=self.heart_feature_columns,
+        model_config=model_config,
+        feature_analysis_input_fn=self._GetHeartTrainInputFn(num_epochs=1),
+        prefitting_input_fn=self._GetHeartTrainInputFn(num_epochs=5),
+        optimizer=tf.keras.optimizers.Adam(0.01),
+        prefitting_optimizer=tf.keras.optimizers.Adam(0.01))
+    estimator.train(input_fn=self._GetHeartTrainInputFn(num_epochs=20))
+
+    # Serving input fn is used to create saved models.
+    serving_input_fn = (
+        tf.estimator.export.build_parsing_serving_input_receiver_fn(
+            feature_spec=fc.make_parse_example_spec(self.heart_feature_columns))
+    )
+    saved_model_path = estimator.export_saved_model(estimator.model_dir,
+                                                    serving_input_fn)
+    logging.info('Model exported to %s', saved_model_path)
+    model = estimators.get_model_graph(saved_model_path)
+
+    expected_num_nodes = (
+        len(self.heart_feature_columns) +  # Input features
+        num_lattices +  # One lattice per submodel
+        1 +  # Averaging submodels
+        int(output_calibration))  # Output calibration
+    if separate_calibrators:
+      expected_num_nodes += num_lattices * lattice_rank
+    else:
+      expected_num_nodes += len(self.heart_feature_columns)
+
+    self.assertLen(model.nodes, expected_num_nodes)
+
+  @parameterized.parameters(
+      (['ZN', 'INDUS', 'RM'], 'random', 3, 1,
+       [['ZN', 'RM'], ['RM'], ['INDUS']]),
+      (['ZN', 'INDUS', 'RM'], 'crystals', 3, 1,
+       [['RM'], ['INDUS'], ['ZN', 'RM']]),
+      (['RM', 'LSTAT', 'AGE'], 'crystals', 3, 1,
+       [['LSTAT'], ['LSTAT', 'AGE'], ['RM']]),
+  )
+  def testCalibratedLatticeEnsembleFix2dConstraintViolations(self,
+                                                             feature_names,
+                                                             lattices,
+                                                             num_lattices,
+                                                             lattice_rank,
+                                                             expected_lattices):
+    self._ResetAllBackends()
+    feature_columns = [
+        feature_column for feature_column in self.boston_feature_columns
+        if feature_column.name in feature_names
+    ]
+    feature_configs = [
+        feature_config for feature_config in self.boston_feature_configs
+        if feature_config.name in feature_names
+    ]
+
+    model_config = configs.CalibratedLatticeEnsembleConfig(
+        feature_configs=feature_configs,
+        lattices=lattices,
+        num_lattices=num_lattices,
+        lattice_rank=lattice_rank,
+    )
+    estimator = estimators.CannedRegressor(
+        feature_columns=feature_columns,
+        model_config=model_config,
+        feature_analysis_input_fn=self._GetBostonTrainInputFn(num_epochs=1),
+        prefitting_input_fn=self._GetBostonTrainInputFn(num_epochs=50),
+        optimizer=tf.keras.optimizers.Adam(0.05),
+        prefitting_optimizer=tf.keras.optimizers.Adam(0.05))
+    estimator.train(input_fn=self._GetBostonTrainInputFn(num_epochs=200))
+
+    # Serving input fn is used to create saved models.
+    serving_input_fn = (
+        tf.estimator.export.build_parsing_serving_input_receiver_fn(
+            feature_spec=fc.make_parse_example_spec(feature_columns))
+    )
+    saved_model_path = estimator.export_saved_model(estimator.model_dir,
+                                                    serving_input_fn)
+    logging.info('Model exported to %s', saved_model_path)
+    model = estimators.get_model_graph(saved_model_path)
+    lattices = []
+    for node in model.nodes:
+      if isinstance(node, model_info.LatticeNode):
+        lattices.append([input_node.input_node.name
+                         for input_node in node.input_nodes])
+
+    self.assertLen(lattices, len(expected_lattices))
+    for lattice, expected_lattice in zip(lattices, expected_lattices):
+      self.assertCountEqual(lattice, expected_lattice)
+
+  @parameterized.parameters(
+      ('linear', True),
+      ('lattice', False),
+  )
+  def testCalibratedModelInfo(self, model_type, output_calibration):
+    self._ResetAllBackends()
+    if model_type == 'linear':
+      model_config = configs.CalibratedLinearConfig(
+          feature_configs=self.heart_feature_configs,
+          output_calibration=output_calibration,
+      )
+    else:
+      model_config = configs.CalibratedLatticeConfig(
+          feature_configs=self.heart_feature_configs,
+          output_calibration=output_calibration,
+      )
+    estimator = estimators.CannedClassifier(
+        feature_columns=self.heart_feature_columns,
+        model_config=model_config,
+        feature_analysis_input_fn=self._GetHeartTrainInputFn(num_epochs=1),
+        prefitting_input_fn=self._GetHeartTrainInputFn(num_epochs=5),
+        optimizer=tf.keras.optimizers.Adam(0.01),
+        prefitting_optimizer=tf.keras.optimizers.Adam(0.01))
+    estimator.train(input_fn=self._GetHeartTrainInputFn(num_epochs=20))
+
+    # Serving input fn is used to create saved models.
+    serving_input_fn = (
+        tf.estimator.export.build_parsing_serving_input_receiver_fn(
+            feature_spec=fc.make_parse_example_spec(self.heart_feature_columns))
+    )
+    saved_model_path = estimator.export_saved_model(estimator.model_dir,
+                                                    serving_input_fn)
+    logging.info('Model exported to %s', saved_model_path)
+    model = estimators.get_model_graph(saved_model_path)
+
+    expected_num_nodes = (
+        2 * len(self.heart_feature_columns) +  # Input features and calibration
+        1 +  # Linear or lattice layer
+        int(output_calibration))  # Output calibration
+
+    self.assertLen(model.nodes, expected_num_nodes)
+
+
+if __name__ == '__main__':
+  tf.test.main()
diff --git a/tensorflow_lattice/python/kernel_tests/BUILD b/tensorflow_lattice/python/kernel_tests/BUILD
deleted file mode 100644
index 1d98263..0000000
--- a/tensorflow_lattice/python/kernel_tests/BUILD
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-licenses(["notice"])  # Apache 2.0
-
-package(
-    default_visibility = [
-        "//tensorflow_lattice:__subpackages__",
-    ],
-)
-
-py_test(
-    name = "pwl_calibration_test",
-    size = "medium",
-    srcs = ["pwl_calibration_test.py"],
-    python_version = "PY2",
-    srcs_version = "PY2AND3",
-    deps = [
-        "@org_tensorflow//tensorflow:tensorflow_py",
-        "//tensorflow_lattice/python:pwl_calibration_ops_py",
-    ],
-)
-
-py_test(
-    name = "lattice_gradient_test",
-    size = "medium",
-    srcs = ["lattice_gradient_test.py"],
-    python_version = "PY2",
-    srcs_version = "PY2AND3",
-    deps = [
-        "@org_tensorflow//third_party/py/numpy",
-        "@org_tensorflow//tensorflow:tensorflow_py",
-        "//tensorflow_lattice/python:lattice_ops_py",
-    ],
-)
diff --git a/tensorflow_lattice/python/kernel_tests/lattice_gradient_test.py b/tensorflow_lattice/python/kernel_tests/lattice_gradient_test.py
deleted file mode 100644
index 9e8220b..0000000
--- a/tensorflow_lattice/python/kernel_tests/lattice_gradient_test.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for hypercube interpolation gradient."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import tensorflow as tf
-
-from tensorflow_lattice.python.ops import lattice_ops
-
-
-class LatticeGradientOpTest(tf.test.TestCase):
-
-  def _testGradient(self, x_value_list, x_shape, lattice_sizes, y_shape,
-                    is_hypercube):
-    """Compute the numerical gradients, and check the error."""
-    for x_value in x_value_list:
-      with self.session(use_gpu=False):
-        x = tf.compat.v1.placeholder(dtype=tf.float32, shape=x_shape, name="x")
-        x_init_value = np.asarray(x_value, dtype=np.float32)
-        if is_hypercube:
-          y = lattice_ops.hypercube_interpolation(
-              x, lattice_sizes=lattice_sizes)
-        else:
-          y = lattice_ops.simplex_interpolation(x, lattice_sizes=lattice_sizes)
-        error = tf.compat.v1.test.compute_gradient_error(
-            x, x_shape, y, y_shape, x_init_value=x_init_value)
-      tf.compat.v1.logging.info("x_init_value = %s" % x_init_value)
-      tf.compat.v1.logging.info("x error = %f", error)
-      self.assertLess(error, 1e-4)
-
-  def _testGradientWith1DInput(self, is_hypercube):
-    x_value_list = [[[-1.0]], [[0.1]], [[0.5]], [[1.001]], [[1.5]], [[2.001]],
-                    [[3.0]]]
-    x_shape = (1, 1)
-    lattice_sizes = [3]
-    # interpolation_weight_size = 3.
-    y_shape = (1, 3)
-    self._testGradient(
-        x_value_list,
-        x_shape,
-        lattice_sizes,
-        y_shape,
-        is_hypercube=is_hypercube)
-
-  def testHypercubeGradientWith1DInput(self):
-    self._testGradientWith1DInput(is_hypercube=True)
-
-  def testSimplexGradientWith1DInput(self):
-    self._testGradientWith1DInput(is_hypercube=False)
-
-  def _testGradientWith2DInput(self, is_hypercube):
-    x_value_list = [[[-1.0, 1.1]], [[0.1, 0.09]], [[0.5, 2.3]], [[1.001, 0.98]],
-                    [[1.5, 0.34]], [[2.001, 10.0]], [[3.0, 0.5]]]
-    x_shape = (1, 2)
-    lattice_sizes = [3, 2]
-    # interpolation_weight_size = 6.
-    y_shape = (1, 6)
-    self._testGradient(
-        x_value_list,
-        x_shape,
-        lattice_sizes,
-        y_shape,
-        is_hypercube=is_hypercube)
-
-  def testHypercubeGradientWith2DInput(self):
-    self._testGradientWith2DInput(is_hypercube=True)
-
-  def testSimplexGradientWith2DInput(self):
-    self._testGradientWith2DInput(is_hypercube=False)
-
-  def _testGradientWith3DInput(self, is_hypercube):
-    x_value_list = [[[-1.0, 1.1, 2.11]], [[0.1, 0.099, 0.111]],
-                    [[0.5, 2.3, 2.212]], [[1.001, 0.98, 0.123]],
-                    [[1.5, 0.34, 0.3312]], [[2.001, 10.0, 9.0]],
-                    [[3.0, 0.5, -1.22]]]
-    x_shape = (1, 3)
-    lattice_sizes = [3, 3, 5]
-    # interpolation_weight_size = 45.
-    y_shape = (1, 45)
-    self._testGradient(x_value_list, x_shape, lattice_sizes, y_shape,
-                       is_hypercube)
-
-  def testHypercubeGradientWith3DInput(self):
-    self._testGradientWith3DInput(is_hypercube=True)
-
-  def testSimplexGradientWith3DInput(self):
-    self._testGradientWith3DInput(is_hypercube=False)
-
-  def testSimplexGradientWith3DBatchInput(self):
-    x_value_list = [[[0.5, 0.1, 0.3], [0.11, 0.3, 0.79], [0.33, 0.5, 0.79]]]
-    x_shape = (3, 3)
-    lattice_sizes = [2, 2, 2]
-    # interpolation_weight_size = 8.
-    y_shape = (3, 8)
-    self._testGradient(
-        x_value_list, x_shape, lattice_sizes, y_shape, is_hypercube=False)
-
-
-class LatticeGradientBoundaryTest(tf.test.TestCase):
-
-  def _testGradient(self, inputs, weights, expected_jacobians_wrt_input,
-                    lattice_sizes, is_hypercube):
-    """Compute the grad_wrt_input and compare it with expected_grad_wrt_input.
-
-    Args:
-      inputs: a 2D array (or numpy array) contains the test inputs. Its shape
-        should be num_examples x input_size.
-      weights: a 2D array (or numpy array) contains the test weights. Its shape
-        should be num_examples x weight_size.
-      expected_jacobians_wrt_input: 3D array (or numpy) contains  a transpoed
-        jacobian matrix that contains dweight/dinput with shape (num_examples,
-        weight_size, input_size). In other words,
-        expected_jacobians_wrt_input[num][ii][jj] ==
-        dweight[num][jj]/dinput[num][ii], where num means the current example.
-      lattice_sizes: A list of lattice_sizes.
-      is_hypercube: If true, hypercube gradient is tested, otherwise simplex
-        gradient is tested.
-    Returns: None
-    Raises: Fails if computed jacobian_wrt_inputs != expected_jacobian_wrt_inpu.
-    """
-
-    # Number of test examples in inputs.
-    num_examples = len(inputs)
-    weight_size = len(weights[0])
-
-    # Define the grad_wrt_input_tensor.
-    with tf.Graph().as_default():
-      input_tensor = tf.constant(inputs, dtype=tf.float32)
-      weight_tensor = tf.constant(weights, dtype=tf.float32)
-      grad_wrt_weight_tensor = tf.compat.v1.placeholder(
-          dtype=tf.float32, shape=(num_examples, weight_size))
-
-      if is_hypercube:
-        grad_wrt_input_tensor = lattice_ops.hypercube_gradient(
-            input_tensor, weight_tensor, grad_wrt_weight_tensor, lattice_sizes)
-      else:
-        grad_wrt_input_tensor = lattice_ops.simplex_gradient(
-            input_tensor, weight_tensor, grad_wrt_weight_tensor, lattice_sizes)
-
-      # Compute the Jacobian.
-      with self.session(use_gpu=False):
-        tf.compat.v1.logging.info("input = %s " % inputs)
-        tf.compat.v1.logging.info("weight = %s " % weights)
-        # num_examples x weight_size x input_size tensor.
-        jacobians_wrt_input = []
-        # Compute dweight[cnt] / dinput.
-        for cnt in range(weight_size):
-          grad_wrt_weight = [0.] * weight_size
-          grad_wrt_weight[cnt] = 1.0
-          grad_wrt_weights = [grad_wrt_weight for _ in range(num_examples)]
-          tf.compat.v1.logging.info("grad_wrt_weights = %s " % grad_wrt_weights)
-          # num_examples x input_size matrix.
-          grad_weight_wrt_inputs = grad_wrt_input_tensor.eval(
-              feed_dict={grad_wrt_weight_tensor: grad_wrt_weights})
-          tf.compat.v1.logging.info("grad_wrt_inputs = %s " %
-                                    grad_weight_wrt_inputs)
-          jacobians_wrt_input.append(grad_weight_wrt_inputs)
-      tf.compat.v1.logging.info("jacobian_wrt_inputs = %s " %
-                                jacobians_wrt_input)
-      tf.compat.v1.logging.info("expected_jacobian_wrt_inputs = %s" %
-                                expected_jacobians_wrt_input)
-      self.assertAllClose(jacobians_wrt_input, expected_jacobians_wrt_input)
-
-  def _test1DLatticeInputAtBoundary(self, is_hypercube):
-    # 1D lattice.
-    lattice_sizes = [4]
-    # Values at the boundaries.
-    inputs = [[-1.0], [0.0], [1.0], [2.0], [3.0], [4.0]]
-    # Interpolation weights and grad_wrt_weights.
-    weights = [[1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0],
-               [0.0, 0.0, 1.0, 0.0], [0.0, 0.0, 0.0, 1.0], [0.0, 0.0, 0.0, 1.0]]
-    # Total 6 test points. So expected_jacobian_wrt_input = 6 x 4 x 1 matrix,
-    # where iith row contains dweight[ii]/dinput[0].
-    # Jacobain for the input -1.0:
-    #  [0, 0, 0, 0].
-    # Jacobian for the input, 0.0:
-    #  [-1, 1, 0, 0]
-    # Jacobian for the input, 1.0:
-    #  [0, -1, 1, 0]
-    # Jacobian for the input, 2.0:
-    #  [0, 0, -1, 1]
-    # Jacobian for the input, 3.0:
-    #  [0, 0, -1, 1]
-    # Jacobian for the input, 4.0:
-    #  [0, 0, 0, 0]
-    expected_jacobian_wrt_input = [[[0], [-1], [0], [0], [0], [0]],
-                                   [[0], [1], [-1], [0], [0], [0]],
-                                   [[0], [0], [1], [-1], [-1], [0]],
-                                   [[0], [0], [0], [1], [1], [0]]]
-
-    self._testGradient(
-        inputs,
-        weights,
-        expected_jacobian_wrt_input,
-        lattice_sizes,
-        is_hypercube=is_hypercube)
-
-  def testHypercubeWith1DLatticeInputAtBoundary(self):
-    self._test1DLatticeInputAtBoundary(is_hypercube=True)
-
-  def testSimplexWith1DLatticeInputAtBoundary(self):
-    self._test1DLatticeInputAtBoundary(is_hypercube=False)
-
-
-if __name__ == "__main__":
-  tf.test.main()
diff --git a/tensorflow_lattice/python/kernel_tests/pwl_calibration_test.py b/tensorflow_lattice/python/kernel_tests/pwl_calibration_test.py
deleted file mode 100644
index 9090154..0000000
--- a/tensorflow_lattice/python/kernel_tests/pwl_calibration_test.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for piecewise-linear calibration gradient."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import tensorflow as tf
-
-from tensorflow_lattice.python.ops import pwl_calibration_ops
-
-_MAX_ABSOLUTE_NUMERIC_ERROR = 1e-4
-
-
-class PWLCalibrationOpsTest(tf.test.TestCase):
-
-  def _testInBetweenGradients(self, kp_inputs):
-    """Compares numerical with the calculated gradient and checks the error."""
-    # Create batch with all values in between the keypoints inputs.
-    x_values = []
-    for ii in range(len(kp_inputs) - 1):
-      x_values += [(kp_inputs[ii] + kp_inputs[ii + 1]) / 2]
-    x_values = np.asarray(x_values, dtype=np.float32)
-
-    tf.compat.v1.logging.info("kp_inputs = %s" % kp_inputs)
-    tf.compat.v1.logging.info("x_values = %s" % x_values)
-    with tf.Graph().as_default():
-      with self.session(use_gpu=False):
-        x_shape = [x_values.size]
-        x = tf.compat.v1.placeholder(dtype=np.float32, shape=x_shape, name="x")
-        y_shape = [x_values.size, len(kp_inputs)]
-
-        # Dense version.
-        y_dense = pwl_calibration_ops.pwl_indexing_calibrator(
-            input=x, kp_inputs=tf.constant(kp_inputs, dtype=tf.float32))
-        y_dense_values = y_dense.eval(feed_dict={x: x_values})
-        tf.compat.v1.logging.info("y_dense=%s" % (y_dense_values,))
-        dense_error = tf.compat.v1.test.compute_gradient_error(
-            x, x_shape, y_dense, y_shape, x_init_value=x_values)
-        tf.compat.v1.logging.info("dense_error = %f" % dense_error)
-        self.assertLess(dense_error, _MAX_ABSOLUTE_NUMERIC_ERROR)
-
-        # Sparse version.
-        sparse_indices, sparse_weights = (
-            pwl_calibration_ops.pwl_indexing_calibrator_sparse(
-                input=x, kp_inputs=tf.constant(kp_inputs, dtype=tf.float32)))
-        y_sparse = tf.sparse.to_dense(
-            tf.SparseTensor(sparse_indices, sparse_weights, y_shape))
-        y_sparse_values = y_sparse.eval(feed_dict={x: x_values})
-        tf.compat.v1.logging.info("y_sparse=%s" % (y_sparse_values,))
-        sparse_weights_values = sparse_weights.eval(feed_dict={x: x_values})
-        sparse_error = tf.compat.v1.test.compute_gradient_error(
-            x,
-            x_shape,
-            sparse_weights,
-            sparse_weights_values.shape,
-            x_init_value=x_values)
-        tf.compat.v1.logging.info("sparse_error = %f" % sparse_error)
-        self.assertLess(sparse_error, _MAX_ABSOLUTE_NUMERIC_ERROR)
-
-    self.assertTrue(  # Checks dense and sparse y's are the same.
-        np.allclose(
-            y_dense_values, y_sparse_values, atol=_MAX_ABSOLUTE_NUMERIC_ERROR))
-
-  def testInBetweenGradients(self):
-    # Notice we don't test the gradients on top of the keypoints (including
-    # edges) because the gradient cannot be calculated numerically on those
-    # points.
-    # But our op define arbitrary values for them, and they are tested
-    # in the C++ implementation. Here it suffices to test that the proper op
-    # gradient c++ implementation is being called.
-    self._testInBetweenGradients([0.0, 1.0])
-    self._testInBetweenGradients([0.0, 1.0, 2.0, 3.0, 4.0])
-    self._testInBetweenGradients([0.0, 1.0, 10.0, 100.0])
-
-
-if __name__ == "__main__":
-  tf.test.main()
diff --git a/tensorflow_lattice/python/lattice_layer.py b/tensorflow_lattice/python/lattice_layer.py
new file mode 100644
index 0000000..1d2239b
--- /dev/null
+++ b/tensorflow_lattice/python/lattice_layer.py
@@ -0,0 +1,851 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Lattice layer with monotonicity, unimodality, trust and bound constraints.
+
+Keras implementation of tensorflow lattice layer. This layer takes one or more
+d-dimensional input(s) and combines them using a lattice function, satisfying
+monotonicity, unimodality, trust and bound constraints if specified.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from . import lattice_lib
+import six
+import tensorflow as tf
+from tensorflow import keras
+
+LATTICE_KERNEL_NAME = "lattice_kernel"
+LATTICE_SIZES_NAME = "lattice_sizes"
+
+
+class Lattice(keras.layers.Layer):
+  # pyformat: disable
+  """Lattice layer.
+
+  Layer performs interpolation using one of `units` d-dimension lattices with
+  arbitrary number of keypoints per dimension. There are trainable weights
+  associated with lattice vertices. Input to this layer is considered to be a
+  d-dimensional point within the lattice. If point coincides with one of the
+  lattice vertex then interpolation result for this point is equal to weight
+  associated with that vertex. Otherwise, all surrounding vertices contribute to
+  the interpolation result inversely proportional to the distance from them.
+
+  For example lattice sizes: [2, 3] produce following lattice:
+
+  ```
+  o---o---o
+  |   |   |
+  o---o---o
+  ```
+
+  First coordinate of input tensor must be within [0, 1], and the second within
+  [0, 2]. If coordinates are outside of this range they will be clipped into it.
+
+  There are several types of constraints on the shape of the learned function
+  that are either 1 or 2 dimensional:
+
+  * **Monotonicity:** constrains the function to be either increasing or
+    decreasing in that dimension.
+  * **Unimodality:** constrains the function to be unimodal in that dimension
+    with minimum being in the center lattice vertex of that dimension. Single
+    dimension can not be constrained to be both monotonic and unimodal.
+    Unimodal dimensions must have at least 3 lattice vertices.
+  * **Edgeworth Trust:** constrains the function to be more responsive to a main
+    feature as a secondary conditional feature increases or decreases. For
+    example, we may want the model to rely more on average rating (main
+    feature) when the number of reviews (conditional feature) is high. In
+    particular, the constraint guarantees that a given change in the main
+    feature's value will change the model output by more when a secondary
+    feature indicates higher trust in the main feature. Note that the
+    constraint only works when the model is monotonic in the main feature.
+  * **Trapezoid Trust:** conceptually similar to edgeworth trust, but this
+    constraint guarantees that the range of possible outputs along the main
+    feature dimension, when a conditional feature indicates low trust, is a
+    *subset* of the range of outputs when a conditional feature indicates high
+    trust. When lattices have 2 vertices in each constrained dimension, this
+    implies edgeworth trust (which only constrains the size of the relevant
+    ranges). With more than 2 lattice vertices per dimension, the two
+    constraints diverge and are not necessarily 'weaker' or 'stronger' than
+    each other - edgeworth trust acts throughout the lattice interior on delta
+    shifts in the main feature, while trapezoid trust only acts on the min and
+    max extremes of the main feature, constraining the overall range of
+    outputs across the domain of the main feature. The two types of trust
+    constraints can be applied jointly.
+  * **Monotonic Dominance:** constrains the function to require the effect
+    (slope) in the direction of the *dominant* dimension to be greater than that
+    of the *weak* dimension for any point in the lattice. Both dominant and weak
+    dimensions must be monotonic. Note that this constraint might not be
+    strictly satisified at the end of training. In such cases, increase the
+    number of projection iterations.
+  * **Joint Monotonicity:** constrains the function to be monotonic along a
+    diagonal direction of a two dimensional subspace when all other dimensions
+    are fixed. For example, if our function is scoring the profit given *A*
+    hotel guests and *B* hotel beds, it may be wrong to constrain the profit to
+    be increasing in either hotel guests or hotel beds in-dependently, but along
+    the diagonal (+ 1 guest and +1 bed), the profit should be monotonic. Note
+    that this constraint might not be strictly satisified at the end of
+    training. In such cases, increase the number of projection iterations.
+
+  There are upper and lower bound constraints on the output.
+
+  All units share the same layer configuration, but each has their separate set
+  of trained parameters.
+
+  Input shape:
+    - if `units == 1`: tensor of shape: `(batch_size, ..., len(lattice_sizes))`
+      or list of `len(lattice_sizes)` tensors of same shape:
+      `(batch_size, ..., 1)`
+    - if `units > 1`: tensor of shape:
+      `(batch_size, ..., units, len(lattice_sizes))` or list of
+      `len(lattice_sizes)` tensors of same shape: `(batch_size, ..., units, 1)`
+
+    A typical shape is: `(batch_size, len(lattice_sizes))`
+
+  Output shape:
+    Tensor of shape: `(batch_size, ..., units)`
+
+  Attributes:
+    - All `__init__` arguments.
+    kernel: weights of the lattice.
+
+  Example:
+
+  ```python
+  lattice = tfl.lattice_layer.Lattice(
+      # Number of vertices along each dimension.
+      lattice_sizes=[2, 2, 3, 4, 2, 2, 3],
+      # You can specify monotonicity constraints.
+      monotonicities=['increasing', 'none', 'increasing', 'increasing',
+                      'increasing', 'increasing', 'increasing'],
+      # You can specify trust constraints between pairs of features. Here we
+      # constrain the function to be more responsive to a main feature (index 4)
+      # as a secondary conditional feature (index 3) increases (positive
+      # direction).
+      edgeworth_trusts=(4, 3, 'positive'),
+      # Output can be bounded.
+      output_min=0.0,
+      output_max=1.0)
+  ```
+  """
+  # pyformat: enable
+
+  def __init__(self,
+               lattice_sizes,
+               units=1,
+               monotonicities=None,
+               unimodalities=None,
+               edgeworth_trusts=None,
+               trapezoid_trusts=None,
+               monotonic_dominances=None,
+               joint_monotonicities=None,
+               output_min=None,
+               output_max=None,
+               num_projection_iterations=10,
+               monotonic_at_every_step=True,
+               clip_inputs=True,
+               kernel_initializer="linear_initializer",
+               kernel_regularizer=None,
+               **kwargs):
+    # pyformat: disable
+    """Initializes an instance of `Lattice`.
+
+    Args:
+      lattice_sizes: List or tuple of length d of integers which represents
+        number of lattice vertices per dimension (minimum is 2). Second
+        dimension of input shape must match the number of elements in lattice
+        sizes.
+      units: Output dimension of the layer. See class comments for details.
+      monotonicities: None or list or tuple of same length as lattice_sizes of
+        {'none', 'increasing', 0, 1} which specifies if the model output should
+        be monotonic in corresponding feature, using 'increasing' or 1 to
+        indicate increasing monotonicity and 'none' or 0 to indicate no
+        monotonicity constraints.
+      unimodalities: None or list or tuple of same length as lattice_sizes of
+        {'none', 'valley', 0, 1} which specifies if the model output should
+        be unimodal in corresponding feature, using 'valley' or 1 to indicate
+        that function first decreases, then increases and 'none' or 0 to
+        indicate no unimodality constraints.
+      edgeworth_trusts: None or three-element tuple or iterable of three-element
+        tuples. First element is the index of the main (monotonic) feature.
+        Second element is the index of the conditional feature. Third element is
+        the direction of trust: 'positive' or 1 if higher values of the
+        conditional feature should increase trust in the main feature and
+        'negative' or -1 otherwise.
+      trapezoid_trusts: None or three-element tuple or iterable of three-element
+        tuples. First element is the index of the main (monotonic) feature.
+        Second element is the index of the conditional feature. Third element is
+        the direction of trust: 'positive' or 1 if higher values of the
+        conditional feature should increase trust in the main feature and
+        'negative' or -1 otherwise.
+      monotonic_dominances: None or two-element tuple or iterable of two-element
+        tuples. First element is the index of the dominant feature. Second
+        element is the index of the weak feature.
+      joint_monotonicities: None or two-element tuple or iterable of two-element
+        tuples which represents indices of two features requiring joint
+        monotonicity.
+      output_min: None or lower bound of the output.
+      output_max: None or upper bound of the output.
+      num_projection_iterations: Number of iterations of Dykstra projections
+        algorithm. Projection updates will be closer to a true projection (with
+        respect to the L2 norm) with higher number of iterations. Increasing
+        this number has diminishing return on projection precsion. Infinite
+        number of iterations would yield perfect projection. Increasing this
+        number might slightly improve convergence by cost of slightly increasing
+        running time. Most likely you want this number to be proportional to
+        number of lattice vertices in largest constrained dimension.
+      monotonic_at_every_step: Whether to strictly enforce monotonicity and
+        trust constraints after every gradient update by applying a final
+        imprecise projection. Setting this parameter to True together with small
+        num_projection_iterations parameter is likely to hurt convergence.
+      clip_inputs: If inputs should be clipped to the input range of the
+        lattice.
+      kernel_initializer: None or one of:
+        - `'linear_initializer'`: initialize parameters to form a linear
+          function with positive and equal coefficients for monotonic dimensions
+          and 0.0 coefficients for other dimensions. Linear function is such
+          that minimum possible output is equal to output_min and maximum
+          possible output is equal to output_max. See LinearInitializer class
+          docstring for more details.
+        - Any Keras initializer object.
+      kernel_regularizer: None or a single element or a list of following:
+        - Tuple `('torsion', l1, l2)` where l1 and l2 represent corresponding
+          regularization amount for graph Torsion regularizer. l1 and l2 can
+          either be single floats or lists of floats to specify different
+          regularization amount for every dimension.
+        - Tuple `('laplacian', l1, l2)` where l1 and l2 represent corresponding
+          regularization amount for graph Laplacian regularizer. l1 and l2 can
+          either be single floats or lists of floats to specify different
+          regularization amount for every dimension.
+        - Any Keras regularizer object.
+      **kwargs: Other args passed to `tf.keras.layers.Layer` initializer.
+
+    Raises:
+      ValueError: If layer hyperparameters are invalid.
+    """
+    # pyformat: enable
+    lattice_lib.verify_hyperparameters(
+        lattice_sizes=lattice_sizes,
+        monotonicities=monotonicities,
+        unimodalities=unimodalities)
+    super(Lattice, self).__init__(**kwargs)
+
+    self.lattice_sizes = lattice_sizes
+    self.units = units
+    self.monotonicities = monotonicities
+    self.unimodalities = unimodalities
+    # Check if inputs are a single tuple of ints (vs an iterable of tuples)
+    if (isinstance(edgeworth_trusts, tuple) and
+        isinstance(edgeworth_trusts[0], int)):
+      self.edgeworth_trusts = [edgeworth_trusts]
+    else:
+      self.edgeworth_trusts = edgeworth_trusts
+    if (isinstance(trapezoid_trusts, tuple) and
+        isinstance(trapezoid_trusts[0], int)):
+      self.trapezoid_trusts = [trapezoid_trusts]
+    else:
+      self.trapezoid_trusts = trapezoid_trusts
+    if (isinstance(monotonic_dominances, tuple) and
+        isinstance(monotonic_dominances[0], int)):
+      self.monotonic_dominances = [monotonic_dominances]
+    else:
+      self.monotonic_dominances = monotonic_dominances
+    if (isinstance(joint_monotonicities, tuple) and
+        isinstance(joint_monotonicities[0], int)):
+      self.joint_monotonicities = [joint_monotonicities]
+    else:
+      self.joint_monotonicities = joint_monotonicities
+    self.output_min = output_min
+    self.output_max = output_max
+    self.num_projection_iterations = num_projection_iterations
+    self.monotonic_at_every_step = monotonic_at_every_step
+    self.clip_inputs = clip_inputs
+
+    if kernel_initializer in ["linear_initializer", "LinearInitializer"]:
+      # Come up with reasonable default initialization parameters if they were
+      # not defined explicitly.
+      if output_min is not None:
+        output_init_min = output_min
+      elif output_max is not None:
+        output_init_min = min(0.0, output_max)
+      else:
+        output_init_min = 0.0
+      if output_max is not None:
+        output_init_max = output_max
+      elif output_min is not None:
+        output_init_max = max(1.0, output_min)
+      else:
+        output_init_max = 1.0
+
+      self.kernel_initializer = LinearInitializer(
+          lattice_sizes=lattice_sizes,
+          monotonicities=monotonicities,
+          output_min=output_init_min,
+          output_max=output_init_max,
+          unimodalities=unimodalities)
+    else:
+      # This is needed for Keras deserialization logic to be aware of our custom
+      # objects.
+      with keras.utils.custom_object_scope({
+          "LinearInitializer": LinearInitializer,
+      }):
+        self.kernel_initializer = keras.initializers.get(kernel_initializer)
+
+    self.kernel_regularizer = []
+    if kernel_regularizer:
+      if (callable(kernel_regularizer) or
+          (isinstance(kernel_regularizer, tuple) and
+           isinstance(kernel_regularizer[0], six.string_types))):
+        kernel_regularizer = [kernel_regularizer]
+
+      for regularizer in kernel_regularizer:
+        if isinstance(regularizer, tuple):
+          (name, l1, l2) = regularizer
+          if name.lower() == "torsion":
+            self.kernel_regularizer.append(
+                TorsionRegularizer(
+                    lattice_sizes=self.lattice_sizes, l1=l1, l2=l2))
+          elif name.lower() == "laplacian":
+            self.kernel_regularizer.append(
+                LaplacianRegularizer(
+                    lattice_sizes=self.lattice_sizes, l1=l1, l2=l2))
+          else:
+            raise ValueError("Unknown custom lattice regularizer: %s" %
+                             regularizer)
+        else:
+          # This is needed for Keras deserialization logic to be aware of our
+          # custom objects.
+          with keras.utils.custom_object_scope({
+              "TorsionRegularizer": TorsionRegularizer,
+              "LaplacianRegularizer": LaplacianRegularizer,
+          }):
+            self.kernel_regularizer.append(keras.regularizers.get(regularizer))
+
+  def build(self, input_shape):
+    """Standard Keras build() method."""
+    lattice_lib.verify_hyperparameters(
+        lattice_sizes=self.lattice_sizes,
+        units=self.units,
+        input_shape=input_shape)
+    constraints = LatticeConstraints(
+        lattice_sizes=self.lattice_sizes,
+        monotonicities=self.monotonicities,
+        unimodalities=self.unimodalities,
+        edgeworth_trusts=self.edgeworth_trusts,
+        trapezoid_trusts=self.trapezoid_trusts,
+        monotonic_dominances=self.monotonic_dominances,
+        joint_monotonicities=self.joint_monotonicities,
+        output_min=self.output_min,
+        output_max=self.output_max,
+        num_projection_iterations=self.num_projection_iterations,
+        enforce_strict_monotonicity=self.monotonic_at_every_step)
+
+    if not self.kernel_regularizer:
+      kernel_reg = None
+    elif len(self.kernel_regularizer) == 1:
+      kernel_reg = self.kernel_regularizer[0]
+    else:
+      # Keras interface assumes only one regularizer, so summ all regularization
+      # losses which we have.
+      kernel_reg = lambda x: tf.add_n([r(x) for r in self.kernel_regularizer])
+
+    num_weights = 1
+    for dim_size in self.lattice_sizes:
+      num_weights *= dim_size
+    self.kernel = self.add_weight(
+        LATTICE_KERNEL_NAME,
+        shape=[num_weights, self.units],
+        initializer=self.kernel_initializer,
+        regularizer=kernel_reg,
+        constraint=constraints,
+        dtype=self.dtype)
+
+    if self.kernel_regularizer and not tf.executing_eagerly():
+      # Keras has its own mechanism to handle regularization losses which does
+      # not use GraphKeys, but we want to also add losses to graph keys so they
+      # are easily accessable when layer is being used outside of Keras. Adding
+      # losses to GraphKeys will not interfer with Keras.
+      for reg in self.kernel_regularizer:
+        tf.compat.v1.add_to_collection(
+            tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES, reg(self.kernel))
+
+    # Constraints with enforce_strict_monotonicity always set to True. Intended
+    # to be run at the end of training or any time when you need everything to
+    # be strictly projected.
+    self._final_constraints = LatticeConstraints(
+        lattice_sizes=self.lattice_sizes,
+        monotonicities=self.monotonicities,
+        unimodalities=self.unimodalities,
+        edgeworth_trusts=self.edgeworth_trusts,
+        trapezoid_trusts=self.trapezoid_trusts,
+        monotonic_dominances=self.monotonic_dominances,
+        joint_monotonicities=self.joint_monotonicities,
+        output_min=self.output_min,
+        output_max=self.output_max,
+        num_projection_iterations=20,
+        enforce_strict_monotonicity=True)
+
+    self.lattice_sizes_tensor = tf.constant(
+        self.lattice_sizes, dtype=tf.int32, name=LATTICE_SIZES_NAME)
+    super(Lattice, self).build(input_shape)
+
+  def call(self, inputs):
+    """Standard Keras call() method."""
+    interpolation_weights = lattice_lib.compute_interpolation_weights(
+        inputs=inputs,
+        lattice_sizes=self.lattice_sizes,
+        clip_inputs=self.clip_inputs)
+
+    # Use control dependencies to save lattice sizes as graph constant for
+    # visualisation toolbox to be able to recove it from saved graph.
+    # Wrap this constant into pure op since in TF 2.0 there are issues passing
+    # tensors into control_dependencies.
+    with tf.control_dependencies([tf.identity(self.lattice_sizes_tensor)]):
+      if self.units == 1:
+        # Weights shape: (batch-size, ..., prod(lattice_sizes))
+        # Kernel shape:  (prod(lattice_sizes), 1)
+        return tf.matmul(interpolation_weights, self.kernel)
+      else:
+        # Weights shape: (batch-size, ..., units, prod(lattice_sizes))
+        # Kernel shape:  (prod(lattice_sizes), units)
+        return tf.reduce_sum(
+            interpolation_weights * tf.transpose(self.kernel), axis=-1)
+
+  def compute_output_shape(self, input_shape):
+    """Standard Keras compute_output_shape() method."""
+    if isinstance(input_shape, list):
+      input_shape = input_shape[0]
+    if self.units == 1:
+      return tuple(input_shape[:-1]) + (1,)
+    else:
+      # Second to last dimension must be equal to 'units'. Nothing to append.
+      return input_shape[:-1]
+
+  def get_config(self):
+    """Standard Keras config for serialization."""
+    config = {
+        "lattice_sizes": self.lattice_sizes,
+        "units": self.units,
+        "monotonicities": self.monotonicities,
+        "unimodalities": self.unimodalities,
+        "edgeworth_trusts": self.edgeworth_trusts,
+        "trapezoid_trusts": self.trapezoid_trusts,
+        "monotonic_dominances": self.monotonic_dominances,
+        "joint_monotonicities": self.joint_monotonicities,
+        "output_min": self.output_min,
+        "output_max": self.output_max,
+        "num_projection_iterations": self.num_projection_iterations,
+        "monotonic_at_every_step": self.monotonic_at_every_step,
+        "clip_inputs": self.clip_inputs,
+        "kernel_initializer":
+            keras.initializers.serialize(self.kernel_initializer),
+        "kernel_regularizer":
+            [keras.regularizers.serialize(r) for r in self.kernel_regularizer],
+    }  # pyformat: disable
+    config.update(super(Lattice, self).get_config())
+    return config
+
+  def finalize_constraints(self):
+    """Ensures layers weights strictly satisfy constraints.
+
+    Applies approximate projection to strictly satisfy specified constraints.
+    If `monotonic_at_every_step == True` there is no need to call this function.
+
+    Returns:
+      In eager mode directly updates weights and returns variable which stores
+      them. In graph mode returns `assign_add` op which has to be executed to
+      updates weights.
+    """
+    return self.kernel.assign_add(
+        self._final_constraints(self.kernel) - self.kernel)
+
+  def assert_constraints(self, eps=1e-6):
+    """Asserts that weights satisfy all constraints.
+
+    In graph mode builds and returns list of assertion ops.
+    In eager mode directly executes assetions.
+
+    Args:
+      eps: allowed constraints violation.
+
+    Returns:
+      List of assertion ops in graph mode or immideately asserts in eager mode.
+    """
+    return lattice_lib.assert_constraints(
+        weights=self.kernel,
+        lattice_sizes=self.lattice_sizes,
+        monotonicities=lattice_lib.canonicalize_monotonicities(
+            self.monotonicities),
+        edgeworth_trusts=lattice_lib.canonicalize_trust(self.edgeworth_trusts),
+        trapezoid_trusts=lattice_lib.canonicalize_trust(self.trapezoid_trusts),
+        monotonic_dominances=self.monotonic_dominances,
+        joint_monotonicities=self.joint_monotonicities,
+        output_min=self.output_min,
+        output_max=self.output_max,
+        eps=eps)
+
+
+class LinearInitializer(keras.initializers.Initializer):
+  # pyformat: disable
+  """Initializes a `tfl.lattice_layer.Lattice` as linear function.
+
+  - The linear function will have positive coefficients for monotonic dimensions
+    and 0 otherwise. If all dimensions are unconstrained, all coefficients will
+    be positive.
+  - Linear coefficients are set such that the minimum/maximum output of the
+    lattice matches the given output_min/output_max.
+  - Each monotonic dimension contributes with same weight regardless of number
+    of vertices per dimension.
+  - No dimension can be both monotonic and unimodal.
+  - Unimodal dimensions contribute with same weight as monotonic dimensions.
+  - Unimodal dimensions linearly decrease for first `(dim_size + 1) // 2`
+    vertices and then linearly increase for following vertices.
+
+  Attributes:
+    - All `__init__` arguments.
+  """
+  # pyformat: enable
+
+  def __init__(self,
+               lattice_sizes,
+               monotonicities,
+               output_min,
+               output_max,
+               unimodalities=None):
+    """Initializes an instance of `LinearInitializer`.
+
+    Args:
+      lattice_sizes: Lattice sizes of `tfl.lattice_layer.Lattice` to initialize.
+      monotonicities: Monotonic dimensions for initialization. Does not need to
+        match `monotonicities` of `tfl.lattice_layer.Lattice`.
+      output_min: Minimum layer output after initialization.
+      output_max: Maximum layer output after initialization.
+      unimodalities: None or unimodal dimensions after initialization. Does not
+        need to match `unimodalities` of `tfl.lattice_layer.Lattice`.
+
+    Raises:
+      ValueError: If there is a mismatch between `monotonicities` and
+      `lattice_sizes`.
+    """
+    lattice_lib.verify_hyperparameters(
+        lattice_sizes=lattice_sizes,
+        monotonicities=monotonicities,
+        unimodalities=unimodalities,
+        output_min=output_min,
+        output_max=output_max)
+
+    self.lattice_sizes = lattice_sizes
+    self.monotonicities = monotonicities
+    self.output_min = output_min
+    self.output_max = output_max
+    self.unimodalities = unimodalities
+
+  def __call__(self, shape, dtype=None, partition_info=None):
+    """Returns weights of `tfl.lattice_layer.Lattice` layer.
+
+    Args:
+      shape: Must be: `(prod(lattice_sizes), units)`.
+      dtype: Standard Keras initializer param.
+      partition_info: Standard Keras initializer param. Not used.
+    """
+    # TODO: figure out whether it should be used.
+    del partition_info
+    return lattice_lib.linear_initializer(
+        lattice_sizes=self.lattice_sizes,
+        monotonicities=lattice_lib.canonicalize_monotonicities(
+            self.monotonicities),
+        unimodalities=lattice_lib.canonicalize_unimodalities(
+            self.unimodalities),
+        output_min=self.output_min,
+        output_max=self.output_max,
+        units=shape[1],
+        dtype=dtype)
+
+  def get_config(self):
+    """Standard Keras config for serialization."""
+    config = {
+        "lattice_sizes": self.lattice_sizes,
+        "monotonicities": self.monotonicities,
+        "output_min": self.output_min,
+        "output_max": self.output_max,
+        "unimodalities": self.unimodalities,
+    }  # pyformat: disable
+    return config
+
+
+class LatticeConstraints(keras.constraints.Constraint):
+  # pyformat: disable
+  """Constraints for `tfl.lattice_layer.Lattice` layer.
+
+  Applies monotonicity, unimodality, trust and bound constraints to the lattice
+  parameters. See `tfl.lattice_layer.Lattice` for details.
+
+  Attributes:
+    - All `__init__` arguments.
+  """
+  # pyformat: enable
+
+  def __init__(self,
+               lattice_sizes,
+               monotonicities=None,
+               unimodalities=None,
+               edgeworth_trusts=None,
+               trapezoid_trusts=None,
+               monotonic_dominances=None,
+               joint_monotonicities=None,
+               output_min=None,
+               output_max=None,
+               num_projection_iterations=1,
+               enforce_strict_monotonicity=True):
+    """Initializes an instance of `LatticeConstraints`.
+
+    Args:
+      lattice_sizes: Lattice sizes of `Lattice` layer to constraint.
+      monotonicities: Same meaning as corresponding parameter of `Lattice`.
+      unimodalities: Same meaning as corresponding parameter of `Lattice`.
+      edgeworth_trusts: Same meaning as corresponding parameter of `Lattice`.
+      trapezoid_trusts: Same meaning as corresponding parameter of `Lattice`.
+      monotonic_dominances: Same meaning as corresponding parameter of
+        `Lattice`.
+      joint_monotonicities: Same meaning as corresponding parameter of
+        `Lattice`.
+      output_min: Minimum possible output.
+      output_max: Maximum possible output.
+      num_projection_iterations: Same meaning as corresponding parameter of
+        `Lattice`.
+      enforce_strict_monotonicity: Whether to use approximate projection to
+        ensure that constratins are strictly satisfied.
+
+    Raises:
+      ValueError: If weights to project don't correspond to `lattice_sizes`.
+    """
+    lattice_lib.verify_hyperparameters(
+        lattice_sizes=lattice_sizes,
+        monotonicities=monotonicities,
+        unimodalities=unimodalities,
+        edgeworth_trusts=edgeworth_trusts,
+        trapezoid_trusts=trapezoid_trusts,
+        monotonic_dominances=monotonic_dominances,
+        joint_monotonicities=joint_monotonicities)
+
+    self.lattice_sizes = lattice_sizes
+    self.monotonicities = monotonicities
+    self.unimodalities = unimodalities
+    self.edgeworth_trusts = edgeworth_trusts
+    self.trapezoid_trusts = trapezoid_trusts
+    self.monotonic_dominances = monotonic_dominances
+    self.joint_monotonicities = joint_monotonicities
+    self.output_min = output_min
+    self.output_max = output_max
+    self.num_projection_iterations = num_projection_iterations
+    self.enforce_strict_monotonicity = enforce_strict_monotonicity
+
+  def __call__(self, w):
+    """Applies constraints to `w`."""
+    canonical_monotonicities = lattice_lib.canonicalize_monotonicities(
+        self.monotonicities)
+    canonical_unimodalities = lattice_lib.canonicalize_unimodalities(
+        self.unimodalities)
+    canonical_edgeworth_trusts = lattice_lib.canonicalize_trust(
+        self.edgeworth_trusts)
+    canonical_trapezoid_trusts = lattice_lib.canonicalize_trust(
+        self.trapezoid_trusts)
+    num_constraint_dims = lattice_lib.count_non_zeros(
+        canonical_monotonicities, canonical_unimodalities)
+    # No need to separately check for trust constraints and monotonic dominance,
+    # since monotonicity is required to impose them. The only exception is joint
+    # monotonicity.
+    if (num_constraint_dims > 0 or self.joint_monotonicities):
+      w = lattice_lib.project_by_dykstra(
+          w,
+          lattice_sizes=self.lattice_sizes,
+          monotonicities=canonical_monotonicities,
+          unimodalities=canonical_unimodalities,
+          edgeworth_trusts=canonical_edgeworth_trusts,
+          trapezoid_trusts=canonical_trapezoid_trusts,
+          monotonic_dominances=self.monotonic_dominances,
+          joint_monotonicities=self.joint_monotonicities,
+          num_iterations=self.num_projection_iterations)
+      if self.enforce_strict_monotonicity:
+        w = lattice_lib.finalize_constraints(
+            w,
+            lattice_sizes=self.lattice_sizes,
+            monotonicities=canonical_monotonicities,
+            edgeworth_trusts=canonical_edgeworth_trusts,
+            trapezoid_trusts=canonical_trapezoid_trusts,
+            output_min=self.output_min,
+            output_max=self.output_max)
+    # TODO: come up with a better solution than separately applying
+    # bounds again after other projections.
+    if self.output_min is not None:
+      w = tf.maximum(w, self.output_min)
+    if self.output_max is not None:
+      w = tf.minimum(w, self.output_max)
+    return w
+
+  def get_config(self):
+    """Standard Keras config for serialization."""
+    return {
+        "lattice_sizes": self.lattice_sizes,
+        "monotonicities": self.monotonicities,
+        "unimodalities": self.unimodalities,
+        "edgeworth_trusts": self.edgeworth_trusts,
+        "trapezoid_trusts": self.trapezoid_trusts,
+        "monotonic_dominances": self.monotonic_dominances,
+        "joint_monotonicities": self.joint_monotonicities,
+        "output_min": self.output_min,
+        "output_max": self.output_max,
+        "num_projection_iterations": self.num_projection_iterations,
+        "enforce_strict_monotonicity": self.enforce_strict_monotonicity
+    }  # pyformat: disable
+
+
+class TorsionRegularizer(keras.regularizers.Regularizer):
+  # pyformat: disable
+  """Torsion regularizer for `tfl.lattice_layer.Lattice` layer.
+
+  Lattice torsion regularizer penalizes how much the lattice function twists
+  from side-to-side (see
+  [publication](http://jmlr.org/papers/v17/15-243.html)).
+
+  Consider a 3 x 2 lattice with weights `w`:
+
+  ```
+  w[3]-----w[4]-----w[5]
+    |        |        |
+    |        |        |
+  w[0]-----w[1]-----w[2]
+  ```
+
+  In this case, the torsion regularizer is defined as:
+
+  ```
+  l1 * (|w[4] + w[0] - w[3] - w[1]| + |w[5] + w[1] - w[4] - w[2]|) +
+  l2 * ((w[4] + w[0] - w[3] - w[1])^2 + (w[5] + w[1] - w[4] - w[2])^2)
+  ```
+
+  Attributes:
+    - All `__init__` arguments.
+  """
+  # pyformat: enable
+
+  def __init__(self, lattice_sizes, l1=0.0, l2=0.0):
+    """Initializes an instance of `TorsionRegularizer`.
+
+    Args:
+      lattice_sizes: Lattice sizes of `tfl.lattice_layer.Lattice` to regularize.
+      l1: l1 regularization amount. Either single float or list or tuple of
+        floats to specify different regularization amount per dimension. The
+        amount of regularization for the interaction term between two dimensions
+        is the product of the corresponding per dimension amounts.
+      l2: l2 regularization amount. Either single float or list or tuple of
+        floats to specify different regularization amount per dimension. The
+        amount of regularization for the interaction term between two dimensions
+        is the product of the corresponding per dimension amounts.
+    """
+    self.lattice_sizes = lattice_sizes
+    self.l1 = l1
+    self.l2 = l2
+
+  def __call__(self, x):
+    """Returns regularization loss for `x`."""
+    lattice_lib.verify_hyperparameters(
+        lattice_sizes=self.lattice_sizes, weights_shape=x.shape)
+    return lattice_lib.torsion_regularizer(x, self.lattice_sizes, self.l1,
+                                           self.l2)
+
+  def get_config(self):
+    """Standard Keras config for serialization."""
+    return {
+        "lattice_sizes": self.lattice_sizes,
+        "l1": self.l1,
+        "l2": self.l2,
+    }  # pyformat: disable
+
+
+class LaplacianRegularizer(keras.regularizers.Regularizer):
+  # pyformat: disable
+  """Laplacian regularizer for `tfl.lattice_layer.Lattice` layer.
+
+  Laplacian regularizer penalizes the difference between adjacent vertices in
+  multi-cell lattice (see
+  [publication](http://jmlr.org/papers/v17/15-243.html)).
+
+  Consider a 3 x 2 lattice with weights `w`:
+
+  ```
+  w[3]-----w[4]-----w[5]
+    |        |        |
+    |        |        |
+  w[0]-----w[1]-----w[2]
+  ```
+
+  where the number at each node represents the weight index.
+  In this case, the laplacian regularizer is defined as:
+
+  ```
+  l1[0] * (|w[1] - w[0]| + |w[2] - w[1]| +
+           |w[4] - w[3]| + |w[5] - w[4]|) +
+  l1[1] * (|w[3] - w[0]| + |w[4] - w[1]| + |w[5] - w[2]|) +
+
+  l2[0] * ((w[1] - w[0])^2 + (w[2] - w[1])^2 +
+           (w[4] - w[3])^2 + (w[5] - w[4])^2) +
+  l2[1] * ((w[3] - w[0])^2 + (w[4] - w[1])^2 + (w[5] - w[2])^2)
+  ```
+
+  Attributes:
+    - All `__init__` arguments.
+  """
+  # pyformat: enable
+
+  def __init__(self, lattice_sizes, l1=0.0, l2=0.0):
+    """Initializes an instance of `LaplacianRegularizer`.
+
+    Args:
+      lattice_sizes: Lattice sizes of `tfl.lattice_layer.Lattice` to regularize.
+      l1: l1 regularization amount. Either single float or list or tuple of
+        floats to specify different regularization amount per dimension.
+      l2: l2 regularization amount. Either single float or list or tuple of
+        floats to specify different regularization amount per dimension.
+
+    Raises:
+      ValueError: If provided input does not correspond to `lattice_sizes`.
+    """
+    lattice_lib.verify_hyperparameters(
+        lattice_sizes=lattice_sizes,
+        regularization_amount=l1,
+        regularization_info="l1")
+    lattice_lib.verify_hyperparameters(
+        lattice_sizes=lattice_sizes,
+        regularization_amount=l2,
+        regularization_info="l2")
+    self.lattice_sizes = lattice_sizes
+    self.l1 = l1
+    self.l2 = l2
+
+  def __call__(self, x):
+    """Returns regularization loss for `x`."""
+    lattice_lib.verify_hyperparameters(
+        lattice_sizes=self.lattice_sizes, weights_shape=x.shape)
+    return lattice_lib.laplacian_regularizer(x, self.lattice_sizes, self.l1,
+                                             self.l2)
+
+  def get_config(self):
+    """Standard Keras config for serialization."""
+    return {
+        "lattice_sizes": self.lattice_sizes,
+        "l1": self.l1,
+        "l2": self.l2
+    }  # pyformat: disable
diff --git a/tensorflow_lattice/python/lattice_lib.py b/tensorflow_lattice/python/lattice_lib.py
new file mode 100644
index 0000000..ad24cd6
--- /dev/null
+++ b/tensorflow_lattice/python/lattice_lib.py
@@ -0,0 +1,2142 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Implementation of algorithms required for Lattice layer."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import copy
+import itertools
+import math
+from absl import logging
+import six
+
+import tensorflow as tf
+
+
+def compute_interpolation_weights(inputs,
+                                  lattice_sizes,
+                                  clip_inputs=True):
+  """Computes weights for lattice interpolation.
+
+  Running time: `O(batch_size * prod(lattice_sizes))`
+
+  If `clip_inputs == True`, inputs outside of the range defined by
+  `lattice_sizes` will be clipped into the lattice input range. If not, the
+  corresponding weights will linearly approach 0.0 with input moving away from
+  the valid input range.
+
+  Args:
+    inputs: Tensor of shape: `(batch_size, ..., len(lattice_sizes))` or list of
+      `len(lattice_sizes)` tensors of same shape `(batch_size, ..., 1)` which
+      represents points to apply lattice interpolation to. A typical shape is
+      `(batch_size, len(lattice_sizes))`.
+    lattice_sizes: List or tuple of integers which represents lattice sizes of
+      layer for which interpolation is being computed.
+    clip_inputs: Whether inputs should be clipped to the input range of the
+      lattice.
+
+  Raises:
+    ValueError: If last dimension of `inputs` does not match `lattice_sizes`.
+
+  Returns:
+    Interpolation weights tensor of shape:
+    `(batch_size, ..., prod(lattice_sizes))`.
+  """
+  if isinstance(inputs, list):
+    input_shape = [tensor.shape for tensor in inputs]
+    input_dtype = inputs[0].dtype
+  else:
+    input_shape = inputs.shape
+    input_dtype = inputs.dtype
+  verify_hyperparameters(lattice_sizes=lattice_sizes, input_shape=input_shape)
+
+  if clip_inputs:
+    inputs = _clip_onto_lattice_range(inputs=inputs,
+                                      lattice_sizes=lattice_sizes)
+
+  # Create interpolation keypoints in advance in order to reuse them for all
+  # dimensions of same size.
+  dim_keypoints = {}
+  for dim_size in set(lattice_sizes):
+    dim_keypoints[dim_size] = tf.constant([i for i in range(dim_size)],
+                                          dtype=input_dtype)
+
+  # Bucketize in order to share interpolation ops across consequtive dims of
+  # same size.
+  bucketized_inputs = _bucketize_consequtive_equal_dims(
+      inputs=inputs, lattice_sizes=lattice_sizes)
+
+  one_d_interpolation_weights = []
+  for tensor, bucket_size, dim_size in bucketized_inputs:
+    if bucket_size > 1:
+      # Within bucket all dims have same lattice sizes so instead of splitting
+      # before interpolation we split after interpolation.
+      # Expand dims in order to make interpolation through broadcasting work.
+      tensor = tf.expand_dims(tensor, axis=-1)
+
+    # Broadcasting subtraction op.
+    distance = tf.abs(tensor - dim_keypoints[dim_size])
+    # Following ops will do following:
+    # 1) if distance >= 1.0 then set interpolation weight to 0.0.
+    # 2) if distance < 1.0 then set interpolation weight to 1.0 - distance.
+    weights = 1.0 - tf.minimum(distance, 1.0)
+
+    if bucket_size == 1:
+      one_d_interpolation_weights.append(weights)
+    else:
+      one_d_interpolation_weights.extend(tf.unstack(weights, axis=-2))
+
+  return batch_outer_operation(one_d_interpolation_weights,
+                               operation=tf.multiply)
+
+
+def batch_outer_operation(list_of_tensors, operation=tf.multiply):
+  """Computes outer operation of last dimensions of each of given tensors.
+
+  Args:
+    list_of_tensors: List of tensors of same shape `(batch_size, ..., k[i])`
+      where everything expect `k_i` matches.
+    operation: Binary TF operation which supports broadcasting to be applied.
+
+  Returns:
+    Tensor of shape: `(batch_size, ..., mul_i(k[i]))`.
+  """
+  if len(list_of_tensors) == 1:
+    return list_of_tensors[0]
+
+  # Dimensions of size '1' at position -1 of first tensor and -2 of second
+  # tensor will result in outer operation due to broadcasting.
+  result = tf.expand_dims(list_of_tensors[0], axis=-1)
+
+  for i, tensor in enumerate(list_of_tensors[1:]):
+    result = operation(result, tf.expand_dims(tensor, axis=-2))
+
+    # For TF1 compatibility convert shape to integers allowing first dimension
+    # to be undefined.
+    #
+    # If we want to support arbitrary number of undefined dimensions we must
+    # compute new_shape using tf ops. It is undesireble because we want to
+    # minimize graph size.
+    shape = [-1] + [int(size) for size in result.shape[1:]]
+
+    # Merge last 2 dimensions which we just multiplied.
+    new_shape = shape[:-2] + [shape[-2] * shape[-1]]
+
+    # Since we are doing reshape anyway append 1 to prepare 'result' for
+    # following outer operation.
+    if i < len(list_of_tensors) - 2:
+      new_shape.append(1)
+
+    result = tf.reshape(result, shape=new_shape)
+  return result
+
+
+def _clip_onto_lattice_range(inputs, lattice_sizes):
+  """Clips inputs onto valid input range for given lattice_sizes.
+
+  Args:
+    inputs: `inputs` argument of `compute_interpolation_weights`.
+    lattice_sizes: list or tuple of integers which represents lattice sizes to
+      clip onto.
+
+  Returns:
+    Clipped `inputs`.
+  """
+  if not isinstance(inputs, list):
+    upper_bounds = [dim_size - 1.0 for dim_size in lattice_sizes]
+    return tf.clip_by_value(
+        inputs,
+        clip_value_min=tf.zeros(shape=len(lattice_sizes), dtype=inputs.dtype),
+        clip_value_max=tf.constant(upper_bounds,
+                                   dtype=inputs.dtype))
+  else:
+    # Share bound constant across dimensions of same size.
+    dim_upper_bounds = {}
+    for dim_size in set(lattice_sizes):
+      dim_upper_bounds[dim_size] = tf.constant(dim_size - 1.0,
+                                               dtype=inputs[0].dtype)
+    dim_lower_bound = tf.zeros(shape=[], dtype=inputs[0].dtype)
+
+    clipped_inputs = []
+    for one_d_input, dim_size in zip(inputs, lattice_sizes):
+      clipped_inputs.append(
+          tf.clip_by_value(one_d_input,
+                           clip_value_min=dim_lower_bound,
+                           clip_value_max=dim_upper_bounds[dim_size]))
+    return clipped_inputs
+
+
+def _bucketize_consequtive_equal_dims(inputs, lattice_sizes):
+  """Groups consequite dimensions of same size together.
+
+  For example `lattice_sizes == [2, 2, 2, 5, 5, 2]` produce 3 buckets:
+  - bucket of size 3 which corresponds to first group of dimensions of size 2.
+  - bucket of size 2 which corresponds to group of dimensions of size 5.
+  - bucket of size 1 which corresponds to last dimension of size 2.
+  If `inputs` is a single tensor then it will be split accordig to buckets.
+
+  If `inputs` is a list of tensor then all buckets will be of size 1 regardless
+  of lattice sizes in order to avoid merging tensors. In this case function acts
+  merely as a convenience helper to unify output format.
+
+  Args:
+    inputs: `inputs` argument of `compute_interpolation_weights`.
+    lattice_sizes: list or tuple of integers which represents lattice sizes.
+
+  Returns:
+    Iterable of tuples: `(tensor, bucket_size, bucket_dim_size)` where
+    `tensor.shape[-1] == bucket_size` and `bucket_dim_size` is a lattice size
+    which corresponds to bucket.
+  """
+  if not isinstance(inputs, list):
+    bucket_sizes = []
+    bucket_dim_sizes = []
+    current_size = 1
+    for i in range(1, len(lattice_sizes)):
+      if lattice_sizes[i] != lattice_sizes[i-1]:
+        bucket_sizes.append(current_size)
+        bucket_dim_sizes.append(lattice_sizes[i-1])
+        current_size = 1
+      else:
+        current_size += 1
+    bucket_sizes.append(current_size)
+    bucket_dim_sizes.append(lattice_sizes[-1])
+    inputs = tf.split(inputs, num_or_size_splits=bucket_sizes, axis=-1)
+  else:
+    # TODO: run benchmark and figure out whether it make sense to merge
+    # indiviaul tensors here.
+    bucket_sizes = [1] * len(lattice_sizes)
+    bucket_dim_sizes = lattice_sizes
+  return zip(inputs, bucket_sizes, bucket_dim_sizes)
+
+
+def linear_initializer(lattice_sizes,
+                       output_min,
+                       output_max,
+                       monotonicities=None,
+                       unimodalities=None,
+                       units=1,
+                       dtype=tf.float32):
+  """Returns a lattice layer weight tensor that represents a linear function.
+
+  - The linear function will have positive coefficients for monotonic dimensions
+    and 0 otherwise. If all dimensions are unconstrained, all coefficients will
+    be positive.
+  - Linear coefficients are set such that the minimum/maximum output of the
+    lattice matches the given output_min/output_max.
+  - Each monotonic dimension contributes with same weight regardless of number
+    of vertices per dimension.
+  - No dimension can be both monotonic and unimodal.
+  - Unimodal dimensions contribute with same weight as monotonic dimensions.
+  - Unimodal dimensions linearly decrease for first `(dim_size + 1) // 2`
+    vertices and then linearly increase for following vertices.
+
+  Args:
+    lattice_sizes: List or tuple of integers which represents lattice sizes.
+    output_min: Minimum output of lattice layer after initialization.
+    output_max: Maximum output of lattice layer after initialization.
+    monotonicities: None or list or tuple of same length as lattice_sizes of {0,
+      1} which represents monotonicity constraints per dimension. 1 stands for
+      increasing (non-decreasing in fact), 0 for no monotonicity constraints.
+    unimodalities: None or list or tuple of same length as lattice_sizes of {0,
+      1} which represents unimodality constraints per dimension. 1 stands for
+      unimodal dimension, 0 for no unimodality constraints.
+    units: Output dimension of the layer. Each of units lattices will be
+      initialized identically.
+    dtype: dtype.
+
+  Returns:
+    Lattice weights tensor of shape: `(prod(lattice_sizes), units)`.
+  """
+  verify_hyperparameters(
+      lattice_sizes=lattice_sizes,
+      monotonicities=monotonicities,
+      unimodalities=unimodalities)
+  if monotonicities is None:
+    monotonicities = [0] * len(lattice_sizes)
+  if unimodalities is None:
+    unimodalities = [0] * len(lattice_sizes)
+
+  num_constraint_dims = count_non_zeros(monotonicities, unimodalities)
+  if num_constraint_dims == 0:
+    monotonicities = [1] * len(lattice_sizes)
+    num_constraint_dims = len(lattice_sizes)
+
+  dim_range = float(output_max - output_min) / num_constraint_dims
+  one_d_weights = []
+
+  for monotonicity, unimodality, dim_size in zip(monotonicities, unimodalities,
+                                                 lattice_sizes):
+    if monotonicity != 0:
+      one_d = _linspace(start=0.0, stop=dim_range, num=dim_size)
+    elif unimodality != 0:
+      decreasing = _linspace(start=dim_range, stop=0.0, num=(dim_size + 1) // 2)
+      increasing = _linspace(start=0.0, stop=dim_range, num=(dim_size + 1) // 2)
+      # For odd size dimensions we want just 1 lowest point. For even sized we
+      # want 2.
+      one_d = decreasing + increasing[dim_size % 2:]
+    else:
+      one_d = [0.0] * dim_size
+    # Insert batch dim of size 1 at the beginning for batch_outer_operation.
+    one_d_weights.append(tf.constant(one_d, dtype=dtype, shape=[1, dim_size]))
+
+  # Use same implementation of outer operation as interpolation logic in order
+  # to guarantee same weights order.
+  weights = batch_outer_operation(one_d_weights, operation=tf.add)
+  weights = tf.reshape(weights + output_min, shape=[-1, 1])
+  if units > 1:
+    weights = tf.tile(weights, multiples=[1, units])
+  return weights
+
+
+def _linspace(start, stop, num):
+  """Returns `num` uniformly spaced floats between `start` and `stop`."""
+  if num == 1:
+    return [start]
+  return [start + (stop - start) * i / (num - 1.0) for i in range(num)]
+
+
+# TODO: Add final projection for unimodality constraints.
+def _approximately_project_monotonicity(weights, lattice_sizes, monotonicities):
+  """Approximately projects to strictly meet monotonicity constraints.
+
+  Algorithm details:
+
+  Definition:
+  A[i] refer to i-th coordinate of vertex A.
+  For 2 vertices A and B:
+    "A <p B": if A[i] <= B[i] for all monotonic dimensions i. (aka dominated by
+      Pareto)
+
+  In order for lattice to be monotonic it is sufficient that either:
+    1) for any vertex V: weight[V] >= weight[X] for any vertex X that: X <p V.
+  or
+    2) for any vertex V: weight[V] <= weight[X] for any vertex X that: V <p X.
+
+  For example consider lattice:
+
+  ```
+  0---1---2---3
+  |   |   |   |
+  4---5---6---7
+  |   |   |   |
+  8---9---10--11
+  ```
+
+  For examle for vertex 6 it's sufficient that:
+
+  weight[6] >= max(weight[4, 5, 8, 9, 10])
+  Or:
+  weight[6] <= min(weight[2, 3, 7])
+
+  Given the above definition, we can use either of the following update rules to
+  approximately project into the feasible space:
+  max_proj[V] = max(weight[X]) for any X that: X <p V.
+  min_proj[V] = min(weight[X]) for any X that: V <p X.
+
+  It's clear though that these algorithms either only increase weights or only
+  decrease weights. We know that true projection algorithm increases some
+  weights and decreases others. To get closer to a true projection, we modify
+  and use both update rules as follows:
+
+  1) half_proj[V] = weight[V] + (max_proj[V] - weight[V]) / 2
+     ... move half way up towards max_proj.
+  2) min_max_proj[V] = min_proj[half_proj[V]]
+     ... move remained way down towards min_proj.
+
+  Differs from _project_partial_monotonicity in that this algorithm guarantees a
+  global satisfying solution for all monotonicity constraints.
+
+  Args:
+    weights: Tensor with weights whose shape matches lattice_sizes.
+    lattice_sizes: List or tuple of integers which represents lattice sizes.
+      which correspond to weights.
+    monotonicities: List or tuple of same length as lattice_sizes of {0, 1}
+      which represents monotonicity constraints per dimension. 1 stands for
+      increasing (non-decreasing in fact), 0 for no monotonicity constraints.
+
+  Returns:
+    Tensor with projected weights matching shape of input weights.
+  """
+
+  # To compute max_proj[V] for all V altogether compute cumulative maximum
+  # along every monotonic dimension in arbitrary order.
+  max_projection = weights
+  for dim in range(len(lattice_sizes)):
+    if monotonicities[dim] == 0:
+      continue
+    layers = tf.unstack(max_projection, axis=dim)
+    for i in range(1, len(layers)):
+      # Computing cummulative maximum.
+      layers[i] = tf.maximum(layers[i], layers[i - 1])
+    max_projection = tf.stack(layers, axis=dim)
+
+  half_projection = (weights + max_projection) / 2.0
+
+  min_projection = half_projection
+  for dim in range(len(lattice_sizes)):
+    if monotonicities[dim] == 0:
+      continue
+    layers = tf.unstack(min_projection, axis=dim)
+    for i in range(len(layers) - 2, -1, -1):
+      # Compute cumulitive minimum in reversed order compare to cumulative
+      # maximum above.
+      layers[i] = tf.minimum(layers[i], layers[i + 1])
+    min_projection = tf.stack(layers, axis=dim)
+
+  return min_projection
+
+
+def _approximately_project_edgeworth(weights, lattice_sizes, edgeworth_trusts):
+  """Approximately projects to strictly meet all edgeworth trust constraints.
+
+  Note that this function will not introduce violations to any
+  previously-satisfied monotonicity constraints.
+
+  Algorithm details:
+
+  For a constraint on main dimension i and conditional dimension j, consider
+  some slice of weights that is fixed along all other dimensions, leaving a grid
+
+  ```
+  0---1---2---3
+  |   |   |   |
+  4---5---6---7
+  |   |   |   |
+  8---9---10--11
+  ```
+
+  You can think of all the other dimensions as other such grids stacked behind
+  this one, e.g. weight[8] and the points behind it are all such points with
+  index 0 in the i'th and j'th dimensions, and weight[6] and the points behind
+  it are all such points with index 2 in the i'th dimension and index 1 in the
+  j'th.
+
+  To enforce this edgeworth trust constraint without messing up monotonicity or
+  other trust constraints, the key idea is that we will always translate all
+  points 'behind' a point on this grid together. This ensures that no other
+  trust constraints will be violated, since all other weight differences
+  constrained by trust constraints will occur 'behind' a single such point
+  (no conditional feature can also be a main feature).
+
+  With that in mind, we project to edgeworth trust on this grid while
+  maintaining monotonicity by working up and right and always increasing the
+  top-right point in each four-point square. Here, we would first find how much
+  we need to increase weight[5] by to maintain edgeworth trust on {4,5,8,9}. To
+  follow the principle above, we then consider all such squares 'behind'
+  {4,5,8,9} and find the biggest such difference. weight[5] and all points
+  behind will be increased by that amount, and then we continue until fixing the
+  top-right grid, {2,3,6,7}.
+
+  If the trust constraint is in the opposite direction, i.e. cond_direction =
+  -1, repeat all of the above except that we start in the top-right {2,3,6,7}
+  grid and always lower the bottom-left point (weight[6] to start) until we
+  reach the bottom-left {4,5,8,9} grid.
+
+  Differs from _project_partial_edgeworth in that this algorithm guarantees a
+  global satisfying solution for all edgeworth trust constraints.
+
+  Args:
+    weights: Tensor with weights whose shape matches lattice_sizes.
+    lattice_sizes: List or tuple of integers which represents lattice sizes.
+      which correspond to weights.
+    edgeworth_trusts: None or iterable of three-element tuples. First element is
+      the index of the main (monotonic) feature. Second element is the index of
+      the conditional feature. Third element is the direction of trust: 1 if
+        higher values of the conditional feature should increase trust in the
+        main feature and -1 otherwise.
+
+  Returns:
+    Tensor with projected weights matching shape of input weights.
+  """
+
+  # Project onto trust constraints by cumulatively fixing violations.
+  trust_projection = weights
+  for main_dim, cond_dim, cond_direction in edgeworth_trusts or []:
+    layers = _unstack_2d(trust_projection, main_dim, cond_dim)
+    # Unlike other trust projections, cannot simply reverse layers beforehand
+    # based on cond_direction; asymmetry would break algorithm.
+    if cond_direction > 0:
+      for i in range(0, lattice_sizes[main_dim] - 1):
+        for j in range(0, lattice_sizes[cond_dim] - 1):
+          difference_in_slopes = ((layers[i + 1][j] - layers[i][j]) -
+                                  (layers[i + 1][j + 1] - layers[i][j + 1]))
+          # Move all weights by the value of the biggest violation to both
+          # satisfy this constraint and not hurt others. See function comments
+          # for more details.
+          max_violation = tf.maximum(tf.reduce_max(difference_in_slopes), 0)
+          layers[i + 1][j + 1] += max_violation
+    else:
+      for i in range(lattice_sizes[main_dim] - 2, -1, -1):
+        for j in range(lattice_sizes[cond_dim] - 2, -1, -1):
+          difference_in_slopes = ((layers[i + 1][j + 1] - layers[i][j + 1]) -
+                                  (layers[i + 1][j] - layers[i][j]))
+          max_violation = tf.maximum(tf.reduce_max(difference_in_slopes), 0)
+          layers[i][j] -= max_violation
+    trust_projection = _stack_2d(layers, main_dim, cond_dim)
+
+  return trust_projection
+
+
+# TODO: It is likely that this algorithm will work for all trapezoid
+# trust constraints without needing the reduce_max, as long as there are no
+# edgeworth constraints. If true, consider using that approach when possible.
+def _approximately_project_trapezoid(weights, lattice_sizes, trapezoid_trusts,
+                                     edgeworth_trusts):
+  """Approximately projects to strictly meet all trapezoid trust constraints.
+
+  Note that this function will not introduce violations to any
+  previously-satisfied monotonicity or edgeworth constraints.
+
+  Algorithm details:
+
+  For a constraint on main dimension i and conditional dimension j, consider
+  some slice of weights that is fixed along all other dimensions, leaving a grid
+
+  ```
+  0---1---2---3
+  |   |   |   |
+  4---5---6---7
+  |   |   |   |
+  8---9---10--11
+  ```
+
+  You can think of all the other dimensions as other such grids stacked behind
+  this one, e.g. weight[8] and the points behind it are all such points with
+  index 0 in the i'th and j'th dimensions, and weight[6] and the points behind
+  it are all such points with index 2 in the i'th dimension and index 1 in the
+  j'th.
+
+  We project to trapezoid trust on this grid by working up both edges of
+  the lattice and only ever decreasing weights on the low main_feature side and
+  increasing weights on the high main_feature side. In the above example, we
+  would first consider the pair {8, 4} and update weight 4 to be min(8, 4),
+  before then looking at {4, 0} and updating 0 to be min(4, 0). Similarly set
+  weight 7 to be max(7, 11) and then weight 3 to max(3, 7). Flip the orders if
+  cond_direction is -1: work down instead of up.
+
+  Unlike in the edgeworth trust case, we do not necessarily look 'behind' the
+  page and update all points behind a given grid point by the maximum violation
+  at each step. It turns out that while this does have the nice property of
+  maintaining almost all types of edgeworth constraints, for the same reason
+  that the edgeworth algorithm does (co-movement of weights involved in other
+  constraints), it can actually break other trapezoid constraints, namely those
+  which share the same conditional feature.
+
+  There is one exception, which is the matching edgeworth trust constraint. In
+  this case, the trapezoid updates only touch one corner of each edgeworth
+  constraint and so can violate them. The solution is to update by the max of
+  all violations behind the page and all violations encountered below in the
+  grid.
+
+  If you separately update each grid by the violations in that grid, this update
+  procedure turns out to respect all trapezoid constraints. The rationale is a
+  bit more subtle than in the edgeworth case. The basic idea is that since each
+  trapezoid and monotonicity constraint operates on two weights that are next to
+  each other (i.e. differ only in the index of one dimension), we can create
+  a 'square' of points in which one edge goes across the constraint we want to
+  maintain and the perpendicular edges go across the constraint we are updating.
+
+  For example, consider the 4 weights
+
+  ```
+  A -- B
+  |    |
+  C -- D
+  ```
+
+  A/B and C/D differ in the same one index (the constraint we hope to maintain)
+  while A/C and B/D differ across the conditional index of the trapezoid
+  constraint we are updating. Say we are focused on whether we maintain A'<=B'
+  (A' is A after imposing trapezoid trust) and we are operating on the 'min main
+  feature' side of the lattice so that any updates that occur will lower
+  weights. If B'=B after trapezoid trust, things are easy because A'<=A by 'min
+  main feature' and A<=B by the preexisting constraint. If not, and B'<B, we
+  start with A'<=C' by trapezoid trust and C'<=C by 'min main feature'. By
+  the preexisting constraints, C<=D, and by the trapezoid trust update procedure
+  and the fact that B has changed, it must be that B'=D.
+
+  Unfortunately, this algorithm will break edgeworth constraints.
+
+  The solution we take is to update independently for each grid whenever we have
+  only trapezoid constraints and to update with the max across all other
+  dimensions (and potentially below, in the case of matching constraints)
+  when there are both types of constraints, recognizing that in this second case
+  we may not achieve guarantees for trapezoid constraints which share a
+  conditional feature.
+
+  Differs from _project_partial_trapezoid in that this algorithm guarantees a
+  global satisfying solution for all trapezoid trust constraints.
+
+  Args:
+    weights: Tensor with weights whose shape matches lattice_sizes.
+    lattice_sizes: List or tuple of integers which represents lattice sizes.
+      which correspond to weights.
+    trapezoid_trusts: None or iterable of three-element tuples. First element is
+      the index of the main (monotonic) feature. Second element is the index of
+      the conditional feature. Third element is the direction of trust set to 1
+      if higher values of the conditional feature should increase trust in the
+      main feature and -1 otherwise.
+    edgeworth_trusts: None or iterable of three-element tuples. First element is
+      the index of the main (monotonic) feature. Second element is the index of
+      the conditional feature. Third element is the direction of trust set to 1
+      if higher values of the conditional feature should increase trust in the
+      main feature and -1 otherwise.
+
+  Returns:
+    Tensor with projected weights matching shape of input weights.
+  """
+
+  any_edgeworth = bool(edgeworth_trusts)
+
+  # Project onto trust constraints by cumulatively fixing violations.
+  for main_dim, cond_dim, cond_direction in trapezoid_trusts or []:
+    layers = _unstack_2d(weights, main_dim, cond_dim)
+    max_main_dim = lattice_sizes[main_dim] - 1
+    same_edgeworth = (main_dim, cond_dim,
+                      cond_direction) in set(edgeworth_trusts or [])
+    if cond_direction < 0:
+      layers = _reverse_second_list_dimension(layers)
+    lhs_update, rhs_update = 0, 0
+    for j in range(0, lattice_sizes[cond_dim] - 1):
+      lhs_difference = layers[0][j + 1] - layers[0][j]
+      lhs_update = _trapezoid_violation_update(lhs_difference, any_edgeworth,
+                                               same_edgeworth, lhs_update)
+      layers[0][j + 1] -= lhs_update
+      rhs_difference = layers[max_main_dim][j] - layers[max_main_dim][j + 1]
+      rhs_update = _trapezoid_violation_update(rhs_difference, any_edgeworth,
+                                               same_edgeworth, rhs_update)
+      layers[max_main_dim][j + 1] += rhs_update
+    if cond_direction < 0:
+      layers = _reverse_second_list_dimension(layers)
+    weights = _stack_2d(layers, main_dim, cond_dim)
+
+  return weights
+
+
+def _trapezoid_violation_update(differences, any_edgeworth, same_edgeworth,
+                                prior_update):
+  """Calculates update amount based on violations for trapezoid projection.
+
+  Note that the shape of the returned tensor is different based on the value
+  of the any_edgeworth boolean feature. A single-valued tensor is
+  returned when it is true, representing the amount by which all relevant
+  weights will be updated. A tensor matching the shape of differences is
+  returned when it is false, representing the individual updates to be applied
+  to each relevant weight.
+
+  Args:
+    differences: Tensor containing amounts by which constraints are satisfied or
+      violated.
+    any_edgeworth: Boolean for whether any edgeworth trust constraints are set
+      for this lattice layer.
+    same_edgeworth: Boolean for whether there is a matching edgeworth constraint
+      for the trapezoid constraint being updated.
+    prior_update: Tensor containing previous trapezoid constraint update.
+
+  Returns:
+    Tensor either matching the shape of the input differences tensor or
+    consisting of a single element.
+
+  """
+  if any_edgeworth and same_edgeworth:
+    return tf.maximum(tf.maximum(tf.reduce_max(differences), 0), prior_update)
+  elif any_edgeworth:
+    return tf.maximum(tf.reduce_max(differences), 0)
+  else:
+    return tf.maximum(differences, 0)
+
+
+def _approximately_project_bounds(weights, output_min, output_max):
+  """Approximately projects to strictly meet min/max constraints.
+
+  Note that this function will not introduce violations to any
+  previously-satisfied monotonicity or trust constraints.
+
+  Algorithm details:
+
+  The idea of the min/max projection is to evenly scale (squish) the weights
+  to fit within the desired range. This ensures that the weight differences-of-
+  differences encountered in the trust constraints will not be affected.
+
+  For example, given min_weight < output_min < 0 < output_max < max_weight, we
+  will translate all weights such that min_weight = 0, then scale the weights
+  by the difference in ratios between max_weight - min_weight and output_max -
+  output_min, and then translate back so that min_weight = output_min and
+  max_weight = output_max.
+
+  Args:
+    weights: Tensor with weights whose shape matches `lattice_sizes`.
+    output_min: None or minimum possible output.
+    output_max: None or maximum possible output.
+
+  Returns:
+    Tensor with projected weights matching shape of input weights.
+  """
+
+  # Project into [output_min, output_max] by translating and scaling output if
+  # necessary.
+  final_projection = weights
+  if output_max is None and output_min is not None:
+    final_projection += tf.maximum(output_min - tf.reduce_min(final_projection),
+                                   0)
+  elif output_max is not None and output_min is None:
+    final_projection -= tf.maximum(
+        tf.reduce_max(final_projection) - output_max, 0)
+  elif output_max is not None and output_min is not None:
+    max_violation = tf.maximum(tf.reduce_max(final_projection) - output_max, 0)
+    min_violation = tf.maximum(output_min - tf.reduce_min(final_projection), 0)
+    final_projection += (min_violation - output_min)
+    final_projection *= ((output_max - output_min) /
+                         ((output_max + max_violation) -
+                          (output_min - min_violation)))
+    final_projection += output_min
+  return final_projection
+
+
+def finalize_constraints(weights,
+                         lattice_sizes,
+                         monotonicities,
+                         edgeworth_trusts=None,
+                         trapezoid_trusts=None,
+                         output_min=None,
+                         output_max=None):
+  """Approximately projects lattice weights to strictly satisfy all constraints.
+
+  This projeciton guarantees that constraints are strictly met, but it is not
+  an exact projection w.r.t. the L2 norm. The computationally cost is
+  `O((num_monotonic_dims + num_trust_constraints) * num_lattice_weights)`.
+
+  See helper functions `_approximately_project_*` for details of the individual
+  projection algorithms for each set of constraints. They are designed to be
+  applied sequentially: monotonicity, then edgeworth, trapezoid, and bounds if
+  necessary. This is because the projection algorithms are guaranteed to not
+  violate *previous* constraints, though they may lead to violations of *later*
+  constraints.
+
+  Args:
+    weights: Lattice weights tensor of shape: `(prod(lattice_sizes), units)`.
+    lattice_sizes: List or tuple of integers which represents lattice sizes.
+      which correspond to weights.
+    monotonicities: List or tuple of same length as lattice_sizes of {0, 1}
+      which represents monotonicity constraints per dimension. 1 stands for
+      increasing (non-decreasing in fact), 0 for no monotonicity constraints.
+    edgeworth_trusts: None or iterable of three-element tuples. First element is
+      the index of the main (monotonic) feature. Second element is the index of
+      the conditional feature. Third element is the direction of trust set to 1
+      if higher values of the conditional feature should increase trust in the
+      main feature and -1 otherwise.
+    trapezoid_trusts: None or iterable of three-element tuples. First element is
+      the index of the main (monotonic) feature. Second element is the index of
+      the conditional feature. Third element is the direction of trust set to 1
+      if higher values of the conditional feature should increase trust in the
+      main feature and -1 otherwise.
+    output_min: None or minimum possible output.
+    output_max: None or maximum possible output.
+
+  Returns:
+    Projected weights tensor of same shape as `weights`.
+  """
+  if count_non_zeros(monotonicities) == 0:
+    return weights
+  units = weights.shape[1]
+  if units > 1:
+    lattice_sizes = lattice_sizes + [int(units)]
+    if monotonicities:
+      monotonicities = monotonicities + [0]
+
+  weights = tf.reshape(weights, shape=lattice_sizes)
+
+  weights = _approximately_project_monotonicity(weights, lattice_sizes,
+                                                monotonicities)
+  if edgeworth_trusts or trapezoid_trusts:
+    weights = _approximately_project_edgeworth(weights, lattice_sizes,
+                                               edgeworth_trusts)
+    weights = _approximately_project_trapezoid(weights, lattice_sizes,
+                                               trapezoid_trusts,
+                                               edgeworth_trusts)
+    # Simple capping, applied in a later step, adds less distortion than this
+    # scaling projection; however, it could violate trust constraints.
+    weights = _approximately_project_bounds(weights, output_min, output_max)
+  return tf.reshape(weights, shape=[-1, units])
+
+
+# TODO: approach used to implement regluarizers is likely to be more
+# efficient than one used here. Especially on TPU. Investigate it.
+def _project_partial_monotonicity(weights, lattice_sizes, monotonicities,
+                                  unimodalities, dimension, constraint_group):
+  """Applies exact monotonicity projection to a subset of a single dimension.
+
+  Algorithm details:
+
+  In order to project into k constrained dimensions we split all constraints
+  into 2k sets in such way that within each sets all constraints are
+  independent. These 2k sets are chosen in such way that for each constrained
+  dimension we have 2 sets of constraints: even and odd constraints according to
+  index of smallest vertex in constraint. We apply Dykstra's algorithm to these
+  sets handling each individual constraint within each set independently.
+
+  This function in particular, then, operates on one of these independent sets,
+  as defined by a specific dimension and constraint group: 0 for the even
+  constraints and 1 for the odd constraints.
+
+  Note that in case of just 2 lattice vertices per dimension odd set for that
+  dimension will be empty.
+
+  * k constrained dimensions projection:
+  If we know how to project into single constrained dimension then we can use
+  Dykstra algorithm to project into union of all k constrained dimensions.
+
+  * Single constrained dimension projection:
+  For single dimension projection we have multiple independent 1-d sequences of
+  constrained weights of same length.
+  For example 2 x 6 lattice with monotonicity along 2-nd dimension:
+
+  ```
+  0--<--1--<--2--<--3--<--4--<--5
+  |     |     |     |     |     |
+  6--<--7--<--8--<--9--<--10-<--11
+  ```
+
+  we have 2 independent rows of constraints. It's clear that both rows can be
+  projected independently.
+
+  To project 1 row, we can again apply Dykstra's algorithm splitting all
+  constraints into two sets: constraints with odd indices and constraints with
+  even indices. For example for first row:
+  - even constraints set: {0 < 1, 2 < 3, 4 < 5}
+  - odd constraints set:  {1 < 2, 3 < 4}
+
+  Within each set no constraints interact with each other so we can project
+  every individual constraint independently.
+
+  * Individual constraint projection:
+  Constraint weight[0] <= weight[1]:
+  - weight[0] = min(weight[0], (weight[0] + weight[1]) / 2)
+  - weight[1] = max(weight[1], (weight[0] + weight[1]) / 2)
+
+  Differs from _approximately_project_monotonicity in that this algorithm
+  - Only operates on a single dimension.
+  - Does not guarantee an satisfying solution to the full monotonicity
+    constraint.
+  - Exactly projects (in L2 terms) on the subset of constraints it does
+    operate on.
+
+  Args:
+    weights: Tensor with weights of lattice layer, with shape lattice_sizes.
+    lattice_sizes: List or tuple of integers which represents lattice sizes.
+      which correspond to weights.
+    monotonicities: None or list or tuple of same length as lattice_sizes of {0,
+      1} which represents monotonicity constraints per dimension. 1 stands for
+      increasing (non-decreasing in fact), 0 for no monotonicity constraints.
+    unimodalities: None or list or tuple of same length as lattice_sizes of {0,
+      1} which represents unimodality constraints per dimension. 1 stands for
+      unimodal dimension, 0 for no unimodality constraints.
+    dimension: Index of feature to which we are applying constraints.
+    constraint_group: 0 or 1 as defined above, representing whether we are
+      operating on 'even' or 'odd' constraints.
+
+  Returns:
+    Tensor with projected weights matching shape of input weights.
+
+  Raises:
+    ValueError: If provided dimension has no monotonicity or unimodality
+      constraint associated with it.
+  """
+
+  if monotonicities[dimension] == 0 and unimodalities[dimension] == 0:
+    raise ValueError(
+        "Trying to project onto unconstrained dimension. Dimension: " %
+        (dimension))
+
+  layers = tf.unstack(weights, axis=dimension)
+  for i in range(constraint_group, lattice_sizes[dimension] - 1, 2):
+    # Project individual independent constraints.
+    average = (layers[i] + layers[i + 1]) / 2.0
+    if (monotonicities[dimension] == 1 or
+        (unimodalities[dimension] == 1 and i >= lattice_sizes[dimension] // 2)):
+      layers[i] = tf.minimum(layers[i], average)
+      layers[i + 1] = tf.maximum(layers[i + 1], average)
+    else:
+      layers[i] = tf.maximum(layers[i], average)
+      layers[i + 1] = tf.minimum(layers[i + 1], average)
+
+  return tf.stack(layers, axis=dimension)
+
+
+def _project_partial_edgeworth(weights, lattice_sizes, edgeworth_trust,
+                               constraint_group):
+  """Applies exact edgeworth trust projection to a subset of one constraint.
+
+  Algorithm details:
+
+  For the Edgeworth trust projection, we follow a similar approach to the
+  monotonicity projection by splitting up the constraints into independent sets.
+  Here, each trust constraint touches every lattice vertex, but can be broken up
+  into 4 independent sets of constraints, based on whether the constraint's
+  smaller indices along the main and conditional dimensions are even or odd.
+  That leaves us with 4t sets of constraints if we have t trust constraints,
+  which we can sequentially project onto with the Dykstra's algorithm.
+
+  This function applies to a single set of independent constraints within a
+  single trust constraint. The constraint group can take the value (0,0), (0,1),
+  (1,0), or (1,1) corresponding to even (0) or odd (1) for the main and
+  conditional dimensions, respectively.
+
+  * k trust constraints projection:
+  If we know how to project into single trust constraint then we can use
+  Dykstra algorithm to project into union of all k trust constraints.
+
+  * Single trust constraint projection:
+  Edgeworth constraints require the difference in weights across the main
+  feature to be larger when the conditional feature is higher. We can think of
+  this as separate constraints applied to each 'square' of weights {(i,j,...),
+  (i+1,j,...), (i,j+1,...), (i+1,j+1,...), where i and j denote the index
+  dimensions of the main and conditional features and the ellipses represent
+  a fixed value of the other feature dimensions. It is immediately clear that
+  we can apply the constraint at the same time for different values of the
+  other dimensions. Considering then a fixed slice, and a grid
+
+  ```
+  0---1---2---3
+  |   |   |   |
+  4---5---6---7
+  |   |   |   |
+  8---9---10--11
+  |   |   |   |
+  12--13--14--15
+  ```
+
+  we get our four independent sets by considering non-overlapping squares of
+  constraints. In particular, we define the sets by the combination of even &
+  odd starting indices in each dimension. So if we start our indexing at the
+  top-left, the even/even set would be the four squares {0,1,4,5}, {2,3,6,7},
+  {8,9,12,13}, and {10,11,14,15}, the even/odd set would be {4,5,8,9} and
+  {6,7,10,11} and so on.
+
+  * Individual weight projection:
+  Within each square the projection moves each of the four weights by the
+  constraint violation / 4, if necessary, increasing the gap between high-trust
+  weights across the main feature and decreasing the gap between low-trust
+  weights across the main feature.
+
+  Differs from _approximately_project_edgeworth in that this algorithm
+  - Only operates on the constraints for a single (main_dim, cond_dim) pair.
+  - Does not guarantee a satisfying solution to the full trust constraint.
+  - Exactly projects (in L2 terms) on the subset of constraints it does
+    operate on.
+
+  Args:
+    weights: Tensor with weights of lattice layer, with shape lattice_sizes.
+    lattice_sizes: List or tuple of integers which represents lattice sizes.
+      which correspond to weights.
+    edgeworth_trust: Three-element tuple representing a single trust constraint.
+      First element is the index of the main (monotonic) feature. Second element
+      is the index of the conditional feature. Third element is the direction of
+      trust set to 1 if higher values of the conditional feature increase trust
+      and -1 otherwise.
+    constraint_group: Two-element tuple of 0s and 1s as defined above,
+      representing the combination of 'even' and 'odd' constraints we are
+      projecting on.
+
+  Returns:
+    Tensor with projected weights matching shape of input weights.
+  """
+
+  main_dim, cond_dim, cond_direction = edgeworth_trust
+  layers = _unstack_2d(weights, main_dim, cond_dim)
+
+  if cond_direction < 0:
+    layers = _reverse_second_list_dimension(layers)
+  for i in range(constraint_group[0], lattice_sizes[main_dim] - 1, 2):
+    for j in range(constraint_group[1], lattice_sizes[cond_dim] - 1, 2):
+      difference_in_slopes = ((layers[i + 1][j] - layers[i][j]) -
+                              (layers[i + 1][j + 1] - layers[i][j + 1]))
+      correction = tf.maximum(difference_in_slopes / 4, 0)
+      layers[i][j] += correction
+      layers[i][j + 1] -= correction
+      layers[i + 1][j] -= correction
+      layers[i + 1][j + 1] += correction
+  if cond_direction < 0:
+    layers = _reverse_second_list_dimension(layers)
+
+  return _stack_2d(layers, main_dim, cond_dim)
+
+
+def _project_partial_trapezoid(weights, lattice_sizes, trapezoid_trust,
+                               constraint_group):
+  """Applies exact trapezoid trust projection to a subset of one constraint.
+
+  Algorithm details:
+
+  For the trapezoid trust projection, each trust constraint touches every
+  lattice vertex, but can be broken up into 2 independent sets of constraints,
+  based on whether the constraint's smaller index along the conditional
+  dimension is even or odd. That leaves us with 2t sets of constraints if we
+  have t trust constraints, which we can sequentially project onto with the
+  Dykstra algorithm.
+
+  This function applies to a single set of independent constraints within a
+  single trust constraint. The constraint group can take the value 0 or 1,
+  corresponding to even (0) or odd (1) for conditional dimension index.
+
+  * k trust constraints projection:
+  If we know how to project into single trust constraint then we can use
+  Dykstra algorithm to project into union of all k trust constraints.
+
+  * Single trust constraint projection:
+  Trapezoid constraints require the range of possible model outputs across the
+  main feature to be larger when the conditional feature demonstrates higher
+  trust in the main feature. That is, they constrain the 'extreme' (minimum and
+  maximum) weights in the main feature dimension but not any of the weights in
+  the middle if the lattice size is larger than 2. We therefore have one set of
+  constraints along the conditional dimension when the main feature is at its
+  minimum and one when the main feature is at its maximum. For example, consider
+  the grid
+
+  ```
+  0---1---2---3
+  |   |   |   |
+  4---5---6---7
+  |   |   |   |
+  8---9---10--11
+  |   |   |   |
+  12--13--14--15
+  ```
+
+  If the main feature is on the x-axis and the conditional feature is on the y-
+  axis in this grid, our constraints operate on {0,4,8,12} and {3,7,11,15}. In
+  fact, those constraints are simply monotonicity constraints in opposite
+  directions. If the cond_direction = 1, we are monotonically decreasing between
+  12 and 0 (0 < 4 < 8 < 12) and monotonically increasing between 15 and 3
+  (3 > 7 > 11 > 15). Note that these imply that [0,3] is a superset of [4,7] and
+  so on down to the smallest subset [12,15]. Our two independent sets of these
+  constraints match those for monotonicity based on even and odd indices. For
+  example, [8 < 12], [4 < 0], [11 > 15], and [3 > 7] can be projected onto at
+  once, while [4 < 8] and [7 > 11] are in the other group. All constraint
+  directions are flipped if cond_direction = -1.
+
+  * Individual weight projection:
+  For each pair of constraints, we project as in monotonicity: each weight moves
+  halfway towards each other if the constraint is being violated, and stays the
+  same otherwise.
+
+  Differs from _approximately_project_trapezoid in that this algorithm
+  - Only operates on the constraints for a single (main_dim, cond_dim) pair.
+  - Does not guarantee a satisfying solution to the full trust constraint.
+  - Exactly projects (in L2 terms) on the subset of constraints it does
+    operate on.
+
+  Args:
+    weights: Tensor with weights of lattice layer, with shape lattice_sizes.
+    lattice_sizes: List or tuple of integers which represents lattice sizes.
+      which correspond to weights.
+    trapezoid_trust: Three-element tuple representing a single trust constraint.
+      First element is the index of the main (monotonic) feature. Second element
+      is the index of the conditional feature. Third element is the direction of
+      trust set to 1 if higher values of the conditional feature increase trust
+      and -1 otherwise.
+    constraint_group: 0 or 1 as defined above, representing whether we are
+      acting on even or odd indices
+
+  Returns:
+    Tensor with projected weights matching shape of input weights.
+  """
+
+  main_dim, cond_dim, cond_direction = trapezoid_trust
+  layers = _unstack_2d(weights, main_dim, cond_dim)
+
+  max_main_dim = lattice_sizes[main_dim] - 1
+  if cond_direction < 0:
+    layers = _reverse_second_list_dimension(layers)
+  for j in range(constraint_group, lattice_sizes[cond_dim] - 1, 2):
+    lhs_difference = layers[0][j + 1] - layers[0][j]
+    lhs_correction = tf.maximum(lhs_difference / 2, 0)
+    layers[0][j] += lhs_correction
+    layers[0][j + 1] -= lhs_correction
+
+    rhs_difference = layers[max_main_dim][j] - layers[max_main_dim][j + 1]
+    rhs_correction = tf.maximum(rhs_difference / 2, 0)
+    layers[max_main_dim][j] -= rhs_correction
+    layers[max_main_dim][j + 1] += rhs_correction
+  if cond_direction < 0:
+    layers = _reverse_second_list_dimension(layers)
+
+  return _stack_2d(layers, main_dim, cond_dim)
+
+
+def _project_partial_monotonic_dominance(weights, lattice_sizes,
+                                         monotonic_dominance, constraint_group):
+  r"""Applies exact monotonic dominance projection to given constraint group.
+
+  Algorithm details:
+
+  For the monotonic dominance projection, we follow a similar approach to the
+  monotonicity projection by splitting up the constraints into independent sets.
+  Here, each dominance constraint can be broken up into 8 independent sets of
+  constraints, based on (1) whether the constraint's smaller indices along the
+  dominant and weak dimensions are even or odd and (2) two triplets of vertices
+  to consider for each square in the grid shown below.
+
+  That leaves us with 8k sets of constraints if we have k dominance constraints,
+  which we can sequentially project onto with the Dykstra algorithm.
+
+  This function applies to a single set of independent constraints within a
+  single dominance constraint group. The constraint group can take the value
+  {0,1} x {0,1} x {0,1}. Even (0) or odd (1) of the first two elements
+  correspond to the dominant and weak features and the third element determines
+  which of the two triplets within a square to consider.
+
+  * k monotonic dominance constraints projection:
+  If we know how to project into single monotonic dominance constraint then we
+  can use Dykstra algorithm to project into union of all k dominance
+  constraints.
+
+  * Single monotonic dominance constraint projection
+  Monotonic dominance constraints require the effect (slope) in the direction
+  of the dominant dimension to be greater than that of the weak dimension for
+  any point in the lattice. We can think of this as separate constraints applied
+  to each 'triangle' of weights represented as either {(i,j,...), (i+1,j,...),
+  (i+1,j+1,...)} or {(i,j,...), (i,j+1,...), (i+1,j+1,...)} where i and j denote
+  the index dimensions of the dominant and weak features and the ellipses
+  represent a fixed value of the other feature dimensions. Considering then a
+  fixed slice, and a grid
+
+  ```
+  0---1---2---3
+  | \ | \ | \ |
+  4---5---6---7
+  | \ | \ | \ |
+  8---9---10--11
+  | \ | \ | \ |
+  12--13--14--15
+  ```
+
+  where the dominant feature is on the x-axis and the weak feature is on the
+  y-axis, we get our 8 independent sets of non-overlapping triangular triplets
+  of vertices. For example, one set consists of {(0,1,4), (8,9,12), (2,3,6),
+  (10,11,14)}.
+
+  * Individual weight projection
+  Within each triangular triplet, the projection moves the weight of the right
+  angled vertex, either top-right or bottom-left, by 2 * violation / 3 and the
+  other two vertices by violation / 3 to satisfy the constraint while minimizing
+  the L2 distance from the initial point.
+
+  Args:
+    weights: tensor with weights of lattice layer, with shape lattice_sizes.
+    lattice_sizes: list or tuple of integers which represents lattice sizes
+      which correspond to weights.
+    monotonic_dominance: two-element tuple representing a single monotonic
+      dominance constraint. First element is the index of the dominant feature.
+      Second element is the index of the weak feature.
+    constraint_group: three-element tuple as defined above, representing 'even'
+      or 'odd' indices and which of the two triangles we are acting on.
+
+  Returns:
+    Tensor with projected weights matching shape of input weights.
+  """
+
+  dominant_dim, weak_dim = monotonic_dominance
+  layers = _unstack_2d(weights, dominant_dim, weak_dim)
+  for i in range(constraint_group[0], lattice_sizes[dominant_dim] - 1, 2):
+    for j in range(constraint_group[1], lattice_sizes[weak_dim] - 1, 2):
+      midpoint = (layers[i][j] + layers[i + 1][j + 1]) / 2
+      if constraint_group[2] == 1:
+        difference = midpoint - layers[i + 1][j]
+        correction = tf.maximum(difference / 3, 0)
+        layers[i + 1][j] += 2 * correction
+      else:
+        difference = midpoint - layers[i][j + 1]
+        correction = tf.minimum(difference / 3, 0)
+        layers[i][j + 1] += 2 * correction
+      layers[i][j] -= correction
+      layers[i + 1][j + 1] -= correction
+
+  return _stack_2d(layers, dominant_dim, weak_dim)
+
+
+def _project_partial_joint_monotonicity(weights, lattice_sizes,
+                                        joint_monotonicity, constraint_group):
+  """Applies exact joint monotonicity projection to given constraint group.
+
+  Algorithm details:
+
+  For the joint monotonicity projection, we follow a similar approach to the
+  per-dimension monotonicity projection by splitting up the constraints into
+  independent sets. Here, each joint monotonicity constraint can be broken up
+  into 8 independent sets of constraints, based on (1) whether the constraint's
+  smaller indices along the two given dimensions are even or odd and (2) two
+  triplets of vertices to consider for each square in the grid shown below.
+
+  That leaves us with 8k sets of constraints if we have k joint monotonocity
+  constraints, which we can sequentially project onto with the Dykstra
+  algorithm.
+
+  This function applies to a single set of independent constraints within a
+  single joint monotonicity constraint. The constraint group can take the value
+  {0,1} x {0,1} x {0,1}. Even (0) or odd (1) of the first two elements
+  correspond to the two features that are jointly monotonic and the third
+  element determines which of the two triplets within in a square to consider.
+
+  * k joint monotonicity constraints projection:
+  If we know how to project into single joint monotonicity constraint then we
+  can use Dykstra algorithm to project into union of all k joint monotonicity
+  constraints.
+
+  * Single joint monotonicity constraint projection
+  Joint monotonicity constraints require the function to be monotonic along a
+  diagonal direction of a two-feature subspace, ceteris paribus all other
+  features. The sum of the partial derivatives on the constraint features needs
+  to be non-negative. We can think of this as separate constraints applied to
+  each 'triangle' of weights represented as either {(i,j,...), (i+1,j,...),
+  (i,j+1,...)} or {(i+1,j+1,...), (i+1,1,...), (i,j+1,...)} where i  and j
+  denote the index dimensions of the two features and the ellipses represent a
+  fixed value of the other feature dimensions. Considering then a fixed slice,
+  and a grid
+
+  ```
+  0---1---2---3
+  | / | / | / |
+  4---5---6---7
+  | / | / | / |
+  8---9---10--11
+  | / | / | / |
+  12--13--14--15
+  ```
+
+  we get our 8 independent sets of non-overlapping triangular triplets of
+  vertices. For example, one set consists of {(0,1,4}, (8,9,12), (2,3,6),
+  (10,11,14)}.
+
+  * Individual weight projection
+  Within each triangular triplet, the projection moves the weight of the right
+  angled vertex, either top-left or bottom-right, by 2 * violation / 3 and the
+  other two vertices by violation / 3 to satisfy the constraint while minimizing
+  the L2 distance from the initial point.
+
+  Args:
+    weights: tensor with weights of lattice layer, with shape lattice_sizes.
+    lattice_sizes: list or tuple of integers which represents lattice sizes
+      which correspond to weights.
+    joint_monotonicity: two-element tuple representing a single joint
+      monotonicity constraint. The two elements are the index of the two
+      constrained features.
+    constraint_group: three-element tuple as defined above, representing the
+      combination of 'even' and 'odd' constraints we are projecting on.
+
+  Returns:
+    Tensor with projected weights matching shape of input weights.
+  """
+
+  dim1, dim2 = joint_monotonicity
+  layers = _unstack_2d(weights, dim1, dim2)
+  for i in range(constraint_group[0], lattice_sizes[dim1] - 1, 2):
+    for j in range(constraint_group[1], lattice_sizes[dim2] - 1, 2):
+      midpoint = (layers[i + 1][j] + layers[i][j + 1]) / 2
+      if constraint_group[2] == 1:
+        difference = midpoint - layers[i + 1][j + 1]
+        correction = tf.maximum(difference / 3, 0)
+        layers[i + 1][j + 1] += 2 * correction
+      else:
+        difference = midpoint - layers[i][j]
+        correction = tf.minimum(difference / 3, 0)
+        layers[i][j] += 2 * correction
+      layers[i + 1][j] -= correction
+      layers[i][j + 1] -= correction
+
+  return _stack_2d(layers, dim1, dim2)
+
+
+# TODO: Test whether adding min/max capping to dykstra projection would
+# improve performance.
+def project_by_dykstra(weights,
+                       lattice_sizes,
+                       monotonicities=None,
+                       unimodalities=None,
+                       edgeworth_trusts=None,
+                       trapezoid_trusts=None,
+                       monotonic_dominances=None,
+                       joint_monotonicities=None,
+                       num_iterations=1):
+  """Applies dykstra's projection algorithm for monotonicity/trust constraints.
+
+  - Returns honest projection with respect to L2 norm if num_iterations is inf.
+  - Monotonicity will be violated by some small eps(num_iterations).
+  - Complexity: O(num_iterations * (num_monotonic_dims + num_trust_constraints)
+    * num_lattice_weights)
+
+  Dykstra's alternating projections algorithm projects into intersection of
+  several convex sets. For algorithm description itself use Google or Wiki:
+  https://en.wikipedia.org/wiki/Dykstra%27s_projection_algorithm
+
+  Here, each monotonicity constraint is split up into 2 independent convex sets
+  each trust constraint is split up into 4 independent convex sets. These sets
+  are then projected onto exactly (in L2 space). For more details, see the
+  _project_partial_* functions.
+
+  Args:
+    weights: `Lattice` weights tensor of shape: `(prod(lattice_sizes), units)`.
+    lattice_sizes: list or tuple of integers which represents lattice sizes.
+      which correspond to weights.
+    monotonicities: None or list or tuple of same length as lattice_sizes of {0,
+      1} which represents monotonicity constraints per dimension. 1 stands for
+      increasing (non-decreasing in fact), 0 for no monotonicity constraints.
+    unimodalities: None or list or tuple of same length as lattice_sizes of {0,
+      1} which represents unimodality constraints per dimension. 1 stands for
+      unimodal dimension, 0 for no unimodality constraints.
+    edgeworth_trusts: None or iterable of three-element tuples. First element is
+      the index of the main (monotonic) feature. Second element is the index of
+      the conditional feature. Third element is the direction of trust: 1 if
+        higher values of the conditional feature should increase trust in the
+        main feature and -1 otherwise.
+    trapezoid_trusts: None or iterable of three-element tuples. First element is
+      the index of the main (monotonic) feature. Second element is the index of
+      the conditional feature. Third element is the direction of trust: 1 if
+        higher values of the conditional feature should increase trust in the
+        main feature and -1 otherwise.
+    monotonic_dominances: None or iterable of two-element tuples. First element
+      is the index of the dominant feature. Second element is the index of the
+      weak feature.
+    joint_monotonicities: None or iterable of two-element tuples. Each tuple
+      represents a pair of feature indices that require joint monotoniticity.
+    num_iterations: number of iterations of Dykstra's algorithm.
+
+  Returns:
+    Projected weights tensor of same shape as `weights`.
+  """
+  if ((count_non_zeros(monotonicities, unimodalities) == 0 and
+       not joint_monotonicities) or
+      num_iterations == 0):
+    return weights
+
+  units = weights.shape[1]
+  if monotonicities is None:
+    monotonicities = [0] * len(lattice_sizes)
+  if unimodalities is None:
+    unimodalities = [0] * len(lattice_sizes)
+  if edgeworth_trusts is None:
+    edgeworth_trusts = []
+  if trapezoid_trusts is None:
+    trapezoid_trusts = []
+  if monotonic_dominances is None:
+    monotonic_dominances = []
+  if joint_monotonicities is None:
+    joint_monotonicities = []
+  if units > 1:
+    lattice_sizes = lattice_sizes + [int(units)]
+    monotonicities = monotonicities + [0]
+    unimodalities = unimodalities + [0]
+
+  weights = tf.reshape(weights, lattice_sizes)
+
+  def body(iteration, weights, last_change):
+    """Body of the tf.while_loop for Dykstra's projection algorithm.
+
+    This implements Dykstra's projection algorithm and requires rolling back
+    the last projection change.
+
+    Args:
+      iteration: Iteration counter tensor.
+      weights: Tensor with project weights at each iteraiton.
+      last_change: Dict that stores the last change in the weights after
+        projecting onto the each subset of constraints.
+
+    Returns:
+      The tuple (iteration, weights, last_change) at the end of each iteration.
+    """
+    last_change = copy.copy(last_change)
+    for dim in range(len(lattice_sizes)):
+      if monotonicities[dim] == 0 and unimodalities[dim] == 0:
+        continue
+
+      for constraint_group in [0, 1]:
+        # Iterate over 2 sets of constraints per dimension: even and odd.
+        # Odd set exists only when there are more than 2 lattice vertices.
+        if constraint_group + 1 >= lattice_sizes[dim]:
+          continue
+
+        # Rolling back last projection into current set as required by Dykstra's
+        # algorithm.
+        rolled_back_weights = weights - last_change[("MONOTONICITY", dim,
+                                                     constraint_group)]
+        weights = _project_partial_monotonicity(rolled_back_weights,
+                                                lattice_sizes, monotonicities,
+                                                unimodalities, dim,
+                                                constraint_group)
+        last_change[("MONOTONICITY", dim,
+                     constraint_group)] = weights - rolled_back_weights
+
+    for constraint in edgeworth_trusts:
+      main_dim, cond_dim, _ = constraint
+      for constraint_group in [(0, 0), (0, 1), (1, 0), (1, 1)]:
+        if (constraint_group[0] >= lattice_sizes[main_dim] - 1 or
+            constraint_group[1] >= lattice_sizes[cond_dim] - 1):
+          continue
+
+        rolled_back_weights = (
+            weights - last_change[("EDGEWORTH", constraint, constraint_group)])
+        weights = _project_partial_edgeworth(rolled_back_weights, lattice_sizes,
+                                             constraint, constraint_group)
+        last_change[("EDGEWORTH", constraint,
+                     constraint_group)] = weights - rolled_back_weights
+
+    for constraint in trapezoid_trusts:
+      _, cond_dim, _ = constraint
+      for constraint_group in [0, 1]:
+        if constraint_group >= lattice_sizes[cond_dim] - 1:
+          continue
+
+        rolled_back_weights = (
+            weights - last_change[("TRAPEZOID", constraint, constraint_group)])
+        weights = _project_partial_trapezoid(rolled_back_weights, lattice_sizes,
+                                             constraint, constraint_group)
+        last_change[("TRAPEZOID", constraint,
+                     constraint_group)] = weights - rolled_back_weights
+
+    for constraint in monotonic_dominances:
+      dominant_dim, weak_dim = constraint
+      for constraint_group in itertools.product([0, 1], [0, 1], [0, 1]):
+        if (constraint_group[0] >= lattice_sizes[dominant_dim] - 1 or
+            constraint_group[1] >= lattice_sizes[weak_dim] - 1):
+          continue
+
+        rolled_back_weights = weights - last_change[("MONOTONIC_DOMINANCE",
+                                                     constraint,
+                                                     constraint_group)]
+        weights = _project_partial_monotonic_dominance(rolled_back_weights,
+                                                       lattice_sizes,
+                                                       constraint,
+                                                       constraint_group)
+        last_change[("MONOTONIC_DOMINANCE", constraint,
+                     constraint_group)] = weights - rolled_back_weights
+
+    for constraint in joint_monotonicities:
+      dim1, dim2 = constraint
+      for constraint_group in itertools.product([0, 1], [0, 1], [0, 1]):
+        if (constraint_group[0] >= lattice_sizes[dim1] - 1 or
+            constraint_group[1] >= lattice_sizes[dim2] - 1):
+          continue
+
+        rolled_back_weights = weights - last_change[("JOINT_MONOTONICITY",
+                                                     constraint,
+                                                     constraint_group)]
+        weights = _project_partial_joint_monotonicity(rolled_back_weights,
+                                                      lattice_sizes, constraint,
+                                                      constraint_group)
+        last_change[("JOINT_MONOTONICITY", constraint,
+                     constraint_group)] = weights - rolled_back_weights
+    return iteration + 1, weights, last_change
+
+  def cond(iteration, weights, last_change):
+    del weights, last_change
+    return tf.less(iteration, num_iterations)
+
+  # Run the body of the loop once to find required last_change keys. The set of
+  # keys in the input and output of the body of tf.while_loop must be the same.
+  # The resulting ops are discarded and will not be part of the TF graph.
+  zeros = tf.zeros(shape=lattice_sizes, dtype=weights.dtype)
+  last_change = collections.defaultdict(lambda: zeros)
+  (_, _, last_change) = body(0, weights, last_change)
+
+  # Apply Dykstra's algorithm with tf.while_loop.
+  iteration = tf.constant(0)
+  last_change = {k: zeros for k in last_change}
+  (_, weights, _) = tf.while_loop(cond, body, (iteration, weights, last_change))
+  return tf.reshape(weights, shape=[-1, units])
+
+
+def laplacian_regularizer(weights, lattice_sizes, l1=0.0, l2=0.0):
+  """Returns Laplacian regularization loss for `Lattice` layer.
+
+  Laplacian regularizer penalizes the difference between adjacent vertices in
+  multi-cell lattice (see
+  [publication](http://jmlr.org/papers/v17/15-243.html)).
+
+  Consider a 3 x 2 lattice with weights `w`:
+
+  ```
+  w[3]-----w[4]-----w[5]
+    |        |        |
+    |        |        |
+  w[0]-----w[1]-----w[2]
+  ```
+
+  where the number at each node represents the weight index.
+  In this case, the laplacian regularizer is defined as:
+
+  ```
+  l1[0] * (|w[1] - w[0]| + |w[2] - w[1]| +
+           |w[4] - w[3]| + |w[5] - w[4]|) +
+  l1[1] * (|w[3] - w[0]| + |w[4] - w[1]| + |w[5] - w[2]|) +
+
+  l2[0] * ((w[1] - w[0])^2 + (w[2] - w[1])^2 +
+           (w[4] - w[3])^2 + (w[5] - w[4])^2) +
+  l2[1] * ((w[3] - w[0])^2 + (w[4] - w[1])^2 + (w[5] - w[2])^2)
+  ```
+
+  Arguments:
+    weights: `Lattice` weights tensor of shape: `(prod(lattice_sizes), units)`.
+    lattice_sizes: List or tuple of integers which represents lattice sizes.
+    l1: l1 regularization amount. Either single float or list or tuple of floats
+      to specify different regularization amount per dimension.
+    l2: l2 regularization amount. Either single float or list or tuple of floats
+      to specify different regularization amount per dimension.
+
+  Returns:
+    Laplacian regularization loss.
+  """
+  if not l1 and not l2:
+    return 0.0
+
+  rank = len(lattice_sizes)
+  # If regularization amount is given as single float assume same amount for
+  # every dimension.
+  if l1 and not isinstance(l1, (list, tuple)):
+    l1 = [l1] * rank
+  if l2 and not isinstance(l2, (list, tuple)):
+    l2 = [l2] * rank
+
+  if weights.shape[1] > 1:
+    lattice_sizes = lattice_sizes + [int(weights.shape[1])]
+    rank += 1
+    if l1:
+      l1 = l1 + [0.0]
+    if l2:
+      l2 = l2 + [0.0]
+  weights = tf.reshape(weights, shape=lattice_sizes)
+
+  result = tf.constant(0.0, shape=[], dtype=weights.dtype)
+  for dim in range(rank):
+    if (not l1 or not l1[dim]) and (not l2 or not l2[dim]):
+      continue
+    if dim > 0:
+      # Transpose so current dimension becomes first one in order to simplify
+      # indexing and be able to merge all other dimensions into 1 for better TPU
+      # performance.
+      permut = [p for p in range(rank)]
+      permut[0], permut[dim] = permut[dim], permut[0]
+      slices = tf.transpose(weights, perm=permut)
+    else:
+      slices = weights
+    slices = tf.reshape(slices, shape=[lattice_sizes[dim], -1])
+
+    diff = slices[1:] - slices[0:-1]
+    if l1:
+      result += tf.reduce_sum(tf.abs(diff)) * l1[dim]
+    if l2:
+      result += tf.reduce_sum(tf.square(diff)) * l2[dim]
+  return result
+
+
+def torsion_regularizer(weights, lattice_sizes, l1=0.0, l2=0.0):
+  """Returns Torsion regularization loss for `Lattice` layer.
+
+  Lattice torsion regularizer penalizes how much the lattice function twists
+  from side-to-side (see
+  [publication](http://jmlr.org/papers/v17/15-243.html)).
+
+  Consider a 3 x 2 lattice with weights `w`:
+
+  ```
+  w[3]-----w[4]-----w[5]
+    |        |        |
+    |        |        |
+  w[0]-----w[1]-----w[2]
+  ```
+
+  In this case, the torsion regularizer is defined as:
+
+  ```
+  l1 * (|w[4] + w[0] - w[3] - w[1]| + |w[5] + w[1] - w[4] - w[2]|) +
+  l2 * ((w[4] + w[0] - w[3] - w[1])^2 + (w[5] + w[1] - w[4] - w[2])^2)
+  ```
+
+  Arguments:
+    weights: `Lattice` weights tensor of shape: `(prod(lattice_sizes), units)`.
+    lattice_sizes: List or tuple of integers which represents lattice sizes.
+    l1: l1 regularization amount. Either single float or list or tuple of floats
+      to specify different regularization amount per dimension.
+    l2: l2 regularization amount. Either single float or list or tuple of floats
+      to specify different regularization amount per dimension. The amount for
+      the interaction term between i and j is the corresponding product of each
+      per feature amount.
+
+  Returns:
+    Laplacian regularization loss.
+  """
+  rank = len(lattice_sizes)
+  if rank == 1 or (not l1 and not l2):
+    return 0.0
+
+  # If regularization amount is given as single float assume same amount for
+  # every dimension.
+  if l1 and not isinstance(l1, (list, tuple)):
+    l1 = [math.sqrt(l1)] * rank
+  if l2 and not isinstance(l2, (list, tuple)):
+    l2 = [math.sqrt(l2)] * rank
+
+  if weights.shape[1] > 1:
+    lattice_sizes = lattice_sizes + [int(weights.shape[1])]
+    rank += 1
+    if l1:
+      l1 = l1 + [0.0]
+    if l2:
+      l2 = l2 + [0.0]
+  weights = tf.reshape(weights, shape=lattice_sizes)
+
+  result = tf.constant(0.0, shape=[], dtype=weights.dtype)
+  for i in range(rank - 1):
+    for j in range(i + 1, rank):
+      if ((not l1 or not l1[i] or not l1[j]) and
+          (not l2 or not l2[i] or not l2[j])):
+        continue
+      if j == 1:
+        planes = weights
+      else:
+        # Transpose so dimensions i and j become first in order to simplify
+        # indexing and be able to merge all other dimensions into 1 for better
+        # TPU performance.
+        permut = [p for p in range(rank)]
+        permut[0], permut[i] = permut[i], permut[0]
+        permut[1], permut[j] = permut[j], permut[1]
+        planes = tf.transpose(weights, perm=permut)
+      planes = tf.reshape(
+          planes, shape=[lattice_sizes[i], lattice_sizes[j], -1])
+
+      a00 = planes[0:-1, 0:-1]
+      a01 = planes[0:-1, 1:]
+      a10 = planes[1:, 0:-1]
+      a11 = planes[1:, 1:]
+      torsion = a00 + a11 - a01 - a10
+
+      if l1:
+        result += tf.reduce_sum(tf.abs(torsion)) * l1[i] * l1[j]
+      if l2:
+        result += tf.reduce_sum(tf.square(torsion)) * l2[i] * l2[j]
+  return result
+
+
+def verify_hyperparameters(lattice_sizes,
+                           units=None,
+                           weights_shape=None,
+                           input_shape=None,
+                           monotonicities=None,
+                           unimodalities=None,
+                           edgeworth_trusts=None,
+                           trapezoid_trusts=None,
+                           monotonic_dominances=None,
+                           joint_monotonicities=None,
+                           output_min=None,
+                           output_max=None,
+                           regularization_amount=None,
+                           regularization_info=""):
+  """Verifies that all given hyperparameters are consistent.
+
+  This function does not inspect weights themselves. Only their shape. Use
+  `assert_constraints()` to assert actual weights against constraints.
+
+  See `tfl.lattice_layer.Lattice` class level comment for detailed description
+  of arguments.
+
+  Args:
+    lattice_sizes: Lattice sizes to check againts.
+    units: Units hyperparameter of `Lattice` layer.
+    weights_shape: Shape of tensor which represents `Lattice` layer weights.
+    input_shape: Shape of layer input. Useful only if `units` is set.
+    monotonicities: Monotonicities hyperparameter of `Lattice` layer.
+    unimodalities: Unimodalities hyperparameter of `Lattice` layer.
+    edgeworth_trusts: Edgeworth_trusts hyperparameter of `Lattice` layer.
+    trapezoid_trusts: Trapezoid_trusts hyperparameter of `Lattice` layer.
+    monotonic_dominances: Monotonic dominances hyperparameter of `Lattice`
+      layer.
+    joint_monotonicities: Joint monotonicities hyperparameter of `Lattice`
+      layer.
+    output_min: Minimum output of `Lattice` layer.
+    output_max: Maximum output of `Lattice` layer.
+    regularization_amount: Regularization amount for regularizers.
+    regularization_info: String which describes `regularization_amount`.
+
+  Raises:
+    ValueError: If something is inconsistent.
+  """
+  for size in lattice_sizes:
+    if size < 2:
+      raise ValueError("All lattice sizes must be at least 2. Given: %s" %
+                       lattice_sizes)
+
+  # It also raises errors if monotonicities specified incorrectly.
+  monotonicities = canonicalize_monotonicities(monotonicities)
+  if monotonicities is not None:
+    if len(monotonicities) != len(lattice_sizes):
+      raise ValueError("If provided 'monotonicities' should have same number "
+                       "of elements as 'lattice_sizes'. 'monotonicities': %s,"
+                       "'lattice_sizes: %s" % (monotonicities, lattice_sizes))
+
+  unimodalities = canonicalize_unimodalities(unimodalities)
+  if unimodalities is not None:
+    if len(unimodalities) != len(lattice_sizes):
+      raise ValueError("If provided 'unimodalities' should have same number "
+                       "of elements as 'lattice_sizes'. 'unimodalities': %s, "
+                       "'lattice_sizes: %s" % (unimodalities, lattice_sizes))
+    for unimodality, dim_size in zip(unimodalities, lattice_sizes):
+      if unimodality == 1 and dim_size < 3:
+        raise ValueError("Unimodal dimensions must have lattice size at "
+                         "least 3. unimodalities: %s, lattice_sizes: %s" %
+                         (unimodalities, lattice_sizes))
+
+  if monotonicities is not None and unimodalities is not None:
+    for i, (monotonicity,
+            unimodality) in enumerate(zip(monotonicities, unimodalities)):
+      if monotonicity != 0 and unimodality != 0:
+        raise ValueError("Both monotonicity and unimodality can not be set "
+                         "simultaniously for same dimension. Dimension: %d, "
+                         "'monotonicities': %s, 'unimodalities': %s" %
+                         (i, monotonicities, unimodalities))
+
+  all_trusts = canonicalize_trust(
+      (edgeworth_trusts or []) + (trapezoid_trusts or [])) or []
+  main_dims, cond_dims, trapezoid_cond_dims = set(), set(), set()
+  dim_pairs_direction = {}
+  for i, constraint in enumerate(all_trusts):
+    main_dim, cond_dim, cond_direction = constraint
+    if (main_dim >= len(lattice_sizes) or cond_dim >= len(lattice_sizes) or
+        main_dim < 0 or cond_dim < 0):
+      raise ValueError("Dimensions constrained by trust constraints "
+                       "are not within the range of the lattice. "
+                       "'trust_dims': %s, %s, num_dims: %s" %
+                       (main_dim, cond_dim, len(lattice_sizes)))
+    if not isinstance(main_dim, int) or not isinstance(cond_dim, int):
+      raise ValueError("Trust constraint dimensions must be integers. Seeing "
+                       "main_dim %s and cond_dim %s" % (main_dim, cond_dim))
+    if monotonicities[main_dim] != 1:
+      raise ValueError("Trust constraint's main feature must be "
+                       "monotonic. Dimension %s is not monotonic." % (main_dim))
+    if (main_dim, cond_dim) in dim_pairs_direction and dim_pairs_direction[
+        (main_dim, cond_dim)] != cond_direction:
+      raise ValueError("Cannot have two trust constraints on the same pair of "
+                       "features in opposite directions. Features: %d, %d" %
+                       (main_dim, cond_dim))
+    # Only apply this check to trapezoid constraints when there are also
+    # edgeworth constraints.
+    if edgeworth_trusts and i >= len(edgeworth_trusts):
+      if cond_dim in trapezoid_cond_dims:
+        logging.warning(
+            "Conditional dimension %d is being used in multiple trapezoid "
+            "trust constraints. Because of this and the presence of edgeworth "
+            "constraints, there may be slight trust violations of one or more "
+            "of these constraints at the end of training. Consider increasing "
+            "num_projection_iterations to reduce violation.", cond_dim)
+      trapezoid_cond_dims.add(cond_dim)
+    main_dims.add(main_dim)
+    cond_dims.add(cond_dim)
+    dim_pairs_direction[(main_dim, cond_dim)] = cond_direction
+  main_and_cond = main_dims.intersection(cond_dims)
+  if main_and_cond:
+    raise ValueError("A feature cannot be both a main feature and a "
+                     "conditional feature in trust constraints. "
+                     "Seeing dimension %d in both" % (main_and_cond.pop()))
+
+  if monotonic_dominances is not None:
+    dim_pairs = set([])
+    for i, constraint in enumerate(monotonic_dominances):
+      if len(constraint) != 2:
+        raise ValueError("Monotonic dominance constraints must consist of 2 "
+                         "elements. Seeing constraint tuple %s" % (constraint,))
+      dominant_dim, weak_dim = constraint
+      if (dominant_dim >= len(lattice_sizes) or
+          weak_dim >= len(lattice_sizes) or
+          dominant_dim < 0 or weak_dim < 0):
+        raise ValueError("Dimensions constrained by monotonic dominance "
+                         "constraints are not within the range of the lattice. "
+                         "'dims': %s, %s, num_dims: %s" %
+                         (dominant_dim, weak_dim, len(lattice_sizes)))
+      if not isinstance(dominant_dim, int) or not isinstance(weak_dim, int):
+        raise ValueError("Monotonic dominance constraint dimensions must be "
+                         "integers. Seeing dominant_dim %s and weak_dim %s" %
+                         (dominant_dim, weak_dim))
+      for dim in [dominant_dim, weak_dim]:
+        if monotonicities[dim] != 1:
+          raise ValueError("Monotonic dominance constraint's features must be "
+                           "monotonic. Dimension %d is not monotonic." % (dim))
+      # TODO: Determine partial ordering of features by dominance and
+      # detect any inconsistencies.
+      if (weak_dim, dominant_dim) in dim_pairs:
+        raise ValueError("Cannot have two dominance constraints on the same "
+                         "pair of features conflicting. Features: %d, %d" %
+                         (dominant_dim, weak_dim))
+      dim_pairs.add((dominant_dim, weak_dim))
+
+  if joint_monotonicities is not None:
+    for i, constraint in enumerate(joint_monotonicities):
+      if len(constraint) != 2:
+        raise ValueError("Joint monotonicities constraints must consist of 2 "
+                         "elements. Seeing constraint tuple %s" % (constraint,))
+      dim1, dim2 = constraint
+      if (dim1 >= len(lattice_sizes) or dim2 >= len(lattice_sizes) or
+          dim1 < 0 or dim2 < 0):
+        raise ValueError("Dimensions constrained by joint monotonicity "
+                         "constraints are not within the range of the lattice. "
+                         "'dims': %s, %s, num_dims: %s" %
+                         (dim1, dim2, len(lattice_sizes)))
+      if not isinstance(dim1, int) or not isinstance(dim2, int):
+        raise ValueError("Joint monotonicity constraint dimensions must be "
+                         "integers. Seeing dimensions %s, %s" % (dim1, dim2))
+
+  if weights_shape is not None:
+    if len(weights_shape) != 2:
+      raise ValueError("Weights must have shape of rank-2. "
+                       "Given: %s" % weights_shape)
+    expected_num_weights = 1
+    for dim_size in lattice_sizes:
+      expected_num_weights *= dim_size
+    if weights_shape[0] != expected_num_weights:
+      raise ValueError("Number of elements in weights does not correspond to "
+                       "lattice sizes. Weights shape: %s, lattice sizes: %s, "
+                       "Number of elements defined by lattice sizes: %d" %
+                       (weights_shape, lattice_sizes, expected_num_weights))
+
+  if input_shape is not None:
+    if not isinstance(input_shape, list):
+      if input_shape[-1] != len(lattice_sizes):
+        raise ValueError("Last dimension of input shape must have same number "
+                         "of elements as 'lattice_sizes'. 'input shape': %s, "
+                         "'lattice_sizes': %s" % (input_shape, lattice_sizes))
+      shape = input_shape
+    else:
+      if len(input_shape) != len(lattice_sizes):
+        raise ValueError("If lattice input is provided as list of tensors their"
+                         " number must match lattice_sizes. 'input list': %s, "
+                         "'lattice_sizes': %s" % (input_shape, lattice_sizes))
+      shape = input_shape[0]
+    if units is not None:  # FYI: It is inside "if input_shape is not None:"
+      if units > 1 and (len(shape) < 3 or shape[-2] != units):
+        raise ValueError("If 'units' > 1 then input shape of Lattice layer must"
+                         " have rank at least 3 where second from last "
+                         "dimension is equal to 'units'. 'units': %s, "
+                         "input_shape: %s" % (units, input_shape))
+
+  if output_min is not None and output_max is not None:
+    if output_min >= output_max:
+      raise ValueError("'output_min' must be not greater than 'output_max'. "
+                       "'output_min': %f, 'output_max': %f" %
+                       (output_min, output_max))
+
+  if regularization_amount and isinstance(regularization_amount, (list, tuple)):
+    if len(regularization_amount) != len(lattice_sizes):
+      raise ValueError(
+          "If %s losses are given per dimension their number must "
+          "match number of dimensions defined by lattice sizes. "
+          "l1: %s, lattice sizes: %s" %
+          (regularization_info, regularization_amount, lattice_sizes))
+
+
+# TODO: investigate whether eps should be bigger.
+def assert_constraints(weights,
+                       lattice_sizes,
+                       monotonicities,
+                       edgeworth_trusts,
+                       trapezoid_trusts,
+                       monotonic_dominances,
+                       joint_monotonicities,
+                       output_min=None,
+                       output_max=None,
+                       eps=1e-6):
+  """Asserts that weights satisfy constraints.
+
+  Args:
+    weights: `Lattice` weights tensor of shape: `(prod(lattice_sizes), units)`.
+    lattice_sizes: List or tuple of integers which represents lattice sizes.
+    monotonicities: Monotonicity constraints.
+    edgeworth_trusts: Edgeworth trust constraints.
+    trapezoid_trusts: Trapezoid trust constraints.
+    monotonic_dominances: Monotonic dominance constraints.
+    joint_monotonicities: Joint monotonicity constraints.
+    output_min: None or lower bound constraints.
+    output_max: None or upper bound constraints.
+    eps: Allowed constraints violation.
+
+  Returns:
+    List of assetion ops in graph mode or directly executes assertions in eager
+    mode.
+  """
+  if weights.shape[1] > 1:
+    lattice_sizes = lattice_sizes + [int(weights.shape[1])]
+    if monotonicities:
+      monotonicities = monotonicities + [0]
+  weights = tf.reshape(weights, shape=lattice_sizes)
+  asserts = []
+
+  for i in range(len(monotonicities or [])):
+    if monotonicities[i] != 1:
+      continue
+    weights_layers = tf.unstack(weights, axis=i)
+
+    for j in range(1, len(weights_layers)):
+      diff = tf.reduce_min(weights_layers[j] - weights_layers[j - 1])
+      asserts.append(
+          tf.Assert(
+              diff >= -eps,
+              data=[
+                  "Monotonicity violation", "Feature index:", i,
+                  "Min monotonicity diff:", diff, "Upper layer number:", j,
+                  "Epsilon:", eps, "Layers:", weights_layers[j],
+                  weights_layers[j - 1]
+              ]))
+
+  for main_dim, cond_dim, cond_direction in edgeworth_trusts or []:
+    weights_layers = _unstack_2d(weights, main_dim, cond_dim)
+    for i in range(lattice_sizes[main_dim] - 1):
+      for j in range(lattice_sizes[cond_dim] - 1):
+        diff = tf.reduce_min(
+            cond_direction *
+            ((weights_layers[i + 1][j + 1] - weights_layers[i][j + 1]) -
+             (weights_layers[i + 1][j] - weights_layers[i][j])))
+        asserts.append(
+            tf.Assert(
+                diff >= -eps,
+                data=[
+                    "Edgeworth trust violation", "Feature indices:", main_dim,
+                    ",", cond_dim, "Min trust diff:", diff, "Epsilon:", eps,
+                    "Layers:", weights_layers[i + 1][j + 1],
+                    weights_layers[i][j + 1], weights_layers[i + 1][j],
+                    weights_layers[i][j]
+                ]))
+
+  for main_dim, cond_dim, cond_direction in trapezoid_trusts or []:
+    weights_layers = _unstack_2d(weights, main_dim, cond_dim)
+    max_main_dim = lattice_sizes[main_dim] - 1
+    for j in range(lattice_sizes[cond_dim] - 1):
+      lhs_diff = tf.reduce_min(
+          cond_direction * (weights_layers[0][j] - weights_layers[0][j + 1]))
+      asserts.append(
+          tf.Assert(
+              lhs_diff >= -eps,
+              data=[
+                  "Trapezoid trust violation", "Feature indices:", main_dim,
+                  ",", cond_dim, "Min trust diff:", lhs_diff, "Epsilon:", eps,
+                  "Layers:", weights_layers[0][j], weights_layers[0][j + 1]
+              ]))
+      rhs_diff = tf.reduce_min(cond_direction *
+                               (weights_layers[max_main_dim][j + 1] -
+                                weights_layers[max_main_dim][j]))
+      asserts.append(
+          tf.Assert(
+              rhs_diff >= -eps,
+              data=[
+                  "Trapezoid trust violation", "Feature indices:", main_dim,
+                  ",", cond_dim, "Min trust diff:", rhs_diff, "Epsilon:", eps,
+                  "Layers:", weights_layers[max_main_dim][j + 1],
+                  weights_layers[max_main_dim][j]
+              ]))
+
+  for dominant_dim, weak_dim in monotonic_dominances or []:
+    weights_layers = _unstack_2d(weights, dominant_dim, weak_dim)
+    for i in range(lattice_sizes[dominant_dim] - 1):
+      for j in range(lattice_sizes[weak_dim] - 1):
+        midpoint = (weights_layers[i + 1][j + 1] + weights_layers[i][j]) / 2
+        dominant_diff = tf.reduce_min(weights_layers[i + 1][j] - midpoint)
+        asserts.append(
+            tf.Assert(
+                dominant_diff >= -eps,
+                data=[
+                    "Dominance violation", "Feature indices:", dominant_dim,
+                    ",", weak_dim, "Min dominance diff:", dominant_diff,
+                    "Epsilon:", eps, "Layers:", weights_layers[i][j],
+                    weights_layers[i + 1][j], weights_layers[i + 1][j + 1]
+                ]))
+        weak_diff = tf.reduce_min(midpoint - weights_layers[i][j + 1])
+        asserts.append(
+            tf.Assert(
+                weak_diff >= -eps,
+                data=[
+                    "Dominance violation", "Feature indices:", dominant_dim,
+                    ",", weak_dim, "Min dominance diff:", weak_diff, "Epsilon:",
+                    eps, "Layers:", weights_layers[i][j],
+                    weights_layers[i + 1][j], weights_layers[i + 1][j + 1]
+                ]))
+
+  for dim1, dim2 in joint_monotonicities or []:
+    weights_layers = _unstack_2d(weights, dim1, dim2)
+    for i in range(lattice_sizes[dim1] - 1):
+      for j in range(lattice_sizes[dim2] - 1):
+        midpoint = (weights_layers[i + 1][j] + weights_layers[i][j + 1]) / 2
+        lower_triangle_diff = tf.reduce_min(
+            weights_layers[i + 1][j + 1] - midpoint)
+        asserts.append(
+            tf.Assert(
+                lower_triangle_diff >= -eps,
+                data=[
+                    "Joint monotonicity violation", "Feature indices:", dim1,
+                    ",", dim2, "Min lower triangle diff:", lower_triangle_diff,
+                    "Epsilon:", eps, "Layers:", weights_layers[i + 1][j + 1],
+                    weights_layers[i + 1][j], weights_layers[i][j + 1]
+                ]))
+        upper_triangle_diff = tf.reduce_min(midpoint - weights_layers[i][j])
+        asserts.append(
+            tf.Assert(
+                upper_triangle_diff >= -eps,
+                data=[
+                    "Joint monotonicity violation", "Feature indices:", dim1,
+                    ",", dim2, "Min upper triangle diff:", upper_triangle_diff,
+                    "Epsilon:", eps, "Layers:", weights_layers[i][j],
+                    weights_layers[i + 1][j], weights_layers[i][j + 1]
+                ]))
+
+  if output_min is not None:
+    min_weight = tf.reduce_min(weights)
+    asserts.append(
+        tf.Assert(
+            min_weight >= output_min - eps,
+            data=[
+                "Lower bound violation.", "output_min:", output_min,
+                "Smallest weight:", min_weight, "Epsilon:", eps, "Weights:",
+                weights
+            ]))
+
+  if output_max is not None:
+    max_weight = tf.reduce_max(weights)
+    asserts.append(
+        tf.Assert(
+            max_weight <= output_max + eps,
+            data=[
+                "Upper bound violation.", "output_max:", output_max,
+                "Largest weight:", max_weight, "Epsilon:", eps, "Weights:",
+                weights
+            ]))
+  return asserts
+
+
+def count_non_zeros(*iterables):
+  """Returns total number of non 0 elements in given iterables."""
+  result = 0
+  for iterable in iterables:
+    if iterable is not None:
+      result += [element != 0 for element in iterable].count(True)
+  return result
+
+
+def canonicalize_monotonicities(monotonicities):
+  """Converts string constants representing monotonicities into integers.
+
+  Args:
+    monotonicities: monotonicities hyperparameter of `Lattice` layer.
+
+  Raises:
+    ValueError if one of monotonicities is invalid.
+
+  Returns:
+    monotonicities represented as 0 or 1.
+  """
+  if monotonicities:
+    canonicalized = []
+    for item in monotonicities:
+      if item in [0, 1]:
+        canonicalized.append(item)
+      elif isinstance(item, six.string_types) and item.lower() == "increasing":
+        canonicalized.append(1)
+      elif isinstance(item, six.string_types) and item.lower() == "none":
+        canonicalized.append(0)
+      else:
+        raise ValueError("'monotonicities' elements must be from: [0, 1, "
+                         "'increasing', 'none']. Given: %s" % monotonicities)
+    return canonicalized
+  return None
+
+
+def canonicalize_unimodalities(unimodalities):
+  """Converts string constants representing unimodalities into integers.
+
+  Args:
+    unimodalities: unimodalities hyperparameter of `Lattice` layer.
+
+  Raises:
+    ValueError if one of unimodalities is invalid.
+
+  Returns:
+    unimodalities represented as 0 or 1.
+  """
+  if unimodalities:
+    canonicalized = []
+    for item in unimodalities:
+      if item in [0, 1]:
+        canonicalized.append(item)
+      elif isinstance(item, six.string_types) and item.lower() == "valley":
+        canonicalized.append(1)
+      elif isinstance(item, six.string_types) and item.lower() == "none":
+        canonicalized.append(0)
+      else:
+        raise ValueError("'unimodalities' elements must be from: [0, 1, "
+                         "'valley', 'none']. Given: %s" % unimodalities)
+    return canonicalized
+  return None
+
+
+def canonicalize_trust(trusts):
+  """Converts string constants representing trust direction into integers.
+
+  Args:
+    trusts: edgeworth_trusts or trapezoid_trusts hyperparameter of `Lattice`
+      layer.
+
+  Raises:
+    ValueError if one of trust constraints is invalid.
+
+  Returns:
+    Trust constraints with direction represented as 0 or 1.
+  """
+  if trusts:
+    canonicalized = []
+    for item in trusts:
+      if len(item) != 3:
+        raise ValueError("Trust constraints must consist of 3 elements. Seeing "
+                         "constraint tuple %s" % item)
+      direction = item[2]
+      if direction in [-1, 1]:
+        canonicalized.append(item)
+      elif (isinstance(direction, six.string_types) and
+            direction.lower() == "positive"):
+        canonicalized.append((item[0], item[1], 1))
+      elif (isinstance(direction, six.string_types) and
+            direction.lower() == "negative"):
+        canonicalized.append((item[0], item[1], -1))
+      else:
+        raise ValueError("trust constraint direction must be from: [-1, 1, "
+                         "'negative', 'positive']. Given: %s" % direction)
+    return canonicalized
+  return None
+
+
+def _unstack_2d(tensor, first_dim, second_dim):
+  """Returns list of list of tensors resulting from two unstack operations."""
+  layers = tf.unstack(tensor, axis=first_dim)
+  unstacked_second_dim = (
+      second_dim if second_dim < first_dim else second_dim - 1)
+  return [tf.unstack(layer, axis=unstacked_second_dim) for layer in layers]
+
+
+def _stack_2d(layers, first_dim, second_dim):
+  """Returns tensor that re-stacks tensor layers formed from unstacking."""
+  unstacked_second_dim = (
+      second_dim if second_dim < first_dim else second_dim - 1)
+  layers = [tf.stack(layer, axis=unstacked_second_dim) for layer in layers]
+  return tf.stack(layers, axis=first_dim)
+
+
+def _reverse_second_list_dimension(layers):
+  """Reverses each list within a list of lists, but not the outer list."""
+  return [layer[::-1] for layer in layers]
diff --git a/tensorflow_lattice/python/lattice_test.py b/tensorflow_lattice/python/lattice_test.py
new file mode 100644
index 0000000..b974cd2
--- /dev/null
+++ b/tensorflow_lattice/python/lattice_test.py
@@ -0,0 +1,1472 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for Lattice Layer."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+from absl import logging
+from absl.testing import parameterized
+import numpy as np
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow_lattice.python import lattice_layer as ll
+from tensorflow_lattice.python import test_utils
+
+
+class LatticeTest(parameterized.TestCase, tf.test.TestCase):
+
+  def setUp(self):
+    super(LatticeTest, self).setUp()
+    self.disable_all = False
+    self.disable_ensembles = False
+    self.loss_eps = 0.0001
+    self.small_eps = 1e-6
+
+  def _ResetAllBackends(self):
+    keras.backend.clear_session()
+    tf.compat.v1.reset_default_graph()
+
+  def _ScatterXUniformly(self, num_points, lattice_sizes):
+    """Deterministically generates num_point random points within lattice."""
+    np.random.seed(41)
+    x = []
+    for _ in range(num_points):
+      point = [
+          np.random.random() * (num_vertices - 1.0)
+          for num_vertices in lattice_sizes
+      ]
+      x.append(np.asarray(point))
+    if len(lattice_sizes) == 1:
+      x.sort()
+    return x
+
+  def _ScatterXUniformlyExtendedRange(self, num_points, lattice_sizes):
+    """Extends every dimension by 1.0 on both sides and generates points."""
+    np.random.seed(41)
+    x = []
+    for _ in range(num_points):
+      point = [
+          np.random.random() * (num_vertices + 1.0) - 1.0
+          for num_vertices in lattice_sizes
+      ]
+      x.append(np.asarray(point))
+    if len(lattice_sizes) == 1:
+      x.sort()
+    return x
+
+  def _SameValueForAllDims(self, num_points, lattice_sizes):
+    """Generates random point with same value for every dimension."""
+    if lattice_sizes.count(lattice_sizes[0]) != len(lattice_sizes):
+      raise ValueError("All dimensions must be of same size. "
+                       "They are: {}".format(lattice_sizes))
+    np.random.seed(41)
+    x = []
+    for _ in range(num_points):
+      rand = np.random.random() * (lattice_sizes[0] - 1.0)
+      point = [rand] * len(lattice_sizes)
+      x.append(np.asarray(point))
+    if len(lattice_sizes) == 1:
+      x.sort()
+    return x
+
+  def _TwoDMeshGrid(self, num_points, lattice_sizes):
+    """Mesh grid for visualisation of 3-d surfaces via pyplot."""
+    if len(lattice_sizes) != 2:
+      raise ValueError("2-d mesh grid is possible only for 2-d lattice. Lattice"
+                       " sizes given: %s" % lattice_sizes)
+    return test_utils.two_dim_mesh_grid(
+        num_points=num_points,
+        x_min=0.0,
+        y_min=0.0,
+        x_max=lattice_sizes[0] - 1.0,
+        y_max=lattice_sizes[1] - 1.0)
+
+  def _TwoDMeshGridExtendedRange(self, num_points, lattice_sizes):
+    """Mesh grid extended by 1.0 on every side."""
+    if len(lattice_sizes) != 2:
+      raise ValueError("2-d mesh grid is possible only for 2-d lattice. Lattice"
+                       " sizes given: %s" % lattice_sizes)
+    return test_utils.two_dim_mesh_grid(
+        num_points=num_points,
+        x_min=-1.0,
+        y_min=-1.0,
+        x_max=lattice_sizes[0],
+        y_max=lattice_sizes[1])
+
+  def _Sin(self, x):
+    return math.sin(x[0])
+
+  def _SinPlusX(self, x):
+    return math.sin(x[0]) + x[0] / 3.0
+
+  def _SinPlusLargeX(self, x):
+    return math.sin(x[0]) + x[0]
+
+  def _SinPlusXNd(self, x):
+    res = 0.0
+    for y in x:
+      res = res + math.sin(y) + y / 5.0
+    return res
+
+  def _SinOfSum(self, x):
+    return math.sin(sum(x))
+
+  def _Square(self, x):
+    return x[0]**2
+
+  def _Max(self, x):
+    return np.amax(x)
+
+  def _WeightedSum(self, x):
+    result = 0.0
+    for i in range(len(x)):
+      result += (i + 1.0) * x[i]
+    return result
+
+  def _MixedSignWeightedSum(self, x):
+    result = 0.0
+    for i in range(len(x)):
+      sign = (i % 2) * -2 + 1
+      result += sign * (i + 1.0) * x[i]
+    return result
+
+  def _PseudoLinear(self, x):
+    result = 0.0
+    for i in range(len(x)):
+      result += 2 * x[i]
+      for j in range(len(x)):
+        if i != j:
+          result += x[i] * x[j]
+    return result
+
+  def _ScaledSum(self, x):
+    result = 0.0
+    for y in x:
+      result += y / len(x)
+    return result
+
+  def _GetMultiOutputInitializer(self, weights):
+    """Tiles given weights along 'units' dimension."""
+    def Initializer(shape, dtype):
+      return tf.tile(tf.constant(weights, shape=[len(weights), 1], dtype=dtype),
+                     multiples=[1, shape[1]])
+    return Initializer
+
+  def _GetTrainingInputsAndLabels(self, config):
+    """Generates training inputs and labels.
+
+    Args:
+      config: Dictionary with config for this unit test.
+
+    Returns:
+      Tuple `(training_inputs, training_labels, raw_training_inputs)` where
+        `training_inputs` and `training_labels` are data for training and
+        `raw_training_inputs` are representation of training_inputs for
+        visualisation.
+    """
+    raw_training_inputs = config["x_generator"](
+        num_points=config["num_training_records"],
+        lattice_sizes=config["lattice_sizes"])
+
+    if isinstance(raw_training_inputs, tuple):
+      # This means that raw inputs are 2-d mesh grid. Convert them into list of
+      # 2-d points.
+      training_inputs = list(np.dstack(raw_training_inputs).reshape((-1, 2)))
+    else:
+      training_inputs = raw_training_inputs
+
+    training_labels = [config["y_function"](x) for x in training_inputs]
+    return training_inputs, training_labels, raw_training_inputs
+
+  def _SetDefaults(self, config):
+    config.setdefault("monotonicities", None)
+    config.setdefault("unimodalities", None)
+    config.setdefault("edgeworth_trusts", None)
+    config.setdefault("trapezoid_trusts", None)
+    config.setdefault("monotonic_dominances", None)
+    config.setdefault("joint_monotonicities", None)
+    config.setdefault("output_min", None)
+    config.setdefault("output_max", None)
+    config.setdefault("signal_name", "TEST")
+    config.setdefault("kernel_initializer", "linear_initializer")
+    config.setdefault("num_projection_iterations", 10)
+    config.setdefault("monotonic_at_every_step", True)
+    config.setdefault("target_monotonicity_diff", 0.0)
+    config.setdefault("kernel_regularizer", None)
+    config.setdefault("units", 1)
+    config.setdefault("lattice_index", 0)
+
+    return config
+
+  def _TestEnsemble(self, config):
+    """Verifies that 'units > 1' lattice produces same output as 'units==1'."""
+    if self.disable_ensembles:
+      return
+    config = dict(config)
+    config["num_training_epoch"] = 3
+    losses = []
+    for units, lattice_index in [(1, 0), (3, 0), (3, 2)]:
+      config["units"] = units
+      config["lattice_index"] = lattice_index
+      losses.append(self._TrainModel(config))
+    self.assertAlmostEqual(min(losses), max(losses), delta=self.loss_eps)
+
+  def _TrainModel(self, config, plot_path=None):
+    logging.info("Testing config:")
+    logging.info(config)
+    config = self._SetDefaults(config)
+    self._ResetAllBackends()
+
+    training_inputs, training_labels, raw_training_inputs = (
+        self._GetTrainingInputsAndLabels(config))
+
+    units = config["units"]
+    lattice_sizes = config["lattice_sizes"]
+    if units > 1:
+      # In order to test multi 'units' lattice replecate inputs 'units' times
+      # and later use just one out of 'units' outputs in order to ensure that
+      # multi 'units' lattice trains exactly similar to single 'units' one.
+      training_inputs = [
+          np.tile(np.expand_dims(x, axis=0), reps=[units, 1])
+          for x in training_inputs
+      ]
+      input_shape = (units, len(lattice_sizes))
+    else:
+      input_shape = (len(lattice_sizes),)
+
+    keras_layer = ll.Lattice(
+        lattice_sizes=lattice_sizes,
+        units=units,
+        monotonicities=config["monotonicities"],
+        unimodalities=config["unimodalities"],
+        edgeworth_trusts=config["edgeworth_trusts"],
+        trapezoid_trusts=config["trapezoid_trusts"],
+        monotonic_dominances=config["monotonic_dominances"],
+        joint_monotonicities=config["joint_monotonicities"],
+        output_min=config["output_min"],
+        output_max=config["output_max"],
+        num_projection_iterations=config["num_projection_iterations"],
+        monotonic_at_every_step=config["monotonic_at_every_step"],
+        kernel_initializer=config["kernel_initializer"],
+        kernel_regularizer=config["kernel_regularizer"],
+        input_shape=input_shape,
+        dtype=tf.float32)
+    model = keras.models.Sequential()
+    model.add(keras_layer)
+
+    if units > 1:
+      lattice_index = config["lattice_index"]
+      model.add(keras.layers.Lambda(
+          lambda x: x[:, lattice_index:lattice_index + 1]))
+
+    optimizer = config["optimizer"](learning_rate=config["learning_rate"])
+    model.compile(loss=keras.losses.mean_squared_error, optimizer=optimizer)
+
+    training_data = (training_inputs, training_labels, raw_training_inputs)
+    loss = test_utils.run_training_loop(
+        config=config,
+        training_data=training_data,
+        keras_model=model,
+        plot_path=plot_path)
+
+    if tf.executing_eagerly():
+      tf.print("final weights: ", keras_layer.kernel)
+    assetion_ops = keras_layer.assert_constraints(
+        eps=-config["target_monotonicity_diff"])
+    if not tf.executing_eagerly() and assetion_ops:
+      tf.compat.v1.keras.backend.get_session().run(assetion_ops)
+
+    return loss
+
+  def testMonotonicityOneD(self):
+    if self.disable_all:
+      return
+    config = {
+        "lattice_sizes": [20],
+        "num_training_records": 128,
+        "num_training_epoch": 50,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._SinPlusX,
+        "monotonicities": [1],
+        "output_min": 0.0,
+        "output_max": 7.0,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.110467, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+    config = {
+        "lattice_sizes": [20],
+        "num_training_records": 100,
+        "num_training_epoch": 50,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": lambda x: -self._SinPlusX(x),
+        "monotonicities": ["increasing"],
+        "output_min": -7.0,
+        "output_max": 0.0,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 2.889168, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+    config = {
+        "lattice_sizes": [5],
+        "num_training_records": 100,
+        "num_training_epoch": 50,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._SinPlusLargeX,
+        "monotonicities": [1],
+        "output_min": 0.0,
+        "output_max": 6.0,
+        # Target function is strictly increasing.
+        "target_monotonicity_diff": 0.02,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.000781, delta=self.loss_eps)
+
+  def testMonotonicityTwoD(self):
+    if self.disable_all:
+      return
+    config = {
+        "lattice_sizes": [21, 6],
+        "num_training_records": 900,
+        "num_training_epoch": 100,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._TwoDMeshGrid,
+        "y_function": self._SinPlusXNd,
+        "monotonicities": [1, 1],
+        "output_min": 0.0,
+        "output_max": 7.0,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.443284, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+    config = {
+        "lattice_sizes": [6, 21],
+        "num_training_records": 900,
+        "num_training_epoch": 100,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._TwoDMeshGrid,
+        "y_function": self._SinPlusXNd,
+        "monotonicities": [1, 1],
+        "output_min": 0.0,
+        "output_max": 7.0,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.443284, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+    config = {
+        "lattice_sizes": [6, 21],
+        "num_training_records": 900,
+        "num_training_epoch": 100,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._TwoDMeshGrid,
+        "y_function": self._SinPlusXNd,
+        "monotonicities": ["none", "increasing"],
+        "output_min": 0.0,
+        "output_max": 7.0,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.202527, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+    config = {
+        "lattice_sizes": [6, 21],
+        "num_training_records": 900,
+        "num_training_epoch": 100,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.5,
+        "x_generator": self._TwoDMeshGrid,
+        "y_function": self._SinPlusXNd,
+        "monotonicities": [1, 0],
+        "output_min": 0.0,
+        "output_max": 7.0,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.244739, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+    config = {
+        "lattice_sizes": [2, 2],
+        "num_training_records": 100,
+        "num_training_epoch": 20,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._TwoDMeshGrid,
+        "y_function": lambda x: -self._ScaledSum(x),
+        "monotonicities": [1, 1],
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.051462, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+  def testMonotonicity5d(self):
+    if self.disable_all:
+      return
+    config = {
+        "lattice_sizes": [2, 2, 2, 2, 2],
+        "num_training_records": 100,
+        "num_training_epoch": 200,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._ScaledSum,
+        "monotonicities": [1, 1, 1, 1, 1],
+        "kernel_initializer": keras.initializers.Constant(value=0.5),
+        # Function is strictly increasing everywhere, so request monotonicity
+        # diff to be strictly positive.
+        "target_monotonicity_diff": 0.08,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.000002, delta=self.loss_eps)
+
+    config = {
+        "lattice_sizes": [2, 2, 2, 2, 2],
+        "num_training_records": 100,
+        "num_training_epoch": 40,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": lambda x: -self._ScaledSum(x),
+        "monotonicities": [1, 1, 1, 1, 1],
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.014971, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+    config = {
+        "lattice_sizes": [3, 3, 3, 3],
+        "num_training_records": 100,
+        "num_training_epoch": 100,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._SinOfSum,
+        "monotonicities": [1, "increasing", 1, 1],
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.358079, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+  @parameterized.parameters(
+      ([0, 1, 1],),
+      ([1, 0, 1],),
+      ([1, 1, 0],),
+  )
+  def testMonotonicityEquivalence(self, monotonicities):
+    if self.disable_all:
+      return
+    config = {
+        "lattice_sizes": [3, 3, 3],
+        "monotonicities": monotonicities,
+        "num_training_records": 100,
+        "num_training_epoch": 50,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 10.0,
+        "x_generator": self._SameValueForAllDims,
+        "y_function": self._SinOfSum,
+        "kernel_initializer": "zeros",
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.000286, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+  def testMonotonicity10dAlmostMonotone(self):
+    if self.disable_all:
+      return
+    np.random.seed(4411)
+    num_weights = 1024
+    weights = [1.0 * i / num_weights for i in range(num_weights)]
+    for _ in range(10):
+      i = int(np.random.random() * num_weights)
+      weights[i] = 0.0
+
+    config = {
+        "lattice_sizes": [2] * 10,
+        "num_training_records": 1000,
+        "num_training_epoch": 100,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 100.0,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": test_utils.get_hypercube_interpolation_fn(weights),
+        "monotonicities": [1] * 10,
+        "kernel_initializer": "zeros",
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.000027, delta=self.loss_eps)
+
+    config["monotonicities"] = [0, 1, 0, 1, 1, 0, 1, 1, 1, 0]
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.000019, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+  def testMonotonicity10dSinOfSum(self):
+    if self.disable_all:
+      return
+    config = {
+        "lattice_sizes": [2] * 10,
+        "num_training_records": 1000,
+        "num_training_epoch": 100,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 100.0,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._SinOfSum,
+        "monotonicities": [1] * 10,
+        "output_min": -1.0,
+        "output_max": 1.0,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.089950, delta=self.loss_eps)
+
+    config["monotonicities"] = [0, 1, 0, 1, 1, 0, 1, 1, 1, 0]
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.078830, delta=self.loss_eps)
+
+    config["monotonicities"] = [0, 0, 0, 1, 0, 1, 0, 0, 0, 0]
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.052190, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+  @parameterized.parameters(
+      ([(0, 1, 1)], [], 0.025785),
+      (None, [(0, 1, 1)], 0.042566),
+      ([(0, 1, "positive")], [(0, 1, "positive")], 0.042566),
+  )
+  def testSimpleTrustTwoD(self, edgeworth_trusts, trapezoid_trusts,
+                          expected_loss):
+    if self.disable_all:
+      return
+    config = {
+        "lattice_sizes": [2, 2],
+        "num_training_records": 100,
+        "num_training_epoch": 20,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._TwoDMeshGrid,
+        "y_function": self._Max,
+        "monotonicities": [1, 0],
+        "edgeworth_trusts": edgeworth_trusts,
+        "trapezoid_trusts": trapezoid_trusts,
+        "output_min": 0.0,
+        "output_max": 1.0,
+        # Leave margin of error (floating point) for trust projection.
+        "target_monotonicity_diff": -1e-6,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+  @parameterized.parameters(
+      ([(1, 0, -1)], None, 3.23711),
+      (None, [(1, 0, -1)], 6.663453),
+      ([(1, 0, "negative")], [(1, 0, "negative")], 9.846122),
+  )
+  def testDenseTrustTwoD(self, edgeworth_trusts, trapezoid_trusts,
+                         expected_loss):
+    if self.disable_all:
+      return
+    config = {
+        "lattice_sizes": [4, 3],
+        "num_training_records": 150,
+        "num_training_epoch": 20,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 10.0,
+        "x_generator": self._TwoDMeshGrid,
+        "y_function": self._PseudoLinear,
+        "monotonicities": [0, 1],
+        "edgeworth_trusts": edgeworth_trusts,
+        "trapezoid_trusts": trapezoid_trusts,
+        "output_min": 0.0,
+        "output_max": 22.0,
+        # Leave margin of error (floating point) for trust projection.
+        "target_monotonicity_diff": -1e-5,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self.loss_eps)
+    if not edgeworth_trusts or not trapezoid_trusts:
+      self._TestEnsemble(config)
+
+  @parameterized.parameters(
+      ([(0, 1, 1)], None, 0.010525),
+      (None, [(0, 1, 1)], 0.013343),
+      ([(0, 1, 1)], [(0, 1, 1)], 0.013343),
+  )
+  def testSimpleTrust4D(self, edgeworth_trusts, trapezoid_trusts,
+                        expected_loss):
+    if self.disable_all:
+      return
+    config = {
+        "lattice_sizes": [2, 2, 2, 2],
+        "num_training_records": 100,
+        "num_training_epoch": 20,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._Max,
+        "monotonicities": [1, 0, 1, 1],
+        "edgeworth_trusts": edgeworth_trusts,
+        "trapezoid_trusts": trapezoid_trusts,
+        "output_min": 0.0,
+        "output_max": 1.0,
+        # Leave margin of error (floating point) for trust projection.
+        "target_monotonicity_diff": -1e-6,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+  @parameterized.parameters(
+      ([(0, 1, 1), (3, 1, -1), (3, 2, 1)], None, 0.334325),
+      (None, [(0, 1, 1), (3, 1, -1), (3, 2, 1)], 0.387444),
+      ([(0, 1, 1), (3, 1, -1)], [(3, 1, -1), (3, 2, 1)], 0.381514),
+  )
+  def testMultiDenseTrust4D(self, edgeworth_trusts, trapezoid_trusts,
+                            expected_loss):
+    if self.disable_all:
+      return
+    config = {
+        "lattice_sizes": [3, 3, 3, 3],
+        "num_training_records": 1000,
+        "num_training_epoch": 20,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._SinOfSum,
+        "monotonicities": [1, 0, 0, 1],
+        "edgeworth_trusts": edgeworth_trusts,
+        "trapezoid_trusts": trapezoid_trusts,
+        "output_min": -0.5,
+        "output_max": 0.9,
+        # Leave margin of error (floating point) for trust projection.
+        "target_monotonicity_diff": -1e-6,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self.loss_eps)
+    if not edgeworth_trusts or not trapezoid_trusts:
+      self._TestEnsemble(config)
+
+  @parameterized.parameters(
+      ([(0, 1, 1)],),
+      ([(1, 2, 1)],),
+      ([(2, 0, 1)],),
+  )
+  def testEdgeworthTrustEquivalence(self, edgeworth_trusts):
+    if self.disable_all:
+      return
+    config = {
+        "lattice_sizes": [3, 3, 3],
+        "monotonicities": [1, 1, 1],
+        "edgeworth_trusts": edgeworth_trusts,
+        "num_training_records": 100,
+        "num_training_epoch": 50,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 10.0,
+        "x_generator": self._SameValueForAllDims,
+        "y_function": self._PseudoLinear,
+        "kernel_initializer": "zeros",
+        # Leave margin of error (floating point) for trust projection.
+        "target_monotonicity_diff": -1e-6,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.006912, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+  @parameterized.parameters(
+      (None, 0.00000),
+      ([(1, 0)], 0.00000),
+      ([(0, 1)], 0.05092),
+  )
+  def testSimpleMonotonicDominance2D(self, monotonic_dominances, expected_loss):
+    if self.disable_all:
+      return
+    config = {
+        "lattice_sizes": [2, 2],
+        "num_training_records": 100,
+        "num_training_epoch": 20,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._TwoDMeshGrid,
+        "y_function": self._WeightedSum,
+        "monotonicities": [1, 1],
+        "monotonic_dominances": monotonic_dominances,
+        "output_min": 0.0,
+        "output_max": 3.0,
+        # Leave margin of error (floating point) for dominance projection.
+        "target_monotonicity_diff": -1e-6,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+  @parameterized.parameters(
+      (None, 0.00113),
+      ([(1, 0)], 0.00113),
+      ([(0, 1)], 0.81520),
+  )
+  def testDenseMonotonicDominance2D(self, monotonic_dominances, expected_loss):
+    if self.disable_all:
+      return
+    config = {
+        "lattice_sizes": [5, 5],
+        "num_training_records": 100,
+        "num_training_epoch": 20,
+        "num_projection_iterations": 20,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._TwoDMeshGrid,
+        "y_function": self._WeightedSum,
+        "monotonicities": [1, 1],
+        "monotonic_dominances": monotonic_dominances,
+        "output_min": 0.0,
+        "output_max": 12.0,
+        # Leave margin of error (floating point) for dominance projection.
+        "target_monotonicity_diff": -1e-2,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+  @parameterized.parameters(
+      ([(1, 0), (2, 1)], 2.52985),
+      ([(0, 1), (1, 2)], 6.16700),
+  )
+  def testDenseMonotonicDominance5D(self, monotonic_dominances, expected_loss):
+    if self.disable_all:
+      return
+    config = {
+        "lattice_sizes": [5, 5, 5, 5, 5],
+        "num_training_records": 100,
+        "num_training_epoch": 300,
+        "num_projection_iterations": 20,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._WeightedSum,
+        "monotonicities": [1, 1, 1, 1, 1],
+        "monotonic_dominances": monotonic_dominances,
+        "output_min": 0.0,
+        "output_max": 60.0,
+        # Leave margin of error (floating point) for dominance projection.
+        "target_monotonicity_diff": -1e-1,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+  @parameterized.parameters(
+      (None, 0.00000),
+      ([(0, 1)], 0.05092),
+      ([(1, 0)], 0.05092),
+  )
+  def testSimpleJointMonotonicity2D(self, joint_monotonicities, expected_loss):
+    if self.disable_all:
+      return
+    config = {
+        "lattice_sizes": [2, 2],
+        "num_training_records": 100,
+        "num_training_epoch": 20,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._TwoDMeshGrid,
+        "y_function": self._MixedSignWeightedSum,
+        "monotonicities": [0, 0],
+        "joint_monotonicities": joint_monotonicities,
+        "output_min": -2.0,
+        "output_max": 1.0,
+        # Leave margin of error (floating point) for dominance projection.
+        "target_monotonicity_diff": -1e-6,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+  @parameterized.parameters(
+      (None, 0.16301),
+      ([(0, 1)], 0.86386),
+      ([(1, 0)], 0.86413),
+  )
+  def testDenseJointMonotonicity2D(self, joint_monotonicities, expected_loss):
+    if self.disable_all:
+      return
+    config = {
+        "lattice_sizes": [5, 5],
+        "num_training_records": 100,
+        "num_training_epoch": 40,
+        "num_projection_iterations": 40,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._TwoDMeshGrid,
+        "y_function": self._MixedSignWeightedSum,
+        "monotonicities": [0, 0],
+        "joint_monotonicities": joint_monotonicities,
+        "output_min": -8.0,
+        "output_max": 4.0,
+        # Leave margin of error (floating point) for dominance projection.
+        "target_monotonicity_diff": -1e-2,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+  @parameterized.parameters(
+      ([(0, 1)], 36.75898),
+  )
+  def testDenseJointMonotonicity5D(self, joint_monotonicities, expected_loss):
+    if self.disable_all:
+      return
+    config = {
+        "lattice_sizes": [5, 5, 5, 5, 5],
+        "num_training_records": 100,
+        "num_training_epoch": 100,
+        "num_projection_iterations": 40,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._MixedSignWeightedSum,
+        "monotonicities": [0, 0, 0, 0, 0],
+        "joint_monotonicities": joint_monotonicities,
+        "output_min": -24.0,
+        "output_max": 36.0,
+        # Leave margin of error (floating point) for dominance projection.
+        "target_monotonicity_diff": -1e-1,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+  @parameterized.parameters(
+      # Custom TFL initializer:
+      ("linear_initializer", 0.126068),
+      # Standard Keras initializer:
+      (keras.initializers.Constant(value=1.5), 0.430379),
+      # Standard Keras initializer specified as string constant:
+      ("zeros", 1.488072),
+  )
+  def testInitializerType(self, initializer, expected_loss):
+    if self.disable_all:
+      return
+    config = {
+        "lattice_sizes": [2, 3],
+        "num_training_records": 98,
+        "num_training_epoch": 0,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._TwoDMeshGrid,
+        "y_function": self._Max,
+        "output_min": 0.0,
+        "output_max": 2.0,
+        "kernel_initializer": initializer,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+  def _MergeDicts(self, x, y):
+    z = dict(x)
+    z.update(y)
+    return z
+
+  def testLinearMonotonicInitializer(self):
+    if self.disable_all:
+      return
+    # Test initializer by training linear function using 0 iteration and verify
+    # that loss is 0.
+    config = {
+        "num_training_records": 96,
+        "num_training_epoch": 0,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._TwoDMeshGrid,
+    }  # pyformat: disable
+
+    init_config = {
+        "lattice_sizes": [3, 4],
+        "monotonicities": [0, 0],
+        "output_min": -1.0,
+        "output_max": 2.0,
+    }
+    config["kernel_initializer"] = "LinearInitializer"
+    config["y_function"] = test_utils.get_linear_lattice_interpolation_fn(
+        **init_config)
+    total_config = self._MergeDicts(config, init_config)
+    loss = self._TrainModel(total_config)
+    self.assertAlmostEqual(loss, 0.0, delta=self.small_eps)
+    self._TestEnsemble(total_config)
+
+    # Change generator since we need more than 2 dimensions from now on.
+    config["x_generator"] = self._ScatterXUniformly
+
+    init_config = {
+        "lattice_sizes": [2, 3, 4, 5],
+        "monotonicities": [1, 1, 0, 1],
+        "output_min": 12.0,
+        "output_max": 22.0,
+    }
+    config["kernel_initializer"] = ll.LinearInitializer(**init_config)
+    config["y_function"] = test_utils.get_linear_lattice_interpolation_fn(
+        **init_config)
+    total_config = self._MergeDicts(config, init_config)
+    loss = self._TrainModel(total_config)
+    self.assertAlmostEqual(loss, 0.0, delta=self.small_eps)
+    self._TestEnsemble(total_config)
+
+    init_config = {
+        "lattice_sizes": [2, 3, 4, 5],
+        "monotonicities": [0, 1, 0, 1],
+        "output_min": -10,
+        "output_max": -5,
+    }
+    config["kernel_initializer"] = ll.LinearInitializer(**init_config)
+    config["y_function"] = test_utils.get_linear_lattice_interpolation_fn(
+        **init_config)
+    total_config = self._MergeDicts(config, init_config)
+    loss = self._TrainModel(total_config)
+    self.assertAlmostEqual(loss, 0.0, delta=self.small_eps)
+    self._TestEnsemble(total_config)
+
+    # Try to fit some other function and see loss >0 to ensure that this test
+    # does not always returns 0.
+    config["y_function"] = self._SinOfSum
+    total_config = self._MergeDicts(config, init_config)
+    loss = self._TrainModel(total_config)
+    self.assertGreater(loss, 0.1)
+    self._TestEnsemble(total_config)
+
+    init_config = {
+        "lattice_sizes": [2, 3, 4, 5],
+        "monotonicities": [0, 0, 0, 0],
+        "output_min": 1.0,
+        "output_max": 3.0,
+    }
+    config["kernel_initializer"] = "linear_initializer"
+    config["y_function"] = test_utils.get_linear_lattice_interpolation_fn(
+        **init_config)
+    total_config = self._MergeDicts(config, init_config)
+    loss = self._TrainModel(total_config)
+    self.assertAlmostEqual(loss, 0.0, delta=self.small_eps)
+    self._TestEnsemble(total_config)
+
+  def testUnimodalInitializer(self):
+    if self.disable_all:
+      return
+    config = {
+        "lattice_sizes": [3, 4],
+        "unimodalities": [1, 1],
+        "kernel_initializer": "linear_initializer",
+        "num_training_records": 96,
+        "num_training_epoch": 0,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._TwoDMeshGrid,
+        "y_function": self._Max,
+        "output_min": 0.0,
+        "output_max": 2.0,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 1.292362, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+    config["unimodalities"] = ["valley", "none"]
+    config["monotonicities"] = ["none", "increasing"]
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.794330, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+  def testAssertMonotonicity(self):
+    if self.disable_all:
+      return
+    # Specify non monotonic initializer and do 0 training iterations so no
+    # projections are being executed.
+    config = {
+        "lattice_sizes": [2, 2],
+        "num_training_records": 100,
+        "num_training_epoch": 0,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 0.15,
+        "x_generator": self._TwoDMeshGrid,
+        "y_function": self._ScaledSum,
+        "monotonicities": [0, 0],
+        "kernel_initializer": self._GetMultiOutputInitializer(
+            weights=[4.0, 3.0, 2.0, 1.0])
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 4.865740, delta=self.loss_eps)
+
+    for monotonicity in [[0, 1], [1, 0], [1, 1]]:
+      for units in [1, 3]:
+        config["monotonicities"] = monotonicity
+        config["units"] = units
+        with self.assertRaises(tf.errors.InvalidArgumentError):
+          self._TrainModel(config)
+
+  def testBounds(self):
+    if self.disable_all:
+      return
+    config = {
+        "lattice_sizes": [20],
+        "num_training_records": 100,
+        "num_training_epoch": 40,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._Sin,
+        "output_min": -0.6,
+        "output_max": 0.4,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.109398, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+    config = {
+        "lattice_sizes": [11, 4],
+        "num_training_records": 270,
+        "num_training_epoch": 40,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._TwoDMeshGrid,
+        "y_function": self._SinPlusXNd,
+        "monotonicities": [1, 1],
+        "output_min": 1.0,
+        "output_max": 2.5,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.380813, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+    config = {
+        "lattice_sizes": [2] * 5,
+        "num_training_records": 100,
+        "num_training_epoch": 40,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._SinOfSum,
+        "monotonicities": [1, 1, 0, 1, 0],
+        "output_min": 0.3,
+        "output_max": 0.7,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.145910, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+  def testInputOutOfBounds(self):
+    if self.disable_all:
+      return
+    config = {
+        "lattice_sizes": [6],
+        "num_training_records": 100,
+        "num_training_epoch": 20,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._ScatterXUniformlyExtendedRange,
+        "y_function": self._Sin,
+        "kernel_initializer": keras.initializers.Zeros(),
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.018727, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+    config = {
+        "lattice_sizes": [2, 2],
+        "num_training_records": 100,
+        "num_training_epoch": 20,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._TwoDMeshGridExtendedRange,
+        "y_function": self._SinOfSum,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.130813, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+  @parameterized.parameters(
+      # Laplacian with l1 and l2:
+      (("laplacian", 0.005, 0.01), 0.03, 0.021399),
+      # Different regularization amount for every dimension:
+      (("laplacian", [0.005, 0.01], [0.01, 0.02]), 0.045, 0.027941),
+      # Torsion with l1 and l2:
+      (("torsion", 0.1, 0.01), 0.11, 0.06738),
+      # Different regularization amount for every dimension:
+      (("torsion", [2.0, 0.05], [0.1, 0.1]), 0.11, 0.06738),
+      # List of regularizers:
+      ([("torsion", 0.1, 0.0), ("Torsion", 0.0, 0.01)], 0.11, 0.06738),
+      # Standard Keras regularizer:
+      (keras.regularizers.l1_l2(l1=0.01, l2=0.1), 0.33, 0.214418),
+  )
+  def testRegularizers2d(self, regularizer, pure_reg_loss, training_loss):
+    if self.disable_all:
+      return
+    weights = [0.0, 1.0, 1.0, 1.0]
+    config = {
+        "lattice_sizes": [2, 2],
+        "num_training_records": 100,
+        "num_training_epoch": 0,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._TwoDMeshGrid,
+        "y_function": test_utils.get_hypercube_interpolation_fn(
+            coefficients=weights),
+        "kernel_initializer": self._GetMultiOutputInitializer(weights=weights),
+        "kernel_regularizer": regularizer,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    # This loss is pure regularization loss because initializer matches target
+    # function and there was 0 training epochs.
+    self.assertAlmostEqual(loss, pure_reg_loss, delta=self.loss_eps)
+
+    multioutput_config = dict(config)
+    units = 3
+    multioutput_config["units"] = units
+    loss = self._TrainModel(multioutput_config)
+    self.assertAlmostEqual(loss, pure_reg_loss * units, delta=self.loss_eps)
+
+    config["num_training_epoch"] = 20
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, training_loss, delta=self.loss_eps)
+
+  @parameterized.parameters(
+      (("torsion", 0.001, 0.0001), 0.147405),
+      (("laplacian", 0.001, 0.0001), 0.193870),
+  )
+  def testRegularizersLargeLattice(self, regularizer, expected_loss):
+    if self.disable_all:
+      return
+    config = {
+        "lattice_sizes": [3, 4, 3, 4],
+        "num_training_records": 100,
+        "num_training_epoch": 100,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._SinOfSum,
+        "kernel_regularizer": regularizer,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self.loss_eps)
+
+  def testHighDimensionsStressTest(self):
+    if self.disable_all:
+      return
+    lattice_sizes = [3, 3] + [2] * 14
+    monotonicities = [0] * 16
+    monotonicities[3], monotonicities[4], monotonicities[10] = (1, 1, 1)
+    unimodalities = [0] * 16
+    unimodalities[1] = 1
+    config = {
+        "lattice_sizes": lattice_sizes,
+        "units": 2,
+        "monotonicities": monotonicities,
+        "unimodalities": unimodalities,
+        "edgeworth_trusts": [(3, 2, 1)],
+        "output_min": 0.0,
+        "output_max": 1.0,
+        "num_training_records": 100,
+        "num_training_epoch": 3,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1000.0,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._SinOfSum,
+        "kernel_regularizer": [("torsion", 1e-6, 1e-6),
+                               ("laplacian", 1e-5, 1e-5)],
+        "target_monotonicity_diff": -1e-5,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    # TODO: this test behaves differently in graph and eager mode.
+    # Figure out why.
+    self.assertAlmostEqual(loss, 3.689727, delta=0.1)
+
+  @parameterized.parameters(
+      ([0], [0], 0.026734),
+      ([1], ["none"], 0.195275),
+      ([1], None, 0.195275),
+      ([0], ["valley"], 0.045627),
+      (None, [1], 0.045627),
+  )
+  def testUnimodalityOneD(self, monotonicities, unimodalities, expected_loss):
+    if self.disable_all:
+      return
+
+    def WShaped1dFunction(x):
+      d = min(abs(x[0] - 3.0), abs(x[0] - 7.0))
+      return d * d / 4.0
+
+    config = {
+        "lattice_sizes": [11],
+        "num_training_records": 128,
+        "num_training_epoch": 200,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 0.15,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": WShaped1dFunction,
+        "monotonicities": monotonicities,
+        "unimodalities": unimodalities,
+        "kernel_initializer": "linear_initializer",
+        "output_min": 0.0,
+        "output_max": 4.0,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+  @parameterized.parameters(
+      ([0, 0], [0, 0], 0.003822),
+      ([1, 1], [0, 0], 0.313155),
+      ([0, 0], [1, 1], 0.003073),
+      ([1, 0], [0, 1], 0.162484),
+      ([0, 0], [1, 0], 0.004883),
+  )
+  def testUnimodalityTwoD(self, monotonicities, unimodalities, expected_loss):
+    if self.disable_all:
+      return
+
+    def WShaped2dFunction(x):
+      distance = lambda x1, y1, x2, y2: ((x2 - x1)**2 + (y2 - y1)**2)**0.5
+      d = distance(x[0], x[1], 5.0, 5.0)
+      return (d - 2.0)**2 / 8.0
+
+    config = {
+        "lattice_sizes": [11, 11],
+        "num_training_records": 900,
+        "num_training_epoch": 50,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.5,
+        "x_generator": self._TwoDMeshGrid,
+        "y_function": WShaped2dFunction,
+        "monotonicities": monotonicities,
+        "unimodalities": unimodalities,
+        "kernel_initializer": "linear_initializer",
+        "output_min": 0.0,
+        "output_max": 4.0,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+  def testUnconstrained(self):
+    if self.disable_all:
+      return
+    config = {
+        "lattice_sizes": [20],
+        "num_training_records": 100,
+        "num_training_epoch": 200,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 0.15,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._Sin,
+        "kernel_initializer": keras.initializers.Zeros,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.000917, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+    config = {
+        "lattice_sizes": [2],
+        "num_training_records": 100,
+        "num_training_epoch": 50,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 0.15,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._Square,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.004277, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+    config = {
+        "lattice_sizes": [2, 2],
+        "num_training_records": 100,
+        "num_training_epoch": 200,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 0.15,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": test_utils.get_hypercube_interpolation_fn(
+            coefficients=[0.0, 1.0, 1.0, 1.0]),
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.000003, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+    config = {
+        "lattice_sizes": [2] * 3,
+        "num_training_records": 100,
+        "num_training_epoch": 200,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 0.15,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": test_utils.get_hypercube_interpolation_fn(
+            coefficients=[i / 2.0**3 for i in range(2**3)])
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.000001, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+    config = {
+        "lattice_sizes": [2] * 5,
+        "num_training_records": 100,
+        "num_training_epoch": 100,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.5,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": test_utils.get_hypercube_interpolation_fn(
+            coefficients=[i / 2.0**5 for i in range(2**5)])
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.000008, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+    config = {
+        "lattice_sizes": [2, 2],
+        "num_training_records": 100,
+        "num_training_epoch": 100,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 0.15,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._Max,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.003599, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+    config = {
+        "lattice_sizes": [2] * 6,
+        "num_training_records": 100,
+        "num_training_epoch": 300,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 30.0,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._PseudoLinear,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.000118, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+    config = {
+        "lattice_sizes": [2, 3, 4],
+        "num_training_records": 100,
+        "num_training_epoch": 200,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 10.0,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._PseudoLinear,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.00002, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+    config = {
+        "lattice_sizes": [4, 5],
+        "num_training_records": 100,
+        "num_training_epoch": 100,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 10.0,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._WeightedSum,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.0, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+    config = {
+        "lattice_sizes": [2, 3, 4, 5],
+        "num_training_records": 100,
+        "num_training_epoch": 200,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 30.0,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._Max,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.000891, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+    config = {
+        "lattice_sizes": [2, 3, 4, 5],
+        "num_training_records": 100,
+        "num_training_epoch": 200,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 30.0,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._WeightedSum,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.004216, delta=self.loss_eps)
+    self._TestEnsemble(config)
+
+  @parameterized.parameters(
+      ([2, 3, 4], 6.429155),
+      ([2, 3, 3], 13.390955),
+      ([2, 2, 3], 22.205267),
+      ([2, 2, 3, 3], 5.049051),
+      ([2, 2, 3, 2, 2], 5.3823),
+      ([2, 2, 3, 3, 2, 2], 67.775276),
+      ([2, 2, 2, 3, 3, 3], 156.755035),
+      ([3, 2, 2, 3, 3, 2], 104.419373),
+  )
+  def testEqaulySizedDimsOptimization(self, lattice_sizes, expected_loss):
+    if self.disable_all:
+      return
+    config = {
+        "lattice_sizes": lattice_sizes,
+        "num_training_records": 100,
+        "num_training_epoch": 1,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 10.0,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._WeightedSum,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self.loss_eps)
+
+  @parameterized.parameters(
+      ([2, 2, 2, 2, 2, 2], 92),
+      ([2, 2, 3, 2, 3, 2], 117),
+      ([2, 2, 2, 2, 3, 3], 102),
+      ([2, 2, 2, 2, 2, 2, 2, 2, 2], 125),
+      ([2, 2, 2, 2, 2, 2, 3, 3, 3], 135),
+  )
+  def testGraphSize(self, lattice_sizes, expected_graph_size):
+    # If this test failed then you modified core lattice interpolation logic in
+    # a way which increases number of ops in the graph. Or maybe Keras team
+    # changed something under the hood. Please ensure that this increase is
+    # unavoidable and try to minimize it.
+    if self.disable_all:
+      return
+    tf.compat.v1.disable_eager_execution()
+    tf.compat.v1.reset_default_graph()
+
+    layer = ll.Lattice(lattice_sizes=lattice_sizes)
+    input_tensor = tf.ones(shape=(1, len(lattice_sizes)))
+    layer(input_tensor)
+    graph_size = len(tf.compat.v1.get_default_graph().as_graph_def().node)
+
+    self.assertLessEqual(graph_size, expected_graph_size)
+
+
+if __name__ == "__main__":
+  tf.test.main()
diff --git a/tensorflow_lattice/python/lib/__init__.py b/tensorflow_lattice/python/lib/__init__.py
deleted file mode 100644
index c4daf9f..0000000
--- a/tensorflow_lattice/python/lib/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""TensorFlow Lattice python libraries."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
diff --git a/tensorflow_lattice/python/lib/keypoints_initialization.py b/tensorflow_lattice/python/lib/keypoints_initialization.py
deleted file mode 100644
index c1d5100..0000000
--- a/tensorflow_lattice/python/lib/keypoints_initialization.py
+++ /dev/null
@@ -1,504 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Piecewise linear calibration keypoints initialization functions.
-
-Piecewise linear calibration requires initialization of its keypoints inputs
-and outputs. If these initialization values are known one can use them directly.
-
-But usually these initialization values are calculated in one of two ways:
-
-1) As a preprocessing step one calculates the quantiles of some input features
-(see function save_quantiles_for_keypoints below). Then during training
-this quantile is sub-sampled to the number of keypoints, and these are the
-initialization values used (see function load_keypoints_from_quantiles below).
-Since the quantiles are independent of the number of keypoints, the quantiles
-saved once can be used for training of models with different number of
-keypoints, so the saved quantiles can be loaded multiple times during
-hyperparameter optimization.
-
-2) The user knows the input range and the number of keypoints. Use the function
-uniform_keypoints_for_signal below to calculate evenly spread initialization
-keypoints inputs based on that.
-
-Notice that in both scenarios it is assumed that the user knows the output range
-and the keypoints outputs are initialized linearly along the min and the max
-of the output, so the calibration starts as a fully linear model.
-
-Notice that the keypoints initialization values are saved, so they are no longer
-needed in production (inference) time.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import ast
-import os
-
-# Dependency imports
-import numpy as np
-import six
-import tensorflow as tf
-
-from tensorflow_lattice.python.lib import tools
-from tensorflow import gfile
-from tensorflow.python.lib.io import file_io  # pylint: disable=g-direct-tensorflow-import
-
-_QUANTILES_SUBDIRECTORY = "quantiles"
-
-# The "feature" name for the label. The labels quantiles will be saved
-# to a file whose name is based on this name. We assume that there's no
-# regular feature with this name.
-_LABEL_FEATURE_NAME = "__label__"
-
-
-def _get_size(o):
-  # Returns number of elements in o, for o list, dict or tuple
-  if isinstance(o, dict):
-    total = 0
-    for v in o.values():
-      total += _get_size(v)
-    return total
-  if isinstance(o, list) or isinstance(o, tuple) or isinstance(o, np.ndarray):
-    return len(o)
-  return 1
-
-
-def _materialize_locally(tensors, num_steps=1, feed_dict=None, safety_size=1e9):
-  """Materialize the given tensors locally, during initialization.
-
-  Assumes non-distributed environment (uses SingularMonitoredSession).
-
-  Args:
-    tensors: tensors to be materialized: array or dict.
-    num_steps: number of steps to run. Usually it's faster/easier to run in
-      one step, a large batch. Set it to 0 or None to run until queue is
-      exhausted, when a OutOfRangeError exception is raised -- typically when
-      an input_fn is set to run for a fixed num_epochs.
-    feed_dict: optional feed_dict.
-    safety_size: if num_steps is None and one created input_fn to loop
-      indefinitely (num_epochs=None), this could loop consuming memory. This
-      is a safety limit on memory to prevent that. Increase this is you actually
-      need more than these many elements in your results, or set num_steps.
-
-  Returns:
-    Materialized tensors as array or dict, like `tensors` arg.
-
-  Raises:
-    ValueError: for negative num_steps.
-    tf.errors.OutOfRangeError: if can't read num_steps times.
-  """
-  if num_steps and num_steps < 0:
-    raise ValueError("can not run with num_steps=%s" % num_steps)
-
-  # tf.compat.v1.train.SingularMonitoredSession silently catches
-  # tf.errors.OutOfRangeError, and we want to expose it.
-  error = None
-  with tf.compat.v1.train.SingularMonitoredSession() as sess:
-    try:
-      splits = []
-      if not num_steps:
-        # Run until queue exhausted.
-        try:
-          count = 0
-          while True:
-            r = sess.run(tensors, feed_dict=feed_dict)
-            count += _get_size(r)
-            if count > safety_size:
-              raise ValueError(
-                  "Unbound (num_steps=None) materialization of "
-                  "input reached safety size of {}".format(safety_size))
-            splits.append(r)
-        except tf.errors.OutOfRangeError:
-          pass
-      else:
-        # Run num_steps times.
-        splits = [
-            sess.run(tensors, feed_dict=feed_dict) for _ in range(num_steps)
-        ]
-      if isinstance(splits[0], dict):
-        materialized = {}
-        for k in splits[0].keys():
-          materialized[k] = np.concatenate([
-              splits[i][k] for i in range(len(splits))
-              if splits[i][k].size > 0])
-      else:
-        materialized = np.concatenate(splits)
-    except (tf.errors.OutOfRangeError, StopIteration) as ex:
-      error = ex
-  if error:
-    raise error  # pylint: disable=raising-bad-type
-  return materialized
-
-
-def _path_for_quantile(subdir, feature_name):
-  # Change slashes to dashes to make quantile filenames valid.
-  # Note that there is a slight chance of name collision here.
-  feature_name = str(feature_name).replace("/", "-")
-  return os.path.join(subdir, "%s.txt" % feature_name)
-
-
-def _save_quantiles(subdir, feature_name, quantiles):
-  file_io.write_string_to_file(
-      _path_for_quantile(subdir, str(feature_name)), str(quantiles))
-
-
-def _load_quantiles(subdir, feature_name):
-  """Returns False if failed to load."""
-  serialized = file_io.read_file_to_string(
-      _path_for_quantile(subdir, feature_name))
-  return ast.literal_eval(serialized)
-
-
-def uniform_keypoints_for_signal(num_keypoints,
-                                 input_min,
-                                 input_max,
-                                 output_min,
-                                 output_max,
-                                 dtype=tf.float32):
-  """Returns a pair of initialization tensors for calibration keypoints.
-
-  This is used when the input range to be calibrated is known.
-
-  Args:
-    num_keypoints: number of keypoints to use for calibrating this signal.
-    input_min: Scalar with the minimum value that the uncalibrated input can
-      take.
-    input_max: Scalar with the maximum value that the uncalibrated input can
-      take.
-    output_min: Scalar with calibrated value associated with input_min.
-      Typically the minimum expected calibrated value, but not necessarily.
-      Specially if the calibration is decreasing.
-    output_max: Scalar with calibrated scalar value associated with input_max.
-    dtype: If any of the scalars are not given as tensors, they are converted to
-      tensors with this dtype.
-
-  Returns:
-    Two tensors to be used as the keypoints_inputs and keypoints_outputs
-    initialization, uniformly distributed over given ranges. Dtype is given
-    by input_min, input_max, output_min, output_max.
-
-  Raises:
-    ValueError: if underlying types (dtype) don't match.
-  """
-  input_min = tools.cast_to_scalar_tensor_of_dtype(input_min, dtype)
-  input_max = tools.cast_to_scalar_tensor_of_dtype(input_max, dtype)
-  output_min = tools.cast_to_scalar_tensor_of_dtype(output_min, dtype)
-  output_max = tools.cast_to_scalar_tensor_of_dtype(output_max, dtype)
-  types_set = set(
-      [input_min.dtype, input_max.dtype, output_min.dtype, output_max.dtype])
-  if len(types_set) != 1:
-    raise ValueError("different dtypes for parameters: got %s" % types_set)
-  return (tf.linspace(input_min, input_max, num_keypoints),
-          tf.linspace(output_min, output_max, num_keypoints))
-
-
-def save_quantiles_for_keypoints(input_fn,
-                                 save_dir,
-                                 feature_columns=None,
-                                 num_steps=1,
-                                 override=True,
-                                 num_quantiles=1000,
-                                 dtype=tf.float32):
-
-  """Calculates and saves quantiles for given features and optionally the label.
-
-  These values can later be retrieved and used by keypoints_from_quantiles()
-  below.
-
-  Repeated values are discarded before the quantiles are calculated. That means
-  that the quantiles of a very skewed distribution (for instance where 99%
-  of the values are 0), will be different. But for the purpose of calibration
-  this approach is more useful.
-
-  Nothing is returned, the values are simply saved in the given location.
-
-  This function can be called as a preprocessing step before actual training
-  starts. Typically one will run this in a separate process locally, before
-  starting training for instance.
-
-  Args:
-    input_fn: Similar to input_fn provided to Estimators. Typically one
-      doesn't need to go over the full data to get good quantiles. Typically
-      some 100 random examples per quantile is good enough for the purpose of
-      calibration. If you don't have too much data, just use everything.
-      If input_fn returns a label, the label quantiles will be saved into a
-      file named _LABEL_FEATURE_NAME in '<save_dir>/quantiles' directory and
-      they can be used to initialize the keypoint outputs by passing True to
-      the 'use_label_quantiles_for_outputs' in
-      load_keypoints_from_quantiles().
-    save_dir: Where to save these quantiles. Since when optimizing
-      hyperparameters we train various models, we can share the quantiles
-      information generated here. So this should be a directory that can be
-      accessed by all training sessions. A subdirectory called "quantiles" will
-      be created, and inside one file per feature is created: named after the
-      feature name, and with the quantiles stored in JSON format.
-    feature_columns: If set, quantiles are generated for these feature columns.
-      The file name used to save the quantiles uses a hash of the names of the
-      feature_columns, so it can support different quantiles sets for different
-      parts of the model if needed. If not set quantiles will be generated for
-      all features returned by input_fn.
-    num_steps: number of steps to take over input_fn to gather enough data to
-      create quantiles. Set to 0 or None to run until queue is exhausted,
-      like if you used num_epochs in your input_fn.
-    override: if False it won't regenerate quantiles for files that are already
-      there. This works as long as the features definition/distribution hasn't
-      change from one run to another.
-    num_quantiles: This value should be larger than the maximum number of
-      keypoints that will be considered for calibrating these features. If
-      there are not enough quantiles for the keypoints, the system is robust and
-      will simply interpolate the missing quantiles. Similarly if there are not
-      enough examples to represent the quantiles, it will interpolate the
-      quantiles from the examples given.
-    dtype: Default dtype to use, in particular for categorical values.
-
-  Returns: Nothing, results are saved to disk.
-
-  Raises:
-    tf.errors.OpError: For I/O errors.
-
-  FutureWork:
-    * Use Munro-Paterson algorithm to calculate quantiles in a streaming
-      fashion. See Squawd library.
-    * Add support to weighted examples.
-    * Handle cases where there are not enough different values in quantiles.
-  """
-  subdir = os.path.join(save_dir, _QUANTILES_SUBDIRECTORY)
-  file_io.recursive_create_dir(subdir)
-  with tf.Graph().as_default():
-    tensor_to_feature = _compute_tensor_to_feature_dict(
-        input_fn, feature_columns, dtype)
-    if override:
-      tensor_to_saved_feature = tensor_to_feature
-    else:
-      tensor_to_saved_feature = {
-          name: tensor
-          for (name, tensor) in six.iteritems(tensor_to_feature)
-          if not gfile.Exists(_path_for_quantile(subdir, name))}
-    materialized_tensors = _materialize_locally(
-        tensor_to_saved_feature, num_steps)
-
-  percentiles = np.linspace(0., 100., num_quantiles)
-  for key, values in six.iteritems(materialized_tensors):
-    values = np.unique(values)
-    quantiles = np.percentile(values, percentiles, interpolation="nearest")
-    quantiles = list(quantiles)
-    _save_quantiles(subdir, key, quantiles)
-
-
-def _compute_tensor_to_feature_dict(input_fn, feature_columns, dtype):
-  """Computes a feature_name-to-tensor dict for the given features.
-
-  Args:
-    input_fn: See the same argument in 'save_quantiles_for_keypoints'.
-    feature_columns: See the same argument in 'save_quantiles_for_keypoints'.
-    dtype: See the same argument in 'save_quantiles_for_keypoints'.
-
-  Returns:
-    A str->tensor dict mapping each feature name to the tensor containing its
-    feature values for the current batch. The dict contains all the features
-    returned by input_fn if feature_columns are none, or only those features
-    included in 'feature_columns', otherwise. If a non-None label is returned by
-    'input_fn', it will also be included in the dict.
-  """
-  if feature_columns is not None:
-    transformed_columns_to_tensors, label = input_fn()
-    features_to_tensors = {
-        f_col.name: tools.input_from_feature_column(
-            transformed_columns_to_tensors, f_col, dtype)
-        for f_col in feature_columns
-    }
-  else:
-    features_to_tensors, label = input_fn()
-  if label is None:
-    return features_to_tensors
-  if _LABEL_FEATURE_NAME in features_to_tensors:
-    raise ValueError(
-        ("Can't save a label as there's already a feature named: '%s'."
-         " Try renaming that feature. ") % _LABEL_FEATURE_NAME)
-  features_to_tensors[_LABEL_FEATURE_NAME] = label
-  return features_to_tensors
-
-
-def save_quantiles_for_keypoints_once(
-    input_fn, save_dir, is_chief, timeout_secs=600, **kwargs):
-  """Concurrency-safe version of save_quantiles_for_keypoints.
-
-  If is_chief is True and the quantiles do not already exist in 'save_dir',
-  calls save_quantiles_for_keypoints; otherwise waits for up to timeout_secs
-  seconds for the quantiles to be created and returns. Raises
-  tools.SaveOrWaitTimeOutError if the timeout expires before the quantiles have
-  been created.
-
-  In multi-process tensorflow training, one must ensure that
-  save_quantiles_for_keypoints is called by a single process before any process
-  calls load_keypoints_from_quantiles. This function facilitates this, by making
-  the chief worker save the quantiles and all the other processes wait for the
-  quantiles to be created. Simply call this function in each process before
-  the process calls load_keypoints_from_quantiles.
-
-  Note that for a given 'save_dir', the quantiles will only be created on the
-  first execution of the program. Successive executions will not overwrite the
-  quantiles. To recreate the quantiles, the save_dir directory must be deleted.
-
-  Args:
-    input_fn: Passed to save_quantiles_for_keypoints.
-    save_dir: Passed to save_quantiles_for_keypoints.
-    is_chief: bool. Whether the caller is the chief.
-    timeout_secs: int. The amount of time in seconds to wait for the chief.
-    **kwargs: Other keyword arguments to be passed to
-      save_quantiles_for_keypoints.
-  """
-  def write_fn():
-    save_quantiles_for_keypoints(input_fn, save_dir, **kwargs)
-  tools.save_once_or_wait_for_chief(
-      write_fn=write_fn,
-      metadata_dir=save_dir,
-      is_chief=is_chief,
-      timeout_secs=timeout_secs)
-
-
-def load_keypoints_from_quantiles(feature_names,
-                                  save_dir,
-                                  num_keypoints,
-                                  output_min=None,
-                                  output_max=None,
-                                  use_label_quantiles_for_outputs=False,
-                                  reversed_dict=None,
-                                  missing_input_values_dict=None,
-                                  dtype=tf.float32):
-  """Retrieves keypoints initialization values for selected features.
-
-  It expects that the quantiles have already been calculated and saved in the
-  save_dir by the save_quantiles_for_keypoints function. It will raise
-  an I/O error if not.
-
-  Args:
-    feature_names: List of features names for which to get keypoints
-      initialization values.
-    save_dir: Directory where the quantiles have been saved to. Same value used
-      when save_quantiles_for_keypoints was called.
-    num_keypoints: Desired number of keypoints to use for calibration. This can
-      either be a scalar to be used for all features, or a dict mapping feature
-      name to num_keypoints. Fewer keypoints than requested can end up being
-      used when for the given feature there are not enough different values. If
-      num_keypoints for a feature is missing, None or 0, no initialization is
-      generated.
-    output_min: If not None, specifies the initial calibrated value associated
-      with the first calibration keypoint. The keypoints outputs in between will
-      be linearly interpolated.  It can be given as a scalar, in which case the
-      value is used for all features, or a dict mapping feature name to
-      output_min.
-    output_max: Like output_min, but the calibrated value associated to the last
-      keypoint. Scalar or dict.
-    use_label_quantiles_for_outputs: Sets the keypoint outputs (calibrated
-      values) to the label quantiles. If this parameter is true then output_min
-      and output_max must both be None and the label quantiles must have been
-      saved in the call to save_quantiles_for_keypoints that generated the
-      quantile files (i.e. the input_fn parameter for the latter function must
-      have returned a label). If this parameter is False, then neither
-      output_min nor output_max may be None.
-    reversed_dict: An optional dict. If reversed_dict[feature_name] is True,
-      then the initial output keypoints will be in reversed order for that
-      feature, i.e., input_min will be mapped to output_max or the last label
-      quantile if use_label_quantiles_for_outputs is true, and input_max will be
-      mapped to output_min or the first label quantile if
-      use_label_quantiles_for_outputs is true. Reversing output keypoints is
-      useful for decreasing monotonic calibrators.
-    missing_input_values_dict: An optional dict. If provided, it should include
-      all features passed via feature_names. If the value of
-      missing_input_values[feature_name] is Not none, it is excluded from the
-      input keypoint values.
-    dtype: Type to be used for calibration.
-
-  Returns:
-    Dict of feature name to pair of constant tensors that can be used to
-    initialize calibrators keypoints inputs and outputs.
-
-  Raises:
-    tf.errors.NotFoundError: if quantiles file not found.
-
-
-    values in the signal. This would probably be better handled as categorical,
-    but still this should handle the case correctly.
-  """
-  if (output_min is None) != (output_max is None):
-    raise ValueError(
-        "Either both output_min and output_max should be given or neither.")
-
-  output_labels_given = (output_min is not None)
-  if (use_label_quantiles_for_outputs and output_labels_given):
-    raise ValueError(
-        "If use_label_quantiles_for_outputs is true, then"
-        " output_min and output_max cannot be given.")
-  if (not use_label_quantiles_for_outputs and not output_labels_given):
-    raise ValueError(
-        "Either use_label_quantiles_for_outputs should be true or "
-        " output_min and output_max must be given.")
-
-  subdir = os.path.join(save_dir, _QUANTILES_SUBDIRECTORY)
-  num_keypoints = tools.cast_to_dict(num_keypoints, feature_names,
-                                     num_keypoints)
-  if use_label_quantiles_for_outputs:
-    label_quantiles = _load_quantiles(subdir, _LABEL_FEATURE_NAME)
-  else:
-    label_quantiles = None
-    output_min = tools.cast_to_dict_of_tensor_scalars(output_min, feature_names,
-                                                      dtype, "output_min")
-    output_max = tools.cast_to_dict_of_tensor_scalars(output_max, feature_names,
-                                                      dtype, "output_max")
-  keypoints = {}
-  for feature_name in feature_names:
-    if feature_name not in num_keypoints or not num_keypoints[feature_name]:
-      continue
-    all_quantiles = _load_quantiles(subdir, feature_name)
-    if (missing_input_values_dict is not None and
-        feature_name in missing_input_values_dict):
-      exclude_val = missing_input_values_dict[feature_name]
-      if exclude_val is not None:
-        all_quantiles = [q for q in all_quantiles if q != exclude_val]
-    quantiles = _resample_quantiles(all_quantiles, num_keypoints[feature_name])
-    unique_quantiles = sorted(set(quantiles))
-    input_keypoints = tf.constant(
-        unique_quantiles, shape=[len(unique_quantiles)], dtype=dtype)
-    if use_label_quantiles_for_outputs:
-      output_keypoints = tf.constant(
-          _resample_quantiles(label_quantiles, len(unique_quantiles)),
-          shape=[len(unique_quantiles)],
-          dtype=dtype)
-    else:
-      output_keypoints = tf.linspace(output_min[feature_name],
-                                     output_max[feature_name],
-                                     len(unique_quantiles))
-    if reversed_dict is not None and reversed_dict[feature_name]:
-      output_keypoints = tf.reverse(output_keypoints, axis=[0])
-    keypoints[feature_name] = (input_keypoints, output_keypoints)
-  return keypoints
-
-
-def _resample_quantiles(quantiles, new_size):
-  """Computes new-size-quantiles on the given array of quantiles.
-
-  This is roughly equivalent to computing new-size-quantiles on the
-  original data from which 'quantiles' was created.
-
-  Args:
-    quantiles: list. The original quantiles.
-    new_size: int. The number of quantiles to generate.
-  Returns:
-    A list of the new quantiles.
-  """
-  percentiles = np.linspace(0., 100., new_size)
-  return np.percentile(quantiles, percentiles, interpolation="nearest")
diff --git a/tensorflow_lattice/python/lib/keypoints_initialization_test.py b/tensorflow_lattice/python/lib/keypoints_initialization_test.py
deleted file mode 100644
index db61575..0000000
--- a/tensorflow_lattice/python/lib/keypoints_initialization_test.py
+++ /dev/null
@@ -1,480 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for TensorFlow Lattice's keypoints_initialization module."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-import os
-
-from absl.testing import parameterized
-import numpy as np
-import tensorflow as tf
-
-from tensorflow_lattice.python.lib import keypoints_initialization
-
-
-class KeypointsInitializationTestCase(tf.test.TestCase, parameterized.TestCase):
-
-  def testMaterializeLocally(self):
-    num_examples = 100
-    x = np.random.uniform(0.0, 1.0, size=num_examples)
-
-    # Read to the end of a number of epochs.
-    input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x={'x': x}, batch_size=13, num_epochs=1, shuffle=False)
-    results = keypoints_initialization._materialize_locally(
-        tensors=input_fn(), num_steps=None)
-    self.assertLen(results['x'], num_examples)
-    input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x={'x': x}, batch_size=13, num_epochs=2, shuffle=False)
-    results = keypoints_initialization._materialize_locally(
-        tensors=input_fn(), num_steps=None)
-    self.assertLen(results['x'], 2 * num_examples)
-
-    # Read a certain number of steps: just enough to read all data (last
-    # batch will only be partially fulfilled).
-    input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x={'x': x}, batch_size=13, num_epochs=1, shuffle=False)
-    results = keypoints_initialization._materialize_locally(
-        tensors=input_fn(), num_steps=1)
-    self.assertLen(results['x'], 13)
-
-    input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x={'x': x}, batch_size=13, num_epochs=1, shuffle=False)
-    results = keypoints_initialization._materialize_locally(
-        tensors=input_fn(), num_steps=8)
-    self.assertLen(results['x'], num_examples)
-
-    # Try to read beyond end of input, with num_steps set.
-    input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x={'x': x}, batch_size=13, num_epochs=1, shuffle=False)
-    with self.assertRaises(tf.errors.OutOfRangeError):
-      results = keypoints_initialization._materialize_locally(
-          tensors=input_fn(), num_steps=100)
-
-    # Try to read beyond safety limit.
-    input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x={'x': x}, batch_size=13, num_epochs=None, shuffle=False)
-    with self.assertRaises(ValueError):
-      results = keypoints_initialization._materialize_locally(
-          tensors=input_fn(), num_steps=None, safety_size=1000)
-
-  def _BuildInputs(self, x0, x1, x2, label=None):
-    """Returns input_fn, feature_names and feature_columns."""
-
-    def _input_fn():
-      features = {
-          'x0': tf.constant(x0, dtype=tf.float32),
-          'x1': tf.constant(x1, dtype=tf.float32),
-          'x2': tf.constant(x2, dtype=tf.float32),
-      }
-      if label is None:
-        return features, None
-      return features, tf.constant(label, dtype=tf.float32)
-
-    feature_names = ['x0', 'x1', 'x2']
-    feature_columns = set(
-        [tf.feature_column.numeric_column(key=fn) for fn in feature_names])
-    return _input_fn, feature_names, feature_columns
-
-  def _CheckSaveQuantilesForKeypoints(self, name, num_examples, num_steps, x0,
-                                      x1, x2, use_feature_columns, override):
-    input_fn, feature_names, feature_columns = self._BuildInputs(x0, x1, x2)
-    save_dir = os.path.join(self.get_temp_dir(), name)
-    keypoints_initialization.save_quantiles_for_keypoints(
-        input_fn,
-        save_dir,
-        feature_columns=(feature_columns if use_feature_columns else None),
-        num_quantiles=5,
-        override=override)
-
-    # Check by reading files directly.
-    subdir = os.path.join(save_dir,
-                          keypoints_initialization._QUANTILES_SUBDIRECTORY)
-    quantiles_x0 = keypoints_initialization._load_quantiles(subdir, 'x0')
-    quantiles_x1 = keypoints_initialization._load_quantiles(subdir, 'x1')
-    quantiles_x2 = keypoints_initialization._load_quantiles(subdir, 'x2')
-    self.assertAllClose(
-        quantiles_x0, [0, 2.5**2, 5.**2, 7.5**2, 100.], atol=0.2)
-    self.assertAllClose(
-        quantiles_x1,
-        [1., math.pow(10., 0.5), 10.0,
-         math.pow(10., 1.5), 100.],
-        atol=0.2)
-    # x2 should start with [0,0,...] and end in [..., 1, 1], the middle value
-    # can be either 0 or 1.
-    self.assertAllClose(quantiles_x2[0:2], [0., 0.], atol=1e-3)
-    self.assertAllClose(quantiles_x2[-2:], [1., 1.], atol=1e-3)
-
-    # New graph is needed because default graph is changed by save
-    # keypoints, and self.session() will by default try to reuse a cached
-    # session, with a different graph.
-    with tf.Graph().as_default() as g:
-      # Check by using load_keypoints_from_quantiles.
-      keypoints_init = keypoints_initialization.load_keypoints_from_quantiles(
-          feature_names,
-          save_dir,
-          3,
-          output_min={
-              'x0': 0.,
-              'x1': 1.,
-              'x2': 7.
-          },
-          output_max={
-              'x0': 1.,
-              'x1': 10.,
-              'x2': 13.
-          })
-      with self.session(graph=g) as sess:
-        keypoints_init = sess.run(keypoints_init)
-    self.assertAllClose(keypoints_init['x0'][0], [0, 5.**2, 100.], atol=0.2)
-    self.assertAllClose(keypoints_init['x0'][1], [0., 0.5, 1.])
-    self.assertAllClose(keypoints_init['x1'][0], [1., 10.0, 100.], atol=0.2)
-    self.assertAllClose(keypoints_init['x1'][1], [1., 5.5, 10.])
-
-    # Notice x2 only has 2 unique values, so it should have lowered the
-    # num_keypoints to 2.
-    self.assertAllClose([0., 1.0], keypoints_init['x2'][0], atol=1e-3)
-    self.assertAllClose([7., 13.0], keypoints_init['x2'][1], atol=1e-3)
-
-    # Check that load_keypoints_from_quantiles don't generate anything
-    # if num_keypoints is 0 or unset.
-    with tf.Graph().as_default() as g:
-      # Check by using load_keypoints_from_quantiles.
-      keypoints_init = keypoints_initialization.load_keypoints_from_quantiles(
-          feature_names,
-          save_dir, {
-              'x0': 3,
-              'x2': 3,
-              'x1': 0
-          },
-          output_min={
-              'x0': 0.,
-              'x1': 1.,
-              'x2': 7.
-          },
-          output_max={
-              'x0': 1.,
-              'x1': 10.,
-              'x2': 13.
-          })
-      with self.session(graph=g) as sess:
-        keypoints_init = sess.run(keypoints_init)
-    self.assertIn('x0', keypoints_init)
-    self.assertIn('x2', keypoints_init)
-    self.assertNotIn('x1', keypoints_init)
-
-  def testSaveQuantilesForKeypoints(self):
-    """Tests quantiles are being calculated correctly."""
-    num_examples = 100000
-    num_steps = num_examples / num_examples
-
-    # Verify for randomized input: try with/without feature_columns.
-    x0 = np.random.uniform(0.0, 10.0, size=num_examples)
-    x0 = np.square(x0)
-    x1 = np.random.uniform(0.0, 2.0, size=num_examples)
-    x1 = np.power(10., x1)
-    x2 = np.random.randint(0, 2, size=num_examples).astype(float)
-    self._CheckSaveQuantilesForKeypoints(
-        'save_quantiles_for_keypoints',
-        num_examples,
-        num_steps,
-        x0,
-        x1,
-        x2,
-        use_feature_columns=True,
-        override=True)
-    self._CheckSaveQuantilesForKeypoints(
-        'save_quantiles_for_keypoints',
-        num_examples,
-        num_steps,
-        x0,
-        x1,
-        x2,
-        use_feature_columns=False,
-        override=False)
-
-    # Second change: since we are not overriding, it shouldn't regenerate the
-    # results. So we provide "wrong data": if the quantiles are regenerated
-    # the test will fail.
-    x0 = np.linspace(0.0, 1.0, num_examples)
-    x1 = np.linspace(0.0, 1.0, num_examples)
-    x2 = np.array([2.] * num_examples)
-    self._CheckSaveQuantilesForKeypoints(
-        'save_quantiles_for_keypoints',
-        num_examples,
-        num_steps,
-        x0,
-        x1,
-        x2,
-        use_feature_columns=False,
-        override=False)
-
-    # Verify that things work on a non-randomized set: this will break
-    # if not all input is being considered.
-    x0 = np.linspace(0.0, 10.0, num_examples)
-    x0 = np.square(x0)
-    x1 = np.linspace(0.0, 2.0, num_examples)
-    x1 = np.power(10., x1)
-    x2 = np.array([0.] * int(num_examples / 2) + [1.] * int(num_examples / 2))
-    self._CheckSaveQuantilesForKeypoints(
-        'save_quantiles_for_keypoints',
-        num_examples,
-        num_steps,
-        x0,
-        x1,
-        x2,
-        use_feature_columns=False,
-        override=True)
-
-  def testSaveQuantilesForKeypointsSavingLabelQuantiles(self):
-    input_fn, unused_feature_names, unused_feature_columns = self._BuildInputs(
-        x0=[0], x1=[0], x2=[0], label=np.random.uniform(0.0, 100.0, 100000))
-    save_dir = os.path.join(self.get_temp_dir(),
-                            'save_quantiles_for_keypoints_saving_labels')
-    keypoints_initialization.save_quantiles_for_keypoints(
-        input_fn, save_dir, override=True, num_quantiles=5)
-    subdir = os.path.join(save_dir,
-                          keypoints_initialization._QUANTILES_SUBDIRECTORY)
-    quantiles = keypoints_initialization._load_quantiles(
-        subdir, keypoints_initialization._LABEL_FEATURE_NAME)
-    self.assertAllClose(
-        np.linspace(0, 100.0, 5),
-        quantiles,
-        atol=0.2,
-        msg=('quantiles saved by save_quantiles_for_keypoints() do not much'
-             ' expected quantiles'))
-
-  def testLoadKeypointsFromQuantilesLoadingLabelQuantiles(self):
-    input_fn, unused_feature_names, unused_feature_columns = self._BuildInputs(
-        x0=np.random.uniform(0.0, 2.0, 100000),
-        x1=[0],
-        x2=[0],
-        label=np.random.uniform(0.0, 100.0, 100000))
-    save_dir = os.path.join(
-        self.get_temp_dir(),
-        'load_keypoints_from_quantiles_loading_label_quantiles')
-    keypoints_initialization.save_quantiles_for_keypoints(
-        input_fn, save_dir, override=True)
-    with tf.Graph().as_default() as g, self.session(graph=g) as session:
-      result = keypoints_initialization.load_keypoints_from_quantiles(
-          feature_names=['x0'],
-          save_dir=save_dir,
-          num_keypoints=5,
-          use_label_quantiles_for_outputs=True)
-      result = session.run(result)
-    self.assertAllClose(
-        {
-            'x0': [
-                np.array([0.0, 0.5, 1.0, 1.5, 2.0]),
-                np.array([0.0, 25.0, 50.0, 75.0, 100.0])
-            ]
-        },
-        result,
-        atol=0.2,
-        msg='load_keypoints_from_quantiles didn\'t produce expected labels')
-
-  @parameterized.named_parameters(
-      {
-          'testcase_name': 'both_output_and_label_quantiles',
-          'msg': ('Expected an exception when both output_min, output_max are '
-                  'given and use_label_quantiles_for_outputs is True'),
-          'use_label_quantiles_for_outputs': True,
-          'output_min': 0.0,
-          'output_max': 1.0
-      }, {
-          'testcase_name': 'output_min_and_not_output_max',
-          'msg':
-              ('Expected an exception when output_min is given and output_max'
-               ' isn\'t'),
-          'use_label_quantiles_for_outputs': True,
-          'output_min': 0.0,
-          'output_max': None
-      }, {
-          'testcase_name': 'output_max_and_not_output_min',
-          'msg':
-              ('Expected an exception when output_max is given and output_min'
-               ' isn\'t'),
-          'use_label_quantiles_for_outputs': True,
-          'output_min': None,
-          'output_max': 1.0
-      }, {
-          'testcase_name': 'neither_output_nor_label_quantiles',
-          'msg':
-              ('Expected an exception when output_min, output_max are not given'
-               ' and use_label_quantiles_for_outputs is False'),
-          'use_label_quantiles_for_outputs': False,
-          'output_min': None,
-          'output_max': None
-      })
-  def testLoadKeypointsFromQuantilesRaises(self,
-                                           use_label_quantiles_for_outputs,
-                                           output_min, output_max, msg):
-    input_fn, unused_feature_names, unused_feature_columns = self._BuildInputs(
-        x0=np.random.uniform(0.0, 2.0, 100000),
-        x1=[0],
-        x2=[0],
-        label=np.random.uniform(0.0, 100.0, 100000))
-    save_dir = os.path.join(
-        self.get_temp_dir(),
-        'load_keypoints_from_quantiles_loading_label_quantiles')
-    keypoints_initialization.save_quantiles_for_keypoints(
-        input_fn, save_dir, override=True)
-    with self.assertRaises(ValueError, msg=msg):
-      keypoints_initialization.load_keypoints_from_quantiles(
-          use_label_quantiles_for_outputs=use_label_quantiles_for_outputs,
-          output_min=output_min,
-          output_max=output_max,
-          feature_names=['x0'],
-          save_dir=save_dir,
-          num_keypoints=5)
-
-  def testQuantileInitWithReversedDict(self):
-    num_examples = 100
-    x0 = np.linspace(0.0, 10.0, num_examples)
-    x1 = np.linspace(0.0, 10.0, num_examples)
-    x2 = np.linspace(0.0, 1.0, num_examples)
-
-    input_fn, feature_names, feature_columns = self._BuildInputs(x0, x1, x2)
-    save_dir = os.path.join(self.get_temp_dir(), 'reversed_dict')
-    keypoints_initialization.save_quantiles_for_keypoints(
-        input_fn,
-        save_dir,
-        feature_columns=feature_columns,
-        num_quantiles=100,
-        override=True)
-    reversed_dict = {'x0': False, 'x1': True, 'x2': False}
-
-    with tf.Graph().as_default() as g:
-      # Check by using load_keypoints_from_quantiles.
-      keypoints_init = keypoints_initialization.load_keypoints_from_quantiles(
-          feature_names,
-          save_dir,
-          num_keypoints=3,
-          output_min={
-              'x0': 0.,
-              'x1': 0.,
-              'x2': 0.
-          },
-          output_max={
-              'x0': 1.,
-              'x1': 1.,
-              'x2': 1.
-          },
-          reversed_dict=reversed_dict)
-      with self.session(graph=g) as sess:
-        keypoints_init = sess.run(keypoints_init)
-
-    self.assertAllClose(keypoints_init['x0'][0], [0.0, 5.0, 10.0], atol=0.1)
-    self.assertAllClose(keypoints_init['x0'][1], [0.0, 0.5, 1.0], atol=0.01)
-    self.assertAllClose(keypoints_init['x1'][0], [0.0, 5.0, 10.0], atol=0.1)
-    self.assertAllClose(keypoints_init['x1'][1], [1.0, 0.5, 0.0], atol=0.01)
-    self.assertAllClose(keypoints_init['x2'][0], [0.0, 0.5, 1.0], atol=0.01)
-    self.assertAllClose(keypoints_init['x2'][1], [0.0, 0.5, 1.0], atol=0.01)
-
-  def testQuantileInitWithMissingInputValuesDict(self):
-    num_examples = 10
-    x0 = np.linspace(-1.0, 1.0, num_examples)
-    x1 = np.linspace(0.0, 1.0, num_examples)
-    x2 = np.linspace(0.0, 1.0, num_examples)
-
-    input_fn, feature_names, feature_columns = self._BuildInputs(x0, x1, x2)
-    save_dir = os.path.join(self.get_temp_dir(), 'exclude_input_values_dict')
-    keypoints_initialization.save_quantiles_for_keypoints(
-        input_fn,
-        save_dir,
-        feature_columns=feature_columns,
-        num_quantiles=num_examples,
-        override=True)
-
-    with tf.Graph().as_default() as g:
-      # Check by using load_keypoints_from_quantiles.
-      keypoints_init = keypoints_initialization.load_keypoints_from_quantiles(
-          feature_names,
-          save_dir,
-          num_keypoints=3,
-          output_min={
-              'x0': 0.,
-              'x1': 0.,
-              'x2': 0.
-          },
-          output_max={
-              'x0': 1.,
-              'x1': 1.,
-              'x2': 1.
-          },
-          missing_input_values_dict={
-              'x0': -1.0,
-              'x1': 0.0,
-              'x2': None
-          },
-      )
-      with self.session(graph=g) as sess:
-        keypoints_init = sess.run(keypoints_init)
-
-    self.assertAllClose(keypoints_init['x0'][0], [-0.778, 0.111, 1.0], atol=0.1)
-    self.assertAllClose(keypoints_init['x0'][1], [0.0, 0.5, 1.0], atol=0.01)
-    self.assertAllClose(keypoints_init['x1'][0], [0.111, 0.556, 1.0], atol=0.1)
-    self.assertAllClose(keypoints_init['x1'][1], [0.0, 0.5, 1.0], atol=0.01)
-    self.assertAllClose(keypoints_init['x2'][0], [0.0, 0.444, 1.0], atol=0.01)
-    self.assertAllClose(keypoints_init['x2'][1], [0.0, 0.5, 1.0], atol=0.01)
-
-  def testUniformKeypointsForSignal(self):
-    # New graph is needed because default graph is changed by save
-    # keypoints, and self.session() will by default try to reuse a cached
-    # session, with a different graph.
-    with tf.Graph().as_default() as g:
-      keypoints_init = keypoints_initialization.uniform_keypoints_for_signal(
-          num_keypoints=5,
-          input_min=tf.constant(0.0, dtype=tf.float64),
-          input_max=tf.constant(1.0, dtype=tf.float64),
-          output_min=10,
-          output_max=100,
-          dtype=tf.float64)
-      self.assertEqual(keypoints_init[0].dtype, tf.float64)
-      self.assertEqual(keypoints_init[1].dtype, tf.float64)
-      with self.session(graph=g) as sess:
-        keypoints_init = sess.run(keypoints_init)
-        self.assertAllClose(keypoints_init[0], [0., 0.25, 0.5, 0.75, 1.])
-        self.assertAllClose(keypoints_init[1], [10., 32.5, 55., 77.5, 100.])
-
-  def testSaveQuantilesForKeypointsOnce(self):
-    """Verifies that save_quantiles_for_keypoints_once doesn't raise exceptions.
-
-    We don't test anything else here since save_quantiles_for_keypoints_once
-    is a thin wrapper around save_once_or_wait_for_chief which is already
-    tested.
-    """
-    num_examples = 10
-    x0 = np.linspace(-1.0, 1.0, num_examples)
-    x1 = np.linspace(0.0, 1.0, num_examples)
-    x2 = np.linspace(0.0, 1.0, num_examples)
-
-    input_fn, _, feature_columns = self._BuildInputs(x0, x1, x2)
-    save_dir = os.path.join(self.get_temp_dir(), 'exclude_input_values_dict')
-    keypoints_initialization.save_quantiles_for_keypoints_once(
-        input_fn,
-        save_dir,
-        is_chief=True,
-        feature_columns=feature_columns,
-        num_quantiles=num_examples,
-        override=True)
-
-
-if __name__ == '__main__':
-  tf.test.main()
diff --git a/tensorflow_lattice/python/lib/lattice_layers.py b/tensorflow_lattice/python/lib/lattice_layers.py
deleted file mode 100644
index 5525324..0000000
--- a/tensorflow_lattice/python/lib/lattice_layers.py
+++ /dev/null
@@ -1,404 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Lattice layers library for TensorFlow Lattice.
-
-Lattice is an interpolated lookup table (LUT), part of TensorFlow Lattice
-models.
-
-This modules provides functions used when building models, as opposed to the
-basic operators exported by lattice_ops.py
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import functools
-
-import tensorflow as tf
-
-from tensorflow_lattice.python.lib import regularizers
-from tensorflow_lattice.python.lib import tools
-from tensorflow_lattice.python.ops import lattice_ops
-from tensorflow_lattice.python.ops.gen_monotone_lattice import monotone_lattice
-
-_VALID_INTERPOLATION_TYPES = ['hypercube', 'simplex']
-
-
-def lattice_param_as_linear(lattice_sizes, output_dim, linear_weights=1.0):
-  """Returns lattice parameter that represents a normalized linear function.
-
-  For simplicity, let's assume output_dim == 1 (when output_dim > 1 you get
-  output_dim lattices one for each linear function). This function returns a
-  lattice parameter so that
-
-    lattice_param' * phi(x) = 1 / len(lattice_sizes) *
-      (sum_k x[k] * linear_weights[k]/(lattice_sizes[k] - 1) + bias)
-
-  where phi(x) is the lattice interpolation weight and
-  bias = -sum_k linear_weights[k] / 2.
-
-  The normalization in the weights and the bias term make the output lie in the
-  range [-0.5, 0.5], when every member of linear_weights is 1.0.
-  In addition, the bias term makes the expected value zero when x[k] is from the
-  uniform distribution over [0, lattice_sizes[k] - 1].
-
-  The returned lattice_param can be used to initialize a lattice layer as a
-  linear function.
-
-  Args:
-    lattice_sizes: (list of ints) A list of lattice sizes of each dimension.
-    output_dim: (int) number of outputs.
-    linear_weights: (float, list of floats, list of list of floats) linear
-      function's weight terms. linear_weights[k][n] == kth output's nth weight.
-      If float, then all the weights uses one value as [[linear_weights] *
-      len(lattice_sizes)] * output_dim. If list of floats, then the
-      len(linear_weights) == len(lattice_sizes) is expected, and the weights are
-      [linear_weights] * output_dim, i.e., all output_dimension will get same
-      linear_weights.
-
-  Returns:
-    List of list of floats with size (output_dim, number_of_lattice_param).
-  Raises:
-    ValueError: * Any element in lattice_sizes is less than 2.
-      * lattice_sizes is empty.
-      * If linear_weights is not supported type, or shape of linear_weights are
-        not the desired values .
-  """
-  if not lattice_sizes:
-    raise ValueError('lattice_sizes should not be empty')
-  for lattice_size in lattice_sizes:
-    if lattice_size < 2:
-      raise ValueError('All elements in lattice_sizes are expected to greater '
-                       'than equal to 2, but got %s' % lattice_sizes)
-
-  lattice_rank = len(lattice_sizes)
-  linear_weight_matrix = None
-  if isinstance(linear_weights, float):
-    linear_weight_matrix = [[linear_weights] * lattice_rank] * output_dim
-  elif isinstance(linear_weights, list):
-    # Branching using the first element in linear_weights. linear_weights[0]
-    # should exist, since lattice_sizes is not empty.
-    if isinstance(linear_weights[0], float):
-      if len(linear_weights) != lattice_rank:
-        raise ValueError(
-            'A number of elements in linear_weights (%d) != lattice rank (%d)' %
-            (len(linear_weights), lattice_rank))
-      # Repeating same weights for all output_dim.
-      linear_weight_matrix = [linear_weights] * output_dim
-    elif isinstance(linear_weights[0], list):
-      # 2d matrix case.
-      if len(linear_weights) != output_dim:
-        raise ValueError(
-            'A number of lists in linear_weights (%d) != output_dim (%d)' %
-            (len(linear_weights), output_dim))
-      for linear_weight in linear_weights:
-        if len(linear_weight) != lattice_rank:
-          raise ValueError(
-              'linear_weights contain more than one list whose length != '
-              'lattice rank(%d)' % lattice_rank)
-      linear_weight_matrix = linear_weights
-    else:
-      raise ValueError(
-          'Only list of float or list of list of floats are supported')
-  else:
-    raise ValueError(
-        'Only float or list of float or list of list of floats are supported.')
-
-  # Create lattice structure to enumerate (index, lattice_dim) pairs.
-  lattice_structure = tools.LatticeStructure(lattice_sizes)
-
-  # Normalize linear_weight_matrix.
-  lattice_parameters = []
-  for linear_weight_per_output in linear_weight_matrix:
-    sum_of_weights = 0.0
-    for weight in linear_weight_per_output:
-      sum_of_weights += weight
-    sum_of_weights /= (2.0 * lattice_rank)
-    lattice_parameter = [-sum_of_weights] * lattice_structure.num_vertices
-    for (idx, vertex) in tools.lattice_indices_generator(lattice_structure):
-      for dim in range(lattice_rank):
-        lattice_parameter[idx] += (
-            linear_weight_per_output[dim] * float(vertex[dim]) /
-            float(lattice_rank * (lattice_sizes[dim] - 1)))
-    lattice_parameters.append(lattice_parameter)
-
-  return lattice_parameters
-
-
-def lattice_param_as_linear_monotonic(lattice_sizes,
-                                      output_dim,
-                                      is_monotone=True,
-                                      output_min=-0.5,
-                                      output_max=0.5):
-  """Returns lattice parameter that represents a monotonic linear function.
-
-  The returned lattice_param can be used to initialize a lattice layer as a
-  linear function. The linear coefficients are such that the function is
-  uniformly increasing in the specified monotonic features and has 0 coefficient
-  in others, spanning the given range.
-
-  Args:
-    lattice_sizes: (list of ints) A list of lattice sizes of each dimension.
-    output_dim: (int) number of outputs.
-    is_monotone: (bool or list of bools) monotonicity constraint for each of the
-      input dimensions. The output forms a linear function that is monotonic in
-      the specified dimensions and has 0 coefficients for non-monotonic
-      dimensions. All-false monotonicity is not supported.
-    output_min: (float) The value of the linear function when all inputs are at
-      minimum.
-    output_max: (float) The value of the linear function when all inputs are at
-      maximum.
-
-  Returns:
-    List of list of floats with size (output_dim, number_of_lattice_param).
-  Raises:
-    ValueError: * Any element in lattice_sizes is less than 2.
-      * lattice_sizes is empty.
-      * If no feature is monotonic.
-  """
-  # Find linear_weights that sum to len(lattice_sizes), such that all
-  # non-monotonic inputs that 0 coefficients and all monotonic inputs have the
-  # same coefficients.
-  if isinstance(is_monotone, bool):
-    is_monotone = [is_monotone] * len(lattice_sizes)
-  is_monotone_float = [float(m) for m in is_monotone]
-  n_monotone_dims = sum(is_monotone_float)
-  if n_monotone_dims == 0:
-    raise ValueError(
-        'At least one feature for the lattice parameters linear initialization '
-        'needs to be monotonic')
-  linear_weights = [
-      m * len(lattice_sizes) / n_monotone_dims for m in is_monotone_float
-  ]
-
-  # With linear_weights that sum to len(lattice_sizes) calling
-  # lattice_param_as_linear will return a linear function in range -0.5 and 0.5.
-  # Shift and scale the parameters to have the range: (output_min, output_max).
-  lattice_initializer = lattice_param_as_linear(lattice_sizes, output_dim,
-                                                linear_weights)
-  lattice_initializer = [[
-      output_min + (v + 0.5) * (output_max - output_min) for v in l
-  ] for l in lattice_initializer]
-  return lattice_initializer
-
-
-def lattice_layer(input_tensor,
-                  lattice_sizes,
-                  is_monotone=None,
-                  output_min=None,
-                  output_max=None,
-                  output_dim=1,
-                  interpolation_type='hypercube',
-                  lattice_initializer=None,
-                  **regularizer_amounts):
-  """Creates a lattice layer.
-
-  Returns an output of lattice, lattice parameters, and projection ops.
-
-  Args:
-    input_tensor: [batch_size, input_dim] tensor.
-    lattice_sizes: A list of lattice sizes of each dimension.
-    is_monotone: A list of input_dim booleans, boolean or None. If None or
-      False, lattice will not have monotonicity constraints. If is_monotone[k]
-      == True, then the lattice output has the non-decreasing monotonicity with
-      respect to input_tensor[?, k] (the kth coordinate). If True, all the input
-      coordinate will have the non-decreasing monotonicity.
-    output_min: Optional output lower bound.
-    output_max: Optional output upper bound.
-    output_dim: Number of outputs.
-    interpolation_type: 'hypercube' or 'simplex'.
-    lattice_initializer: (Optional) Initializer for lattice parameter vectors, a
-      2D tensor [output_dim, parameter_dim] (where parameter_dim ==
-      lattice_sizes[0] * ... * lattice_sizes[input_dim - 1]). If None,
-      lattice_param_as_linear initializer will be used with linear_weights=[1] *
-      len(lattice_sizes).
-    **regularizer_amounts: Keyword args of regularization amounts passed to
-      regularizers.lattice_regularization(). Keyword names should be among
-      regularizers.LATTICE_ONE_DIMENSIONAL_REGULARIZERS or
-      regularizers.LATTICE_MULTI_DIMENSIONAL_REGULARIZERS. For multi-dimensional
-      regularizers the value should be float. For one-dimensional regularizers
-      the values should be float or list of floats. If a single float value is
-      provided, then all dimensions will get the same value.
-
-  Returns:
-    A tuple of:
-    * output tensor of shape [batch_size, output_dim]
-    * parameter tensor of shape [output_dim, parameter_dim]
-    * None or projection ops, that must be applied at each
-      step (or every so many steps) to project the model to a feasible space:
-      used for bounding the outputs or for imposing monotonicity.
-    * None or a regularization loss, if regularization is configured.
-
-  Raises:
-    ValueError: for invalid parameters.
-  """
-  if interpolation_type not in _VALID_INTERPOLATION_TYPES:
-    raise ValueError('interpolation_type should be one of {}'.format(
-        _VALID_INTERPOLATION_TYPES))
-
-  if lattice_initializer is None:
-    linear_weights = [1.0] * len(lattice_sizes)
-    lattice_initializer = lattice_param_as_linear(
-        lattice_sizes, output_dim, linear_weights=linear_weights)
-
-  parameter_tensor = tf.compat.v1.get_variable(
-      interpolation_type + '_lattice_parameters',
-      initializer=lattice_initializer)
-
-  output_tensor = lattice_ops.lattice(
-      input_tensor,
-      parameter_tensor,
-      lattice_sizes,
-      interpolation_type=interpolation_type)
-
-  with tf.name_scope('lattice_monotonic_projection'):
-    if is_monotone or output_min is not None or output_max is not None:
-      projected_parameter_tensor = parameter_tensor
-      if is_monotone:
-        is_monotone = tools.cast_to_list(is_monotone, len(lattice_sizes),
-                                         'is_monotone')
-        projected_parameter_tensor = monotone_lattice(
-            projected_parameter_tensor,
-            lattice_sizes=lattice_sizes,
-            is_monotone=is_monotone)
-
-      if output_min is not None:
-        projected_parameter_tensor = tf.maximum(projected_parameter_tensor,
-                                                output_min)
-
-      if output_max is not None:
-        projected_parameter_tensor = tf.minimum(projected_parameter_tensor,
-                                                output_max)
-
-      delta = projected_parameter_tensor - parameter_tensor
-      projection_ops = [parameter_tensor.assign_add(delta)]
-    else:
-      projection_ops = None
-
-  with tf.name_scope('lattice_regularization'):
-    reg = regularizers.lattice_regularization(parameter_tensor, lattice_sizes,
-                                              **regularizer_amounts)
-
-  return (output_tensor, parameter_tensor, projection_ops, reg)
-
-
-def ensemble_lattices_layer(input_tensor,
-                            lattice_sizes,
-                            structure_indices,
-                            is_monotone=None,
-                            output_dim=1,
-                            interpolation_type='hypercube',
-                            lattice_initializers=None,
-                            **regularizer_amounts):
-  """Creates a ensemble of lattices layer.
-
-  Returns a list of output of lattices, lattice parameters, and projection ops.
-
-  Args:
-    input_tensor: [batch_size, input_dim] tensor.
-    lattice_sizes: A list of lattice sizes of each dimension.
-    structure_indices: A list of list of ints. structure_indices[k] is a list of
-      indices that belongs to kth lattices.
-    is_monotone: A list of input_dim booleans, boolean or None. If None or
-      False, lattice will not have monotonicity constraints. If is_monotone[k]
-      == True, then the lattice output has the non-decreasing monotonicity with
-      respect to input_tensor[?, k] (the kth coordinate). If True, all the input
-      coordinate will have the non-decreasing monotonicity.
-    output_dim: Number of outputs.
-    interpolation_type: 'hypercube' or 'simplex'.
-    lattice_initializers: (Optional) A list of initializer for each lattice
-      parameter vectors. lattice_initializer[k] is a 2D tensor [output_dim,
-      parameter_dim[k]], where parameter_dim[k] is the number of parameter in
-      the kth lattice. If None, lattice_param_as_linear initializer will be used
-      with linear_weights=[1 if monotone else 0 for monotone in is_monotone].
-    **regularizer_amounts: Keyword args of regularization amounts passed to
-      regularizers.lattice_regularization(). Keyword names should be among
-      regularizers.LATTICE_ONE_DIMENSIONAL_REGULARIZERS or
-      regularizers.LATTICE_MULTI_DIMENSIONAL_REGULARIZERS. For multi-dimensional
-      regularizers the value should be float. For one-dimensional regularizers
-      the values should be float or list of floats. If a single float value is
-      provided, then all dimensions will get the same value.
-
-  Returns:
-    A tuple of:
-    * a list of output tensors, [batch_size, output_dim], with length
-      len(structure_indices), i.e., one for each lattice.
-    * a list of parameter tensors shape [output_dim, parameter_dim]
-    * None or projection ops, that must be applied at each
-      step (or every so many steps) to project the model to a feasible space:
-      used for bounding the outputs or for imposing monotonicity.
-    * None or a regularization loss, if regularization is configured.
-  """
-  num_lattices = len(structure_indices)
-  lattice_initializers = tools.cast_to_list(lattice_initializers, num_lattices,
-                                            'lattice initializers')
-  one_dimensional_regularizers = \
-    regularizers.LATTICE_ONE_DIMENSIONAL_REGULARIZERS
-  for regularizer_name in regularizer_amounts:
-    if regularizer_name in one_dimensional_regularizers:
-      regularizer_amounts[regularizer_name] = tools.cast_to_list(
-          regularizer_amounts[regularizer_name], len(lattice_sizes),
-          regularizer_name)
-
-  # input_slices[k] = input_tensor[:, k].
-  input_slices = tf.unstack(input_tensor, axis=1)
-
-  output_tensors = []
-  param_tensors = []
-  projections = []
-  regularization = None
-  if is_monotone:
-    is_monotone = tools.cast_to_list(is_monotone, len(lattice_sizes),
-                                     'is_monotone')
-  # Now iterate through structure_indices to construct lattices.
-  get_indices = lambda indices, iterable: [iterable[index] for index in indices]
-  for (cnt, structure) in enumerate(structure_indices):
-    with tf.compat.v1.variable_scope('lattice_%d' % cnt):
-      sub = functools.partial(get_indices, structure)
-      sub_lattice_sizes = sub(lattice_sizes)
-      sub_is_monotone = None
-      if is_monotone:
-        sub_is_monotone = sub(is_monotone)
-
-      sub_input_tensor_list = sub(input_slices)
-      sub_input_tensor = tf.stack(sub_input_tensor_list, axis=1)
-
-      sub_regularizer_amounts = {}
-      for regularizer_name in regularizer_amounts:
-        if regularizer_name in one_dimensional_regularizers:
-          sub_regularizer_amounts[regularizer_name] = sub(
-              regularizer_amounts[regularizer_name])
-        else:
-          sub_regularizer_amounts[regularizer_name] = regularizer_amounts[
-              regularizer_name]
-
-      packed_results = lattice_layer(
-          sub_input_tensor,
-          sub_lattice_sizes,
-          sub_is_monotone,
-          output_dim=output_dim,
-          interpolation_type=interpolation_type,
-          lattice_initializer=lattice_initializers[cnt],
-          **sub_regularizer_amounts)
-      (sub_output, sub_param, sub_proj, sub_reg) = packed_results
-
-      output_tensors.append(sub_output)
-      param_tensors.append(sub_param)
-      if sub_proj:
-        projections += sub_proj
-      regularization = tools.add_if_not_none(regularization, sub_reg)
-
-  return (output_tensors, param_tensors, projections, regularization)
diff --git a/tensorflow_lattice/python/lib/lattice_layers_test.py b/tensorflow_lattice/python/lib/lattice_layers_test.py
deleted file mode 100644
index bfa609f..0000000
--- a/tensorflow_lattice/python/lib/lattice_layers_test.py
+++ /dev/null
@@ -1,628 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for TensorFlow Lattice's lattice_layers module."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-
-from tensorflow_lattice.python.lib import lattice_layers
-
-
-class LatticeParamTestCase(tf.test.TestCase):
-
-  def testTwoByTwoOneOutput(self):
-    lattice_param = lattice_layers.lattice_param_as_linear(
-        lattice_sizes=[2, 2], output_dim=1)
-    self.assertAllClose([[-0.5, 0.0, 0.0, 0.5]], lattice_param)
-
-  def testTwoByTwoTwoOutputs(self):
-    lattice_param = lattice_layers.lattice_param_as_linear(
-        lattice_sizes=[2, 2],
-        output_dim=2,
-        linear_weights=[[1.0, 1.0], [-0.1, 0.3]])
-    self.assertAllClose([[-0.5, 0.0, 0.0, 0.5], [-0.05, -0.1, 0.1, 0.05]],
-                        lattice_param)
-
-  def testTwoByThreeByTwoOneOutput(self):
-    lattice_param = lattice_layers.lattice_param_as_linear(
-        lattice_sizes=[2, 3, 2], output_dim=1, linear_weights=[-1.0, 1.0, 1.0])
-    self.assertAllClose([[
-        -0.1666667, -0.5, 0.0, -0.3333333, 0.1666667, -0.1666667, 0.1666667,
-        -0.1666667, 0.3333333, 0.0, 0.5, 0.1666667
-    ]], lattice_param)
-
-  def testWrongLatticeSizesExpectError(self):
-    with self.assertRaises(ValueError):
-      _ = lattice_layers.lattice_param_as_linear(
-          lattice_sizes=[1, -1], output_dim=1)
-
-  def testEmptyLatticeSizesExpectError(self):
-    with self.assertRaises(ValueError):
-      _ = lattice_layers.lattice_param_as_linear(lattice_sizes=[], output_dim=1)
-
-  def testMoreLinearWeightsThanLatticeRankExpectError(self):
-    with self.assertRaises(ValueError):
-      _ = lattice_layers.lattice_param_as_linear(
-          lattice_sizes=[2, 2], output_dim=1, linear_weights=[1, 2, 3])
-
-  def testLessLinearWeightsThanOutputDimExpectError(self):
-    with self.assertRaises(ValueError):
-      _ = lattice_layers.lattice_param_as_linear(
-          lattice_sizes=[2, 2], output_dim=2, linear_weights=[[1, 2]])
-
-  def testWrongLinearWeightsExpectError(self):
-    with self.assertRaises(ValueError):
-      _ = lattice_layers.lattice_param_as_linear(
-          lattice_sizes=[2, 2], output_dim=2, linear_weights=[[1], [1, 2]])
-
-  def testTwoByThreeByTwoOneOutputMonotonic(self):
-    lattice_param = lattice_layers.lattice_param_as_linear_monotonic(
-        lattice_sizes=[2, 3, 2],
-        output_dim=1,
-        is_monotone=[True, False, True],
-        output_min=-2.0,
-        output_max=4.0)
-    self.assertAllClose(
-        [[-2.0, 1.0, -2.0, 1.0, -2.0, 1.0, 1.0, 4.0, 1.0, 4.0, 1.0, 4.0]],
-        lattice_param)
-
-
-class LatticeLayersTestCase(tf.test.TestCase):
-
-  def _testLatticeLayerEvaluation(self, interpolation_type, lattice_sizes,
-                                  output_dim, inputs, parameters,
-                                  expected_outputs):
-    """Test evaluation of lattice layers."""
-    with tf.Graph().as_default():
-      input_tensor = tf.constant(inputs, dtype=tf.float32)
-      init_param = tf.constant(parameters, dtype=tf.float32)
-      (output_tensor, _, _, _) = lattice_layers.lattice_layer(
-          input_tensor,
-          lattice_sizes=lattice_sizes,
-          output_dim=output_dim,
-          interpolation_type=interpolation_type,
-          lattice_initializer=init_param)
-
-      with self.session() as sess:
-        sess.run(tf.compat.v1.global_variables_initializer())
-        output_tensor_values = sess.run(output_tensor)
-      self.assertAllClose(output_tensor_values, expected_outputs)
-
-  def testWrongInterpolationTypeExpectError(self):
-    with self.assertRaises(ValueError):
-      self._testLatticeLayerEvaluation(
-          interpolation_type='wrong',
-          output_dim=2,
-          lattice_sizes=[2, 2],
-          inputs=[[0.5, 0.5]],
-          parameters=[[1.0, 2.0], [3.0, 4.0]],
-          expected_outputs=[[2.5]])
-
-  def testHypercubeEvaluation(self):
-    inputs = [[-1.0, 0.0], [0.0, 0.0], [0.1, 0.9], [0.3, 1.1], [1.5, 2.0],
-              [1.6, 3.0]]
-    parameters = [[0.0, 0.1, 1.1, 2.3, 3.1, 4.2],
-                  [5.1, 2.11, 1.11, 3.21, -1.02, -2.2]]
-    expected_outputs = [[0.0, 5.1], [0.0, 5.1], [1.099, 1.6681],
-                        [1.657, 1.4286], [4.2, -2.2], [4.2, -2.2]]
-    self._testLatticeLayerEvaluation(
-        interpolation_type='hypercube',
-        output_dim=2,
-        lattice_sizes=[2, 3],
-        inputs=inputs,
-        parameters=parameters,
-        expected_outputs=expected_outputs)
-
-  def testSimplexEvaluation(self):
-    inputs = [[-1.0, 0.0], [0.0, 0.0], [0.1, 0.9], [0.3, 1.1], [1.5, 2.0],
-              [1.6, 3.0]]
-    parameters = [[0.0, 0.1, 1.1, 2.3, 3.1, 4.2],
-                  [5.1, 2.11, 1.11, 3.21, -1.02, -2.2]]
-    expected_outputs = [[0.0, 5.1], [0.0, 5.1], [1.11, 1.719], [1.65, 1.199],
-                        [4.2, -2.2], [4.2, -2.2]]
-    self._testLatticeLayerEvaluation(
-        interpolation_type='simplex',
-        output_dim=2,
-        lattice_sizes=[2, 3],
-        inputs=inputs,
-        parameters=parameters,
-        expected_outputs=expected_outputs)
-
-  def testHypercubeEvaluationWithLinearParam(self):
-    lattice_sizes = [2, 3]
-    output_dim = 2
-    inputs = [[0.0, 0.0], [0.1, 0.9], [0.3, 1.1], [1.5, 2.0]]
-    # This parameter works as a linear function
-    #   f(x1, x2) == 1/2 * (x1 + x2) - 0.75
-    parameters = lattice_layers.lattice_param_as_linear(
-        lattice_sizes=lattice_sizes, linear_weights=[1.0, 2.0], output_dim=2)
-    expected_outputs = [[-0.75, -0.75], [-0.25, -0.25], [-0.05, -0.05],
-                        [0.75, 0.75]]
-    self._testLatticeLayerEvaluation(
-        interpolation_type='hypercube',
-        output_dim=output_dim,
-        lattice_sizes=lattice_sizes,
-        inputs=inputs,
-        parameters=parameters,
-        expected_outputs=expected_outputs)
-
-  def testSimplexEvaluationWithLinearParam(self):
-    lattice_sizes = [2, 3]
-    output_dim = 2
-    inputs = [[0.0, 0.0], [0.1, 0.9], [0.3, 1.1], [1.5, 2.0]]
-    # This parameter works as linear functions
-    #   f(x1, x2) = [0.5 * (x1 + x2) - 0.75, x1 + x2 - 1.5]
-    parameters = lattice_layers.lattice_param_as_linear(
-        lattice_sizes=lattice_sizes,
-        output_dim=2,
-        linear_weights=[[1.0, 2.0], [2.0, 4.0]])
-    expected_outputs = [[-0.75, -1.5], [-0.25, -0.5], [-0.05, -0.1],
-                        [0.75, 1.5]]
-    self._testLatticeLayerEvaluation(
-        interpolation_type='simplex',
-        output_dim=output_dim,
-        lattice_sizes=lattice_sizes,
-        inputs=inputs,
-        parameters=parameters,
-        expected_outputs=expected_outputs)
-
-  def testHypercubeNoRegularizationExpectsNone(self):
-    lattice_sizes = [2, 3]
-    with tf.Graph().as_default():
-      input_tensor = tf.compat.v1.placeholder(shape=[None, 2], dtype=tf.float32)
-      (_, _, _, regularization) = lattice_layers.lattice_layer(
-          input_tensor,
-          lattice_sizes=lattice_sizes,
-          output_dim=1,
-          interpolation_type='hypercube')
-      self.assertEqual(regularization, None)
-
-  def testSimplexNoRegularizationExpectsNone(self):
-    lattice_sizes = [2, 3]
-    with tf.Graph().as_default():
-      input_tensor = tf.compat.v1.placeholder(shape=[None, 2], dtype=tf.float32)
-      (_, _, _, regularization) = lattice_layers.lattice_layer(
-          input_tensor,
-          lattice_sizes=lattice_sizes,
-          output_dim=1,
-          interpolation_type='simplex')
-      self.assertEqual(regularization, None)
-
-  def testHypercubeRegularization(self):
-    lattice_sizes = [2, 3]
-    parameters = [[0.0, 0.1, 1.1, 2.3, 3.1, 4.2],
-                  [5.1, 2.11, 1.11, 3.21, -1.02, -2.2]]
-    output_dim = 2
-    with tf.Graph().as_default():
-      input_tensor = tf.compat.v1.placeholder(shape=[None, 2], dtype=tf.float32)
-      init_param = tf.constant(parameters, dtype=tf.float32)
-      (_, _, _, regularization) = lattice_layers.lattice_layer(
-          input_tensor,
-          lattice_sizes=lattice_sizes,
-          output_dim=output_dim,
-          interpolation_type='hypercube',
-          l1_reg=0.1,
-          l2_reg=0.1,
-          l1_torsion_reg=0.1,
-          l2_torsion_reg=0.1,
-          l1_laplacian_reg=[0.1, 0.1],
-          l2_laplacian_reg=[0.1, 0.1],
-          lattice_initializer=init_param)
-
-      with self.session() as sess:
-        sess.run(tf.compat.v1.global_variables_initializer())
-        self.assertAlmostEqual(26.514278, sess.run(regularization), delta=1e-5)
-
-  def testSimplexRegularization(self):
-    lattice_sizes = [2, 3]
-    parameters = [[0.0, 0.1, 1.1, 2.3, 3.1, 4.2],
-                  [5.1, 2.11, 1.11, 3.21, -1.02, -2.2]]
-    output_dim = 2
-    with tf.Graph().as_default():
-      input_tensor = tf.compat.v1.placeholder(shape=[None, 2], dtype=tf.float32)
-      init_param = tf.constant(parameters, dtype=tf.float32)
-      (_, _, _, regularization) = lattice_layers.lattice_layer(
-          input_tensor,
-          lattice_sizes=lattice_sizes,
-          output_dim=output_dim,
-          interpolation_type='simplex',
-          l1_reg=0.1,
-          l2_reg=0.1,
-          l1_torsion_reg=0.1,
-          l2_torsion_reg=0.1,
-          l1_laplacian_reg=[0.1, 0.1],
-          l2_laplacian_reg=[0.1, 0.1],
-          lattice_initializer=init_param)
-
-      with self.session() as sess:
-        sess.run(tf.compat.v1.global_variables_initializer())
-        self.assertAlmostEqual(26.514278, sess.run(regularization), delta=1e-5)
-
-  def _testLatticeLayerProjection(self, interpolation_type, lattice_sizes,
-                                  output_dim, output_min, output_max,
-                                  is_monotone, parameters,
-                                  expected_projected_parameters):
-    """Test monotonicity projection of lattice layers."""
-    with tf.Graph().as_default():
-      input_tensor = tf.zeros([1, len(lattice_sizes)], dtype=tf.float32)
-      (_, param_tensor, projection_op, _) = lattice_layers.lattice_layer(
-          input_tensor,
-          lattice_sizes=lattice_sizes,
-          is_monotone=is_monotone,
-          output_dim=output_dim,
-          output_min=output_min,
-          output_max=output_max,
-          interpolation_type=interpolation_type)
-      with self.session() as sess:
-        sess.run(tf.compat.v1.global_variables_initializer())
-        sess.run(
-            tf.compat.v1.assign(param_tensor, tf.constant(parameters, dtype=tf.float32)))
-        sess.run(projection_op)
-        param_tensor_values = param_tensor.eval()
-
-      self.assertAllClose(
-          param_tensor_values, expected_projected_parameters, atol=1e-4)
-
-  def testProjectionWithNonMonotonicHypercube(self):
-    parameters = [[0.0, 0.1, 1.1, 2.3, 3.1, 4.2],
-                  [5.1, 2.11, 1.11, 3.21, -1.02, -2.2]]
-    expected_projected_parameters = parameters
-    self._testLatticeLayerProjection(
-        interpolation_type='hypercube',
-        is_monotone=[False, False],
-        output_dim=2,
-        output_min=None,
-        output_max=None,
-        lattice_sizes=[2, 3],
-        parameters=parameters,
-        expected_projected_parameters=expected_projected_parameters)
-
-  def testProjectionWithNonMonotonicSimplex(self):
-    parameters = [[0.0, 0.1, 1.1, 2.3, 3.1, 4.2],
-                  [5.1, 2.11, 1.11, 3.21, -1.02, -2.2]]
-    expected_projected_parameters = parameters
-    self._testLatticeLayerProjection(
-        interpolation_type='simplex',
-        is_monotone=[False, False],
-        output_dim=2,
-        output_min=None,
-        output_max=None,
-        lattice_sizes=[2, 3],
-        parameters=parameters,
-        expected_projected_parameters=expected_projected_parameters)
-
-  def testProjectionWithFullMonotonicHypercube(self):
-    parameters = [[0.0, 0.1, 1.1, 2.3, 3.1, 4.2],
-                  [5.1, 2.11, 1.11, 3.21, -1.02, -2.2]]
-    expected_projected_parameters = [[0, 0.1, 1.1, 2.3, 3.1, 4.2],
-                                     [1.385, 1.385, 1.385, 1.385, 1.385, 1.385]]
-    self._testLatticeLayerProjection(
-        interpolation_type='hypercube',
-        is_monotone=[True, True],
-        output_dim=2,
-        output_min=None,
-        output_max=None,
-        lattice_sizes=[2, 3],
-        parameters=parameters,
-        expected_projected_parameters=expected_projected_parameters)
-
-  def testProjectionWithFullMonotonicSimplex(self):
-    parameters = [[0.0, 0.1, 1.1, 2.3, 3.1, 4.2],
-                  [5.1, 2.11, 1.11, 3.21, -1.02, -2.2]]
-    expected_projected_parameters = [[0, 0.1, 1.1, 2.3, 3.1, 4.2],
-                                     [1.385, 1.385, 1.385, 1.385, 1.385, 1.385]]
-    self._testLatticeLayerProjection(
-        interpolation_type='simplex',
-        is_monotone=[True, True],
-        output_dim=2,
-        output_min=None,
-        output_max=None,
-        lattice_sizes=[2, 3],
-        parameters=parameters,
-        expected_projected_parameters=expected_projected_parameters)
-
-  def testProjectionWithBoundedFullMonotonicHypercube(self):
-    parameters = [[0.0, 0.1, 1.1, 2.3, 3.1, 4.2],
-                  [5.1, 2.11, 1.11, 3.21, -1.02, -2.2]]
-    expected_projected_parameters = [[0.3, 0.3, 1.1, 2.3, 3.0, 3.0],
-                                     [1.385, 1.385, 1.385, 1.385, 1.385, 1.385]]
-    self._testLatticeLayerProjection(
-        interpolation_type='hypercube',
-        is_monotone=[True, True],
-        output_dim=2,
-        output_min=0.3,
-        output_max=3.0,
-        lattice_sizes=[2, 3],
-        parameters=parameters,
-        expected_projected_parameters=expected_projected_parameters)
-
-
-class EnsembleLatticesLayersTestCase(tf.test.TestCase):
-
-  def _testEnsembleLatticesLayerEvaluation(self, interpolation_type,
-                                           lattice_sizes, structure, output_dim,
-                                           inputs, parameters,
-                                           expected_outputs_list):
-    """Test evaluation of ensemble lattices layers."""
-    with tf.Graph().as_default():
-      input_tensor = tf.constant(inputs, dtype=tf.float32)
-      init_params = [
-          tf.constant(param, dtype=tf.float32) for param in parameters
-      ]
-      (output_tensor_lists, _, _, _) = lattice_layers.ensemble_lattices_layer(
-          input_tensor,
-          lattice_sizes=lattice_sizes,
-          structure_indices=structure,
-          output_dim=output_dim,
-          interpolation_type=interpolation_type,
-          lattice_initializers=init_params)
-      self.assertEqual(len(output_tensor_lists), len(structure))
-      with self.session() as sess:
-        sess.run(tf.compat.v1.global_variables_initializer())
-        output_values_list = sess.run(output_tensor_lists)
-      self.assertAllClose(output_values_list, expected_outputs_list)
-
-  def testHypercubeEvaluation(self):
-    inputs = [[-1.0, 0.0], [0.0, 0.0], [0.1, 0.9], [0.3, 1.1], [1.5, 2.0],
-              [1.6, 3.0]]
-    structure = [[0], [1], [0, 1]]
-
-    # Construct params.
-    parameters = []
-    # First one is 1d lattice with two outputs:
-    #  output[0] = x[0], output[1] = 1-x[0].
-    parameters.append([[0.0, 1.0], [1.0, 0.0]])
-    # Second one is 1d lattice with two outputs:
-    #   output[0] = x[1] for 1 <= x[1] <= 2, 0 otherwise
-    #   output[1] = 1 - x[1] for 0 <= x[1] <= 1, 0 otherwise.
-    parameters.append([[0.0, 0.0, 1.0], [1.0, 0.0, 0.0]])
-    # Third one is 2d lattice.
-    parameters.append([[0.0, 0.1, 1.1, 2.3, 3.1, 4.2],
-                       [5.1, 2.11, 1.11, 3.21, -1.02, -2.2]])
-
-    # Construct expected outputs.
-    expected_outputs = []
-    # Expected outputs from the first lattice.
-    expected_outputs.append([[0.0, 1.0], [0.0, 1.0], [0.1, 0.9], [0.3, 0.7],
-                             [1.0, 0.0], [1.0, 0.0]])
-    # Expected outputs from the second lattice.
-    expected_outputs.append([[0.0, 1.0], [0.0, 1.0], [0.0, 0.1], [0.1, 0.0],
-                             [1.0, 0.0], [1.0, 0.0]])
-    # Expected outputs from the third lattice.
-    expected_outputs.append([[0.0, 5.1], [0.0, 5.1], [1.099, 1.6681],
-                             [1.657, 1.4286], [4.2, -2.2], [4.2, -2.2]])
-
-    self._testEnsembleLatticesLayerEvaluation(
-        interpolation_type='hypercube',
-        structure=structure,
-        output_dim=2,
-        lattice_sizes=[2, 3],
-        inputs=inputs,
-        parameters=parameters,
-        expected_outputs_list=expected_outputs)
-
-  def testSimplexEvaluation(self):
-    inputs = [[-1.0, 0.0], [0.0, 0.0], [0.1, 0.9], [0.3, 1.1], [1.5, 2.0],
-              [1.6, 3.0]]
-    structure = [[0], [1], [0, 1]]
-
-    # Construct params.
-    parameters = []
-    # First one is 1d lattice with two outputs:
-    #   output[0] = x[0], output[1] = 1 - x[0].
-    parameters.append([[0.0, 1.0], [1.0, 0.0]])
-    # Second one is 1d lattice with two outputs:
-    #   output[0] = x[1] for 1 <= x[1] <= 2, 0 otherwise
-    #   output[1] = 1 - x[1] for 0 <= x[1] <= 1, 0 otherwise.
-    parameters.append([[0.0, 0.0, 1.0], [1.0, 0.0, 0.0]])
-    # Third one is 2d lattice with two outputs.
-    parameters.append([[0.0, 0.1, 1.1, 2.3, 3.1, 4.2],
-                       [5.1, 2.11, 1.11, 3.21, -1.02, -2.2]])
-
-    # Construct expected outputs.
-    expected_outputs = []
-    # Expected outputs from the first lattice.
-    expected_outputs.append([[0.0, 1.0], [0.0, 1.0], [0.1, 0.9], [0.3, 0.7],
-                             [1.0, 0.0], [1.0, 0.0]])
-    # Expected outputs from the second lattice.
-    expected_outputs.append([[0.0, 1.0], [0.0, 1.0], [0.0, 0.1], [0.1, 0.0],
-                             [1.0, 0.0], [1.0, 0.0]])
-    # Expected outputs from the third lattice.
-    expected_outputs.append([[0.0, 5.1], [0.0, 5.1], [1.11, 1.719],
-                             [1.65, 1.199], [4.2, -2.2], [4.2, -2.2]])
-
-    self._testEnsembleLatticesLayerEvaluation(
-        interpolation_type='simplex',
-        structure=structure,
-        output_dim=2,
-        lattice_sizes=[2, 3],
-        inputs=inputs,
-        parameters=parameters,
-        expected_outputs_list=expected_outputs)
-
-  def testHypercubeRegularization(self):
-    lattice_sizes = [2, 3]
-    structure = [[0], [1], [0, 1]]
-    # Construct params.
-    parameters = []
-    parameters.append([[0.0, 1.0], [1.0, 0.0]])
-    parameters.append([[0.0, 0.0, 1.0], [1.0, 0.0, 0.0]])
-    parameters.append([[0.0, 0.1, 1.1, 2.3, 3.1, 4.2],
-                       [5.1, 2.11, 1.11, 3.21, -1.02, -2.2]])
-    output_dim = 2
-    with tf.Graph().as_default():
-      input_tensor = tf.compat.v1.placeholder(shape=[None, 2], dtype=tf.float32)
-      init_params = [
-          tf.constant(param, dtype=tf.float32) for param in parameters
-      ]
-      (_, _, _, regularization) = lattice_layers.ensemble_lattices_layer(
-          input_tensor,
-          lattice_sizes=lattice_sizes,
-          structure_indices=structure,
-          output_dim=output_dim,
-          interpolation_type='hypercube',
-          l1_reg=0.1,
-          l2_reg=0.1,
-          l1_torsion_reg=0.1,
-          l2_torsion_reg=0.1,
-          l1_laplacian_reg=[0.1, 0.1],
-          l2_laplacian_reg=[0.1, 0.1],
-          lattice_initializers=init_params)
-
-      with self.session() as sess:
-        sess.run(tf.compat.v1.global_variables_initializer())
-        self.assertAlmostEqual(28.114279, sess.run(regularization), delta=1e-5)
-
-  def testSimplexRegularization(self):
-    lattice_sizes = [2, 3]
-    structure = [[0], [1], [0, 1]]
-    # Construct params.
-    parameters = []
-    parameters.append([[0.0, 1.0], [1.0, 0.0]])
-    parameters.append([[0.0, 0.0, 1.0], [1.0, 0.0, 0.0]])
-    parameters.append([[0.0, 0.1, 1.1, 2.3, 3.1, 4.2],
-                       [5.1, 2.11, 1.11, 3.21, -1.02, -2.2]])
-    output_dim = 2
-    with tf.Graph().as_default():
-      input_tensor = tf.compat.v1.placeholder(shape=[None, 2], dtype=tf.float32)
-      init_params = [
-          tf.constant(param, dtype=tf.float32) for param in parameters
-      ]
-      (_, _, _, regularization) = lattice_layers.ensemble_lattices_layer(
-          input_tensor,
-          lattice_sizes=lattice_sizes,
-          structure_indices=structure,
-          output_dim=output_dim,
-          interpolation_type='simplex',
-          l1_reg=0.1,
-          l2_reg=0.1,
-          l1_torsion_reg=0.1,
-          l2_torsion_reg=0.1,
-          l1_laplacian_reg=[0.1, 0.1],
-          l2_laplacian_reg=[0.1, 0.1],
-          lattice_initializers=init_params)
-
-      with self.session() as sess:
-        sess.run(tf.compat.v1.global_variables_initializer())
-        self.assertAlmostEqual(28.114279, sess.run(regularization), delta=1e-5)
-
-  def _testEnsembleLatticesLayerProjection(self, interpolation_type,
-                                           lattice_sizes, structure, output_dim,
-                                           is_monotone, parameters,
-                                           expected_projected_parameters):
-    """Test monotonicity projection of lattice layers."""
-    with tf.Graph().as_default():
-      input_tensor = tf.zeros([1, len(lattice_sizes)], dtype=tf.float32)
-      (_, param_tensors, proj, _) = lattice_layers.ensemble_lattices_layer(
-          input_tensor,
-          structure_indices=structure,
-          lattice_sizes=lattice_sizes,
-          is_monotone=is_monotone,
-          output_dim=output_dim,
-          lattice_initializers=parameters,
-          interpolation_type=interpolation_type)
-      with self.session() as sess:
-        sess.run(tf.compat.v1.global_variables_initializer())
-        # Check initialization.
-        param_tensor_values = sess.run(param_tensors)
-        self.assertEqual(len(param_tensor_values), len(parameters))
-        for (param_value, expected_value) in zip(param_tensor_values,
-                                                 parameters):
-          self.assertAllClose(param_value, expected_value, atol=1e-4)
-        # Check projection.
-        sess.run(proj)
-        param_tensor_values = sess.run(param_tensors)
-        self.assertEqual(
-            len(param_tensor_values), len(expected_projected_parameters))
-        for (param_value, expected_value) in zip(param_tensor_values,
-                                                 expected_projected_parameters):
-          self.assertAllClose(param_value, expected_value, atol=1e-4)
-
-  def testProjectionWithNonMonotonicHypercube(self):
-    structure = [[0], [0, 1]]
-    params = []
-    params.append([[0.0, 1.0], [1.0, -1.0]])
-    params.append([[0.0, 0.1, 1.1, 2.3, 3.1, 4.2],
-                   [5.1, 2.11, 1.11, 3.21, -1.02, -2.2]])
-    expected_proj_params = params
-    self._testEnsembleLatticesLayerProjection(
-        interpolation_type='hypercube',
-        structure=structure,
-        is_monotone=[False, False],
-        output_dim=2,
-        lattice_sizes=[2, 3],
-        parameters=params,
-        expected_projected_parameters=expected_proj_params)
-
-  def testProjectionWithNonMonotonicSimplex(self):
-    structure = [[0], [0, 1]]
-    params = []
-    params.append([[0.0, 1.0], [1.0, -1.0]])
-    params.append([[0.0, 0.1, 1.1, 2.3, 3.1, 4.2],
-                   [5.1, 2.11, 1.11, 3.21, -1.02, -2.2]])
-    expected_proj_params = params
-    self._testEnsembleLatticesLayerProjection(
-        interpolation_type='hypercube',
-        structure=structure,
-        is_monotone=[False, False],
-        output_dim=2,
-        lattice_sizes=[2, 3],
-        parameters=params,
-        expected_projected_parameters=expected_proj_params)
-
-  def testProjectionWithFullMonotonicHypercube(self):
-    structure = [[0], [0, 1]]
-    params = []
-    params.append([[0.0, -10.0], [0.0, 5.0]])
-    params.append([[0.0, 0.1, 1.1, 2.3, 3.1, 4.2],
-                   [5.1, 2.11, 1.11, 3.21, -1.02, -2.2]])
-    expected_proj_params = []
-    expected_proj_params.append([[-5.0, -5.0], [0.0, 5.0]])
-    expected_proj_params.append([[0, 0.1, 1.1, 2.3, 3.1, 4.2],
-                                 [1.385, 1.385, 1.385, 1.385, 1.385, 1.385]])
-    self._testEnsembleLatticesLayerProjection(
-        interpolation_type='hypercube',
-        structure=structure,
-        is_monotone=[True, True],
-        output_dim=2,
-        lattice_sizes=[2, 3],
-        parameters=params,
-        expected_projected_parameters=expected_proj_params)
-
-  def testProjectionWithFullMonotonicSimplex(self):
-    structure = [[0], [0, 1]]
-    params = []
-    params.append([[0.0, -10.0], [0.0, 5.0]])
-    params.append([[0.0, 0.1, 1.1, 2.3, 3.1, 4.2],
-                   [5.1, 2.11, 1.11, 3.21, -1.02, -2.2]])
-    expected_proj_params = []
-    expected_proj_params.append([[-5.0, -5.0], [0.0, 5.0]])
-    expected_proj_params.append([[0, 0.1, 1.1, 2.3, 3.1, 4.2],
-                                 [1.385, 1.385, 1.385, 1.385, 1.385, 1.385]])
-    self._testEnsembleLatticesLayerProjection(
-        interpolation_type='simplex',
-        structure=structure,
-        is_monotone=True,
-        output_dim=2,
-        lattice_sizes=[2, 3],
-        parameters=params,
-        expected_projected_parameters=expected_proj_params)
-
-
-if __name__ == '__main__':
-  tf.test.main()
diff --git a/tensorflow_lattice/python/lib/monotone_linear_layers.py b/tensorflow_lattice/python/lib/monotone_linear_layers.py
deleted file mode 100644
index 6fd0750..0000000
--- a/tensorflow_lattice/python/lib/monotone_linear_layers.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Monotonic linear embedding layers library for TensorFlow."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-
-from tensorflow_lattice.python.lib import regularizers
-from tensorflow_lattice.python.lib import tools
-
-
-def monotone_linear_layer(input_tensor,
-                          input_dim,
-                          output_dim,
-                          is_monotone=None,
-                          add_bias=True,
-                          normalization_order=None,
-                          init_weight_mean=2.0,
-                          init_weight_stddev=0.5,
-                          init_bias=None,
-                          l1_reg=None,
-                          l2_reg=None):
-  """Creates a partially monotonic linear embedding layer.
-
-  Returns an output of partially monotonic linear embedding layer, weights in
-  the linear embedding layer, projection ops and regularizers.
-
-    output = input * weight' + bias
-
-  and the kth row is constrained to be non-negative, if is_monotone[k] == True.
-  weight is initialized to entrywise Normal random variable (init_weight_mean,
-  init_weight_stdev). If init_b is not provided, then the initial bias is
-  initialized to -1/2 * init_weight_mean * input_dim. This offset term is used
-  to make the initial mean to 0, assuming each input tensor is from the uniform
-  distribution [0, 1]:
-    E[output] = E[input * weight' + bias] = E[input] * E[weight] + bias
-      = 1/2 * init_weight_mean * input_dim + bias
-      = 0.
-
-  Args:
-    input_tensor: [batch_size, input_dim] tensor.
-    input_dim: (int) input dimension.
-    output_dim: (int) output dimension.
-    is_monotone:  A list of input_dim booleans, a single boolean, or None. If
-      None or False, linear layer will not have monotonicity constraints. If
-      True, all of inputs are set to be monotonic. In the case of boolean list,
-      input_tensor[:, k] is set to be monotonic if is_monotone[k] == True.
-    add_bias: (bool) If a bias term should be added.
-    normalization_order: If specified, the returned projection will normalize
-      the weight vector across each output dimension to have norm 1. The norm
-      order can be 1, 2 or np.inf. Norm is lower bounded by 1e-12.
-    init_weight_mean: (float) A mean for Normal random weight initializer.
-    init_weight_stddev: (float) A standard deviation for Normal random weight
-      initializer.
-    init_bias: (float) initial bias. If not provided, -1/2 * init_weight_mean *
-      input_dim is used.
-    l1_reg: (float) amount of l1 regularization.
-    l2_reg: (float) amount of l2 regularization.
-
-  Returns:
-    A tuple of:
-    * output tensor of shape [batch_size, output_dim]
-    * weight tensor of shape [output_dim, input_dim]
-    * None or projection ops, that must be applied at each
-      step (or every so many steps) to project the model to a feasible space:
-      used for bounding the outputs or for imposing monotonicity.
-    * None or a regularization loss, if regularization is configured.
-
-  Raises:
-    ValueError: If is_monotone is not None, but its length != input_dim.
-  """
-  with tf.compat.v1.variable_scope('monotone_linear'):
-    # We use [output_dim, input_dim] convention to use broadcasting in
-    # projeciton.
-    init_weights = tf.random.normal([output_dim, input_dim],
-                                    mean=init_weight_mean,
-                                    stddev=init_weight_stddev)
-    if init_bias is None:
-      init_biases = [-init_weight_mean * 0.5 * input_dim] * output_dim
-    else:
-      init_biases = [init_bias] * output_dim
-
-    w = tf.compat.v1.get_variable(
-        name='weight', initializer=init_weights, dtype=input_tensor.dtype)
-    output_tensor = tf.matmul(input_tensor, w, transpose_b=True)
-    if add_bias:
-      b = tf.compat.v1.get_variable(
-          name='bias', initializer=init_biases, dtype=input_tensor.dtype)
-      output_tensor = output_tensor + b
-
-    # Constructing a projection op.
-    projection = None
-    if is_monotone or normalization_order:
-      with tf.name_scope('monotonic_projection'):
-        diff = None
-        if is_monotone:
-          if isinstance(is_monotone, list):
-            # is_monotone is given as a list. We should only apply positivity
-            # constraints to a masked version of the weights.
-            if input_dim != len(is_monotone):
-              raise ValueError('input_dim (%d) != is_monotone length (%d)' %
-                               (input_dim, len(is_monotone)))
-            # Construct a multiplicative mask for monotonic dimension
-            # selection.
-            monotone_mask = tf.constant(
-                [1.0 if monotone else 0.0 for monotone in is_monotone],
-                dtype=w.dtype)
-            # Since input_dim is the last dimension of the weight, we can use
-            # broadcasting.
-            masked_w = tf.multiply(w, monotone_mask)
-          else:
-            # is_monotone is set to True.
-            masked_w = w
-
-          projected_w = tf.maximum(masked_w, 0.0)
-          diff = projected_w - masked_w
-
-        if normalization_order:
-          unnormalized_w = w if diff is None else w + diff
-          normalized_w = unnormalized_w / tf.maximum(
-              tf.norm(
-                  unnormalized_w,
-                  ord=normalization_order,
-                  axis=1,
-                  keepdims=True), 1e-12)
-          diff = normalized_w - w
-
-        projection = w.assign_add(diff)
-
-    # Constructing a regularization op.
-    regularizer = None
-    if l1_reg is not None or l2_reg is not None:
-      with tf.name_scope('linear_regularization'):
-        regularizer = regularizers.linear_regularization(w, l1_reg, l2_reg)
-
-    return (output_tensor, w, projection, regularizer)
-
-
-def split_monotone_linear_layer(input_tensor,
-                                input_dim,
-                                monotonic_output_dim,
-                                non_monotonic_output_dim,
-                                is_monotone=None,
-                                init_weight_mean=2.0,
-                                init_weight_stddev=0.5,
-                                init_bias=None,
-                                l1_reg=None,
-                                l2_reg=None):
-  """Creates a split monotonic linear embedding layer.
-
-  Returns outputs of partially monotonic linear embedding layers, weights in
-  the linear embedding layers, projection ops and regularizers. This function
-  splits monotonic and non-monotonic input based on is_monotone, and creates
-  two separate linear embedding in the following form:
-
-    monotonic_output = monotonic_input * monotonic_weight
-                      + non-monotonic_input * nm_weight
-                      + bias
-    non_monotonic_output = non-monotonic_input * nn_weight + bias
-
-  where monotonic_weight has to be non-negative. All elements in
-  monotonic_output should be treated as a monotonic signal, otherwise there
-  would be no monotonicity guarantee.
-  Weights are initialized as in monotone_linear_layer.
-
-  Args:
-    input_tensor: [batch_size, input_dim] tensor.
-    input_dim: (int) input dimension.
-    monotonic_output_dim: (int) monotonic_output's dimension.
-    non_monotonic_output_dim: (int) non_monotonic_output's dimension.
-    is_monotone: A list of input_dim booleans, or None. If None, all inputs are
-      set to be non-monotonic. In a boolean list case, the input_tensor[:, k]
-      is set to be monotonic input if is_monotone[k] == True.
-    init_weight_mean: (float) A mean for Normal random weight initializer.
-    init_weight_stddev: (float) A standard deviation for Normal random weight
-      initializer.
-    init_bias: (float) initial bias. If not provided,
-      -1/2 * init_weight_mean * input_dim is used.
-    l1_reg: (float) amount of l1 regularization.
-    l2_reg: (float) amount of l2 regularization.
-
-  Returns:
-    A tuple of:
-    * monotonic_output tensor of shape [batch_size, monotonic_output_dim]
-      or None if monotonic_outpu_dim == 0.
-    * monotonic output's weight tensor of shape
-      [input_dim, monotonic_output_dim] or None if monotonic_outpu_dim == 0.
-    * non_monotonic_output tensor of shape
-      [batch_size, non_monotonic_output_dim] or None if
-      non_monotonic_output_dim == 0.
-    * non_monotonic_output's weight tensor of shape
-      [non_monotonic_input_dim, non_monotonic_output_dim] or None if
-      non_monotonic_output_dim == 0.
-    * None or projection ops, that must be applied at each
-      step (or every so many steps) to project the model to a feasible space:
-      used for bounding the outputs or for imposing monotonicity.
-    * None or a regularization loss, if regularization is configured.
-
-  Raises:
-    ValueError: * If is_monotone is not None nor a list.
-    * is_monotone is a list but its length != input_dim.
-    * All values is_monotone is True, but non_monotonic_output_dim is not 0.
-  """
-  monotonic_output = None
-  m_weight = None
-  non_monotonic_output = None
-  n_weight = None
-  projections = []
-  regularization = None
-  if monotonic_output_dim > 0:
-    with tf.compat.v1.variable_scope('split_monotone'):
-      packed_results = monotone_linear_layer(
-          input_tensor,
-          input_dim=input_dim,
-          output_dim=monotonic_output_dim,
-          is_monotone=is_monotone,
-          init_weight_mean=init_weight_mean,
-          init_weight_stddev=init_weight_stddev,
-          init_bias=init_bias,
-          l1_reg=l1_reg,
-          l2_reg=l2_reg)
-      (monotonic_output, m_weight, projection, regularizer) = packed_results
-      projections.append(projection)
-      regularization = tools.add_if_not_none(regularization, regularizer)
-
-  if non_monotonic_output_dim > 0:
-    with tf.compat.v1.variable_scope('split_non_monotone'):
-      # Construct non_monotone_input_tensor.
-      if is_monotone is None:
-        non_monotone_input_tensor = input_tensor
-      else:
-        if not isinstance(is_monotone, list):
-          raise ValueError('is_monotone should be None or a list of booleans')
-        if len(is_monotone) != input_dim:
-          raise ValueError('input_dim (%d) != is_monotone length (%d)' %
-                           (input_dim, len(is_monotone)))
-
-        input_columns = tf.unstack(input_tensor, axis=1)
-        non_monotone_columns = []
-        for (monotone, input_column) in zip(is_monotone, input_columns):
-          if not monotone:
-            non_monotone_columns.append(input_column)
-        if not non_monotone_columns:
-          raise ValueError(
-              'non_monotonic_output_dim is not None nor zero, but all inputs '
-              'are required to be non-monotonic.')
-        non_monotone_input_tensor = tf.stack(non_monotone_columns, axis=1)
-      # Create a linear embedding.
-      packed_results = monotone_linear_layer(
-          non_monotone_input_tensor,
-          input_dim=len(non_monotone_columns),
-          output_dim=non_monotonic_output_dim,
-          is_monotone=None,
-          init_weight_mean=init_weight_mean,
-          init_weight_stddev=init_weight_stddev,
-          init_bias=init_bias,
-          l1_reg=l1_reg,
-          l2_reg=l2_reg)
-      (non_monotonic_output, n_weight, _, regularizer) = packed_results
-      regularization = tools.add_if_not_none(regularization, regularizer)
-
-  return (monotonic_output, m_weight, non_monotonic_output, n_weight,
-          projections, regularization)
diff --git a/tensorflow_lattice/python/lib/monotone_linear_layers_test.py b/tensorflow_lattice/python/lib/monotone_linear_layers_test.py
deleted file mode 100644
index a766afa..0000000
--- a/tensorflow_lattice/python/lib/monotone_linear_layers_test.py
+++ /dev/null
@@ -1,353 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for TensorFlow Lattice's monotone_linear_layers module."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-
-from tensorflow_lattice.python.lib import monotone_linear_layers
-
-
-class MonotoneLinearTestCase(tf.test.TestCase):
-
-  def testEvaluationWithZeroBias(self):
-    """Create a partial monotone linear layer and check evaluation."""
-    input_placeholder = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[None, 3])
-    input_tensor = [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]
-    sum_input_tensor = [[2.0, 0.0, 0.0], [0.0, 2.0, 0.0], [0.0, 0.0, 2.0]]
-    # Check linearity of the output tensor.
-    # f(input_tensor + input_tensor) = 2 * f(input_tensor)
-    # since the bias is 0.
-    packed_results = monotone_linear_layers.monotone_linear_layer(
-        input_placeholder, input_dim=3, output_dim=5, init_bias=0.0)
-    (output_tensor, _, _, _) = packed_results
-    with self.session() as sess:
-      sess.run(tf.compat.v1.global_variables_initializer())
-      # Check linearity of the output tensor.
-      # f(input_tensor + input_tensor) = 2 * f(input_tensor)
-      # since the bias is 0.
-      output_val = sess.run(
-          output_tensor, feed_dict={input_placeholder: input_tensor})
-      sum_output_val = sess.run(
-          output_tensor, feed_dict={input_placeholder: sum_input_tensor})
-      expected_sum_output_val = 2 * output_val
-    self.assertAllClose(expected_sum_output_val, sum_output_val)
-
-  def testEvaluationWithDefaultBias(self):
-    """Create a partial monotone linear layer and check the bias."""
-    input_dim = 10
-    input_placeholder = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[None, input_dim])
-    # Monotone linear layers contain random weights and for this input_tensor
-    # we expect 0 as an output on "average". In order to control randomness, we
-    # set the standard deviation exactly zero.
-    input_tensor = [[0.5] * input_dim]
-    expected_output_val = [[0.0]]
-    packed_results = monotone_linear_layers.monotone_linear_layer(
-        input_placeholder,
-        input_dim=input_dim,
-        output_dim=1,
-        init_weight_stddev=0.0)
-    (output_tensor, _, _, _) = packed_results
-    with self.session() as sess:
-      sess.run(tf.compat.v1.global_variables_initializer())
-      # Check linearity of the output tensor.
-      # f(input_tensor + input_tensor) = 2 * f(input_tensor)
-      # since the bias is 0.
-      output_val = sess.run(
-          output_tensor, feed_dict={input_placeholder: input_tensor})
-    self.assertAllClose(expected_output_val, output_val)
-
-  def testProjection(self):
-    """Create a partial monotone linear layer and check the projection."""
-    input_dim = 10
-    is_monotone = [True, False] * 5
-    input_placeholder = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[None, input_dim])
-    # We set the initial_weight_mean to -10.0. After projection, we expect
-    # elements corresponding to monotonic input becomes 0.
-    packed_results = monotone_linear_layers.monotone_linear_layer(
-        input_placeholder,
-        input_dim=input_dim,
-        output_dim=2,
-        is_monotone=is_monotone,
-        init_weight_mean=-10.0,
-        init_weight_stddev=0.0)
-    (_, weight_tensor, projection_op, _) = packed_results
-    # The weight is in shape (output_dim, input_dim).
-    expected_pre_projection_weight = [[-10.0] * 10] * 2
-    expected_projected_weight = [[0.0, -10.0] * 5] * 2
-    with self.session() as sess:
-      sess.run(tf.compat.v1.global_variables_initializer())
-      pre_projection_weight = sess.run(weight_tensor)
-      sess.run(projection_op)
-      projected_weight = sess.run(weight_tensor)
-    self.assertAllClose(expected_pre_projection_weight, pre_projection_weight)
-    self.assertAllClose(expected_projected_weight, projected_weight)
-
-  def testNormalizationProjection(self):
-    """Test projection when l1 normalization is requested."""
-    input_dim = 10
-    is_monotone = [True, False] * 5
-    input_placeholder = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[None, input_dim])
-    # We set the initial_weight_mean to -10.0. After projection, we expect
-    # elements corresponding to monotonic input becomes 0.
-    packed_results = monotone_linear_layers.monotone_linear_layer(
-        input_placeholder,
-        input_dim=input_dim,
-        output_dim=2,
-        is_monotone=is_monotone,
-        init_weight_mean=-10.0,
-        init_weight_stddev=0.0,
-        add_bias=False,
-        normalization_order=1,
-    )
-    (_, weight_tensor, projection_op, _) = packed_results
-    # The weight is in shape (output_dim, input_dim).
-    expected_pre_projection_weight = [[-10.0] * 10] * 2
-    expected_projected_weight = [[0.0, -0.2] * 5] * 2
-    with self.session() as sess:
-      sess.run(tf.compat.v1.global_variables_initializer())
-      pre_projection_weight = sess.run(weight_tensor)
-      sess.run(projection_op)
-      projected_weight = sess.run(weight_tensor)
-    self.assertAllClose(expected_pre_projection_weight, pre_projection_weight)
-    self.assertAllClose(expected_projected_weight, projected_weight)
-
-  def testNoRegularizationExpectsNone(self):
-    """Create a monotone linear layer and check no regularization."""
-    input_dim = 10
-    input_placeholder = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[None, input_dim])
-    # We set the initial_weight_mean to -10.0.
-    (_, _, _, regularization) = monotone_linear_layers.monotone_linear_layer(
-        input_placeholder,
-        input_dim=input_dim,
-        output_dim=2,
-        init_weight_mean=-10.0,
-        init_weight_stddev=0.0)
-    self.assertIsNone(regularization)
-
-  def testRegularization(self):
-    """Create a monotone linear layer and check regularization."""
-    input_dim = 10
-    input_placeholder = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[None, input_dim])
-    # We set the initial_weight_mean to -10.0.
-    (_, _, _, regularization) = monotone_linear_layers.monotone_linear_layer(
-        input_placeholder,
-        input_dim=input_dim,
-        output_dim=2,
-        init_weight_mean=-10.0,
-        init_weight_stddev=0.0,
-        l1_reg=0.1,
-        l2_reg=0.1)
-    with self.session() as sess:
-      sess.run(tf.compat.v1.global_variables_initializer())
-      self.assertAlmostEqual(220.0, sess.run(regularization), delta=1e-5)
-
-
-class SplitMonotoneLinearTestCase(tf.test.TestCase):
-
-  def testEvaluation(self):
-    """Create a split monotone linear layer and check the results."""
-    batch_size = 5
-    input_dim = 10
-    monotonic_output_dim = 2
-    non_monotonic_output_dim = 3
-    # First five is monotonic, and the last five is non-monotonic.
-    is_monotone = [True] * 5 + [False] * 5
-    input_placeholder = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[batch_size, input_dim])
-    packed_results = monotone_linear_layers.split_monotone_linear_layer(
-        input_placeholder,
-        input_dim=input_dim,
-        monotonic_output_dim=monotonic_output_dim,
-        non_monotonic_output_dim=non_monotonic_output_dim,
-        is_monotone=is_monotone)
-    (monotonic_output, _, non_monotonic_output, _, _, _) = packed_results
-
-    # Check the shape of outputs.
-    self.assertAllEqual(monotonic_output.shape,
-                        [batch_size, monotonic_output_dim])
-    self.assertAllEqual(non_monotonic_output.shape,
-                        [batch_size, non_monotonic_output_dim])
-
-    # Check monotonic inputs are not part of non_monotonic_output.
-    # We do this by changing the first half of inputs and check whether it
-    # changes the value or not.
-    zero_input = [[0.0] * 10] * 5
-    identity_in_monotone_inputs = [
-        [1.0, 0.0, 0.0, 0.0, 0.0] + [0.0] * 5,
-        [0.0, 1.0, 0.0, 0.0, 0.0] + [0.0] * 5,
-        [0.0, 0.0, 1.0, 0.0, 0.0] + [0.0] * 5,
-        [0.0, 0.0, 0.0, 1.0, 0.0] + [0.0] * 5,
-        [0.0, 0.0, 0.0, 0.0, 1.0] + [0.0] * 5,
-    ]
-
-    with self.session() as sess:
-      sess.run(tf.compat.v1.global_variables_initializer())
-      non_monotonic_output_at_zero = sess.run(
-          non_monotonic_output, feed_dict={input_placeholder: zero_input})
-      non_monotonic_output_at_identity = sess.run(
-          non_monotonic_output,
-          feed_dict={input_placeholder: identity_in_monotone_inputs})
-
-    self.assertAllClose(non_monotonic_output_at_zero,
-                        non_monotonic_output_at_identity)
-
-  def testProjection(self):
-    """Check projection operator."""
-    input_dim = 2
-    monotonic_output_dim = 2
-    non_monotonic_output_dim = 1
-    # First five is monotonic, and the last five is non-monotonic.
-    is_monotone = [True, False]
-    input_placeholder = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[None, input_dim])
-    packed_results = monotone_linear_layers.split_monotone_linear_layer(
-        input_placeholder,
-        input_dim=input_dim,
-        monotonic_output_dim=monotonic_output_dim,
-        non_monotonic_output_dim=non_monotonic_output_dim,
-        is_monotone=is_monotone,
-        init_weight_mean=-10.0,
-        init_weight_stddev=0.0)
-    (_, monotone_weights, _, non_monotone_weights, proj, _) = packed_results
-
-    expected_pre_monotone_weights = [[-10.0, -10.0]] * 2
-    expected_pre_non_monotone_weights = [[-10.0]]
-    expected_projected_monotone_weights = [[0.0, -10.0]] * 2
-    expected_projected_non_monotone_weights = [[-10.0]]
-
-    with self.session() as sess:
-      sess.run(tf.compat.v1.global_variables_initializer())
-      self.assertAllClose(expected_pre_monotone_weights,
-                          monotone_weights.eval())
-      self.assertAllClose(expected_pre_non_monotone_weights,
-                          non_monotone_weights.eval())
-      sess.run(proj)
-      self.assertAllClose(expected_projected_monotone_weights,
-                          monotone_weights.eval())
-      self.assertAllClose(expected_projected_non_monotone_weights,
-                          non_monotone_weights.eval())
-
-  def testBooleanIsMonotoneExpectsError(self):
-    """Test empty non monotonic output."""
-    input_dim = 2
-    monotonic_output_dim = 2
-    non_monotonic_output_dim = 1
-    is_monotone = True
-    input_placeholder = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[None, input_dim])
-    with self.assertRaises(ValueError):
-      _ = monotone_linear_layers.split_monotone_linear_layer(
-          input_placeholder,
-          input_dim=input_dim,
-          monotonic_output_dim=monotonic_output_dim,
-          non_monotonic_output_dim=non_monotonic_output_dim,
-          is_monotone=is_monotone,
-          init_weight_mean=-10.0,
-          init_weight_stddev=0.0)
-
-  def testZeroNonMonotonicOutputExpectEmptyNonMonotonicOutput(self):
-    """Test empty non monotonic output."""
-    input_dim = 2
-    monotonic_output_dim = 2
-    non_monotonic_output_dim = 0
-    is_monotone = [True, True]
-    input_placeholder = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[None, input_dim])
-    packed_results = monotone_linear_layers.split_monotone_linear_layer(
-        input_placeholder,
-        input_dim=input_dim,
-        monotonic_output_dim=monotonic_output_dim,
-        non_monotonic_output_dim=non_monotonic_output_dim,
-        is_monotone=is_monotone,
-        init_weight_mean=-10.0,
-        init_weight_stddev=0.0)
-    (_, _, non_monotonic_outputs, non_monotonic_weights, _, _) = packed_results
-    self.assertEqual(non_monotonic_outputs, None)
-    self.assertEqual(non_monotonic_weights, None)
-
-  def testNoNonMonotonicInputsWithNonMonotonicOutputExpectFailure(self):
-    input_dim = 2
-    monotonic_output_dim = 2
-    non_monotonic_output_dim = 2
-    is_monotone = [True, True]
-    input_placeholder = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[None, input_dim])
-    with self.assertRaises(ValueError):
-      _ = monotone_linear_layers.split_monotone_linear_layer(
-          input_placeholder,
-          input_dim=input_dim,
-          monotonic_output_dim=monotonic_output_dim,
-          non_monotonic_output_dim=non_monotonic_output_dim,
-          is_monotone=is_monotone,
-          init_weight_mean=-10.0,
-          init_weight_stddev=0.0)
-
-  def testNoRegularizationExpectsNone(self):
-    """Create a split monotone linear layer and check no regularization."""
-    input_dim = 2
-    monotonic_output_dim = 2
-    non_monotonic_output_dim = 2
-    is_monotone = [True, False]
-    input_placeholder = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[None, input_dim])
-    # We set the initial_weight_mean to -10.0.
-    (_, _, _, _, _,
-     regularization) = monotone_linear_layers.split_monotone_linear_layer(
-         input_placeholder,
-         input_dim=input_dim,
-         monotonic_output_dim=monotonic_output_dim,
-         non_monotonic_output_dim=non_monotonic_output_dim,
-         is_monotone=is_monotone,
-         init_weight_mean=-10.0,
-         init_weight_stddev=0.0)
-    self.assertIsNone(regularization)
-
-  def testRegularization(self):
-    """Create a split monotone linear layer and check regularization."""
-    input_dim = 2
-    monotonic_output_dim = 2
-    non_monotonic_output_dim = 2
-    is_monotone = [True, False]
-    input_placeholder = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[None, input_dim])
-    # We set the initial_weight_mean to -10.0.
-    (_, _, _, _, _,
-     regularization) = monotone_linear_layers.split_monotone_linear_layer(
-         input_placeholder,
-         input_dim=input_dim,
-         monotonic_output_dim=monotonic_output_dim,
-         non_monotonic_output_dim=non_monotonic_output_dim,
-         is_monotone=is_monotone,
-         init_weight_mean=-10.0,
-         init_weight_stddev=0.0,
-         l1_reg=0.1,
-         l2_reg=0.1)
-    with self.session() as sess:
-      sess.run(tf.compat.v1.global_variables_initializer())
-      self.assertAlmostEqual(66.0, sess.run(regularization), delta=1e-5)
-
-
-if __name__ == '__main__':
-  tf.test.main()
diff --git a/tensorflow_lattice/python/lib/pwl_calibration_layers.py b/tensorflow_lattice/python/lib/pwl_calibration_layers.py
deleted file mode 100644
index 3d3ce96..0000000
--- a/tensorflow_lattice/python/lib/pwl_calibration_layers.py
+++ /dev/null
@@ -1,622 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Piecewise linear calibration layers library for TensorFlow Lattice.
-
-Piecewise linear calibration is a 1D lookup table (LUT), part of TensorFlow
-Lattice set of models, and typically used as calibration of input to lattice
-models, but can be used in conjunction with other types of models as well.
-
-It also works particularly well with linear models, not breaking independence
-of the variables (desirable in some situations).
-
-This modules provides functions used when building models, as opposed to the
-basic operators exported by pwl_calibration_ops.py
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-
-from tensorflow_lattice.python.lib import regularizers
-from tensorflow_lattice.python.lib import tools
-from tensorflow_lattice.python.ops import pwl_calibration_ops
-
-
-def one_dimensional_calibration_layer(uncalibrated_tensor,
-                                      num_keypoints,
-                                      signal_name,
-                                      keypoints_initializers=None,
-                                      keypoints_initializer_fns=None,
-                                      bound=False,
-                                      monotonic=None,
-                                      missing_input_value=None,
-                                      missing_output_value=None,
-                                      **regularizer_amounts):
-  """Creates a calibration layer for one single continuous signal.
-
-  Returns a calibrated tensor of the uncalibrated continuous signal and a list
-  of projections ops.
-
-  Args:
-    uncalibrated_tensor: Tensor of shape [batch_size] of one single signal.
-    num_keypoints: Number of keypoints to use.
-    signal_name: (Required) Used as a suffix to the variable names.
-    keypoints_initializers: For evaluation or inference (or when resuming
-      training from a checkpoint) the values will be loaded from disk, so they
-      don't need to be given -- but in this case num_keypoints need to be
-      accurate. Two tensors of shape [num_keypoints]. See
-      load_keypoints_from_quantiles or uniform_keypoints_for_signal on how to
-      generate these (module keypoints_initialization).
-    keypoints_initializer_fns: Like keypoints_initializers but using lambda
-      initializers. They should be compatible with tf.compat.v1.get_variable. If
-      this is set, then keypoints_initializers must be None.
-    bound: boolean whether output of calibration must be bound. Alternatively a
-      dict mapping feature name to boundness.
-    monotonic: whether calibration has to be kept monotonic: None or 0 means no
-      monotonicity. Positive or negative values mean increasing or decreasing
-      monotonicity respectively. Alternatively a dict mapping feature name to
-      monotonic.
-    missing_input_value: If set, and if the input has this value it is assumed
-      to be missing and the output will either be calibrated to some value
-      between `[calibration_output_min, calibration_output_max]` or set to a
-      fixed value set by missing_output_value. Limitation: it only works for
-        scalars.
-    missing_output_value: Requires missing_input_value also to be set. If set if
-      will convert missing input to this value.
-    **regularizer_amounts: Keyword args of regularization amounts passed to
-      regularizers.calibrator_regularization(). Keyword names should be among
-      supported regularizers.CALIBRATOR_REGULARIZERS and values should be float.
-
-  Returns:
-    A tuple of:
-    * calibrated tensor of shape [batchsize]
-    * None or projection ops, that must be applied at each
-      step (or every so many steps) to project the model to a feasible space:
-      used for bounding the outputs or for imposing monotonicity.
-    * None of a regularization loss, if regularization is configured.
-
-  Raises:
-    ValueError: if dtypes are incompatible.
-    ValueError: if keypoints_initializers and keypoints_initializer_fns are both
-      set.
-
-
-
-
-  """
-  if (keypoints_initializers is not None and
-      keypoints_initializer_fns is not None):
-    raise ValueError('keypoints_initializers and keypoints_initializer_fns '
-                     'cannot both be set.')
-  with tf.compat.v1.variable_scope('pwl_calibration'):
-    # Sanity checks.
-    if uncalibrated_tensor.get_shape().ndims != 1:
-      raise ValueError(
-          'one_dimensional_calibration_layer can only be used for a single '
-          'signal, so uncalibrated shape must be of form (batchsize), got %s' %
-          uncalibrated_tensor.get_shape())
-    if missing_output_value is not None and missing_input_value is None:
-      raise ValueError(
-          'missing_output_value can only be set if a misisng_input_value is '
-          'also set, missing_input_value=None, missing_output_values=%s' %
-          missing_output_value)
-
-    # Create variables: only uses initializer if they are given.
-    kp_in_name = signal_name + '_keypoints_inputs'
-    kp_out_name = signal_name + '_keypoints_outputs'
-    missing_out_calibrated_name = signal_name + '_calibrated_missing_output'
-
-    if keypoints_initializers is not None:
-      kp_in, kp_out = keypoints_initializers[0], keypoints_initializers[1]
-      if (uncalibrated_tensor.dtype != kp_in.dtype or
-          uncalibrated_tensor.dtype != kp_out.dtype):
-        raise ValueError(
-            'incompatible types for signal \'%s\': uncalibrated=%s, '
-            'keypoints_initializers[input=%s, output=%s]' %
-            (signal_name, uncalibrated_tensor.dtype, kp_in.dtype, kp_out.dtype))
-      tools.assert_shape(kp_in, [num_keypoints],
-                         'keypoints_initializers[input]')
-      tools.assert_shape(kp_out, [num_keypoints],
-                         'keypoints_initializers[output]')
-      keypoints_inputs = tf.compat.v1.get_variable(
-          kp_in_name, initializer=kp_in)
-      keypoints_outputs = tf.compat.v1.get_variable(
-          kp_out_name, initializer=kp_out)
-
-      if missing_input_value is not None:
-        # Value to be taken by missing features.
-        if missing_output_value is not None:
-          missing_out_calibrated = tf.constant(
-              missing_output_value, dtype=uncalibrated_tensor.dtype)
-        else:
-          # Learned missing value, initialized by the first value of kp_out.
-          missing_out_calibrated = tf.compat.v1.get_variable(
-              missing_out_calibrated_name, initializer=kp_out[0])
-    elif keypoints_initializer_fns is not None:
-      kp_in, kp_out = keypoints_initializer_fns[0], keypoints_initializer_fns[1]
-      keypoints_inputs = tf.compat.v1.get_variable(
-          kp_in_name, shape=[num_keypoints], initializer=kp_in)
-      keypoints_outputs = tf.compat.v1.get_variable(
-          kp_out_name, shape=[num_keypoints], initializer=kp_out)
-
-      if missing_input_value is not None:
-        # Value to be taken by missing features.
-        if missing_output_value is not None:
-          missing_out_calibrated = tf.constant(
-              missing_output_value, dtype=uncalibrated_tensor.dtype)
-        else:
-          # Learned missing value, initialized by the first value of kp_out.
-          def first_kp_out(*args, **kwargs):
-            return kp_out(*args, **kwargs)[0]
-
-          missing_out_calibrated = tf.compat.v1.get_variable(
-              missing_out_calibrated_name, shape=[], initializer=first_kp_out)
-    else:
-      # When loading a model, no initializer.
-      keypoints_inputs = tf.compat.v1.get_variable(
-          kp_in_name, shape=[num_keypoints], dtype=uncalibrated_tensor.dtype)
-      keypoints_outputs = tf.compat.v1.get_variable(
-          kp_out_name, shape=[num_keypoints], dtype=uncalibrated_tensor.dtype)
-      if missing_input_value is not None:
-        if missing_output_value is not None:
-          missing_out_calibrated = tf.constant(
-              missing_output_value, dtype=uncalibrated_tensor.dtype)
-        else:
-          missing_out_calibrated = tf.compat.v1.get_variable(
-              missing_out_calibrated_name,
-              shape=[],
-              dtype=uncalibrated_tensor.dtype)
-
-    # Split missing values from normal values.
-    # FutureWork: move handling of missing values be moved to C++ land.
-    if missing_input_value is not None:
-      missing_mask = tf.equal(uncalibrated_tensor,
-                              tf.constant(missing_input_value))
-      mask_indices = tf.range(tf.shape(uncalibrated_tensor)[0])
-      mask_indices = tf.dynamic_partition(mask_indices,
-                                          tf.cast(missing_mask, tf.int32), 2)
-      (uncalibrated_tensor,
-       missing_values) = tf.dynamic_partition(uncalibrated_tensor,
-                                              tf.cast(missing_mask, tf.int32),
-                                              2)
-
-      # Assign value to missing_values.
-      missing_values = tf.ones_like(missing_values)
-      missing_values *= missing_out_calibrated
-
-    # Dense implementation.
-    interpolation = pwl_calibration_ops.pwl_indexing_calibrator(
-        uncalibrated_tensor, keypoints_inputs)
-    calibrated = tf.reduce_sum(interpolation * keypoints_outputs, 1)
-    projection_ops = None
-
-    # Re-join missing values.
-    if missing_input_value is not None:
-      calibrated = tf.dynamic_stitch(mask_indices, [calibrated, missing_values])
-
-    # Boundness.
-    projected_keypoints_outputs = None
-    if bound:
-      bound_min_name = signal_name + '_bound_min'
-      bound_max_name = signal_name + '_bound_max'
-      # Set bound_min/max from min/max values initialized.
-      if keypoints_initializers is not None:
-        # Store bound_min and bound_max in variables because their values (from
-        # kp_out) are only available during train (when keypoints_initializers
-        # is available). During inference the value is not available. Storing
-        # them in variables make them available during inference.
-        bound_min = tf.compat.v1.get_variable(
-            bound_min_name,
-            dtype=uncalibrated_tensor.dtype,
-            initializer=tf.reduce_min(kp_out))
-        bound_max = tf.compat.v1.get_variable(
-            bound_max_name,
-            dtype=uncalibrated_tensor.dtype,
-            initializer=tf.reduce_max(kp_out))
-      elif keypoints_initializer_fns is not None:
-        # Store bound_min and bound_max in variables because their values (from
-        # kp_out) are only available during train (when keypoints_initializers
-        # is available). During inference the value is not available. Storing
-        # them in variables make them available during inference.
-        def min_kp_out(*args, **kwargs):
-          return tf.reduce_min(kp_out(*args, **kwargs))
-
-        def max_kp_out(*args, **kwargs):
-          return tf.reduce_max(kp_out(*args, **kwargs))
-
-        bound_min = tf.compat.v1.get_variable(
-            bound_min_name,
-            dtype=uncalibrated_tensor.dtype,
-            shape=[],
-            initializer=min_kp_out)
-        bound_max = tf.compat.v1.get_variable(
-            bound_max_name,
-            dtype=uncalibrated_tensor.dtype,
-            shape=[],
-            initializer=max_kp_out)
-      else:
-        # No need to initialize, since presumably their values will be read
-        # from some checkpoint.
-        bound_min = tf.compat.v1.get_variable(
-            bound_min_name, dtype=uncalibrated_tensor.dtype, shape=[])
-        bound_max = tf.compat.v1.get_variable(
-            bound_max_name, dtype=uncalibrated_tensor.dtype, shape=[])
-      projected_keypoints_outputs = tf.minimum(
-          tf.maximum(keypoints_outputs, bound_min), bound_max)
-
-    # Monotonicity.
-    if monotonic:
-      # First a soft-enforcement: might not break indirect constraints.
-      if projected_keypoints_outputs is None:
-        projected_keypoints_outputs = keypoints_outputs
-      projected_keypoints_outputs = pwl_calibration_ops.monotonic_projection(
-          increasing=bool(monotonic > 0),
-          values=projected_keypoints_outputs,
-          name='project_calibration_to_monotonic')
-
-    # Make assing_add op to projected output.
-    if projected_keypoints_outputs is not None:
-      constrained_diff = projected_keypoints_outputs - keypoints_outputs
-      projection_ops = tf.compat.v1.assign_add(
-          keypoints_outputs,
-          constrained_diff,
-          use_locking=None,
-          name='project_feasible')
-      if (bound and missing_input_value is not None and
-          missing_output_value is None):
-        # Include op bounding calibrated missing value.
-        projected_missing_out_calibrated = tf.minimum(
-            tf.maximum(missing_out_calibrated, bound_min), bound_max)
-        projected_missing_out_calibrated_diff = (
-            projected_missing_out_calibrated - missing_out_calibrated)
-        projected_missing_out_calibrated_op = tf.compat.v1.assign_add(
-            missing_out_calibrated,
-            projected_missing_out_calibrated_diff,
-            use_locking=None,
-            name='project_missing_calibration_to_bounds')
-        projection_ops = tf.group(projection_ops,
-                                  projected_missing_out_calibrated_op)
-
-    # Regularization
-    regularization = regularizers.calibrator_regularization(
-        keypoints_outputs,
-        name=signal_name + '_calibrator_regularization',
-        **regularizer_amounts)
-  return calibrated, projection_ops, regularization
-
-
-def input_calibration_layer(columns_to_tensors,
-                            num_keypoints,
-                            feature_columns=None,
-                            keypoints_initializers=None,
-                            keypoints_initializer_fns=None,
-                            bound=False,
-                            monotonic=None,
-                            missing_input_values=None,
-                            missing_output_values=None,
-                            dtype=tf.float32,
-                            **regularizer_amounts):
-  """Creates a calibration layer for the given input and feature_columns.
-
-  Returns a tensor with the calibrated values of the given features, a list
-  of the names of the features in the order they feature in the returned, and
-  a list of projection ops, that must be applied at each step (or every so many
-  steps) to project the model to a feasible space: used for bounding the outputs
-  or for imposing monotonic -- the list will be empty if bound and
-  monotonic are not set.
-
-  Args:
-    columns_to_tensors: A mapping from feature name to tensors. 'string' key
-      means a base feature (not-transformed). If feature_columns is not set
-      these are the features calibrated. Otherwise the transformed
-      feature_columns are the ones calibrated.
-    num_keypoints: Number of keypoints to use. Either a single int, or a dict
-      mapping feature names to num_keypoints. If a value of the dict is 0 or
-      None the correspondent feature won't be calibrated.
-    feature_columns: Optional. If set to a set of FeatureColumns, these will be
-      the features used and calibrated.
-    keypoints_initializers: For evaluation or inference (or when resuming
-      training from a checkpoint) the values will be loaded from disk, so they
-      don't need to be given (leave it as None). Either a tuple of two tensors
-      of shape [num_keypoints], or a dict mapping feature names to pair of
-      tensors of shape [num_keypoints[feature_name]]. See
-      load_keypoints_from_quantiles or uniform_keypoints_for_signal on how to
-      generate these (module keypoints_initialization).
-    keypoints_initializer_fns: Like keypoints_initializers but using lambda
-      initializers. They should be compatible with tf.compat.v1.get_variable. If
-      this is set, then keypoints_initializers must be None.
-    bound: boolean whether output of calibration must be bound. Alternatively a
-      dict mapping feature name to boundness.
-    monotonic: whether calibration has to be kept monotonic: None or 0 means no
-      monotonic. Positive or negative values mean increasing or decreasing
-      monotonic respectively. Alternatively a dict mapping feature name to
-      monotonic.
-    missing_input_values: If set, and if the input has this value it is assumed
-      to be missing and the output will either be calibrated to some value
-      between `[calibration_output_min, calibration_output_max]` or set to a
-      fixed value set by missing_output_value. Limitation: it only works for
-        scalars. Either one value for all inputs, or a dict mapping feature name
-        to missing_input_value for the respective feature.
-    missing_output_values: Requires missing_input_value also to be set. If set
-      if will convert missing input to this value. Either one value for all
-      inputs, or a dict mapping feature name to missing_input_value for the
-      respective feature.
-    dtype: If any of the scalars are not given as tensors, they are converted to
-      tensors with this dtype.
-    **regularizer_amounts: Keyword args of regularization amounts passed to
-      regularizers.calibrator_regularization(). Keyword names should be among
-      supported regularizers.CALIBRATOR_REGULARIZERS and values should be
-      either float or {feature_name: float}. If float, then same value is
-        applied to all features.
-
-  Returns:
-    A tuple of:
-    * calibrated tensor of shape [batch_size, sum(features dimensions)].
-    * list of the feature names in the order they feature in the calibrated
-      tensor. A name may appear more than once if the feature is
-      multi-dimension (for instance a multi-dimension embedding)
-    * list of projection ops, that must be applied at each step (or every so
-      many steps) to project the model to a feasible space: used for bounding
-      the outputs or for imposing monotonicity. Empty if none are requested.
-    * None or tensor with regularization loss.
-
-  Raises:
-    ValueError: if dtypes are incompatible.
-
-
-  """
-  with tf.name_scope('input_calibration_layer'):
-    feature_names = tools.get_sorted_feature_names(columns_to_tensors,
-                                                   feature_columns)
-    num_keypoints = tools.cast_to_dict(num_keypoints, feature_names,
-                                       'num_keypoints')
-    bound = tools.cast_to_dict(bound, feature_names, 'bound')
-    monotonic = tools.cast_to_dict(monotonic, feature_names, 'monotonic')
-    keypoints_initializers = tools.cast_to_dict(keypoints_initializers,
-                                                feature_names,
-                                                'keypoints_initializers')
-    keypoints_initializer_fns = tools.cast_to_dict(keypoints_initializer_fns,
-                                                   feature_names,
-                                                   'keypoints_initializer_fns')
-    missing_input_values = tools.cast_to_dict(missing_input_values,
-                                              feature_names,
-                                              'missing_input_values')
-    missing_output_values = tools.cast_to_dict(missing_output_values,
-                                               feature_names,
-                                               'missing_output_values')
-    regularizer_amounts = {
-        name: tools.cast_to_dict(regularizer_amounts[name], feature_names, name)
-        for name in regularizer_amounts
-    }
-
-    per_dimension_feature_names = []
-
-    # Get uncalibrated tensors, either from columns_to_tensors, or using
-    # feature_columns.
-    if feature_columns is None:
-      uncalibrated_features = [
-          columns_to_tensors[name] for name in feature_names
-      ]
-    else:
-      transformed_columns_to_tensors = columns_to_tensors.copy()
-      dict_feature_columns = {f_col.name: f_col for f_col in feature_columns}
-      uncalibrated_features = [
-          tools.input_from_feature_column(transformed_columns_to_tensors,
-                                          dict_feature_columns[name], dtype)
-          for name in feature_names
-      ]
-
-    projection_ops = []
-    calibrated_splits = []
-    total_regularization = None
-    for feature_idx in range(len(feature_names)):
-      name = feature_names[feature_idx]
-      uncalibrated_feature = uncalibrated_features[feature_idx]
-      if uncalibrated_feature.shape.ndims == 1:
-        feature_dim = 1
-        uncalibrated_splits = [uncalibrated_feature]
-      elif uncalibrated_feature.shape.ndims == 2:
-        feature_dim = uncalibrated_feature.shape.dims[1].value
-        uncalibrated_splits = tf.unstack(uncalibrated_feature, axis=1)
-      else:
-        raise ValueError(
-            'feature {}: it has rank {}, but only ranks 1 or 2 are '
-            'supported; feature shape={}'.format(
-                name, uncalibrated_feature.shape.ndims,
-                uncalibrated_feature.shape))
-      missing_input_value = missing_input_values[name]
-      missing_output_value = missing_output_values[name]
-      feature_regularizer_amounts = {
-          regularizer_name: regularizer_amounts[regularizer_name][name]
-          for regularizer_name in regularizer_amounts
-      }
-
-      # FutureWork: make the interpolation ops handle multi-dimension values,
-      #   so this step is not needed.
-      for dim_idx in range(feature_dim):
-        per_dimension_feature_names += [name]
-        split_name = name
-        if feature_dim > 1:
-          split_name = '{}_dim_{}'.format(name, dim_idx)
-        uncalibrated = uncalibrated_splits[dim_idx]
-        if not num_keypoints[name]:
-          # No calibration for this feature:
-          calibrated_splits += [uncalibrated]
-          if (missing_input_value is not None or
-              missing_output_value is not None):
-            raise ValueError(
-                'feature %s: cannot handle missing values if feature is not '
-                'calibrated, missing_input_value=%s, missing_output_value=%s' %
-                (name, missing_input_value, missing_output_value))
-        else:
-          calibrated, projection, reg = one_dimensional_calibration_layer(
-              uncalibrated,
-              num_keypoints[name],
-              signal_name=split_name,
-              keypoints_initializers=keypoints_initializers[name],
-              keypoints_initializer_fns=keypoints_initializer_fns[name],
-              bound=bound[name],
-              monotonic=monotonic[name],
-              missing_input_value=missing_input_value,
-              missing_output_value=missing_output_value,
-              **feature_regularizer_amounts)
-          calibrated_splits += [calibrated]
-          if projection is not None:
-            projection_ops += [projection]
-          total_regularization = tools.add_if_not_none(total_regularization,
-                                                       reg)
-
-    all_calibrated = tf.stack(
-        calibrated_splits, axis=1, name='stack_calibrated')
-    return (all_calibrated, per_dimension_feature_names, projection_ops,
-            total_regularization)
-
-
-def calibration_layer(uncalibrated_tensor,
-                      num_keypoints,
-                      keypoints_initializers=None,
-                      keypoints_initializer_fns=None,
-                      bound=False,
-                      monotonic=None,
-                      missing_input_values=None,
-                      missing_output_values=None,
-                      name=None,
-                      **regularizer_amounts):
-  """Creates a calibration layer for uncalibrated values.
-
-  Returns a calibrated tensor of the same shape as the uncalibrated continuous
-  signals passed in, and a list of projection ops, that must be applied at
-  each step (or every so many steps) to project the model to a feasible space:
-  used for bounding the outputs or for imposing monotonicity -- the list will be
-  empty if bound and monotonic are not set.
-
-  Args:
-    uncalibrated_tensor: Tensor of shape [batch_size, ...] with uncalibrated
-      values.
-    num_keypoints: Number of keypoints to use. Either a scalar value that will
-      be used for every uncalibrated signal, or a list of n values, per
-      uncalibrated signal -- uncalibrated is first flattened ( see
-      tf.contrib.layers.flatten) to [batch_size, n], and there should be one
-      value in the list per n. If a value of the list is 0 or None the
-      correspondent signal won't be calibrated.
-    keypoints_initializers: For evaluation or inference (or when resuming
-      training from a checkpoint) the values will be loaded from disk, so they
-      don't need to be given (leave it as None). Otherwise provide either a
-      tuple of two tensors of shape [num_keypoints], or a list of n pairs of
-      tensors, each of shape [num_keypoints]. In this list there should be one
-      pair per uncalibrated signal, just like num_keypoints above. Notice that
-      num_keypoints can be different per signal.
-    keypoints_initializer_fns: Like keypoints_initializers but using lambda
-      initializers. They should be compatible with tf.compat.v1.get_variable. If
-      this is set, then keypoints_initializers must be None.
-    bound: boolean whether output of calibration must be bound. Alternatively a
-      list of n booleans, one per uncalibrated value, like num_keypoints above.
-    monotonic: whether calibration is monotonic: None or 0 means no
-      monotonicity. Positive or negative values mean increasing or decreasing
-      monotonicity respectively. Alternatively a list of n monotonic values, one
-      per uncalibrated value, like num_keypoints above.
-    missing_input_values: If set, and if the input has this value it is assumed
-      to be missing and the output will either be calibrated to some value
-      between `[calibration_output_min, calibration_output_max]` or set to a
-      fixed value set by missing_output_value. Limitation: it only works for
-        scalars. Either one value for all inputs, or a list with one value per
-        uncalibrated value.
-    missing_output_values: Requires missing_input_value also to be set. If set
-      if will convert missing input to this value. Either one value for all
-      outputs, or a list with one value per uncalibrated value.
-    name: Name scope for operations.
-    **regularizer_amounts: Keyword args of regularization amounts passed to
-      regularizers.calibrator_regularization(). Keyword names should be among
-      supported regularizers.CALIBRATOR_REGULARIZERS and values should be either
-      float or list of floats. If float, then same value is applied to all input
-      signals.
-
-  Returns:
-    A tuple of:
-    * calibrated tensor of shape [batch_size, ...], the same shape as
-      uncalibrated.
-    * list of projection ops, that must be applied at each step (or every so
-      many steps) to project the model to a feasible space: used for bounding
-      the outputs or for imposing monotonicity. Empty if none are requested.
-    * None or tensor with regularization loss.
-
-  Raises:
-    ValueError: If dimensions don't match.
-  """
-  with tf.name_scope(name or 'calibration_layer'):
-    # Flattening uncalibrated tensor [batch_Size, k1, k2, ..., kn] to
-    # [batch_size, k1 * k2 * ... * kn].
-    uncalibrated_shape = uncalibrated_tensor.get_shape().as_list()
-    n = 1
-    for non_batch_dim in uncalibrated_shape[1:]:
-      n *= non_batch_dim
-    flat_uncalibrated = tf.reshape(
-        uncalibrated_tensor, shape=[-1, n], name='flat_uncalibrated')
-
-    num_keypoints = tools.cast_to_list(num_keypoints, n, 'num_keypoints')
-    keypoints_initializers = tools.cast_to_list(keypoints_initializers, n,
-                                                'keypoints_initializers')
-    keypoints_initializer_fns = tools.cast_to_list(keypoints_initializer_fns, n,
-                                                   'keypoints_initializer_fns')
-    bound = tools.cast_to_list(bound, n, 'bound')
-    monotonic = tools.cast_to_list(monotonic, n, 'monotonic')
-    missing_input_values = tools.cast_to_list(missing_input_values, n,
-                                              'missing_input_values')
-    missing_output_values = tools.cast_to_list(missing_output_values, n,
-                                               'missing_output_values')
-    regularizer_amounts = {
-        name: tools.cast_to_list(regularizer_amounts[name], n, name)
-        for name in regularizer_amounts
-    }
-
-    signal_names = ['signal_%d' % ii for ii in range(n)]
-
-    uncalibrated_splits = tf.unstack(flat_uncalibrated, axis=1)
-    calibrated_splits = []
-    projection_ops = []
-    total_regularization = None
-    for ii in range(n):
-      if not num_keypoints[ii]:
-        # No calibration for this signal.
-        calibrated_splits += [uncalibrated_splits[ii]]
-      else:
-        signal_regularizer_amounts = {
-            regularizer_name: regularizer_amounts[regularizer_name][ii]
-            for regularizer_name in regularizer_amounts
-        }
-        calibrated, projection, reg = one_dimensional_calibration_layer(
-            uncalibrated_splits[ii],
-            num_keypoints[ii],
-            signal_name=signal_names[ii],
-            keypoints_initializers=keypoints_initializers[ii],
-            keypoints_initializer_fns=keypoints_initializer_fns[ii],
-            bound=bound[ii],
-            monotonic=monotonic[ii],
-            missing_input_value=missing_input_values[ii],
-            missing_output_value=missing_output_values[ii],
-            **signal_regularizer_amounts)
-        calibrated_splits += [calibrated]
-        if projection is not None:
-          projection_ops += [projection]
-        total_regularization = tools.add_if_not_none(total_regularization, reg)
-    flat_calibrated = tf.stack(
-        calibrated_splits, axis=1, name='stack_calibrated')
-    reshaped_calibrated = tf.reshape(
-        flat_calibrated,
-        shape=tf.shape(uncalibrated_tensor),
-        name='reshape_calibrated')
-    return reshaped_calibrated, projection_ops, total_regularization
diff --git a/tensorflow_lattice/python/lib/pwl_calibration_layers_test.py b/tensorflow_lattice/python/lib/pwl_calibration_layers_test.py
deleted file mode 100644
index 807b8a3..0000000
--- a/tensorflow_lattice/python/lib/pwl_calibration_layers_test.py
+++ /dev/null
@@ -1,586 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for TensorFlow Lattice's pwl_calibration_layers module."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-
-from tensorflow_lattice.python.lib import keypoints_initialization
-from tensorflow_lattice.python.lib import pwl_calibration_layers
-from tensorflow_lattice.python.lib import tools
-
-_DEFAULT_OUTPUT_MIN = 200
-_DEFAULT_OUTPUT_MAX = 300
-
-
-def _get_variable_by_name(name):
-  return tf.compat.v1.get_default_graph().get_tensor_by_name(name)
-
-
-class PwlCalibratorLayersTestCase(tf.test.TestCase):
-
-  def _BuildInputs(self, x0, x1):
-    """Returns input_fn, feature_names and feature_columns."""
-
-    def _input_fn():
-      return {
-          'x0': tf.constant(x0, dtype=tf.float32),
-          'x1': tf.constant(x1, dtype=tf.float32),
-      }
-
-    feature_names = ['x0', 'x1']
-    x0_dim = 1 if not isinstance(x0[0], list) else len(x0[0])
-    x1_dim = 1 if not isinstance(x1[0], list) else len(x1[0])
-    feature_columns = {
-        tf.feature_column.numeric_column(key='x0', shape=(x0_dim,)),
-        tf.feature_column.numeric_column(key='x1', shape=(x1_dim,)),
-    }
-    return _input_fn, feature_names, feature_columns
-
-  def _CheckOneDimensionalCalibrationLayer(self, sess, uncalibrated, calibrated,
-                                           value, want):
-    got = sess.run(calibrated, feed_dict={uncalibrated: value})
-    self.assertAllClose(got, want)
-
-  def _UniformKeypoints(self,
-                        num_keypoints,
-                        output_min=_DEFAULT_OUTPUT_MIN,
-                        output_max=_DEFAULT_OUTPUT_MAX):
-    return keypoints_initialization.uniform_keypoints_for_signal(
-        num_keypoints=num_keypoints,
-        input_min=tf.constant(0.0, dtype=tf.float32),
-        input_max=tf.constant(1.0, dtype=tf.float32),
-        output_min=output_min,
-        output_max=output_max,
-        dtype=tf.float32)
-
-  def testOneDimensionalCalibrationLayer(self):
-    with tf.Graph().as_default():
-      num_keypoints = 10
-      keypoints_init = self._UniformKeypoints(num_keypoints)
-      uncalibrated = tf.compat.v1.placeholder(
-          shape=tf.TensorShape([tf.compat.v1.Dimension(None)]),
-          dtype=tf.float32)
-      calibrated, projection, regularization = (
-          pwl_calibration_layers.one_dimensional_calibration_layer(
-              uncalibrated,
-              num_keypoints=num_keypoints,
-              signal_name='test_one_dimensional_calibration_layer',
-              keypoints_initializers=keypoints_init))
-      self.assertEqual(projection, None)
-      self.assertEqual(regularization, None)
-      with self.session() as sess:
-        sess.run(tf.compat.v1.global_variables_initializer())
-        self._CheckOneDimensionalCalibrationLayer(sess, uncalibrated,
-                                                  calibrated, [0.5], [250.])
-        self._CheckOneDimensionalCalibrationLayer(sess, uncalibrated,
-                                                  calibrated, [0.2, 0.7],
-                                                  [220., 270.])
-
-  def testOneDimensionalCalibrationLambda(self):
-    with tf.Graph().as_default():
-      num_keypoints = 10
-
-      def kp_in_fn(*args, **kwargs):
-        del args
-        del kwargs
-        return tf.linspace(0., 1., num_keypoints)
-
-      def kp_out_fn(*args, **kwargs):
-        del args
-        del kwargs
-        return tf.linspace(
-            float(_DEFAULT_OUTPUT_MIN), float(_DEFAULT_OUTPUT_MAX),
-            num_keypoints)
-
-      uncalibrated = tf.compat.v1.placeholder(
-          shape=tf.TensorShape([tf.compat.v1.Dimension(None)]),
-          dtype=tf.float32)
-      calibrated, _, regularization = (
-          pwl_calibration_layers.one_dimensional_calibration_layer(
-              uncalibrated,
-              missing_input_value=0.21,
-              num_keypoints=num_keypoints,
-              bound=True,
-              signal_name='test_one_dimensional_calibration_layer_lambda',
-              keypoints_initializer_fns=(kp_in_fn, kp_out_fn)))
-      self.assertEqual(regularization, None)
-      with self.session() as sess:
-        sess.run(tf.compat.v1.global_variables_initializer())
-        self._CheckOneDimensionalCalibrationLayer(sess, uncalibrated,
-                                                  calibrated, [0.5], [250.])
-        self._CheckOneDimensionalCalibrationLayer(sess, uncalibrated,
-                                                  calibrated, [0.2, 0.7],
-                                                  [220., 270.])
-
-  def testOneDimensionalCalibrationRaises(self):
-    with tf.Graph().as_default():
-      num_keypoints = 10
-
-      def kp_in_fn(*args, **kwargs):
-        del args
-        del kwargs
-        return tf.linspace(0., 1., num_keypoints)
-
-      def kp_out_fn(*args, **kwargs):
-        del args
-        del kwargs
-        return tf.linspace(
-            float(_DEFAULT_OUTPUT_MIN), float(_DEFAULT_OUTPUT_MAX),
-            num_keypoints)
-
-      keypoints_init = self._UniformKeypoints(num_keypoints)
-      uncalibrated = tf.compat.v1.placeholder(
-          shape=tf.TensorShape([tf.compat.v1.Dimension(None)]),
-          dtype=tf.float32)
-      self.assertRaises(
-          ValueError,
-          pwl_calibration_layers.one_dimensional_calibration_layer,
-          uncalibrated,
-          num_keypoints=num_keypoints,
-          signal_name='test_one_dimensional_calibration_layer',
-          keypoints_initializers=keypoints_init,
-          keypoints_initializer_fns=(kp_in_fn, kp_out_fn))
-
-  def testOneDimensionalCalibrationLayerRegularization(self):
-    with tf.Graph().as_default():
-      num_keypoints = 10
-      keypoints_init = self._UniformKeypoints(num_keypoints)
-      uncalibrated = tf.compat.v1.placeholder(
-          shape=tf.TensorShape([tf.compat.v1.Dimension(None)]),
-          dtype=tf.float32)
-      _, _, regularization = (
-          pwl_calibration_layers.one_dimensional_calibration_layer(
-              uncalibrated,
-              num_keypoints=num_keypoints,
-              signal_name='test_one_dimensional_calibration_layer',
-              l1_reg=1.0,
-              l2_reg=1.0,
-              l1_laplacian_reg=1.0,
-              l2_laplacian_reg=1.0,
-              keypoints_initializers=keypoints_init))
-      with self.session() as sess:
-        sess.run(tf.compat.v1.global_variables_initializer())
-        got = sess.run(regularization)
-        expected_value = 638896.25
-        self.assertAlmostEqual(got, expected_value, delta=1e-1)
-
-  def testInputCalibrationLayer(self):
-    x0 = [[0.1], [0.2], [0.3], [0.3], [-1.]]
-    x1 = [[0.9], [0.8], [0.7], [-1.], [0.7]]
-    input_fn, feature_names, feature_columns = self._BuildInputs(x0, x1)
-    num_keypoints = 10
-
-    # Test calibration of two features.
-    with tf.Graph().as_default():
-      keypoints_init = self._UniformKeypoints(num_keypoints)
-      columns_to_tensors = input_fn()
-      calibrated, feature_names, projection_ops, regularization = (
-          pwl_calibration_layers.input_calibration_layer(
-              columns_to_tensors=columns_to_tensors,
-              feature_columns=feature_columns,
-              num_keypoints=num_keypoints,
-              keypoints_initializers=keypoints_init,
-              missing_input_values=-1.,
-              missing_output_values=7.))
-      self.assertEqual(feature_names, ['x0', 'x1'])
-      self.assertEqual(projection_ops, [])
-      self.assertEqual(regularization, None)
-      got = keypoints_initialization._materialize_locally(
-          calibrated, num_steps=1)
-      self.assertAllClose(
-          got,
-          [[210., 290.], [220., 280.], [230., 270.], [230., 7.], [7., 270.]])
-
-  def testInputCalibrationLayerNonCalibrated(self):
-    x0 = [[0.1], [0.2], [0.3], [0.3], [-1.]]
-    x1 = [[0.9], [0.8], [0.7], [-1.], [0.7]]
-    input_fn, feature_names, feature_columns = self._BuildInputs(x0, x1)
-    num_keypoints = 10
-
-    # Test case where one feature is not calibrated.
-    with tf.Graph().as_default():
-      keypoints_init = self._UniformKeypoints(num_keypoints)
-      columns_to_tensors = input_fn()
-
-      calibrated, feature_names, projection_ops, regularization = (
-          pwl_calibration_layers.input_calibration_layer(
-              columns_to_tensors=columns_to_tensors,
-              feature_columns=feature_columns,
-              num_keypoints={
-                  'x0': num_keypoints,
-                  'x1': 0
-              },
-              keypoints_initializers=keypoints_init,
-              missing_input_values={
-                  'x0': -1.,
-                  tools.DEFAULT_NAME: None
-              },
-              missing_output_values={
-                  'x0': 7.,
-                  tools.DEFAULT_NAME: None
-              }))
-      self.assertEqual(projection_ops, [])
-      self.assertEqual(feature_names, ['x0', 'x1'])
-      self.assertEqual(regularization, None)
-      got = keypoints_initialization._materialize_locally(
-          calibrated, num_steps=1)
-      self.assertAllClose(
-          got, [[210., 0.9], [220., 0.8], [230., 0.7], [230., -1.], [7., 0.7]])
-
-  def testInputCalibrationLayerMultiDimensional(self):
-    x0 = [[0.1, 0.9], [0.2, 0.8], [0.3, 0.7]]
-    x1 = [[0.9, 1.2], [0.8, 1.1], [0.7, 0.2]]
-    input_fn, feature_names, feature_columns = self._BuildInputs(x0, x1)
-    num_keypoints = 10
-
-    # Test case where feature columns are multi-dimensional.
-    with tf.Graph().as_default():
-      keypoints_init = self._UniformKeypoints(num_keypoints)
-      columns_to_tensors = input_fn()
-      calibrated, feature_names, projection_ops, regularization = (
-          pwl_calibration_layers.input_calibration_layer(
-              columns_to_tensors=columns_to_tensors,
-              feature_columns=feature_columns,
-              num_keypoints={
-                  'x0': num_keypoints,
-                  'x1': 0
-              },
-              keypoints_initializers=keypoints_init))
-      self.assertEqual(projection_ops, [])
-      self.assertEqual(feature_names, ['x0', 'x0', 'x1', 'x1'])
-      self.assertEqual(regularization, None)
-      got = keypoints_initialization._materialize_locally(
-          calibrated, num_steps=1)
-      self.assertAllClose(got, [[210., 290., 0.9, 1.2], [220., 280., 0.8, 1.1],
-                                [230., 270., 0.7, 0.2]])
-
-  def testInputCalibrationLayerRegularization(self):
-    x0 = [0.1, 0.2, 0.7]
-    x1 = [0.9, 0.8, 0.7]
-    input_fn, _, feature_columns = self._BuildInputs(x0, x1)
-    num_keypoints = 10
-
-    with tf.Graph().as_default():
-      keypoints_init = self._UniformKeypoints(num_keypoints)
-      columns_to_tensors = input_fn()
-      _, _, _, regularization = (
-          pwl_calibration_layers.input_calibration_layer(
-              columns_to_tensors=columns_to_tensors,
-              feature_columns=feature_columns,
-              num_keypoints={
-                  'x0': num_keypoints,
-                  'x1': num_keypoints
-              },
-              l1_reg={
-                  'x0': 1.0,
-                  'x1': 2.0
-              },
-              l2_reg={
-                  'x0': 0.5,
-                  'x1': None
-              },
-              l1_laplacian_reg={
-                  'x0': None,
-                  'x1': 3.0
-              },
-              l2_laplacian_reg={
-                  'x0': None,
-                  'x1': 5.0
-              },
-              keypoints_initializers=keypoints_init))
-      with self.session() as sess:
-        sess.run(tf.compat.v1.global_variables_initializer())
-        got = sess.run(regularization)
-        expected_value = 330948.12
-        self.assertAlmostEqual(got, expected_value, delta=1e-1)
-
-  def testCalibrationLayer(self):
-    with tf.Graph().as_default():
-      # Shape: [batch_size=2, 3, 2]
-      uncalibrated = tf.constant([
-          [[0.1, 0.2], [0.9, 0.8], [0.4, 0.6]],
-          [[0.2, 0.3], [1.0, 0.9], [0.5, 0.7]],
-      ])
-      kp_init_0 = self._UniformKeypoints(10)
-      kp_init_1 = self._UniformKeypoints(5, 0, 1000)
-      num_keypoints = [10, 10, 10, 10, 5, 5]
-      kp_init = [
-          kp_init_0, kp_init_0, kp_init_0, kp_init_0, kp_init_1, kp_init_1
-      ]
-      calibrated, projection_ops, regularization = (
-          pwl_calibration_layers.calibration_layer(
-              uncalibrated,
-              num_keypoints,
-              keypoints_initializers=kp_init,
-              name='test'))
-      self.assertEqual(projection_ops, [])
-      self.assertEqual(regularization, None)
-      got = keypoints_initialization._materialize_locally(
-          calibrated, num_steps=1)
-      want = [
-          [[210., 220.], [290., 280.], [400., 600.]],
-          [[220., 230.], [300., 290.], [500., 700.]],
-      ]
-      self.assertAllClose(got, want)
-
-  def testCalibrationLayerRegularization(self):
-    with tf.Graph().as_default():
-      # Shape: [batch_size=2, 3, 2]
-      uncalibrated = tf.constant([
-          [[0.1, 0.2], [0.9, 0.8], [0.4, 0.6]],
-          [[0.2, 0.3], [1.0, 0.9], [0.5, 0.7]],
-      ])
-      kp_init_0 = self._UniformKeypoints(10)
-      kp_init_1 = self._UniformKeypoints(5, 0, 1000)
-      num_keypoints = [10, 10, 10, 10, 5, 5]
-      kp_init = [
-          kp_init_0, kp_init_0, kp_init_0, kp_init_0, kp_init_1, kp_init_1
-      ]
-      _, _, regularization = (
-          pwl_calibration_layers.calibration_layer(
-              uncalibrated,
-              num_keypoints,
-              keypoints_initializers=kp_init,
-              l1_reg=0.1,
-              l2_reg=1.0,
-              l1_laplacian_reg=[0.3, 0.1, 0.2, 0.3, 0.4, 0.5],
-              l2_laplacian_reg=[None, 1.0, None, None, None, None],
-              name='test'))
-
-      with self.session() as sess:
-        sess.run(tf.compat.v1.global_variables_initializer())
-        got = sess.run(regularization)
-        expected_value = 6294341.5
-        self.assertAlmostEqual(got, expected_value, delta=1e-1)
-
-  def testCalibrationLayerWithUnknownBatchSize(self):
-    with tf.Graph().as_default():
-      # Shape: [batch_size=2, 3, 2]
-      uncalibrated = tf.compat.v1.placeholder(tf.float32, shape=[None, 3, 2])
-      kp_init_0 = self._UniformKeypoints(10)
-      kp_init_1 = self._UniformKeypoints(5, 0, 1000)
-      num_keypoints = [10, 10, 10, 10, 5, 5]
-      kp_init = [
-          kp_init_0, kp_init_0, kp_init_0, kp_init_0, kp_init_1, kp_init_1
-      ]
-      calibrated, projection_ops, regularization = (
-          pwl_calibration_layers.calibration_layer(
-              uncalibrated,
-              num_keypoints,
-              keypoints_initializers=kp_init,
-              name='test'))
-      self.assertEqual(projection_ops, [])
-      self.assertEqual(regularization, None)
-      got = keypoints_initialization._materialize_locally(
-          calibrated,
-          num_steps=1,
-          feed_dict={
-              uncalibrated: [
-                  [[0.1, 0.2], [0.9, 0.8], [0.4, 0.6]],
-                  [[0.2, 0.3], [1.0, 0.9], [0.5, 0.7]],
-              ]
-          })
-      want = [
-          [[210., 220.], [290., 280.], [400., 600.]],
-          [[220., 230.], [300., 290.], [500., 700.]],
-      ]
-      self.assertAllClose(got, want)
-
-  def testBoundness(self):
-    # Create a bound calibration, then set it outside the bounds and check
-    # that it is projected back to the bounds.
-    with tf.Graph().as_default():
-      num_keypoints = 3
-      keypoints_init = keypoints_initialization.uniform_keypoints_for_signal(
-          num_keypoints=num_keypoints,
-          input_min=tf.constant(0.0, dtype=tf.float32),
-          input_max=tf.constant(1.0, dtype=tf.float32),
-          output_min=0.,
-          output_max=1.,
-          dtype=tf.float32)
-      uncalibrated = tf.compat.v1.placeholder(
-          shape=tf.TensorShape([tf.compat.v1.Dimension(None)]),
-          dtype=tf.float32)
-      with tf.compat.v1.variable_scope('test_boundness'):
-        _, projection, regularization = (
-            pwl_calibration_layers.one_dimensional_calibration_layer(
-                uncalibrated,
-                num_keypoints=num_keypoints,
-                bound=True,
-                signal_name='bounded_x',
-                keypoints_initializers=keypoints_init))
-      self.assertIsNotNone(projection)
-      self.assertEqual(regularization, None)
-
-      with self.session() as sess:
-        # First initialize keypoints (and all variables)
-        sess.run(tf.compat.v1.global_variables_initializer())
-        kp_out = _get_variable_by_name(
-            'test_boundness/pwl_calibration/bounded_x_keypoints_outputs:0')
-        kp_out_values = sess.run(kp_out)
-        self.assertAllClose(kp_out_values, [0.0, 0.5, 1.0])
-
-        # Assign values to variable beyond bounds.
-        out_of_bounds = [-0.1, 1.2, 0.9]
-        sess.run(
-            tf.compat.v1.assign(kp_out,
-                                tf.constant(out_of_bounds, dtype=tf.float32)))
-        kp_out_values = sess.run(kp_out)
-        self.assertAllClose(kp_out_values, out_of_bounds)
-
-        # Execute projection.
-        sess.run(projection)
-        kp_out_values = sess.run(kp_out)
-        self.assertAllClose(kp_out_values, [0.0, 1.0, 0.9])
-
-  def testMonotonicity(self):
-    # Create a monotonic calibration, then set it in a non-monotonic way and
-    # check that it is projected back to monotonicity.
-    with tf.Graph().as_default():
-      num_keypoints = 5
-      keypoints_init = keypoints_initialization.uniform_keypoints_for_signal(
-          num_keypoints=num_keypoints,
-          input_min=tf.constant(0.0, dtype=tf.float32),
-          input_max=tf.constant(1.0, dtype=tf.float32),
-          output_min=0.,
-          output_max=1.,
-          dtype=tf.float32)
-      uncalibrated = tf.compat.v1.placeholder(
-          shape=tf.TensorShape([tf.compat.v1.Dimension(None)]),
-          dtype=tf.float32)
-      with tf.compat.v1.variable_scope('test_monotonicity'):
-        _, projection, regularization = (
-            pwl_calibration_layers.one_dimensional_calibration_layer(
-                uncalibrated,
-                num_keypoints=num_keypoints,
-                monotonic=1,
-                signal_name='monotonic_x',
-                keypoints_initializers=keypoints_init))
-      self.assertIsNotNone(projection)
-      self.assertEqual(regularization, None)
-
-      with self.session() as sess:
-        # First initialize keypoints (and all variables)
-        sess.run(tf.compat.v1.global_variables_initializer())
-        kp_out = _get_variable_by_name(
-            'test_monotonicity/pwl_calibration/monotonic_x_keypoints_outputs:0')
-        kp_out_values = sess.run(kp_out)
-        self.assertAllClose(kp_out_values, [0.0, 0.25, 0.5, 0.75, 1.0])
-
-        # Assign non_monotonic calibration.
-        non_monotonic = [4., 5., 0., 4., -3.]
-        sess.run(
-            tf.compat.v1.assign(kp_out,
-                                tf.constant(non_monotonic, dtype=tf.float32)))
-        kp_out_values = sess.run(kp_out)
-        self.assertAllClose(kp_out_values, non_monotonic)
-
-        # Execute projection.
-        sess.run(projection)
-        kp_out_values = sess.run(kp_out)
-        self.assertAllClose(kp_out_values, [2., 2., 2., 2., 2.])
-
-  def testMissingFixedOutput(self):
-    with tf.Graph().as_default():
-      num_keypoints = 10
-      keypoints_init = self._UniformKeypoints(num_keypoints)
-      uncalibrated = tf.compat.v1.placeholder(
-          shape=tf.TensorShape([tf.compat.v1.Dimension(None)]),
-          dtype=tf.float32)
-      calibrated, projection, regularization = (
-          pwl_calibration_layers.one_dimensional_calibration_layer(
-              uncalibrated,
-              num_keypoints=num_keypoints,
-              signal_name='test_missing_fixed_output',
-              keypoints_initializers=keypoints_init,
-              bound=True,
-              missing_input_value=-1.,
-              missing_output_value=7.))
-      self.assertNotEqual(projection, None)
-      self.assertEqual(regularization, None)
-      with self.session() as sess:
-        sess.run(tf.compat.v1.global_variables_initializer())
-        # Mix of missing and calibrated:
-        self._CheckOneDimensionalCalibrationLayer(sess, uncalibrated,
-                                                  calibrated, [0.5, -1.],
-                                                  [250., 7.])
-        # Only calibrated:
-        self._CheckOneDimensionalCalibrationLayer(sess, uncalibrated,
-                                                  calibrated, [0.2, 0.7],
-                                                  [220., 270.])
-        # Only missing:
-        self._CheckOneDimensionalCalibrationLayer(sess, uncalibrated,
-                                                  calibrated, [-1., -1.],
-                                                  [7., 7.])
-
-        # Projection shouldn't affect the missing output value, even though
-        # it is outside the bounds.
-        sess.run([projection])
-        self._CheckOneDimensionalCalibrationLayer(sess, uncalibrated,
-                                                  calibrated, [-1., -1.],
-                                                  [7., 7.])
-
-  def testMissingCalibratedOutput(self):
-    with tf.Graph().as_default():
-      # With calibration:
-      num_keypoints = 10
-      keypoints_init = self._UniformKeypoints(num_keypoints)
-      uncalibrated = tf.compat.v1.placeholder(
-          shape=tf.TensorShape([tf.compat.v1.Dimension(None)]),
-          dtype=tf.float32)
-      calibrated, projection, regularization = (
-          pwl_calibration_layers.one_dimensional_calibration_layer(
-              uncalibrated,
-              num_keypoints=num_keypoints,
-              signal_name='test_missing_calibrated_output',
-              keypoints_initializers=keypoints_init,
-              bound=True,
-              missing_input_value=-1.))
-      self.assertNotEqual(projection, None)
-      self.assertEqual(regularization, None)
-      with self.session() as sess:
-        sess.run(tf.compat.v1.global_variables_initializer())
-        self._CheckOneDimensionalCalibrationLayer(sess, uncalibrated,
-                                                  calibrated, [0.5, -1.],
-                                                  [250., _DEFAULT_OUTPUT_MIN])
-
-        # Set out-of-bound value for missing value.
-        missing_calibrated_output = _get_variable_by_name(
-            'pwl_calibration/'
-            'test_missing_calibrated_output_calibrated_missing_output:0')
-        sess.run([tf.compat.v1.assign(missing_calibrated_output, 700.0)])
-        self._CheckOneDimensionalCalibrationLayer(sess, uncalibrated,
-                                                  calibrated, [-1.], [700.])
-
-        # Project to bound.
-        sess.run(projection)
-        self._CheckOneDimensionalCalibrationLayer(sess, uncalibrated,
-                                                  calibrated, [-1.],
-                                                  [_DEFAULT_OUTPUT_MAX])
-
-        # Gradient wrt missing_calibrated_output should be 1.0
-        d_calibrated_wrt_d_output = tf.gradients(calibrated,
-                                                 missing_calibrated_output)
-        got = sess.run(
-            d_calibrated_wrt_d_output, feed_dict={uncalibrated: [-1.]})
-        self.assertAllClose(got, [1.])
-
-
-if __name__ == '__main__':
-  tf.test.main()
diff --git a/tensorflow_lattice/python/lib/regularizers.py b/tensorflow_lattice/python/lib/regularizers.py
deleted file mode 100644
index 65337a8..0000000
--- a/tensorflow_lattice/python/lib/regularizers.py
+++ /dev/null
@@ -1,598 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""A collection of TensorFlow Lattice regularizers."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import copy
-
-import tensorflow as tf
-
-from tensorflow_lattice.python.lib import tools
-
-
-def _calibrator_laplacian(output_keypoints,
-                          l1_reg=None,
-                          l2_reg=None,
-                          name='calibrator_laplacian'):
-  """Returns a calibrator laplacian regularization.
-
-  A calibrator laplacian regularization =
-     l1_reg * ||output_keypoints[1:end] - output_keypoints[0:end-1]||_1
-   + l2_reg * ||output_keypoints[1:end] - output_keypoints[0:end-1]||_2^2
-
-  Args:
-    output_keypoints: (Rank-1 tensor with shape [num_keypoints]) 1d calibrator's
-      output keypoints tensor.
-    l1_reg: (float) l1 regularization amount.
-    l2_reg: (float) l2 regularization amount.
-    name: name scope of calibrator laplacian regularizer.
-
-  Returns:
-    A rank-0 tensor (scalar) that contains regularizer
-    or None if there is no regularization. This can happen if l1_reg and l2_reg
-    amounts are not set, or num_keypoints <= 1.
-
-  Raises:
-    ValueError: * If output_keypoints is not rank-1 tensor.
-                * If the shape of output_keypoints is unknown.
-  """
-  dims = output_keypoints.shape.as_list()
-  if len(dims) != 1:
-    raise ValueError('calibrator_laplacian expects output_keypoints as a '
-                     'rank-1 tensor but got shape: %s' % dims)
-  num_kpts = dims[0]
-  if num_kpts is None:
-    raise ValueError('calibrator_laplacian expects output_keypoints dimension '
-                     'to be known, but the first dimension is not set.')
-
-  if num_kpts <= 1 or (l1_reg is None and l2_reg is None):
-    return None
-
-  reg = None
-  with tf.name_scope(name):
-    diff = (
-        tf.slice(output_keypoints, [1], [num_kpts - 1]) - tf.slice(
-            output_keypoints, [0], [num_kpts - 1]))
-    if l1_reg:
-      reg = tools.add_if_not_none(reg, l1_reg * tf.reduce_sum(tf.abs(diff)))
-    if l2_reg:
-      reg = tools.add_if_not_none(reg, l2_reg * tf.reduce_sum(tf.square(diff)))
-
-  return reg
-
-
-def _calibrator_hessian(output_keypoints,
-                        l1_reg=None,
-                        l2_reg=None,
-                        name='calibrator_hessian'):
-  """Returns a calibrator hessian regularization.
-
-  A calibrator hessian regularization (change in slope) =
-     l1_reg * ||nonlinearity||_1 + l2_reg * ||nonlinearity||_2^2
-  where nonlinearity is:
-     2 * output_keypoints[1:end-1]
-       - output_keypoints[0:end-2]
-       - output_keypoints[2:end].
-  This regularizer is zero when the output_keypoints form a linear function of
-  the index (and not necessarily linear in input values, e.g. when using
-  non-uniform input keypoints).
-
-  Args:
-    output_keypoints: (Rank-1 tensor with shape [num_keypoints]) 1d calibrator's
-      output keypoints tensor.
-    l1_reg: (float) l1 regularization amount.
-    l2_reg: (float) l2 regularization amount.
-    name: name scope of calibrator hessian regularizer.
-
-  Returns:
-    A rank-0 tensor (scalar) that contains regularizer or None if there is no
-    regularization. This can happen if l1_reg and l2_reg amounts are not set, or
-    num_keypoints <= 2.
-
-  Raises:
-    ValueError: * If output_keypoints is not rank-1 tensor.
-                * If the shape of output_keypoints is unknown.
-  """
-  dims = output_keypoints.shape.as_list()
-  if len(dims) != 1:
-    raise ValueError('calibrator_hessian expects output_keypoints as a '
-                     'rank-1 tensor but got shape: %s' % dims)
-  num_kpts = dims[0]
-  if num_kpts is None:
-    raise ValueError('calibrator_hessian expects output_keypoints dimension '
-                     'to be known, but the first dimension is not set.')
-
-  if num_kpts < 3 or (l1_reg is None and l2_reg is None):
-    return None
-
-  reg = None
-  with tf.name_scope(name):
-    slope_diff = (2 * tf.slice(output_keypoints, [1], [num_kpts - 2]) -
-                  tf.slice(output_keypoints, [0], [num_kpts - 2]) - tf.slice(
-                      output_keypoints, [2], [num_kpts - 2]))
-    if l1_reg:
-      reg = tools.add_if_not_none(reg,
-                                  l1_reg * tf.reduce_sum(tf.abs(slope_diff)))
-    if l2_reg:
-      reg = tools.add_if_not_none(reg,
-                                  l2_reg * tf.reduce_sum(tf.square(slope_diff)))
-
-  return reg
-
-
-def _calibrator_wrinkle(output_keypoints,
-                        l1_reg=None,
-                        l2_reg=None,
-                        name='calibrator_wrinkle'):
-  """Returns a calibrator wrinkle regularization.
-
-  A calibrator wrinkle regularization (change in second derivative) =
-     l1_reg * ||third_derivative||_1 + l2_reg * ||third_derivative||_2^2
-  where third_derivative is:
-     +3 * output_keypoints[1:end-2]
-     -3 * output_keypoints[2:end-1]
-        - output_keypoints[0:end-3]
-        + output_keypoints[3:end].
-  This regularizer is zero when the output_keypoints form a 2nd order polynomial
-  of the index (and not necessarily in input values, e.g. when using
-  non-uniform input keypoints).
-
-  Args:
-    output_keypoints: (Rank-1 tensor with shape [num_keypoints]) 1d calibrator's
-      output keypoints tensor.
-    l1_reg: (float) l1 regularization amount.
-    l2_reg: (float) l2 regularization amount.
-    name: name scope of calibrator wrinkle regularizer.
-
-  Returns:
-    A rank-0 tensor (scalar) that contains regularizer or None if there is no
-    regularization. This can happen if l1_reg and l2_reg amounts are not set, or
-    num_keypoints <= 3.
-
-  Raises:
-    ValueError: * If output_keypoints is not rank-1 tensor.
-                * If the shape of output_keypoints is unknown.
-  """
-  dims = output_keypoints.shape.as_list()
-  if len(dims) != 1:
-    raise ValueError('calibrator_wrinkle expects output_keypoints as a '
-                     'rank-1 tensor but got shape: %s' % dims)
-  num_kpts = dims[0]
-  if num_kpts is None:
-    raise ValueError('calibrator_wrinkle expects output_keypoints dimension '
-                     'to be known, but the first dimension is not set.')
-
-  if num_kpts < 4 or (l1_reg is None and l2_reg is None):
-    return None
-
-  reg = None
-  with tf.name_scope(name):
-    third_drv = (3 * tf.slice(output_keypoints, [1], [num_kpts - 3]) -
-                 3 * tf.slice(output_keypoints, [2], [num_kpts - 3]) - tf.slice(
-                     output_keypoints, [0], [num_kpts - 3]) + tf.slice(
-                         output_keypoints, [3], [num_kpts - 3]))
-    if l1_reg:
-      reg = tools.add_if_not_none(reg,
-                                  l1_reg * tf.reduce_sum(tf.abs(third_drv)))
-    if l2_reg:
-      reg = tools.add_if_not_none(reg,
-                                  l2_reg * tf.reduce_sum(tf.square(third_drv)))
-
-  return reg
-
-
-# List of supported calibrator regularizers.
-CALIBRATOR_REGULARIZERS = [
-    'l1_reg',
-    'l2_reg',
-    'l1_laplacian_reg',
-    'l2_laplacian_reg',
-    'l1_hessian_reg',
-    'l2_hessian_reg',
-    'l1_wrinkle_reg',
-    'l2_wrinkle_reg',
-]
-
-
-def calibrator_regularization(output_keypoints,
-                              l1_reg=None,
-                              l2_reg=None,
-                              l1_laplacian_reg=None,
-                              l2_laplacian_reg=None,
-                              l1_hessian_reg=None,
-                              l2_hessian_reg=None,
-                              l1_wrinkle_reg=None,
-                              l2_wrinkle_reg=None,
-                              name='calibrator_regularization'):
-  """Returns a calibrator regularization op.
-
-  Args:
-   output_keypoints: (Rank-1 tensor with shape [num_keypoints]) 1d calibrator's
-      output keypoints tensor.
-   l1_reg: (float) l1 regularization amount.
-   l2_reg: (float) l2 regularization amount.
-   l1_laplacian_reg: (float) l1 Laplacian regularization amount.
-   l2_laplacian_reg: (float) l2 Laplacian regularization amount.
-   l1_hessian_reg: (float) l1 Hessian regularization amount.
-   l2_hessian_reg: (float) l2 Hessian regularization amount.
-   l1_wrinkle_reg: (float) l1 Wrinkle regularization amount.
-   l2_wrinkle_reg: (float) l2 Wrinkle regularization amount.
-   name: name scope of calibrator regularization.
-
-  Returns:
-    Rank-0 tensor (scalar) that contains calibrator regularization.
-
-  Raises:
-    ValueError: * If output_keypoints is not rank-1 tensor.
-                * If the shape of output_keypoints is unknown.
-  """
-  with tf.name_scope(name):
-    reg = _calibrator_laplacian(
-        output_keypoints, l1_reg=l1_laplacian_reg, l2_reg=l2_laplacian_reg)
-    reg = tools.add_if_not_none(
-        reg,
-        _calibrator_hessian(
-            output_keypoints, l1_reg=l1_hessian_reg, l2_reg=l2_hessian_reg))
-    reg = tools.add_if_not_none(
-        reg,
-        _calibrator_wrinkle(
-            output_keypoints, l1_reg=l1_wrinkle_reg, l2_reg=l2_wrinkle_reg))
-    if l1_reg:
-      reg = tools.add_if_not_none(
-          reg, l1_reg * tf.reduce_sum(tf.abs(output_keypoints)))
-    if l2_reg:
-      reg = tools.add_if_not_none(
-          reg, l2_reg * tf.reduce_sum(tf.square(output_keypoints)))
-
-  return reg
-
-
-def _lattice_laplacian(lattice_param,
-                       lattice_sizes,
-                       l1_reg=None,
-                       l2_reg=None,
-                       name='lattice_laplacian'):
-  """Returns a lattice laplacian regularization.
-
-  Laplacian regularizers penalize the difference between adjacent vertices in
-  multi-cell lattice. See Lattice Regression, NIPS, 2009 for the details, but
-  we provide a 2d example in here.
-
-  Consider a 3 x 2 lattice:
-    3-------4--------5
-    |       |        |
-    |       |        |
-    0-------1--------2
-  where the number at each node represents the parameter index.
-  In this case, the laplacian l1 regularizer is defined as
-
-  reg = l1_reg[0] * (|param[1] - param[0]| + |param[2] - param[1]|
-                     + |param[4] - param[3]| + |param[5] - param[4]|)
-        + l1_reg[1] * (|param[3] - param[0]| + |param[4] - param[1]|
-                       + |param[5] - param[2]})
-  where param is a lattice_param tensor assuming one output.
-  In l2 case, the absolute value is replaced with a square.
-
-  If num_outputs > 1, the op is
-    total_reg = sum_{d=1}^{output_dim} reg(lattice_param[d, :])
-  i.e., a sum across all output dimensions.
-
-  Args:
-    lattice_param: (Rank-2 tensor with shape [num_outputs, num_parameters])
-      lattice model's parameter.
-    lattice_sizes: (list of integers) lattice size of each dimension.
-    l1_reg: (list of floats or float) l1 regularization amount per each
-      lattice dimension. If float, a same number will be accrossed to all
-      lattice dimensions.
-    l2_reg: (list of floats or float) l2 regularization amount per each
-      lattice dimension. If float, a same number will be accrossed to all
-      lattice dimensions.
-    name: name scope of lattice laplacian regularizer.
-
-  Returns:
-    A rank-0 tensor (scalar) that contains regularizer or None if there is no
-    regularization. This can happen if l1_reg and l2_reg amounts are not set.
-
-  Raises:
-    ValueError: * lattice_param is not rank-2 tensor.
-                * output_dim or param_dim is unknown.
-  """
-  dims = lattice_param.shape.as_list()
-  if len(dims) != 2:
-    raise ValueError(
-        'lattice_laplacian expects lattice_param as a '
-        'rank-2 tensor but got dimensions: ', dims)
-  output_dim = dims[0]
-  param_dim = dims[1]
-  if output_dim is None or param_dim is None:
-    raise ValueError(
-        'lattice_laplacian expects all the dimensions in '
-        'lattice_param to be known, but got dimensions: ', dims)
-
-  l1_reg = tools.cast_to_list(l1_reg, len(lattice_sizes), 'laplacian_l1_reg')
-  l2_reg = tools.cast_to_list(l2_reg, len(lattice_sizes), 'laplacian_l2_reg')
-
-  # Collect all dimensions that has non-trivial regularization amount.
-  reg_dims = []
-  lattice_rank = len(lattice_sizes)
-  for dim in range(lattice_rank):
-    if l1_reg[dim] or l2_reg[dim]:
-      reg_dims.append(dim)
-
-  if not reg_dims:
-    return None
-
-  regularization = None
-
-  with tf.name_scope(name):
-    for dim in reg_dims:
-      slice_size = lattice_sizes[dim] - 1
-      per_dim_upper = tools.lattice_1d_slice(
-          lattice_param,
-          lattice_sizes=lattice_sizes,
-          lattice_axis=dim,
-          begin=1,
-          size=slice_size)
-      per_dim_lower = tools.lattice_1d_slice(
-          lattice_param,
-          lattice_sizes=lattice_sizes,
-          lattice_axis=dim,
-          begin=0,
-          size=slice_size)
-      per_dim_diff = per_dim_upper - per_dim_lower
-      if l1_reg[dim]:
-        regularization = tools.add_if_not_none(
-            regularization, l1_reg[dim] * tf.reduce_sum(tf.abs(per_dim_diff)))
-      if l2_reg[dim]:
-        regularization = tools.add_if_not_none(
-            regularization,
-            l2_reg[dim] * tf.reduce_sum(tf.square(per_dim_diff)))
-
-  return regularization
-
-
-def _lattice_torsion(lattice_param,
-                     lattice_sizes,
-                     l1_reg=None,
-                     l2_reg=None,
-                     name='lattice_torsion'):
-  """Returns a lattice torsion regularization.
-
-  Torsion regularizers penalizes how much the lattice function twists from
-  side-to-side, a non-linear interactions in each 2 x 2 cells.  See
-  Monotonic Calibrated Interpolated Look-Up Tables, JMLR, 2016 for the details,
-  but we provide a 2d example in here.
-
-  Consider a 3 x 2 lattice:
-    3-------4--------5
-    |       |        |
-    |       |        |
-    0-------1--------2
-  where the number at each node represents the parameter index.
-  In this case, the torsion l2 regularizer is defined as
-
-  reg = l2_reg * ((param[4] + param[0] - param[3] - param[1]) ** 2
-                  + (param[5] + param[1] - param[4] - param[2]) ** 2
-
-  where param is a lattice_param tensor assuming one output.
-  In l1 case, the squared value is replaced with the absolte value.
-
-  If num_outputs > 1, the op is
-    total_reg = sum_{d=1}^{output_dim} reg(lattice_param[d, :])
-  i.e., a sum across all output dimensions.
-
-  Args:
-    lattice_param: (Rank-2 tensor with shape [num_outputs, num_parameters])
-      lattice model's parameter.
-    lattice_sizes: (list of integers) lattice size of each dimension.
-    l1_reg: (float) l1 regularization amount.
-    l2_reg: (float) l2 regularization amount.
-    name: name scope of lattice torsion regularizer.
-
-  Returns:
-    A rank-0 tensor (scalar) that contains regularizer or None if there is no
-    regularization. This can happen if l1_reg and l2_reg amounts are not set.
-
-  Raises:
-    ValueError: * lattice_param is not rank-2 tensor.
-                * output_dim or param_dim is unknown.
-  """
-  dims = lattice_param.shape.as_list()
-  if len(dims) != 2:
-    raise ValueError(
-        'lattice_laplacian expects lattice_param as a '
-        'rank-2 tensor but got dimensions: ', dims)
-  output_dim = dims[0]
-  param_dim = dims[1]
-  lattice_rank = len(lattice_sizes)
-  if output_dim is None or param_dim is None:
-    raise ValueError(
-        'lattice_laplacian expects all the dimensions in '
-        'lattice_param to be known, but got dimensions: ', dims)
-
-  if l1_reg is None and l2_reg is None:
-    return None
-
-  regularization = None
-
-  with tf.name_scope(name):
-    for dim1 in range(lattice_rank - 1):
-      slice_size1 = lattice_sizes[dim1] - 1
-      param_0x = tools.lattice_1d_slice(
-          lattice_param,
-          lattice_sizes=lattice_sizes,
-          lattice_axis=dim1,
-          begin=0,
-          size=slice_size1)
-      param_1x = tools.lattice_1d_slice(
-          lattice_param,
-          lattice_sizes=lattice_sizes,
-          lattice_axis=dim1,
-          begin=1,
-          size=slice_size1)
-      resized_lattice_sizes = copy.deepcopy(lattice_sizes)
-      resized_lattice_sizes[dim1] -= 1
-      for dim2 in range(dim1 + 1, lattice_rank):
-        slice_size2 = resized_lattice_sizes[dim2] - 1
-        param_00 = tools.lattice_1d_slice(
-            param_0x,
-            lattice_sizes=resized_lattice_sizes,
-            lattice_axis=dim2,
-            begin=0,
-            size=slice_size2)
-        param_01 = tools.lattice_1d_slice(
-            param_0x,
-            lattice_sizes=resized_lattice_sizes,
-            lattice_axis=dim2,
-            begin=1,
-            size=slice_size2)
-        param_10 = tools.lattice_1d_slice(
-            param_1x,
-            lattice_sizes=resized_lattice_sizes,
-            lattice_axis=dim2,
-            begin=0,
-            size=slice_size2)
-        param_11 = tools.lattice_1d_slice(
-            param_1x,
-            lattice_sizes=resized_lattice_sizes,
-            lattice_axis=dim2,
-            begin=1,
-            size=slice_size2)
-        torsion = param_00 + param_11 - param_01 - param_10
-        if l1_reg:
-          regularization = tools.add_if_not_none(
-              regularization, l1_reg * tf.reduce_sum(tf.abs(torsion)))
-        if l2_reg:
-          regularization = tools.add_if_not_none(
-              regularization, l2_reg * tf.reduce_sum(tf.square(torsion)))
-
-  return regularization
-
-
-# List of supported one-dimensional lattice regularizers.
-LATTICE_ONE_DIMENSIONAL_REGULARIZERS = [
-    'l1_laplacian_reg',
-    'l2_laplacian_reg',
-]
-
-# List of supported multi-dimensional lattice regularizers.
-LATTICE_MULTI_DIMENSIONAL_REGULARIZERS = [
-    'l1_reg',
-    'l2_reg',
-    'l1_torsion_reg',
-    'l2_torsion_reg',
-]
-
-# List of supported lattice regularizers.
-LATTICE_REGULARIZERS = (
-    LATTICE_ONE_DIMENSIONAL_REGULARIZERS +
-    LATTICE_MULTI_DIMENSIONAL_REGULARIZERS)
-
-
-def lattice_regularization(lattice_params,
-                           lattice_sizes,
-                           l1_reg=None,
-                           l2_reg=None,
-                           l1_torsion_reg=None,
-                           l2_torsion_reg=None,
-                           l1_laplacian_reg=None,
-                           l2_laplacian_reg=None,
-                           name='lattice_regularization'):
-  """Returns a lattice regularization op.
-
-  Args:
-   lattice_params: (Rank-2 tensor with shape [output_dim, param_dim]) Lattice
-     parameter tensor.
-   lattice_sizes: (list of integers) lattice size of each dimension.
-   l1_reg: (float) l1 regularization amount.
-   l2_reg: (float) l2 regularization amount.
-   l1_torsion_reg: (float) l1 torsion regularization amount.
-   l2_torsion_reg: (float) l2 torsion regularization amount.
-   l1_laplacian_reg: (list of floats or float) list of L1 Laplacian
-     regularization amount per each dimension. If a single float value is
-     provided, then all diemnsion will get the same value.
-   l2_laplacian_reg: (list of floats or float) list of L2 Laplacian
-     regularization amount per each dimension. If a single float value is
-     provided, then all diemnsion will get the same value.
-   name: name scope of lattice regularization.
-
-  Returns:
-    Rank-0 tensor (scalar) that contains lattice regularization.
-
-  Raises:
-    ValueError: * lattice_param is not rank-2 tensor.
-                * output_dim or param_dim is unknown.
-  """
-  with tf.name_scope(name):
-    reg = _lattice_laplacian(
-        lattice_params,
-        lattice_sizes,
-        l1_reg=l1_laplacian_reg,
-        l2_reg=l2_laplacian_reg)
-    reg = tools.add_if_not_none(
-        reg,
-        _lattice_torsion(
-            lattice_params,
-            lattice_sizes,
-            l1_reg=l1_torsion_reg,
-            l2_reg=l2_torsion_reg))
-    if l1_reg:
-      reg = tools.add_if_not_none(
-          reg, l1_reg * tf.reduce_sum(tf.abs(lattice_params)))
-    if l2_reg:
-      reg = tools.add_if_not_none(
-          reg, l2_reg * tf.reduce_sum(tf.square(lattice_params)))
-
-  return reg
-
-
-# List of supported linear regularizers.
-LINEAR_REGULARIZERS = [
-    'l1_reg',
-    'l2_reg',
-]
-
-
-
-def linear_regularization(linear_params,
-                          l1_reg=None,
-                          l2_reg=None,
-                          name='linear_regularization'):
-  """Returns a linear regularization op.
-
-  Args:
-   linear_params: Lattice parameter tensor.
-   l1_reg: (float) l1 regularization amount.
-   l2_reg: (float) l2 regularization amount.
-   name: name scope of linear regularization.
-
-  Returns:
-    Rank-0 tensor (scalar) that contains linear regularization.
-
-  Raises:
-    ValueError: * linear_param is not rank-2 tensor.
-                * output_dim or param_dim is unknown.
-  """
-  with tf.name_scope(name):
-    reg = 0
-    if l1_reg:
-      reg += l1_reg * tf.reduce_sum(tf.abs(linear_params))
-    if l2_reg:
-      reg += l2_reg * tf.reduce_sum(tf.square(linear_params))
-
-  return reg if l1_reg or l2_reg else None
diff --git a/tensorflow_lattice/python/lib/regularizers_test.py b/tensorflow_lattice/python/lib/regularizers_test.py
deleted file mode 100644
index 50d744d..0000000
--- a/tensorflow_lattice/python/lib/regularizers_test.py
+++ /dev/null
@@ -1,1021 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for TensorFlow Lattice's keypoints_initialization module."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-
-from tensorflow_lattice.python.lib import regularizers
-
-
-class CalibratorLaplacianTestCase(tf.test.TestCase):
-
-  def setUp(self):
-    self._num_examples = 4
-    self._keypoint_lists = [
-        [0.0, 0.1, 1.0],  # for better formatting
-        [-1.0, 0.2, 0.3, 0.5],
-        [1.11, 2.11, -1.5, -10.232],
-        [2.22, -51.1, 321.0, 33.22, -201.0, -50.0]
-    ]
-    # L1 regularization amount assuming 1.0 weight.
-    self._l1_regs = [1.0, 1.4999999999999998, 13.34199999999999, 1098.42]
-    # L2 regularization amount assuming 1.0 weight.
-    self._l2_regs = [0.8200000000000001, 1.49, 90.28, 301778.78]
-
-    super(CalibratorLaplacianTestCase, self).setUp()
-
-  def _runAndCheckValues(self,
-                         output_keypoints,
-                         expected_value,
-                         l1_reg=None,
-                         l2_reg=None):
-    output_keypoints_tensor = tf.constant(
-        output_keypoints, dtype=tf.float32)
-    reg = regularizers.calibrator_regularization(
-        output_keypoints_tensor,
-        l1_laplacian_reg=l1_reg,
-        l2_laplacian_reg=l2_reg)
-    with self.session() as sess:
-      reg_value = sess.run(reg)
-    self.assertAlmostEqual(reg_value, expected_value, delta=1e-1)
-
-  def testL1Regularizer(self):
-    """Check l1 regularization amount."""
-    l1_reg = 1.0
-    for cnt in range(self._num_examples):
-      expected_value = l1_reg * self._l1_regs[cnt]
-      self._runAndCheckValues(
-          self._keypoint_lists[cnt], expected_value, l1_reg=l1_reg)
-
-  def testL2Regularizer(self):
-    """Check l2 regularization amount."""
-    l2_reg = 1.0
-    for cnt in range(self._num_examples):
-      expected_value = l2_reg * self._l2_regs[cnt]
-      self._runAndCheckValues(
-          self._keypoint_lists[cnt], expected_value, l2_reg=l2_reg)
-
-  def testL1AndL2Regularizers(self):
-    """Check l1 and l2 regularization amount."""
-    l1_reg = 0.5
-    l2_reg = 0.5
-    for cnt in range(self._num_examples):
-      expected_value = l1_reg * self._l1_regs[cnt] + l2_reg * self._l2_regs[cnt]
-      self._runAndCheckValues(
-          self._keypoint_lists[cnt],
-          expected_value,
-          l1_reg=l1_reg,
-          l2_reg=l2_reg)
-
-  def testRank2TensorExpectsError(self):
-    """Pass rank-2 tensor output keypoints and check the error."""
-    output_keypoints_tensor = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[10, 10])
-    with self.assertRaises(ValueError):
-      regularizers.calibrator_regularization(output_keypoints_tensor)
-
-  def testUnknownShapeTensorExpectsError(self):
-    """Pass rank-1 tensor with unknown shape and check the error."""
-    output_keypoints_tensor = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[None])
-    with self.assertRaises(ValueError):
-      regularizers.calibrator_regularization(output_keypoints_tensor)
-
-  def testOneKeypointsExpectsNone(self):
-    """Pass a tensor with one keypoints and check None regularizer."""
-    output_keypoints_tensor = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[1])
-    self.assertEqual(
-        regularizers.calibrator_regularization(output_keypoints_tensor), None)
-
-  def testNoRegularizerExpectsNone(self):
-    """Set no l1_reg and l2_reg and check None regularizer."""
-    output_keypoints_tensor = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[2])
-    self.assertEqual(
-        regularizers.calibrator_regularization(output_keypoints_tensor), None)
-
-
-class CalibratorHessianTestCase(tf.test.TestCase):
-
-  def setUp(self):
-    self._num_examples = 4
-    self._keypoint_lists = [
-        [0.0, 0.1, 1.0],  # for better formatting
-        [-1.0, 0.2, 0.3, 0.5],
-        [1.11, 2.11, -1.5, -10.232],
-        [2.22, -51.1, 321.0, 33.22, -201.0, -50.0]
-    ]
-    # L1 regularization amount assuming 1.0 weight.
-    self._l1_regs = [0.8, 1.2, 9.732, 1524.08]
-    # L2 regularization amount assuming 1.0 weight.
-    self._l2_regs = [0.64, 1.22, 47.486984, 767686.9128]
-
-    super(CalibratorHessianTestCase, self).setUp()
-
-  def _runAndCheckValues(self,
-                         output_keypoints,
-                         expected_value,
-                         l1_reg=None,
-                         l2_reg=None):
-    output_keypoints_tensor = tf.constant(
-        output_keypoints, dtype=tf.float32)
-    reg = regularizers.calibrator_regularization(
-        output_keypoints_tensor, l1_hessian_reg=l1_reg, l2_hessian_reg=l2_reg)
-    with self.session() as sess:
-      reg_value = sess.run(reg)
-    self.assertAlmostEqual(reg_value, expected_value, delta=1e-1)
-
-  def testL1Regularizer(self):
-    """Check l1 regularization amount."""
-    l1_reg = 1.0
-    for cnt in range(self._num_examples):
-      expected_value = l1_reg * self._l1_regs[cnt]
-      self._runAndCheckValues(
-          self._keypoint_lists[cnt], expected_value, l1_reg=l1_reg)
-
-  def testL2Regularizer(self):
-    """Check l2 regularization amount."""
-    l2_reg = 1.0
-    for cnt in range(self._num_examples):
-      expected_value = l2_reg * self._l2_regs[cnt]
-      self._runAndCheckValues(
-          self._keypoint_lists[cnt], expected_value, l2_reg=l2_reg)
-
-  def testL1AndL2Regularizers(self):
-    """Check l1 and l2 regularization amount."""
-    l1_reg = 0.5
-    l2_reg = 0.5
-    for cnt in range(self._num_examples):
-      expected_value = l1_reg * self._l1_regs[cnt] + l2_reg * self._l2_regs[cnt]
-      self._runAndCheckValues(
-          self._keypoint_lists[cnt],
-          expected_value,
-          l1_reg=l1_reg,
-          l2_reg=l2_reg)
-
-  def testRank2TensorExpectsError(self):
-    """Pass rank-2 tensor output keypoints and check the error."""
-    output_keypoints_tensor = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[10, 10])
-    with self.assertRaises(ValueError):
-      regularizers.calibrator_regularization(output_keypoints_tensor)
-
-  def testUnknownShapeTensorExpectsError(self):
-    """Pass rank-1 tensor with unknown shape and check the error."""
-    output_keypoints_tensor = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[None])
-    with self.assertRaises(ValueError):
-      regularizers.calibrator_regularization(output_keypoints_tensor)
-
-  def testTwoKeypointsExpectsNone(self):
-    """Pass a tensor with one keypoints and check None regularizer."""
-    output_keypoints_tensor = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[2])
-    self.assertEqual(
-        regularizers.calibrator_regularization(output_keypoints_tensor), None)
-
-  def testNoRegularizerExpectsNone(self):
-    """Set no l1_reg and l2_reg and check None regularizer."""
-    output_keypoints_tensor = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[2])
-    self.assertEqual(
-        regularizers.calibrator_regularization(output_keypoints_tensor), None)
-
-
-class CalibratorWrinkleTestCase(tf.test.TestCase):
-
-  def setUp(self):
-    self._num_examples = 4
-    self._keypoint_lists = [
-        [0.1, 0.1, 0.1, 0.1],  # constant
-        [1.0, 2.0, 3.0, 4.0],  # linear
-        [0.0, 1.0, 4.0, 9.0],  # 2nd degree polynomial
-        [0.0, 1.0, 4.0, 11.0]
-    ]
-    # L1 regularization amount assuming 1.0 weight.
-    self._l1_regs = [0.0, 0.0, 0.0, 2.0]
-    # L2 regularization amount assuming 1.0 weight.
-    self._l2_regs = [0.0, 0.0, 0.0, 4.0]
-
-    super(CalibratorWrinkleTestCase, self).setUp()
-
-  def _runAndCheckValues(self,
-                         output_keypoints,
-                         expected_value,
-                         l1_reg=None,
-                         l2_reg=None):
-    output_keypoints_tensor = tf.constant(
-        output_keypoints, dtype=tf.float32)
-    reg = regularizers.calibrator_regularization(
-        output_keypoints_tensor, l1_wrinkle_reg=l1_reg, l2_wrinkle_reg=l2_reg)
-    with self.session() as sess:
-      reg_value = sess.run(reg)
-    self.assertAlmostEqual(reg_value, expected_value, delta=1e-1)
-
-  def testL1Regularizer(self):
-    """Check l1 regularization amount."""
-    l1_reg = 1.0
-    for cnt in range(self._num_examples):
-      expected_value = l1_reg * self._l1_regs[cnt]
-      self._runAndCheckValues(
-          self._keypoint_lists[cnt], expected_value, l1_reg=l1_reg)
-
-  def testL2Regularizer(self):
-    """Check l2 regularization amount."""
-    l2_reg = 1.0
-    for cnt in range(self._num_examples):
-      expected_value = l2_reg * self._l2_regs[cnt]
-      self._runAndCheckValues(
-          self._keypoint_lists[cnt], expected_value, l2_reg=l2_reg)
-
-  def testL1AndL2Regularizers(self):
-    """Check l1 and l2 regularization amount."""
-    l1_reg = 0.5
-    l2_reg = 0.5
-    for cnt in range(self._num_examples):
-      expected_value = l1_reg * self._l1_regs[cnt] + l2_reg * self._l2_regs[cnt]
-      self._runAndCheckValues(
-          self._keypoint_lists[cnt],
-          expected_value,
-          l1_reg=l1_reg,
-          l2_reg=l2_reg)
-
-  def testRank2TensorExpectsError(self):
-    """Pass rank-2 tensor output keypoints and check the error."""
-    output_keypoints_tensor = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[10, 10])
-    with self.assertRaises(ValueError):
-      regularizers.calibrator_regularization(output_keypoints_tensor)
-
-  def testUnknownShapeTensorExpectsError(self):
-    """Pass rank-1 tensor with unknown shape and check the error."""
-    output_keypoints_tensor = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[None])
-    with self.assertRaises(ValueError):
-      regularizers.calibrator_regularization(output_keypoints_tensor)
-
-  def testTwoKeypointsExpectsNone(self):
-    """Pass a tensor with one keypoints and check None regularizer."""
-    output_keypoints_tensor = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[2])
-    self.assertEqual(
-        regularizers.calibrator_regularization(output_keypoints_tensor), None)
-
-  def testNoRegularizerExpectsNone(self):
-    """Set no l1_reg and l2_reg and check None regularizer."""
-    output_keypoints_tensor = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[2])
-    self.assertEqual(
-        regularizers.calibrator_regularization(output_keypoints_tensor), None)
-
-
-class CalibratorRegularizersTestCase(tf.test.TestCase):
-
-  def setUp(self):
-    self._num_examples = 4
-    self._keypoint_lists = [
-        [0.0, 0.1, 1.0],  # for better formatting
-        [-1.0, 0.2, 0.3, 0.5],
-        [1.11, 2.11, -1.5, -10.232],
-        [2.22, -51.1, 321.0, 33.22, -201.0, -50.0]
-    ]
-    # L1 regularization amount assuming 1.0 weight.
-    self._l1_regs = [1.1, 2.0, 14.952, 658.54]
-    # L2 regularization amount assuming 1.0 weight.
-    self._l2_regs = [1.01, 1.38, 112.628024, 149661.7068]
-    # L1 laplacian regularization amount assuming 1.0 weight.
-    self._l1_laplacian_regs = [
-        1.0, 1.4999999999999998, 13.34199999999999, 1098.42
-    ]
-    # L2 laplacian regularization amount assuming 1.0 weight.
-    self._l2_laplacian_regs = [0.8200000000000001, 1.49, 90.28, 301778.78]
-
-    super(CalibratorRegularizersTestCase, self).setUp()
-
-  def _runAndCheckValues(self,
-                         output_keypoints,
-                         expected_value,
-                         l1_reg=None,
-                         l2_reg=None,
-                         l1_laplacian_reg=None,
-                         l2_laplacian_reg=None):
-    output_keypoints_tensor = tf.constant(
-        output_keypoints, dtype=tf.float32)
-    reg = regularizers.calibrator_regularization(
-        output_keypoints_tensor,
-        l1_reg=l1_reg,
-        l2_reg=l2_reg,
-        l1_laplacian_reg=l1_laplacian_reg,
-        l2_laplacian_reg=l2_laplacian_reg)
-    with self.session() as sess:
-      reg_value = sess.run(reg)
-    self.assertAlmostEqual(reg_value, expected_value, delta=1e-1)
-
-  def testL1Regularizer(self):
-    """Check l1 regularization amount."""
-    l1_reg = 1.0
-    for cnt in range(self._num_examples):
-      expected_value = l1_reg * self._l1_regs[cnt]
-      self._runAndCheckValues(
-          self._keypoint_lists[cnt], expected_value, l1_reg=l1_reg)
-
-  def testL2Regularizer(self):
-    """Check l2 regularization amount."""
-    l2_reg = 2.0
-    for cnt in range(self._num_examples):
-      expected_value = l2_reg * self._l2_regs[cnt]
-      self._runAndCheckValues(
-          self._keypoint_lists[cnt], expected_value, l2_reg=l2_reg)
-
-  def testL1AndL2Regularizers(self):
-    """Check l1 and l2 regularization amount."""
-    l1_reg = 0.5
-    l2_reg = 0.5
-    for cnt in range(self._num_examples):
-      expected_value = l1_reg * self._l1_regs[cnt] + l2_reg * self._l2_regs[cnt]
-      self._runAndCheckValues(
-          self._keypoint_lists[cnt],
-          expected_value,
-          l1_reg=l1_reg,
-          l2_reg=l2_reg)
-
-  def testAllRegularizers(self):
-    """Check l1, l2 and laplacian regularization amount."""
-    l1_reg = 0.5
-    l2_reg = 0.5
-    l1_laplacian_reg = 0.5
-    l2_laplacian_reg = 0.5
-    for cnt in range(self._num_examples):
-      expected_value = l1_reg * self._l1_regs[cnt]
-      expected_value += l2_reg * self._l2_regs[cnt]
-      expected_value += l1_laplacian_reg * self._l1_laplacian_regs[cnt]
-      expected_value += l2_laplacian_reg * self._l2_laplacian_regs[cnt]
-      self._runAndCheckValues(
-          self._keypoint_lists[cnt],
-          expected_value,
-          l1_reg=l1_reg,
-          l2_reg=l2_reg,
-          l1_laplacian_reg=l1_laplacian_reg,
-          l2_laplacian_reg=l2_laplacian_reg)
-
-
-class LatticeLaplacianTestCase(tf.test.TestCase):
-
-  def _runAndCheckValues(self,
-                         lattice_param,
-                         lattice_sizes,
-                         expected_value,
-                         l1_reg=None,
-                         l2_reg=None):
-    lattice_param_tensor = tf.constant(
-        lattice_param, dtype=tf.float32)
-    reg = regularizers.lattice_regularization(
-        lattice_param_tensor,
-        lattice_sizes,
-        l1_laplacian_reg=l1_reg,
-        l2_laplacian_reg=l2_reg)
-    with self.session() as sess:
-      reg_value = sess.run(reg)
-    self.assertAlmostEqual(reg_value, expected_value, delta=1e-1)
-
-  def testZeroRegularizerValueForVariousLatticeRanks(self):
-    """Check zero output value for zero parameters."""
-    for lattice_rank in range(2, 10):
-      param_dim = 2**lattice_rank
-      lattice_param = [[0.0] * param_dim]
-      self._runAndCheckValues(
-          lattice_param=lattice_param,
-          lattice_sizes=[2] * lattice_rank,
-          expected_value=0.0,
-          l1_reg=1.0,
-          l2_reg=1.0)
-
-  def testL1RegularizerWithTwoByTwo(self):
-    """Check l1 regularization amount for two by two lattices.
-
-    In 2 x 2 lattice, L1 Laplacian regualrizer has the following form:
-
-      l1_first_reg = (abs(param[1] - param[0]) + abs(param[3] - param[2])
-        + abs(param[5] - param[4]) + abs(param[7] - param[6])
-        + abs(param[9] - param[8]) + abs(param[11] - param[10]))
-
-      l1_second_reg = (abs(param[2] - param[0]) + abs(param[4] - param[2])
-                + abs(param[3] - param[1]) + abs(param[5] - param[3])
-                + abs(param[8] - param[6]) + abs(param[10] - param[8])
-                + abs(param[9] - param[7]) + abs(param[11] - param[9]))
-
-      l1_reg = l1_reg[0] * l1_first_reg + l1_reg[1] * l1_second_reg,
-
-    where param is the lattice parameter tensor (assuming one output),
-    l1_first_reg is the regularization amount along the first dimension,
-    l1_second_reg is the regularization amount along the second dimension.
-    """
-    self._runAndCheckValues(
-        lattice_param=[[0.0, 0.0, 1.0, 1.0]],
-        lattice_sizes=[2, 2],
-        expected_value=2.0,
-        l1_reg=1.0)
-    self._runAndCheckValues(
-        lattice_param=[[0.0, 0.0, 1.0, 1.0]],
-        lattice_sizes=[2, 2],
-        expected_value=2.0,
-        l1_reg=[0.0, 1.0])
-    self._runAndCheckValues(
-        lattice_param=[[0.0, 0.0, 1.0, 1.0]],
-        lattice_sizes=[2, 2],
-        expected_value=0.0,
-        l1_reg=[1.0, 0.0])
-    self._runAndCheckValues(
-        lattice_param=[[0.3312, -0.3217, -0.5, 0.1]],
-        lattice_sizes=[2, 2],
-        expected_value=2.5058,
-        l1_reg=1.0)
-    self._runAndCheckValues(
-        lattice_param=[[0.3312, -0.3217, -0.5, 0.1]],
-        lattice_sizes=[2, 2],
-        expected_value=1.87935,
-        l1_reg=[0.5, 1.0])
-    self._runAndCheckValues(
-        lattice_param=[[0.0, 0.0, 1.0, 1.0], [0.3312, -0.3217, -0.5, 0.1]],
-        lattice_sizes=[2, 2],
-        expected_value=4.5058,
-        l1_reg=1.0)
-
-  def testL1RegularizerWithTwoByThreeByTwo(self):
-    """Check l1 regularization amount for two by three by two lattices.
-
-    In 2 x 3 x 2 lattice, L1 Laplacian regualrizer has the following form:
-
-      l1_first_reg = (abs(param[1] - param[0]) + abs(param[3] - param[2])
-        + abs(param[5] - param[4]) + abs(param[7] - param[6])
-        + abs(param[9] - param[8]) + abs(param[11] - param[10]))
-      l1_second_reg = (abs(param[2] - param[0]) + abs(param[4] - param[2])
-        + abs(param[3] - param[1]) + abs(param[5] - param[3])
-        + abs(param[8] - param[6]) + abs(param[10] - param[8])
-        + abs(param[9] - param[7]) + abs(param[11] - param[9]))
-      l1_third_reg = (abs(param[6] - param[0]) + abs(param[7] - param[1])
-        + abs(param[8] - param[2]) + abs(param[9] - param[3])
-        + abs(param[10] - param[4]) + abs(param[11] - param[5]))
-
-      l1_reg = l1_reg[0] * l1_first_reg + l1_reg[1] * l1_second_reg
-        + l1_reg[2] * l1_third_reg,
-
-    where param is the lattice parameter tensor (assuming one output),
-    l1_first_reg is the regularization amount along the first dimension,
-    l1_second_reg is the regularization amount along the second dimension.
-    l1_third_reg is the regularization amount along the third dimension.
-    """
-    self._runAndCheckValues(
-        lattice_param=[[
-            1.11, 2.22, -3.22, 0.33, -0.221, 3.123, -0.477, 1.22, 3.221, 11.22,
-            22.12, 33.11
-        ]],
-        lattice_sizes=[2, 3, 2],
-        expected_value=28.69,
-        l1_reg=[1.0, 0.0, 0.0])
-    self._runAndCheckValues(
-        lattice_param=[[
-            1.11, 2.22, -3.22, 0.33, -0.221, 3.123, -0.477, 1.22, 3.221, 11.22,
-            22.12, 33.11
-        ]],
-        lattice_sizes=[2, 3, 2],
-        expected_value=66.499,
-        l1_reg=[0.0, 1.0, 0.0])
-    self._runAndCheckValues(
-        lattice_param=[[
-            1.11, 2.22, -3.22, 0.33, -0.221, 3.123, -0.477, 1.22, 3.221, 11.22,
-            22.12, 33.11
-        ]],
-        lattice_sizes=[2, 3, 2],
-        expected_value=72.246,
-        l1_reg=[0.0, 0.0, 1.0])
-    self._runAndCheckValues(
-        lattice_param=[[
-            1.11, 2.22, -3.22, 0.33, -0.221, 3.123, -0.477, 1.22, 3.221, 11.22,
-            22.12, 33.11
-        ]],
-        lattice_sizes=[2, 3, 2],
-        expected_value=167.435,
-        l1_reg=1.0)
-    self._runAndCheckValues(
-        lattice_param=[[
-            1.11, 2.22, -3.22, 0.33, -0.221, 3.123, -0.477, 1.22, 3.221, 11.22,
-            22.12, 33.11
-        ], [
-            -2.003, 1.221, 0.321, 0.447, 0.321, 0.446, -0.33192, 0.476, 0.8976,
-            -4.123, 0.487, -0.4473
-        ]],
-        lattice_sizes=[2, 3, 2],
-        expected_value=199.30862,
-        l1_reg=1.0)
-
-  def testL2RegularizerWithTwoByTwo(self):
-    """Check l2 regularization amount.
-
-    Replacing abs to square in the formula in testL1RegulairzerWithTwoByTwo
-    gives L2 Laplacian.
-    """
-    self._runAndCheckValues(
-        lattice_param=[[0.0, 0.0, 1.0, 1.0]],
-        lattice_sizes=[2, 2],
-        expected_value=2.0,
-        l2_reg=1.0)
-    self._runAndCheckValues(
-        lattice_param=[[0.0, 0.0, 1.0, 1.0]],
-        lattice_sizes=[2, 2],
-        expected_value=2.0,
-        l2_reg=[0.0, 1.0])
-    self._runAndCheckValues(
-        lattice_param=[[0.0, 0.0, 1.0, 1.0]],
-        lattice_sizes=[2, 2],
-        expected_value=0.0,
-        l2_reg=[1.0, 0.0])
-    self._runAndCheckValues(
-        lattice_param=[[0.3312, -0.3217, -0.5, 0.1]],
-        lattice_sizes=[2, 2],
-        expected_value=1.65500274,
-        l2_reg=1.0)
-    self._runAndCheckValues(
-        lattice_param=[[0.3312, -0.3217, -0.5, 0.1]],
-        lattice_sizes=[2, 2],
-        expected_value=1.261863535,
-        l2_reg=[0.5, 1.0])
-    self._runAndCheckValues(
-        lattice_param=[[0.0, 0.0, 1.0, 1.0], [0.3312, -0.3217, -0.5, 0.1]],
-        lattice_sizes=[2, 2],
-        expected_value=3.65500274,
-        l2_reg=1.0)
-
-  def testL2RegularizerWithTwoByThreeByTwo(self):
-    """Check l2 regularization amount for two by three by two lattices.
-
-    Replacing abs to square in the formula in
-    testL1RegulairzerWithTwoByThreeByTwo gives L2 Laplacian.
-    """
-    self._runAndCheckValues(
-        lattice_param=[[
-            1.11, 2.22, -3.22, 0.33, -0.221, 3.123, -0.477, 1.22, 3.221, 11.22,
-            22.12, 33.11
-        ]],
-        lattice_sizes=[2, 3, 2],
-        expected_value=2763.733801,
-        l2_reg=1.0)
-    self._runAndCheckValues(
-        lattice_param=[[
-            1.11, 2.22, -3.22, 0.33, -0.221, 3.123, -0.477, 1.22, 3.221, 11.22,
-            22.12, 33.11
-        ]],
-        lattice_sizes=[2, 3, 2],
-        expected_value=212.660846,
-        l2_reg=[1.0, 0.0, 0.0])
-    self._runAndCheckValues(
-        lattice_param=[[
-            1.11, 2.22, -3.22, 0.33, -0.221, 3.123, -0.477, 1.22, 3.221, 11.22,
-            22.12, 33.11
-        ]],
-        lattice_sizes=[2, 3, 2],
-        expected_value=989.135355,
-        l2_reg=[0.0, 1.0, 0.0])
-    self._runAndCheckValues(
-        lattice_param=[[
-            1.11, 2.22, -3.22, 0.33, -0.221, 3.123, -0.477, 1.22, 3.221, 11.22,
-            22.12, 33.11
-        ]],
-        lattice_sizes=[2, 3, 2],
-        expected_value=1561.9376,
-        l2_reg=[0.0, 0.0, 1.0])
-    self._runAndCheckValues(
-        lattice_param=[[
-            1.11, 2.22, -3.22, 0.33, -0.221, 3.123, -0.477, 1.22, 3.221, 11.22,
-            22.12, 33.11
-        ], [
-            -2.003, 1.221, 0.321, 0.447, 0.321, 0.446, -0.33192, 0.476, 0.8976,
-            -4.123, 0.487, -0.4473
-        ]],
-        lattice_sizes=[2, 3, 2],
-        expected_value=2868.62393167,
-        l2_reg=1.0)
-
-  def testL1AndL2Regularizers(self):
-    """Check l1 and l2 regularization amount."""
-    self._runAndCheckValues(
-        lattice_param=[[
-            1.11, 2.22, -3.22, 0.33, -0.221, 3.123, -0.477, 1.22, 3.221, 11.22,
-            22.12, 33.11
-        ]],
-        lattice_sizes=[2, 3, 2],
-        expected_value=2931.168801,
-        l1_reg=1.0,
-        l2_reg=1.0)
-    self._runAndCheckValues(
-        lattice_param=[[
-            1.11, 2.22, -3.22, 0.33, -0.221, 3.123, -0.477, 1.22, 3.221, 11.22,
-            22.12, 33.11
-        ]],
-        lattice_sizes=[2, 3, 2],
-        expected_value=279.159846,
-        l1_reg=[0.0, 1.0, 0.0],
-        l2_reg=[1.0, 0.0, 0.0])
-
-  def testNoRegularizerExpectsNone(self):
-    """Set no l1_reg and l2_reg and check None regularizer."""
-    lattice_param = tf.compat.v1.placeholder(dtype=tf.float32, shape=[2, 4])
-    lattice_sizes = [2, 2]
-    self.assertEqual(
-        None, regularizers.lattice_regularization(lattice_param, lattice_sizes))
-
-  def testRank1TensorExpectsError(self):
-    """Pass rank-1 lattice_param tensor and check the error."""
-    lattice_param = tf.compat.v1.placeholder(dtype=tf.float32, shape=[10])
-    lattice_sizes = [2, 5]
-    with self.assertRaises(ValueError):
-      regularizers.lattice_regularization(
-          lattice_param, lattice_sizes, l1_laplacian_reg=1.0)
-
-  def testUnknownShapeTensorExpectsError(self):
-    """Pass rank-2 tensor with unknown shape and check the error."""
-    lattice_param = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[None, None])
-    lattice_sizes = [2, 2]
-    with self.assertRaises(ValueError):
-      regularizers.lattice_regularization(
-          lattice_param, lattice_sizes, l1_laplacian_reg=1.0)
-
-  def testWrongL1regularizationsExpectsError(self):
-    # 2 x 2 lattice
-    lattice_param = tf.compat.v1.placeholder(dtype=tf.float32, shape=[2, 4])
-    lattice_sizes = [2, 2]
-    # Set 3 l1_regularizations for 2d lattice.
-    l1_reg = [0.0, 1.0, 0.0]
-    with self.assertRaises(ValueError):
-      regularizers.lattice_regularization(
-          lattice_param, lattice_sizes, l1_laplacian_reg=l1_reg)
-
-  def testWrongL2regularizationsExpectsError(self):
-    # 2 x 2 lattice
-    lattice_param = tf.compat.v1.placeholder(dtype=tf.float32, shape=[4, 2])
-    lattice_sizes = [2, 2]
-    # Set 3 l2_regularizations for 2d lattice.
-    l2_reg = [0.0, 1.0, 0.0]
-    with self.assertRaises(ValueError):
-      regularizers.lattice_regularization(
-          lattice_param, lattice_sizes, l2_laplacian_reg=l2_reg)
-
-
-class LatticeTorsionTestCase(tf.test.TestCase):
-
-  def _runAndCheckValues(self,
-                         lattice_param,
-                         lattice_sizes,
-                         expected_value,
-                         l1_reg=None,
-                         l2_reg=None):
-    lattice_param_tensor = tf.constant(
-        lattice_param, dtype=tf.float32)
-    reg = regularizers.lattice_regularization(
-        lattice_param_tensor,
-        lattice_sizes,
-        l1_torsion_reg=l1_reg,
-        l2_torsion_reg=l2_reg)
-    with self.session() as sess:
-      reg_value = sess.run(reg)
-    self.assertAlmostEqual(reg_value, expected_value, delta=1e-1)
-
-  def testZeroRegularizerValueForVariousLatticeRanks(self):
-    """Check zero output value for zero parameters."""
-    for lattice_rank in range(2, 10):
-      param_dim = 2**lattice_rank
-      lattice_param = [[0.0] * param_dim]
-      self._runAndCheckValues(
-          lattice_param=lattice_param,
-          lattice_sizes=[2] * lattice_rank,
-          expected_value=0.0,
-          l1_reg=1.0,
-          l2_reg=1.0)
-
-  def testL1RegularizerWithTwoByTwo(self):
-    """Check l1 regularization amount for two by two lattices.
-
-    In 2 x 2 lattice, L1 torsion regualrizer has the following form:
-      l1_reg * abs(param[0] + param[3] - param[1] - param[2]),
-    where param is the lattice parameter tensor (assuming one output),
-    """
-    self._runAndCheckValues(
-        lattice_param=[[0.0, 0.0, 1.0, 1.0]],
-        lattice_sizes=[2, 2],
-        expected_value=0.0,
-        l1_reg=1.0)
-    self._runAndCheckValues(
-        lattice_param=[[0.0, 1.0, 1.0, 1.0]],
-        lattice_sizes=[2, 2],
-        expected_value=1.0,
-        l1_reg=1.0)
-    self._runAndCheckValues(
-        lattice_param=[[0.0, 0.0, 0.0, 1.0]],
-        lattice_sizes=[2, 2],
-        expected_value=1.0,
-        l1_reg=1.0)
-    self._runAndCheckValues(
-        lattice_param=[[0.3312, -0.3217, -0.5, 0.1]],
-        lattice_sizes=[2, 2],
-        expected_value=1.2529,
-        l1_reg=1.0)
-
-  def testL1RegularizerWithTwoByThreeByTwo(self):
-    """Check l1 regularization amount for two by three by two lattices.
-
-    In 2 x 3 x 2 lattice, L1 Torsion regualrizer has the following form:
-      l1_reg * (abs(param[0] + param[3] - param[1] - param[2])
-                + abs(param[2] + param[5] - param[3] - param[4])
-                + abs(param[6] + param[9] - param[7] - param[8])
-                + abs(param[8] + param[11] - param[9] - param[10])
-                + abs(param[0] + param[7] - param[1] - param[6])
-                + abs(param[2] + param[9] - param[3] - param[8])
-                + abs(param[4] + param[11] - param[5] - param[10])
-                + abs(param[0] + param[8] - param[2] - param[6])
-                + abs(param[2] + param[10] - param[4] - param[8])
-                + abs(param[1] + param[9] - param[3] - param[7])
-                + abs(param[3] + param[11] - param[5] - param[9]))
-
-    where param is the lattice parameter tensor (assuming one output),
-    """
-    self._runAndCheckValues(
-        lattice_param=[[
-            1.11, 2.22, -3.22, 0.33, -0.221, 3.123, -0.477, 1.22, 3.221, 11.22,
-            22.12, 33.11
-        ]],
-        lattice_sizes=[2, 3, 2],
-        expected_value=79.536,
-        l1_reg=1.0)
-    self._runAndCheckValues(
-        lattice_param=[[
-            -2.003, 1.221, 0.321, 0.447, 0.321, 0.446, -0.33192, 0.476, 0.8976,
-            -4.123, 0.487, -0.4473
-        ]],
-        lattice_sizes=[2, 3, 2],
-        expected_value=30.642580000000002,
-        l1_reg=1.0)
-    self._runAndCheckValues(
-        lattice_param=[[
-            1.11, 2.22, -3.22, 0.33, -0.221, 3.123, -0.477, 1.22, 3.221, 11.22,
-            22.12, 33.11
-        ], [
-            -2.003, 1.221, 0.321, 0.447, 0.321, 0.446, -0.33192, 0.476, 0.8976,
-            -4.123, 0.487, -0.4473
-        ]],
-        lattice_sizes=[2, 3, 2],
-        expected_value=110.17858000000001,
-        l1_reg=1.0)
-
-  def testL2RegularizerWithTwoByTwo(self):
-    """Check l2 regularization amount for two by two lattices.
-
-    In 2 x 2 lattice, L2 torsion regualrizer has the following form:
-      l2_reg * (param[0] + param[3] - param[1] - param[2]) ** 2,
-    where param is the lattice parameter tensor (assuming one output),
-    """
-    self._runAndCheckValues(
-        lattice_param=[[0.0, 0.0, 1.0, 1.0]],
-        lattice_sizes=[2, 2],
-        expected_value=0.0,
-        l2_reg=1.0)
-    self._runAndCheckValues(
-        lattice_param=[[0.0, 1.0, 1.0, 1.0]],
-        lattice_sizes=[2, 2],
-        expected_value=1.0,
-        l2_reg=1.0)
-    self._runAndCheckValues(
-        lattice_param=[[0.0, 0.0, 0.0, 1.0]],
-        lattice_sizes=[2, 2],
-        expected_value=1.0,
-        l2_reg=1.0)
-    self._runAndCheckValues(
-        lattice_param=[[0.3312, -0.3217, -0.5, 0.1]],
-        lattice_sizes=[2, 2],
-        expected_value=1.5697584099999997,
-        l2_reg=1.0)
-
-  def testL2RegularizerWithTwoByThreeByTwo(self):
-    """Check l2 regularization amount for two by three by two lattices.
-
-    Replacing abs to square in the formula in
-    testL1RegulairzerWithTwoByThreeByTwo gives L2 torsion.
-    """
-    self._runAndCheckValues(
-        lattice_param=[[
-            1.11, 2.22, -3.22, 0.33, -0.221, 3.123, -0.477, 1.22, 3.221, 11.22,
-            22.12, 33.11
-        ]],
-        lattice_sizes=[2, 3, 2],
-        expected_value=956.5830999999998,
-        l2_reg=1.0)
-    self._runAndCheckValues(
-        lattice_param=[[
-            -2.003, 1.221, 0.321, 0.447, 0.321, 0.446, -0.33192, 0.476, 0.8976,
-            -4.123, 0.487, -0.4473
-        ]],
-        lattice_sizes=[2, 3, 2],
-        expected_value=123.2293754172,
-        l2_reg=1.0)
-    self._runAndCheckValues(
-        lattice_param=[[
-            1.11, 2.22, -3.22, 0.33, -0.221, 3.123, -0.477, 1.22, 3.221, 11.22,
-            22.12, 33.11
-        ], [
-            -2.003, 1.221, 0.321, 0.447, 0.321, 0.446, -0.33192, 0.476, 0.8976,
-            -4.123, 0.487, -0.4473
-        ]],
-        lattice_sizes=[2, 3, 2],
-        expected_value=1079.8124754172,
-        l2_reg=1.0)
-
-  def testL1andL2Regularizer(self):
-    """Check l1 and l2 regularization amount for two by two lattices."""
-    self._runAndCheckValues(
-        lattice_param=[[0.0, 0.0, 1.0, 1.0]],
-        lattice_sizes=[2, 2],
-        expected_value=0.0,
-        l1_reg=1.0,
-        l2_reg=1.0)
-    self._runAndCheckValues(
-        lattice_param=[[0.0, 1.0, 1.0, 1.0]],
-        lattice_sizes=[2, 2],
-        expected_value=2.0,
-        l1_reg=1.0,
-        l2_reg=1.0)
-    self._runAndCheckValues(
-        lattice_param=[[0.0, 0.0, 0.0, 1.0]],
-        lattice_sizes=[2, 2],
-        expected_value=2.0,
-        l1_reg=1.0,
-        l2_reg=1.0)
-    self._runAndCheckValues(
-        lattice_param=[[0.3312, -0.3217, -0.5, 0.1]],
-        lattice_sizes=[2, 2],
-        expected_value=2.82265841,
-        l1_reg=1.0,
-        l2_reg=1.0)
-
-  def testRank1TensorExpectsError(self):
-    """Pass rank-1 tensor and check the error."""
-    lattice_param = tf.compat.v1.placeholder(dtype=tf.float32, shape=[10])
-    lattice_sizes = [2, 5]
-    with self.assertRaises(ValueError):
-      regularizers.lattice_regularization(
-          lattice_param, lattice_sizes, l1_torsion_reg=1.0)
-
-  def testUnknownShapeTensorExpectsError(self):
-    """Pass rank-2 tensor with unknown shape and check the error."""
-    lattice_param = tf.compat.v1.placeholder(
-        dtype=tf.float32, shape=[None, None])
-    lattice_sizes = [2, 2]
-    with self.assertRaises(ValueError):
-      regularizers.lattice_regularization(
-          lattice_param, lattice_sizes, l1_torsion_reg=1.0)
-
-
-class LatticeRegularizersTestCase(tf.test.TestCase):
-
-  def setUp(self):
-    super(LatticeRegularizersTestCase, self).setUp()
-    self._lattice_param = [[0.3312, -0.3217, -0.5, 0.1]]
-    self._lattice_sizes = [2, 2]
-    # Regularzation amounts for weight = 1.0
-    self._l1_laplacian_reg = 2.5058
-    self._l2_laplacian_reg = 1.65500274
-    self._l1_reg = 1.2529
-    self._l2_reg = 0.47318433
-    self._l1_torsion_reg = 1.2529
-    self._l2_torsion_reg = 1.5697584099999997
-
-  def _runAndCheckValues(self,
-                         lattice_param,
-                         lattice_sizes,
-                         expected_value,
-                         l1_reg=None,
-                         l2_reg=None,
-                         l1_laplacian_reg=None,
-                         l2_laplacian_reg=None,
-                         l1_torsion_reg=None,
-                         l2_torsion_reg=None):
-    lattice_param_tensor = tf.constant(
-        lattice_param, dtype=tf.float32)
-    reg = regularizers.lattice_regularization(
-        lattice_param_tensor,
-        lattice_sizes,
-        l1_reg=l1_reg,
-        l2_reg=l2_reg,
-        l1_laplacian_reg=l1_laplacian_reg,
-        l2_laplacian_reg=l2_laplacian_reg,
-        l1_torsion_reg=l1_torsion_reg,
-        l2_torsion_reg=l2_torsion_reg)
-    with self.session() as sess:
-      reg_value = sess.run(reg)
-    self.assertAlmostEqual(reg_value, expected_value, delta=1e-1)
-
-  def testL1Regularizer(self):
-    """Check l1 regularization amount."""
-    self._runAndCheckValues(
-        self._lattice_param,
-        self._lattice_sizes,
-        expected_value=self._l1_reg,
-        l1_reg=1.0)
-
-  def testL2Regularizer(self):
-    """Check l2 regularization amount."""
-    self._runAndCheckValues(
-        self._lattice_param,
-        self._lattice_sizes,
-        expected_value=self._l2_reg,
-        l2_reg=1.0)
-
-  def testL1AndL2Regularizers(self):
-    """Check l1 and l2 regularization amount."""
-    expected_value = 0.5 * self._l1_reg + 0.5 * self._l2_reg
-    self._runAndCheckValues(
-        self._lattice_param,
-        self._lattice_sizes,
-        expected_value=expected_value,
-        l1_reg=0.5,
-        l2_reg=0.5)
-
-  def testAllRegularizers(self):
-    """Check l1, l2 and laplacian regularization amount."""
-    expected_value = 0.5 * self._l1_reg
-    expected_value += 0.5 * self._l2_reg
-    expected_value += 0.5 * self._l1_laplacian_reg
-    expected_value += 0.5 * self._l2_laplacian_reg
-    expected_value += 0.5 * self._l1_torsion_reg
-    expected_value += 0.5 * self._l2_torsion_reg
-    self._runAndCheckValues(
-        self._lattice_param,
-        self._lattice_sizes,
-        expected_value=expected_value,
-        l1_reg=0.5,
-        l2_reg=0.5,
-        l1_laplacian_reg=0.5,
-        l2_laplacian_reg=0.5,
-        l1_torsion_reg=0.5,
-        l2_torsion_reg=0.5)
-
-
-class LinearRegularizersTestCase(tf.test.TestCase):
-
-  def setUp(self):
-    super(LinearRegularizersTestCase, self).setUp()
-    self._linear_param = [[0.3312, -0.3217, -0.5, 0.1]]
-    # Regularzation amounts for weight = 1.0
-    self._l1_reg = 1.2529
-    self._l2_reg = 0.47318433
-
-  def _runAndCheckValues(self,
-                         linear_param,
-                         expected_value,
-                         l1_reg=None,
-                         l2_reg=None):
-    linear_param_tensor = tf.constant(linear_param, dtype=tf.float32)
-    reg = regularizers.linear_regularization(
-        linear_param_tensor, l1_reg=l1_reg, l2_reg=l2_reg)
-    with self.session() as sess:
-      reg_value = sess.run(reg)
-    self.assertAlmostEqual(reg_value, expected_value, delta=1e-1)
-
-  def testL1Regularizer(self):
-    """Check l1 regularization amount."""
-    self._runAndCheckValues(
-        self._linear_param, expected_value=self._l1_reg, l1_reg=1.0)
-
-  def testL2Regularizer(self):
-    """Check l2 regularization amount."""
-    self._runAndCheckValues(
-        self._linear_param, expected_value=self._l2_reg, l2_reg=1.0)
-
-  def testL1AndL2Regularizers(self):
-    """Check l1 and l2 regularization amount."""
-    expected_value = 0.5 * self._l1_reg + 0.5 * self._l2_reg
-    self._runAndCheckValues(
-        self._linear_param,
-        expected_value=expected_value,
-        l1_reg=0.5,
-        l2_reg=0.5)
-
-
-if __name__ == '__main__':
-  tf.test.main()
diff --git a/tensorflow_lattice/python/lib/test_data.py b/tensorflow_lattice/python/lib/test_data.py
deleted file mode 100644
index a7c94f1..0000000
--- a/tensorflow_lattice/python/lib/test_data.py
+++ /dev/null
@@ -1,194 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Collection of test datasets."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-
-import numpy as np
-import tensorflow as tf
-
-_NUM_EXAMPLES = 10000
-_BATCH_SIZE = 100
-_NUM_EPOCHS = 1
-
-
-class TestData(object):
-  """A test dataset class."""
-
-  def __init__(self,
-               num_examples=_NUM_EXAMPLES,
-               batch_size=_BATCH_SIZE,
-               num_epochs=_NUM_EPOCHS):
-    self.num_examples = num_examples
-    self.batch_size = batch_size
-    self.num_epochs = num_epochs
-
-  # Collection of transformations that generates the label, y.
-  def _f(self, x):
-    return np.power(x, 3) + 0.1 * np.sin(x * math.pi * 8)
-
-  def _g(self, x0, x1):
-    return self._f(x0) + 0.3 * (1.0 - np.square(x1))
-
-  def _h(self, x0, x1):
-    radius2 = (x0 * x0 + x1 * x1)
-    max_radius2 = 0.25
-    return radius2 < max_radius2
-
-  def _i(self, x0, x1, x2):
-    return self._g(x0, x1) + np.choose(x2.astype(int) + 1, [11., 7., 13.])
-
-  def oned_input_fn(self):
-    """Returns an input function for one dimensional learning task.
-
-    column 'x' is a feature column, and column 'y' is a label column.
-    The transformation is deterministic, where y = _f(x).
-
-    Returns:
-      Function, that has signature of ()->({'x': data}, `target`)
-
-    FutureWork: Make this use keypoints_initialization from quantiles.
-    """
-    x = np.random.uniform(-1.0, 1.0, size=self.num_examples)
-    y = self._f(x)
-    return tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x={'x': x},
-        y=y,
-        batch_size=self.batch_size,
-        num_epochs=self.num_epochs,
-        shuffle=False)
-
-  def oned_zero_weight_input_fn(self):
-    """Returns an input function for one dimensional learning task.
-
-    column 'x' is a feature column, column 'zero' is a numerical column that
-    contains zero values and column 'y' is a label column.
-    The transformation is deterministic, where y = _f(x).
-
-    Returns:
-      Function, that has signature of ()->({'x': data, 'zero': zeros}, `target`)
-    """
-    x = np.random.uniform(-1.0, 1.0, size=self.num_examples)
-    zeros = np.zeros(shape=(self.num_examples))
-    y = self._f(x)
-    return tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x={
-            'x': x,
-            'zero': zeros
-        },
-        y=y,
-        batch_size=self.batch_size,
-        num_epochs=self.num_epochs,
-        shuffle=False)
-
-  def twod_input_fn(self):
-    """Returns an input function for two dimensional learning task.
-
-    column 'x0' and 'x1' are feature columns, and column 'y' is a label column.
-    The transformation is deterministic, where y = _g(x0, x1).
-
-    Returns:
-      Function, that has signature of ()->({'x0': data, 'x1': data}, `target`)
-    """
-    x0 = np.random.uniform(-1.0, 1.0, size=self.num_examples)
-    x1 = np.random.uniform(-1.0, 1.0, size=self.num_examples)
-    y = self._g(x0, x1)
-
-    return tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x={'x0': x0,
-           'x1': x1},
-        y=y,
-        batch_size=self.batch_size,
-        num_epochs=self.num_epochs,
-        shuffle=False)
-
-  def twod_classificer_input_fn(self):
-    """Returns an input function for two dimensional classification task.
-
-    column 'x0' and 'x1' are feature columns, and column 'y' is a label column.
-    The transformation is deterministic, where y = _h(x0, x1).
-
-    Returns:
-      Function, that has signature of ()->({'x0': data, 'x1': data}, `target`)
-    """
-    x0 = np.random.uniform(-1.0, 1.0, size=self.num_examples)
-    x1 = np.random.uniform(-1.0, 1.0, size=self.num_examples)
-    y = np.vectorize(self._h)(x0, x1)
-
-    return tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x={'x0': x0,
-           'x1': x1},
-        y=y,
-        batch_size=self.batch_size,
-        num_epochs=self.num_epochs,
-        shuffle=False)
-
-  def threed_input_fn(self, full_data, num_epochs=None):
-    """Returns an input function for three-dimensional learning task.
-
-    'x0' and 'x1' are numeric, and 'x2' is categorical with values {-1, 0, 1}.
-    The transformation is deterministic and decomposable on the inputs,
-    that is y = _i(x0, x1, x2) = _i_0(x0)+_i_1(x1)+_i_2(x2).
-
-    Args:
-      full_data: if set to true the whole data is returned in one batch.
-      num_epochs: number of epochs to go over the data. Takes default used
-        in construction if not set.
-
-    Returns:
-      Function, that has signature of
-      ()->({'x0': data, 'x1': data, 'x2': data}, `target`)
-    """
-    x0 = np.random.uniform(-1.0, 1.0, size=self.num_examples)
-    x1 = np.random.uniform(-1.0, 1.0, size=self.num_examples)
-    x2 = np.random.choice(
-        [-1., 0., 1.], size=self.num_examples, replace=True, p=[0.1, 0.7, 0.2])
-    y = self._i(x0, x1, x2)
-
-    x2_str = np.choose(x2.astype(int) + 1, ['?', 'N', 'Y'])
-    if num_epochs is None:
-      num_epochs = self.num_epochs
-    return tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x={'x0': x0,
-           'x1': x1,
-           'x2': x2_str},
-        y=y,
-        batch_size=self.batch_size if not full_data else self.num_examples,
-        num_epochs=num_epochs,
-        shuffle=False)
-
-  def multid_feature_input_fn(self):
-    """Returns an input function with one multi-dimensional feature.
-
-    column 'x' is the feature column, and column 'y' is a label column.
-    The transformation is deterministic, where y = _g(x[0], x[1]).
-
-    Returns:
-      Function, that has signature of ()->({'x': data}, `target`)
-    """
-    x = np.random.uniform(-1.0, 1.0, size=[self.num_examples, 2])
-    x_split = np.split(x, 2, axis=1)
-    y = self._g(x_split[0], x_split[1])
-
-    return tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x={'x': x},
-        y=y,
-        batch_size=self.batch_size,
-        num_epochs=self.num_epochs,
-        shuffle=False)
diff --git a/tensorflow_lattice/python/lib/tools.py b/tensorflow_lattice/python/lib/tools.py
deleted file mode 100644
index 5087467..0000000
--- a/tensorflow_lattice/python/lib/tools.py
+++ /dev/null
@@ -1,421 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Library of internal functions used by TensorFlow Lattice modules."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import datetime
-import os
-import socket
-import time
-import traceback
-
-# Dependency imports
-import tensorflow as tf
-
-from tensorflow.python.feature_column import feature_column as feature_column_lib  # pylint: disable=g-direct-tensorflow-import
-from tensorflow.python.lib.io import file_io  # pylint: disable=g-direct-tensorflow-import
-
-# Name used as a default for all per-feature configurations, see
-# cast_to_dict.
-DEFAULT_NAME = 'tensorflow_lattice_internal_default'
-
-
-def cast_to_scalar_tensor_of_dtype(t, dtype):
-  """If not yet a tensor, casts it to a constant scalar tensor."""
-  if issubclass(type(t), tf.Tensor):
-    return t
-  return tf.constant(t, shape=[], dtype=dtype)
-
-
-def cast_to_list(v, n, param_name):
-  if isinstance(v, list):
-    if len(v) != n:
-      raise ValueError('List given to %s has %d values, but we need %d' %
-                       (param_name, len(v), n))
-    return v
-  return [v] * n
-
-
-def cast_to_dict(v, feature_names, param_name):
-  """If value not yet a dict, cast it to a dict of all feature names to values.
-
-  Args:
-    v: can be a dict or a value. If a dict, missing feature names are set to the
-      value of v[DEFAULT_NAME] -- an exception is raised if some feature name is
-      not found v[DEFAULT_NAME] is not set.
-    feature_names: list of feature names that must be present in the returned
-      dict.
-    param_name: name shown in case of error, if value is not set for some
-      feature.
-
-  Returns:
-    If value is not a dict, a new dict with the same value repeated for all
-    feature names.
-
-    If value is a dict, returns a new dict with the values copied, or if not
-    present, copied from v[DEFAULT_NAME].
-
-  Raises:
-    ValueError: if a value is not set for a feature in feature_names, and no
-    default value is set.
-  """
-  if isinstance(v, dict):
-    v_copy = {}
-    for feature_name in feature_names:
-      if feature_name in v:
-        v_copy[feature_name] = v[feature_name]
-      else:
-        if DEFAULT_NAME in v:
-          v_copy[feature_name] = v[DEFAULT_NAME]
-        else:
-          raise ValueError(
-              'Dict given for %s does not contain definition for feature '
-              '"%s"' % (param_name, feature_name))
-    return v_copy
-  return {feature_name: v for feature_name in feature_names}
-
-
-def cast_to_dict_of_tensor_scalars(v, feature_names, dtype, param_name):
-  """Cast value to a dict mapping feature names to tensor scalars."""
-  if isinstance(v, dict):
-    # Convert each value to scalar.
-    res = {}
-    for feature_name in feature_names:
-      if feature_name in v:
-        res[feature_name] = cast_to_scalar_tensor_of_dtype(
-            v[feature_name], dtype)
-      else:
-        if DEFAULT_NAME in v:
-          res[feature_name] = cast_to_scalar_tensor_of_dtype(
-              v[DEFAULT_NAME], dtype)
-        else:
-          raise ValueError(
-              'Dict given for %s does not contain definition for feature '
-              '"%s"' % (param_name, feature_name))
-    return res
-
-  v = cast_to_scalar_tensor_of_dtype(v, dtype)
-  return {feature_name: v for feature_name in feature_names}
-
-
-def input_from_feature_column(columns_to_tensors,
-                              feature_column,
-                              dtype=tf.float32):
-  """Convert one feature_column to `Tensor`, making necessary transformations.
-
-  DenseColumns are taken as is, see  `tf.feature_column.input_layer`.
-  CategoricalColumns are assumed to be exclusive and it takes only the value
-  of the category.
-
-  Args:
-    columns_to_tensors: Returned by input_fn. Consider processing first by
-      `layers.transform_features(columns_to_tensors, feature_columns))`, since
-      it may share tf ops for different FeatureColumns. This function transforms
-      one at a time.
-    feature_column: feature_column to transform to `Tensor`.
-    dtype: `_CategoricalColumn`s are converted to this type.
-
-  Returns:
-    Tensor with transformed feature column for calibration consumption.
-
-  Raises:
-    ValueError: if type of FeatureColumn is unknown, and this function doesn't
-      know how to handle it.
-  """
-  # pylint: disable=protected-access
-  if isinstance(feature_column, feature_column_lib._DenseColumn):
-    return feature_column_lib.input_layer(
-        features=columns_to_tensors, feature_columns=set([feature_column]))
-  elif isinstance(feature_column, feature_column_lib._CategoricalColumn):
-    categorical_ids = tf.cast(
-        feature_column._transform_feature(columns_to_tensors).values, dtype)
-    return tf.stack([categorical_ids], axis=1)
-  # pylint: enable=protected-access
-  raise ValueError('Cannot handle FeatureColumn {}: only _DenseColumn and '
-                   '_CategoricalColumn are implemented, consider converting '
-                   'your column to float32 until this FeatureColumn is '
-                   'supported'.format(feature_column))
-
-
-def get_sorted_feature_columns(feature_columns):
-  """Sorts an iterable of feature columns by their names in ascending order.
-
-  Args:
-    feature_columns: An iterable that yields instances of a tensorflow
-      FeatureColumn.
-
-  Returns:
-    A copy of the input sorted by name in ascending order.
-  """
-  return sorted(feature_columns, key=lambda fc: fc.name)
-
-
-def get_sorted_feature_names(columns_to_tensors, feature_columns=None):
-  """List feature names: from feature_columns or columns_to_tensors.
-
-  This function will return the list of feature names for layers or Estimators
-  that use either feature_columns or directly the inputs returned by an
-  input_fn.
-
-  Args:
-    columns_to_tensors: str-->tf.Tensor dict. A mapping from feature name to
-      tensors.
-    feature_columns: Optional set containing all the feature columns. If not set
-      it is assumed all features come from columns_to_tensors. Otherwise this
-      defines the list of features to use. All items in the set should be
-      instances of classes derived by FeatureColumn.
-
-  Returns:
-    List of feature names.
-  """
-  if feature_columns is not None:
-    return [f_col.name for f_col in get_sorted_feature_columns(feature_columns)]
-  return [k for k in sorted(columns_to_tensors.keys())]
-
-
-def assert_shape(tensor, expected_shape, tensor_name):
-  """Assert shapes that must be known in graph construction time."""
-  got_shape = tensor.shape.as_list()
-  if got_shape != expected_shape:
-    raise ValueError('Invalid shape for %s: got %s, expected %s' %
-                     (tensor_name, got_shape, expected_shape))
-
-
-def add_if_not_none(a, b):
-  """Returns a/b is one of them is None, or their sum if both are not None."""
-  if a is None:
-    return b
-  if b is None:
-    return a
-  return a + b
-
-
-
-class LatticeStructure(object):
-  """Lattice structure class.
-
-  This class represents lattice vertices in a column-major order indexing that
-  are used in C++ lattice operators.
-
-  With the column-major indexing, the lattice with lattice_sizes
-  [m_0, m_1, ..., m_{n-1}] will have:
-  dimension: n
-  number of vertices: m_0 * ... * m_{n-1}
-  number of vertices in each cell: 2 ** (n-1)
-  stride[0] = 1
-  stride[1] = 1 * m_{0}
-       ...
-  stride[n-1] = 1 * m_{n-2} ... * m_0
-
-  LatticeStructure keeps a reference copy of lattice_sizes, so if any of element
-  in lattice_sizes changes, this structure's output is not useful anymore.
-
-  """
-
-  def __init__(self, lattice_sizes):
-    """Initialize lattice structure.
-
-    Args:
-      lattice_sizes: (list of ints) constains lattice size of each dimension.
-
-    Raises:
-      ValueError: If any element of lattice_sizes is less than 2.
-    """
-
-    # This is a reference copy.
-    self.lattice_sizes = lattice_sizes
-    self.dimension = len(lattice_sizes)
-    self.num_vertices_per_cell = 2**self.dimension
-    self.num_vertices = 1
-    self.strides = []
-    for lattice_size in lattice_sizes:
-      self.strides.append(self.num_vertices)
-      if lattice_size < 2:
-        raise ValueError(
-            'Lattice size cannot be less than 2, but one (or more) of '
-            'lattice_size is less than 2', lattice_sizes)
-      self.num_vertices *= lattice_size
-
-
-def lattice_indices_generator(lattice_structure):
-  """lattice_indices_generator iterators all vertices in a multi-cell lattice.
-
-  It returns a global index and per-dimension index. So a lattice of sizes
-  [2,3] would yield the sequence:
-
-     (0, [0, 0])
-     (1, [1, 0])
-     (2, [0, 1])
-     (3, [1, 1])
-     (4, [0, 2])
-     (5, [1, 2])
-
-  The access order is in the column-major order, that is consistent with C++
-  lattice operators indexing convention.
-
-  Example usage:
-    for (index, per_dim_index) in lattice_indices_generator(lattice structure):
-      flat_index = index
-      first_dim_index = per_dim_index[0]
-
-  Args:
-    lattice_structure: (LatticeStructure) lattice structure to be used.
-
-  Yields:
-    (flat_index, per_dim_indices)
-  """
-  per_dim_indices = [0] * lattice_structure.dimension
-
-  for flat_index in range(lattice_structure.num_vertices):
-    yield (flat_index, per_dim_indices)
-    for dim in range(lattice_structure.dimension):
-      per_dim_indices[dim] += 1
-      if per_dim_indices[dim] == lattice_structure.lattice_sizes[dim]:
-        per_dim_indices[dim] = 0
-      else:
-        break
-
-
-def lattice_1d_slice(lattice_param_tensor, lattice_sizes, lattice_axis, begin,
-                     size):
-  """Produce 1d slice of lattice param.
-
-  Suppose we have d dimensional lattices. Recall that lattice_param_tensor
-  is a 2d tensor, where the first dimension is output_dim, and the second
-  dimension is a flattened version of lattice parameters.
-
-  This function interprets lattice_param_tensor as (d + 1) dimensional tensor
-  of the form:
-    lattice_param[output_dim, vertex[0], vertex[1], ..., vertex[d - 1]]
-  and returns the flattened (2d) representation of
-    lattice_param[output_dim, :, :, ..., begin : begin + size, :, ..., :]
-  where the slicing happens at lattice_axis.
-
-  Args:
-    lattice_param_tensor: [output_dim, param_dim] tensor contains lattice
-      parameters in the column-major order.
-    lattice_sizes: (list of ints) lattice size of each dimension.
-    lattice_axis: (int) axis slice.
-    begin: (int) slice beginning index at lattice_axis.
-    size: (int) slice size along the axis slice.
-
-  Returns:
-    [output_dim, sliced_param_dim] tensor that contains sliced lattice params.
-
-  Raises:
-    ValueError: * If lattice_param's shape is not a 2d tensor.
-      * If lattice_axis is not in [0, len(lattice_sizes) - 1].
-      * If [begin, begin + size] is not a subset of
-        [0, lattice_sizes[lattice_axis] - 1]
-  """
-  lattice_rank = len(lattice_sizes)
-  param_shape = lattice_param_tensor.shape.as_list()
-  if len(param_shape) != 2:
-    raise ValueError('Expect 2d tensor, but got %s' % param_shape)
-  if lattice_axis < 0 or lattice_axis >= lattice_rank:
-    raise ValueError('lattice_axis (%d) is out of range' % lattice_axis)
-  if begin < 0 or (begin + size) > lattice_sizes[lattice_axis]:
-    raise ValueError(
-        '[begin, begin + size] ([%d, %d]) is out of range [0, %d]' %
-        (begin, begin + size, lattice_sizes[lattice_axis]))
-
-  output_dim = param_shape[0]
-
-  pre_axis_param_dim = 1
-  for index in range(0, lattice_axis):
-    pre_axis_param_dim *= lattice_sizes[index]
-  post_axis_param_dim = 1
-  for index in range(lattice_axis + 1, lattice_rank):
-    post_axis_param_dim *= lattice_sizes[index]
-
-  # Lattice param in each output dimension is in the column-major order, but
-  # tf.reshape works in the row-major order. So we put post_axis_param_dim
-  # first, and then pre_axis_param_dim.
-  target_shape = [
-      output_dim, post_axis_param_dim, lattice_sizes[lattice_axis],
-      pre_axis_param_dim
-  ]
-  # reshape param to [output_dim, :, target_axis, :].
-  reshaped_param = tf.reshape(lattice_param_tensor, shape=target_shape)
-  sliced_param = tf.slice(
-      reshaped_param, begin=[0, 0, begin, 0], size=[-1, -1, size, -1])
-  final_slice = tf.reshape(sliced_param, shape=[output_dim, -1])
-
-  return final_slice
-
-
-class SaveOnceOrWaitTimeOutError(Exception):
-  pass
-
-
-def save_once_or_wait_for_chief(write_fn,
-                                metadata_dir,
-                                is_chief,
-                                timeout_secs=600):
-  """Synchronizes saving data to disk across multiple tensorflow processes.
-
-  This function can be used for synchronizing creation of data on disk that
-  needs to be available to all processes in a Tensorflow cluster. Each process
-  should call this function prior to using the data. The function makes the
-  designated chief process write the data and every other process blocks until
-  the data has been written.
-
-  Args:
-    write_fn: A function taking no arguments that executes the write to disk.
-    metadata_dir: A path on the filesystem used for storing internal data used
-      in this function (currently, a "done" sentinal file). If this directory
-      doesn't exist it would be created; otherwise it should be writeable.
-    is_chief: Whether the current process is the designated chief. Only one
-      process should pass this as "True".
-    timeout_secs: The (approximate) time in seconds a non-chief process should
-      wait for the data to be created.
-
-  Raises:
-    SaveOnceOrWaitTimeOutError if this is a non-chief process and the data has
-      not been created by the chief after timeout_secs seconds.
-  """
-  done_file = os.path.join(metadata_dir, '__tensorflow_lattice__done')
-  if not is_chief:
-    _poll_for_file(done_file, timeout_secs)
-    return
-
-  if file_io.file_exists(done_file):
-    return
-
-  write_fn()
-
-  # Create an empty done file.
-  file_io.recursive_create_dir(metadata_dir)
-  file_io.write_string_to_file(
-      done_file, 'Time created [UTC]: %s'
-      '\nHostname: %s'
-      '\nProcess id: %s'
-      '\nTraceback:\n%s' % (datetime.datetime.utcnow(), socket.gethostname(),
-                            os.getpid(), '\n'.join(traceback.format_stack())))
-
-
-POLL_INTERVAL_SECS = 30
-
-
-def _poll_for_file(filename, timeout_secs):
-  start = time.time()
-  while not file_io.file_exists(filename):
-    time.sleep(POLL_INTERVAL_SECS)
-    if time.time() - start > timeout_secs:
-      raise SaveOnceOrWaitTimeOutError('Waiting for file %s timed-out' %
-                                       filename)
diff --git a/tensorflow_lattice/python/lib/tools_test.py b/tensorflow_lattice/python/lib/tools_test.py
deleted file mode 100644
index be02e21..0000000
--- a/tensorflow_lattice/python/lib/tools_test.py
+++ /dev/null
@@ -1,360 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for TensorFlow Lattice's tools module."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import tensorflow as tf
-
-from tensorflow_lattice.python.lib import test_data
-from tensorflow_lattice.python.lib import tools
-
-_NUM_EXAMPLES = 10
-
-
-class ToolsTestCase(tf.test.TestCase):
-
-  def setUp(self):
-    super(ToolsTestCase, self).setUp()
-    self._test_data = test_data.TestData(num_examples=_NUM_EXAMPLES)
-
-  def testCastToDict(self):
-    names = ['a', 'b', 'c']
-    got = tools.cast_to_dict(1.0, names, 'blah')
-    self.assertEqual(got['a'], 1.0)
-    self.assertEqual(got['b'], 1.0)
-    self.assertEqual(got['c'], 1.0)
-    self.assertItemsEqual(got.keys(), names)
-
-    got = tools.cast_to_dict({'a': 1.0, 'b': 2.0, 'c': 3.0}, names, 'blah')
-    self.assertEqual(got['a'], 1.0)
-    self.assertEqual(got['b'], 2.0)
-    self.assertEqual(got['c'], 3.0)
-    self.assertItemsEqual(got.keys(), names)
-
-    with self.assertRaisesRegexp(
-        ValueError,
-        'Dict given for blah does not contain definition for feature "c"'):
-      got = tools.cast_to_dict({'a': 1.0, 'b': 2.0}, names, 'blah')
-
-    got = tools.cast_to_dict({'a': 1.0, tools.DEFAULT_NAME: 2.0}, names, 'blah')
-    self.assertItemsEqual(got.keys(), names)
-    self.assertEqual(got['a'], 1.0)
-    self.assertEqual(got['b'], 2.0)
-    self.assertEqual(got['c'], 2.0)
-
-  def testCastToDictOfTensorScalars(self):
-    # Same value for all names.
-    names = ['a', 'b', 'c']
-    value = np.array(1.0)
-    got = tools.cast_to_dict_of_tensor_scalars(value, ['a', 'b', 'c'],
-                                               tf.float32, 't1')
-    self.assertItemsEqual(got.keys(), names)
-    self.assertEqual(got['a'], got['b'])
-    self.assertEqual(got['b'], got['c'])
-    self.assertIsInstance(got['a'], tf.Tensor)
-    self.assertShapeEqual(value, got['a'])
-
-    # Raises for missing names.
-    with self.assertRaisesRegexp(
-        ValueError,
-        'Dict given for t2 does not contain definition for feature "c"'):
-      got = tools.cast_to_dict_of_tensor_scalars({
-          'a': value,
-          'b': value
-      }, ['a', 'b', 'c'], tf.float32, 't2')
-
-    # Uses default value
-    default_value = np.array(2.0)
-    got = tools.cast_to_dict_of_tensor_scalars(
-        {
-            'a': value,
-            'b': value,
-            tools.DEFAULT_NAME: default_value,
-        }, names, tf.float32, 't2')
-    self.assertItemsEqual(got.keys(), names)
-
-  def testInputFromFeatureColumn(self):
-    # Tests 1-dimension real valued feature.
-    x = np.random.uniform(-1.0, 1.0, size=[self._test_data.num_examples])
-    feature_column = tf.feature_column.numeric_column('x')
-    # Notice that 1-dimension features [batch_size] are packaged into a 2-dim
-    # tensor: [batch_size, 1]
-    materialized = self._materialize_feature_column(feature_column, x)
-    self.assertEqual(materialized.shape, (self._test_data.num_examples, 1))
-    materialized = materialized[:, 0]
-    self.assertTrue(
-        self._np_array_close(x, materialized),
-        'expected:{} != got:{}'.format(x, materialized))
-
-    # Tests that 2-dimensional real valued feature.
-    x = np.random.uniform(-1.0, 1.0, size=[self._test_data.num_examples, 2])
-    feature_column = tf.feature_column.numeric_column('x', shape=(2,))
-    materialized = self._materialize_feature_column(feature_column, x)
-    self.assertTrue(
-        self._np_array_close(x, materialized),
-        'expected:{} != got:{}'.format(x, materialized))
-
-    # Tests that categorical feature is correctly converted.
-    x = np.array(['Y', 'N', '?', 'Y', 'Y', 'N'])
-    expect = np.array([0., 1., -1., 0., 0., 1.])
-    feature_column = tf.feature_column.categorical_column_with_vocabulary_list(
-        'x', ['Y', 'N'])
-    materialized = self._materialize_feature_column(feature_column, x)[:, 0]
-    self.assertTrue(
-        self._np_array_close(expect, materialized),
-        'expect:{} != got:{}'.format(expect, materialized))
-
-  def testSaveOnceOrWaitForChief(self):
-    write_fn = tf.compat.v1.test.mock.Mock()
-    tools.save_once_or_wait_for_chief(
-        write_fn, self.get_temp_dir(), is_chief=True)
-    write_fn.assert_called_once_with()
-    write_fn.reset_mock()
-    write_fn.assert_not_called()
-    tools.save_once_or_wait_for_chief(
-        write_fn, self.get_temp_dir(), is_chief=True)
-    write_fn.assert_not_called()
-    tools.save_once_or_wait_for_chief(
-        write_fn, self.get_temp_dir(), is_chief=False)
-    write_fn.assert_not_called()
-
-  @tf.compat.v1.test.mock.patch('time.time')
-  def testSaveOnceOrWaitForChief_Timeout(self, mock_time):
-    write_fn = tf.compat.v1.test.mock.Mock()
-    # Return 0 on the first call to 'time.time' and 1000 on the second.
-    mock_time.side_effect = [0, 1000]
-    self.assertRaises(
-        tools.SaveOnceOrWaitTimeOutError,
-        tools.save_once_or_wait_for_chief,
-        write_fn,
-        self.get_temp_dir(),
-        is_chief=False,
-        timeout_secs=999)
-    call = tf.compat.v1.test.mock.call
-    self.assertEqual(mock_time.mock_calls, [call(), call()])
-
-  def _np_array_close(self, a, b):
-    return np.alltrue(np.isclose(a, b))
-
-  def _materialize_feature_column(self, feature_column, x):
-    """Creates input_fn with x then transform and materialize feature_column."""
-    input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
-        x={'x': x},
-        y=None,
-        batch_size=self._test_data.num_examples,
-        num_epochs=1,
-        shuffle=False)
-    with tf.Graph().as_default():
-      features = input_fn()
-      input_tensor = tools.input_from_feature_column(features, feature_column)
-      materialized = self._materialize_locally(input_tensor)
-    return materialized
-
-  def _materialize_locally(self, tensors, feed_dict=None):
-    with tf.compat.v1.train.SingularMonitoredSession() as sess:
-      materialized = sess.run(tensors, feed_dict=feed_dict)
-    return materialized
-
-
-class LatticeToolsTestCase(tf.test.TestCase):
-
-  def _runIterAndCheck(self, lattice_sizes, expected_vertices):
-    # Running iterator, and check the returned vertices with expected_vertices.
-    lattice_structure = tools.LatticeStructure(lattice_sizes)
-    for (index, vertices) in tools.lattice_indices_generator(lattice_structure):
-      self.assertItemsEqual(vertices, expected_vertices[index])
-
-  def testTwoByThreeLatticeIteration(self):
-    lattice_sizes = [2, 3]
-    expected_vertices = [[0, 0], [1, 0], [0, 1], [1, 1], [0, 2], [1, 2]]
-    self._runIterAndCheck(lattice_sizes, expected_vertices)
-
-  def testThreeByTwoByTwoIteration(self):
-    lattice_sizes = [3, 2, 2]
-    expected_vertices = [[0, 0, 0], [1, 0, 0], [2, 0, 0], [0, 1, 0], [1, 1, 0],
-                         [2, 1, 0], [0, 0, 1], [1, 0, 1], [2, 0, 1], [0, 1, 1],
-                         [1, 1, 1], [2, 1, 1]]
-    self._runIterAndCheck(lattice_sizes, expected_vertices)
-
-  def testWrongLatticeSizeExpectsError(self):
-    with self.assertRaises(ValueError):
-      _ = tools.LatticeStructure([1, 1])
-
-
-class Lattice1DSliceTestCase(tf.test.TestCase):
-
-  def _runAndCheckValues(self, slice_lattice_param_tensor, expected_value):
-    with self.session() as sess:
-      slice_lattice_param_value = sess.run(slice_lattice_param_tensor)
-    self.assertAllClose(slice_lattice_param_value, expected_value)
-
-  def testTwodLatticeSlice(self):
-    lattice_sizes = [2, 3]
-    # param[0][0] = 0
-    # param[1][0] = 1
-    # param[0][1] = 2
-    # param[1][1] = 3
-    # param[0][2] = 4
-    # param[1][2] = 5
-    lattice_param_tensor = tf.constant([list(range(2 * 3))])
-    # param[0][:] = [0, 2, 4]
-    param_0_x = tools.lattice_1d_slice(
-        lattice_param_tensor, lattice_sizes, lattice_axis=0, begin=0, size=1)
-    self._runAndCheckValues(param_0_x, expected_value=[[0, 2, 4]])
-    # param[1][:] = [1, 3, 5]
-    param_1_x = tools.lattice_1d_slice(
-        lattice_param_tensor, lattice_sizes, lattice_axis=0, begin=1, size=1)
-    self._runAndCheckValues(param_1_x, expected_value=[[1, 3, 5]])
-    # param[:][0] = [0, 1]
-    param_x_0 = tools.lattice_1d_slice(
-        lattice_param_tensor, lattice_sizes, lattice_axis=1, begin=0, size=1)
-    self._runAndCheckValues(param_x_0, expected_value=[[0, 1]])
-    # param[:][1] = [2, 3]
-    param_x_1 = tools.lattice_1d_slice(
-        lattice_param_tensor, lattice_sizes, lattice_axis=1, begin=1, size=1)
-    self._runAndCheckValues(param_x_1, expected_value=[[2, 3]])
-    # param[:][2] = [4, 5]
-    param_x_2 = tools.lattice_1d_slice(
-        lattice_param_tensor, lattice_sizes, lattice_axis=1, begin=2, size=1)
-    self._runAndCheckValues(param_x_2, expected_value=[[4, 5]])
-    # param[:][0:1] = [0, 1, 2, 3]
-    param_x_01 = tools.lattice_1d_slice(
-        lattice_param_tensor, lattice_sizes, lattice_axis=1, begin=0, size=2)
-    self._runAndCheckValues(param_x_01, expected_value=[[0, 1, 2, 3]])
-    # param[:][1:2] = [2, 3, 4, 5]
-    param_x_12 = tools.lattice_1d_slice(
-        lattice_param_tensor, lattice_sizes, lattice_axis=1, begin=1, size=2)
-    self._runAndCheckValues(param_x_12, expected_value=[[2, 3, 4, 5]])
-
-  def testTwodMultiOutputLatticeSlice(self):
-    lattice_sizes = [2, 2]
-    # first_param[0][0] = 0
-    # first_param[1][0] = 1
-    # first_param[0][1] = 2
-    # first_param[1][1] = 3
-    # second_param[0][0] = 3
-    # second_param[1][0] = 2
-    # second_param[0][1] = 1
-    # second_param[1][1] = 0
-    lattice_param_tensor = tf.constant(
-        [list(range(2 * 2)), list(range(2 * 2 - 1, -1, -1))])
-    # param[0][:] = [[0, 2], [3, 1]]
-    param_0_x = tools.lattice_1d_slice(
-        lattice_param_tensor, lattice_sizes, lattice_axis=0, begin=0, size=1)
-    self._runAndCheckValues(param_0_x, expected_value=[[0, 2], [3, 1]])
-    # param[1][:] = [[1, 3], [2, 0]]
-    param_1_x = tools.lattice_1d_slice(
-        lattice_param_tensor, lattice_sizes, lattice_axis=0, begin=1, size=1)
-    self._runAndCheckValues(param_1_x, expected_value=[[1, 3], [2, 0]])
-    # param[:][0] = [[0, 1], [3, 2]]
-    param_x_0 = tools.lattice_1d_slice(
-        lattice_param_tensor, lattice_sizes, lattice_axis=1, begin=0, size=1)
-    self._runAndCheckValues(param_x_0, expected_value=[[0, 1], [3, 2]])
-    # param[:][1] = [[2, 3], [1, 0]]
-    param_x_1 = tools.lattice_1d_slice(
-        lattice_param_tensor, lattice_sizes, lattice_axis=1, begin=1, size=1)
-    self._runAndCheckValues(param_x_1, expected_value=[[2, 3], [1, 0]])
-
-  def testThreedLatticeSlice(self):
-    lattice_sizes = [2, 3, 2]
-    # param[0][0][0] = 0
-    # param[1][0][0] = 1
-    # param[0][1][0] = 2
-    # param[1][1][0] = 3
-    # param[0][2][0] = 4
-    # param[1][2][0] = 5
-    # param[0][0][1] = 6
-    # param[1][0][1] = 7
-    # param[0][1][1] = 8
-    # param[1][1][1] = 9
-    # param[0][2][1] = 10
-    # param[1][2][1] = 11
-    lattice_param_tensor = tf.constant([list(range(2 * 3 * 2))])
-    # param[0][:][:] = [0, 2, 4, 6, 8, 10]
-    param_0_x_x = tools.lattice_1d_slice(
-        lattice_param_tensor, lattice_sizes, lattice_axis=0, begin=0, size=1)
-    self._runAndCheckValues(param_0_x_x, expected_value=[[0, 2, 4, 6, 8, 10]])
-    # param[1][:][:] = [1, 3, 5, 7, 9, 11]
-    param_1_x_x = tools.lattice_1d_slice(
-        lattice_param_tensor, lattice_sizes, lattice_axis=0, begin=1, size=1)
-    self._runAndCheckValues(param_1_x_x, expected_value=[[1, 3, 5, 7, 9, 11]])
-    # param[:][0][:] = [0, 1, 6, 7]
-    param_x_0_x = tools.lattice_1d_slice(
-        lattice_param_tensor, lattice_sizes, lattice_axis=1, begin=0, size=1)
-    self._runAndCheckValues(param_x_0_x, expected_value=[[0, 1, 6, 7]])
-    # param[:][1][:] = [2, 3, 8, 9]
-    param_x_1_x = tools.lattice_1d_slice(
-        lattice_param_tensor, lattice_sizes, lattice_axis=1, begin=1, size=1)
-    self._runAndCheckValues(param_x_1_x, expected_value=[[2, 3, 8, 9]])
-    # param[:][2][:] = [4, 5, 10, 11]
-    param_x_2_x = tools.lattice_1d_slice(
-        lattice_param_tensor, lattice_sizes, lattice_axis=1, begin=2, size=1)
-    self._runAndCheckValues(param_x_2_x, expected_value=[[4, 5, 10, 11]])
-    # param[:][0:1][:] = [0, 1, 2, 3, 6, 7, 8, 9]
-    param_x_01_x = tools.lattice_1d_slice(
-        lattice_param_tensor, lattice_sizes, lattice_axis=1, begin=0, size=2)
-    self._runAndCheckValues(
-        param_x_01_x, expected_value=[[0, 1, 2, 3, 6, 7, 8, 9]])
-    # param[:][1:2][:] = [2, 3, 4, 5, 8, 9, 10, 11]
-    param_x_12_x = tools.lattice_1d_slice(
-        lattice_param_tensor, lattice_sizes, lattice_axis=1, begin=1, size=2)
-    self._runAndCheckValues(
-        param_x_12_x, expected_value=[[2, 3, 4, 5, 8, 9, 10, 11]])
-    # param[:][:][0] = [0, 1, 2, 3, 4, 5]
-    param_x_x_0 = tools.lattice_1d_slice(
-        lattice_param_tensor, lattice_sizes, lattice_axis=2, begin=0, size=1)
-    self._runAndCheckValues(param_x_x_0, expected_value=[[0, 1, 2, 3, 4, 5]])
-    # param[:][:][1] = [6, 7, 8, 9, 10, 11]
-    param_x_x_1 = tools.lattice_1d_slice(
-        lattice_param_tensor, lattice_sizes, lattice_axis=2, begin=1, size=1)
-    self._runAndCheckValues(param_x_x_1, expected_value=[[6, 7, 8, 9, 10, 11]])
-
-  def testWrongTensorShapeExpectsError(self):
-    lattice_param_tensor = tf.compat.v1.placeholder(shape=(2, 2, 2), dtype=tf.float32)
-    with self.assertRaises(ValueError):
-      _ = tools.lattice_1d_slice(
-          lattice_param_tensor,
-          lattice_sizes=[2],
-          lattice_axis=0,
-          begin=0,
-          size=1)
-
-  def testOutOfRangeAxisExpectsError(self):
-    lattice_param_tensor = tf.compat.v1.placeholder(shape=(2, 4), dtype=tf.float32)
-    with self.assertRaises(ValueError):
-      _ = tools.lattice_1d_slice(
-          lattice_param_tensor,
-          lattice_sizes=[2, 2],
-          lattice_axis=3,
-          begin=0,
-          size=1)
-
-  def testBeginSizeOutOfRangeExpectsError(self):
-    lattice_param_tensor = tf.compat.v1.placeholder(shape=(2, 4), dtype=tf.float32)
-    with self.assertRaises(ValueError):
-      _ = tools.lattice_1d_slice(
-          lattice_param_tensor,
-          lattice_sizes=[2, 2],
-          lattice_axis=0,
-          begin=1,
-          size=2)
-
-
-if __name__ == '__main__':
-  tf.test.main()
diff --git a/tensorflow_lattice/python/linear_layer.py b/tensorflow_lattice/python/linear_layer.py
new file mode 100644
index 0000000..47baa60
--- /dev/null
+++ b/tensorflow_lattice/python/linear_layer.py
@@ -0,0 +1,331 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Layer which represents linear function. See class level comment.
+
+This layer applies a linear transformation to the input tensor with an optional
+bias term. It supports monotonicity, monotonic dominance and fixed-norm
+constraints.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from . import linear_lib
+import tensorflow as tf
+from tensorflow import keras
+
+LINEAR_LAYER_KERNEL_NAME = "linear_layer_kernel"
+LINEAR_LAYER_BIAS_NAME = "linear_layer_bias"
+
+
+class Linear(keras.layers.Layer):
+  # pyformat: disable
+  """Layer which represents linear function.
+
+  Monotonicity can be specified for any input dimension in which case learned
+  weight for that dimension is guaranteed to be either non negative for
+  increasing or non positive for decreasing monotonicity.
+
+  Monotonic dominance can be specified for any pair of dimensions referred to as
+  *dominant* and *weak* dimensions such that  the effect (slope) in the
+  direction of the *dominant* dimension to be greater than that of the *weak*
+  dimension for any point. Both dominant and weak dimensions must be increasing.
+
+  Weights can be constrained to have a fixed norm.
+
+  Input shape:
+  Rank-2 tensor with shape: (batch_size, num_input_dims)
+
+  Output shape:
+  Rank-2 tensor with shape: (batch_size, 1)
+
+  Attributes:
+    - All `__init__ `arguments.
+    kernel: layer's kernel.
+    bias: layer's bias. Only available if `use_bias == True`.
+
+  Example:
+
+  ```python
+  layer = tfl.linear_layer.Linear(
+      num_input_dims=8,
+      # Monotonicity constraints can be defined per dimension or for all dims.
+      monotonicities='increasing',
+      use_bias=True,
+      # You can force the L1 norm to be 1. Since this is a monotonic layer,
+      # the coefficients will sum to 1, making this a "weighted average".
+      normalization_order=1)
+  ```
+  """
+  # pyformat: enable
+
+  def __init__(self,
+               num_input_dims,
+               monotonicities=None,
+               monotonic_dominances=None,
+               use_bias=True,
+               normalization_order=None,
+               kernel_initializer="random_uniform",
+               bias_initializer="random_uniform",
+               kernel_regularizer=None,
+               bias_regularizer=None,
+               **kwargs):
+    """initializes an instance of `Linear`.
+
+    Args:
+      num_input_dims: Number of input dimensions.
+      monotonicities: None or list or tuple of length 'num_input_dims' of
+        {'decreasing', 'none', 'increasing', -1, 0, 1} which specifies if the
+        model output should be monotonic in corresponding feature, using
+        'increasing' or 1 to indicate increasing monotonicity, 'decreasing' or
+        -1 to indicate decreasing monotonicity and 'none' or 0 to indicate no
+        monotonicity constraints..
+        In case of decreasing monotonicity corresponding weight will be
+        constrained to be non positive, in case of increasing non-negative.
+        Instead of a list or tuple single value can be specified to indicate the
+        monotonicity constraint across all dimensions.
+      monotonic_dominances: None or list of two-element tuples. First element is
+        the index of the dominant feature. Second element is the index of the
+        weak feature.
+      use_bias: Whether linear function has bias.
+      normalization_order: If specified learned weights will be adjusted to have
+        norm 1. Norm will be computed by: `tf.norm(tensor,
+          ord=normalization_order)`.
+      kernel_initializer: Any keras initializer to be applied to kernel.
+      bias_initializer: Any keras initializer to be applied to bias. Only valid
+        if `use_bias == True`.
+      kernel_regularizer: None or single element or list of any Keras
+        regularizer objects.
+      bias_regularizer: None or single element or list of any Keras regularizer
+        objects.
+      **kwargs: Other args passed to `tf.keras.layers.Layer` initializer.
+
+    Raises:
+      ValueError: if monotonicity specified incorrectly.
+    """
+    super(Linear, self).__init__(**kwargs)
+
+    self.num_input_dims = num_input_dims
+
+    if isinstance(monotonicities, list) or isinstance(monotonicities, tuple):
+      self.monotonicities = list(monotonicities)
+    elif monotonicities is not None:
+      self.monotonicities = [monotonicities] * self.num_input_dims
+    else:
+      self.monotonicities = [0] * self.num_input_dims
+    self.monotonic_dominances = monotonic_dominances
+    # Verify hyperparameters after converting monotonicities to list because
+    # internally everything expects monotonicites to be list or tuple rather
+    # than single element.
+    linear_lib.verify_hyperparameters(
+        num_input_dims=self.num_input_dims, monotonicities=self.monotonicities)
+
+    self.use_bias = use_bias
+    self.normalization_order = normalization_order
+    self.kernel_initializer = keras.initializers.get(kernel_initializer)
+    if use_bias:
+      self.bias_initializer = keras.initializers.get(bias_initializer)
+
+    self.kernel_regularizer = []
+    if kernel_regularizer:
+      if callable(kernel_regularizer):
+        kernel_regularizer = [kernel_regularizer]
+      for reg in kernel_regularizer:
+        self.kernel_regularizer.append(keras.regularizers.get(reg))
+    self.bias_regularizer = []
+    if bias_regularizer:
+      if callable(bias_regularizer):
+        bias_regularizer = [bias_regularizer]
+      for reg in bias_regularizer:
+        self.bias_regularizer.append(keras.regularizers.get(reg))
+
+    self.input_spec = keras.layers.InputSpec(
+        dtype=self.dtype, shape=(None, num_input_dims))
+
+  def build(self, input_shape):
+    """Standard Keras build() method.
+
+    Args:
+      input_shape: Must be: (batch_size, num_input_dims)
+
+    Raises:
+      ValueError: If shape is not (batch_size, num_input_dims).
+    """
+    if len(input_shape) != 2 or input_shape[1] != self.num_input_dims:
+      raise ValueError("'input_shape' must be of rank two and number of "
+                       "elements of second dimension must be equal to "
+                       "'num_input_dims'. 'input_shape': " + str(input_shape) +
+                       "'num_input_dims': " + str(self.num_input_dims))
+
+    if (any(self.monotonicities) or self.monotonic_dominances or
+        self.normalization_order):
+      constraints = LinearConstraints(
+          monotonicities=self.monotonicities,
+          monotonic_dominances=self.monotonic_dominances,
+          normalization_order=self.normalization_order)
+    else:
+      constraints = None
+
+    if not self.kernel_regularizer:
+      kernel_reg = None
+    elif len(self.kernel_regularizer) == 1:
+      kernel_reg = self.kernel_regularizer[0]
+    else:
+      # Keras interface assumes only one regularizer, so summ all regularization
+      # losses which we have.
+      kernel_reg = lambda x: tf.add_n([r(x) for r in self.kernel_regularizer])
+
+    self.kernel = self.add_weight(
+        LINEAR_LAYER_KERNEL_NAME,
+        # 1 column matrix rather than verctor for matrix multiplication.
+        shape=[self.num_input_dims, 1],
+        initializer=self.kernel_initializer,
+        regularizer=kernel_reg,
+        constraint=constraints,
+        dtype=self.dtype)
+
+    if self.use_bias:
+      if not self.bias_regularizer:
+        bias_reg = None
+      elif len(self.bias_regularizer) == 1:
+        bias_reg = self.bias_regularizer[0]
+      else:
+        bias_reg = lambda x: tf.add_n([r(x) for r in self.bias_regularizer])
+      self.bias = self.add_weight(
+          LINEAR_LAYER_BIAS_NAME,
+          shape=[],
+          initializer=self.bias_initializer,
+          regularizer=bias_reg,
+          constraint=None,
+          dtype=self.dtype)
+
+    super(Linear, self).build(input_shape)
+
+  def call(self, inputs):
+    """Standard Keras call() method."""
+    result = tf.matmul(inputs, self.kernel)
+    if self.use_bias:
+      result += self.bias
+    return result
+
+  def compute_output_shape(self, input_shape):
+    """Standard Keras compute_output_shape() method."""
+    del input_shape
+    return [None, 1]
+
+  def get_config(self):
+    """Standard Keras get_config() method."""
+    config = {
+        "num_input_dims": self.num_input_dims,
+        "monotonicities": self.monotonicities,
+        "use_bias": self.use_bias,
+        "normalization_order": self.normalization_order,
+        "monotonic_dominances": self.monotonic_dominances,
+        "kernel_initializer":
+            keras.initializers.serialize(self.kernel_initializer),
+        "kernel_regularizer": [
+            keras.regularizers.serialize(r) for r in self.kernel_regularizer
+        ],
+    }  # pyformat: disable
+    if self.use_bias:
+      config["bias_initializer"] = keras.initializers.serialize(
+          self.bias_initializer)
+      config["bias_regularizer"] = [
+          keras.regularizers.serialize(r) for r in self.bias_regularizer
+      ]
+
+    config.update(super(Linear, self).get_config())
+    return config
+
+  # Default eps is bigger than one for other layers because normalization is
+  # prone to numerical errors.
+  def assert_constraints(self, eps=1e-4):
+    """Asserts that weights satisfy all constraints.
+
+    In graph mode builds and returns list of assertion ops.
+    In eager mode directly executes assetions.
+
+    Args:
+      eps: Allowed constraints violation.
+
+    Returns:
+      List of assertion ops in graph mode or immideately asserts in eager mode.
+    """
+    return linear_lib.assert_constraints(
+        weights=self.kernel,
+        monotonicities=linear_lib.canonicalize_monotonicities(
+            self.monotonicities),
+        monotonic_dominances=self.monotonic_dominances,
+        normalization_order=self.normalization_order,
+        eps=eps)
+
+
+class LinearConstraints(keras.constraints.Constraint):
+  # pyformat: disable
+  """Applies monotonicity constraints and normalization to TFL Linear layer.
+
+  Monotonicity is specified per input dimension in which case learned weight for
+  those dimensions is guaranteed to be either non negative for increasing or non
+  positive for decreasing monotonicity.
+
+  Weights can be constrained to have norm 1.
+
+  Attributes:
+    - All `__init__` arguments.
+  """
+  # pyformat: enable
+
+  def __init__(self, monotonicities, monotonic_dominances=None,
+               normalization_order=None):
+    """initializes an instance of `LinearConstraints`.
+
+    Args:
+      monotonicities: Same meaning as corresponding parameter of `Linear`.
+      monotonic_dominances: Same meaning as corresponding parameter of `Linear`.
+      normalization_order: Same meaning as corresponding parameter of `Linear`.
+    """
+    linear_lib.verify_hyperparameters(monotonicities=monotonicities,
+                                      monotonic_dominances=monotonic_dominances)
+    self.monotonicities = monotonicities
+    self.monotonic_dominances = monotonic_dominances
+    self.normalization_order = normalization_order
+
+  def __call__(self, w):
+    """Applies constraints to w.
+
+    Args:
+      w: Tensor which represents weights of TFL linear layer. Must have shape:
+        `(len(self.monotonicities), 1)`.
+
+    Raises:
+      ValueError: if shape of `w` is not `(len(self.monotonicities), 1)`.
+
+    Returns:
+      Tensor `w` with monotonicity constraints and normalization applied to it.
+    """
+    return linear_lib.project(
+        weights=w,
+        monotonicities=linear_lib.canonicalize_monotonicities(
+            self.monotonicities),
+        monotonic_dominances=self.monotonic_dominances,
+        normalization_order=self.normalization_order)
+
+  def get_config(self):
+    """Standard Keras get_config() method."""
+    return {
+        "monotonicities": self.monotonicities,
+        "monotonic_dominances": self.monotonic_dominances,
+        "normalization_order": self.normalization_order
+    }  # pyformat: disable
diff --git a/tensorflow_lattice/python/linear_lib.py b/tensorflow_lattice/python/linear_lib.py
new file mode 100644
index 0000000..b20960d
--- /dev/null
+++ b/tensorflow_lattice/python/linear_lib.py
@@ -0,0 +1,253 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Implementation of algorithms required for Linear layer."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from . import utils
+import six
+import tensorflow as tf
+
+_NORMALIZATION_EPS = 1e-8
+
+
+def project(weights, monotonicities, monotonic_dominances=None,
+            normalization_order=None):
+  """Applies constraints to weights.
+
+  Args:
+    weights: Tensor which represents weights of TFL linear layer. Must have
+      shape [len(monotonicities), 1].
+    monotonicities: List or tuple of same length as number of elements in
+      'weights' of {-1, 0, 1} which represent monotonicity constraints per
+      dimension. -1 stands for decreasing, 0 for no constraints, 1 for
+      increasing.
+    monotonic_dominances: List of two-element tuples. First element is the index
+      of the dominant feature. Second element is the index of the weak feature.
+    normalization_order: If specified weights will be adjusted to have norm 1.
+      Norm will be computed by: `tf.norm(tensor, ord=normalization_order)`.
+
+  Raises:
+    ValueError: If shape of weights is not `(len(monotonicities), 1)`.
+
+  Returns:
+    'weights' with monotonicity constraints and normalization applied to it.
+  """
+  verify_hyperparameters(weights_shape=weights.shape,
+                         monotonicities=monotonicities,
+                         monotonic_dominances=monotonic_dominances)
+  if any(monotonicities):
+    if 1 in monotonicities:
+      inverted_increasing_mask = tf.constant(
+          value=[0.0 if m == 1 else 1.0 for m in monotonicities],
+          dtype=weights.dtype,
+          shape=weights.shape)
+      # Multiplying by this mask will keep non monotonic dims same and will
+      # set monotonic dims to 0.0. Later by taking maximum with this product
+      # we'll essentially take maximumum of monotonic dims with 0.0.
+      weights = tf.maximum(weights, weights * inverted_increasing_mask)
+
+    if -1 in monotonicities:
+      inverted_decreasing_mask = tf.constant(
+          value=[0.0 if m == -1 else 1.0 for m in monotonicities],
+          dtype=weights.dtype,
+          shape=weights.shape)
+      weights = tf.minimum(weights, weights * inverted_decreasing_mask)
+
+  if monotonic_dominances:
+    monotonic_dominances = [(j, i) for i, j in monotonic_dominances]
+    weights = utils.approximately_project_categorical_partial_monotonicities(
+        weights, monotonic_dominances)
+
+  if normalization_order:
+    norm = tf.norm(weights, ord=normalization_order)
+    weights = tf.cond(norm < _NORMALIZATION_EPS,
+                      true_fn=lambda: weights,
+                      false_fn=lambda: weights / norm)
+
+  return weights
+
+
+def assert_constraints(weights, monotonicities, monotonic_dominances,
+                       normalization_order, eps=1e-4):
+  """Asserts that weights satisfy constraints.
+
+  Args:
+    weights: Weights of Linear layer.
+    monotonicities: List or tuple of same length as number of elements in
+      'weights' of {-1, 0, 1} which represent monotonicity constraints per
+      dimension. -1 stands for decreasing, 0 for no constraints, 1 for
+      increasing.
+    monotonic_dominances: List of two-element tuple. First element is the index
+      of the dominant feature. Second element is the index of the weak feature.
+    normalization_order: Whether weights have to have norm 1. Norm will be
+      computed by: `tf.norm(tensor, ord=normalization_order)`.
+    eps: Allowed constraints violation.
+
+  Returns:
+    List of assetion ops in graph mode or directly executes assertions in eager
+    mode.
+  """
+  asserts = []
+  if any(monotonicities):
+    # Create constant specifying shape explicitly because otherwise due to
+    # weights shape ending with dimesion of size 1 broadcasting will hurt us.
+    monotonicities_constant = tf.constant(monotonicities,
+                                          shape=weights.shape,
+                                          dtype=weights.dtype)
+    diff = tf.reduce_min(weights * monotonicities_constant)
+    asserts.append(
+        tf.Assert(diff >= -eps,
+                  data=["Monotonicity violation",
+                        "Monotonicities:", monotonicities,
+                        "Min monotonicity diff:", diff,
+                        "Epsilon:", eps,
+                        "Weights:", weights],
+                  summarize=weights.shape[0]))
+
+  for dominant_dim, weak_dim in monotonic_dominances or []:
+    diff = tf.reduce_min(weights[dominant_dim] - weights[weak_dim])
+    asserts.append(
+        tf.Assert(diff >= -eps,
+                  data=["Monotonic dominance violation",
+                        "Dominant dim:", dominant_dim,
+                        "Weak dim:", weak_dim,
+                        "Epsilon:", eps,
+                        "Weights:", weights],
+                  summarize=weights.shape[0]))
+
+  if normalization_order:
+    norm = tf.norm(weights, ord=normalization_order)
+    asserts.append(
+        # Norm can be either 0.0 or 1.0, because if all weights are close to 0.0
+        # we can't scale them to get norm 1.0.
+        tf.Assert(tf.logical_or(tf.abs(norm - 1.0) < eps,
+                                tf.abs(norm) < _NORMALIZATION_EPS),
+                  data=["Normalization order violation",
+                        "Norm:", norm,
+                        "Epsilon:", eps,
+                        "Weights:", weights],
+                  summarize=weights.shape[0]))
+  return asserts
+
+
+def verify_hyperparameters(num_input_dims=None,
+                           monotonicities=None,
+                           monotonic_dominances=None,
+                           weights_shape=None):
+  """Verifies that all given hyperparameters are consistent.
+
+  This function does not inspect weights themselves. Only their shape. Use
+  `assert_constraints()` to assert actual weights against constraints.
+
+  Unlike linear layer itself this function requires monotonicites to be
+  specified via list or tuple rather than via single element because that's how
+  monotonicites are stored internaly.
+
+  See `tfl.linear_layer.Linear` Layer class level comment for detailed
+  description of arguments.
+
+  Args:
+    num_input_dims: None or number of input dimensions.
+    monotonicities: List or tuple of same length as number of elements in
+      `weights` of {-1, 0, 1} which represent monotonicity constraints per
+      dimension. -1 stands for decreasing, 0 for no constraints, 1 for
+      increasing.
+    monotonic_dominances: List of two-element tuples. First element is the index
+      of the dominant feature. Second element is the index of the weak feature.
+    weights_shape: None or shape of tensor which represents weights of Linear
+      layer.
+
+  Raises:
+    ValueError: If something is inconsistent.
+  """
+  # It also raises errors if monotonicities specified incorrectly.
+  monotonicities = canonicalize_monotonicities(monotonicities)
+
+  if monotonicities is not None and num_input_dims is not None:
+    if len(monotonicities) != num_input_dims:
+      raise ValueError("Number of elements in 'monotonicities' must be equal to"
+                       " num_input_dims. monotoniticites: %s, "
+                       "len(monotonicities): %d, num_input_dims: %d"
+                       % (monotonicities, len(monotonicities), num_input_dims))
+
+  if weights_shape is not None and monotonicities is not None:
+    if (len(weights_shape) != 2 or weights_shape[0] != len(monotonicities)
+        or weights_shape[1] != 1):
+      raise ValueError("Number of elements in 'monotonicities' does not "
+                       "correspond to number of weights. Weights shape: %s, "
+                       "monotonicities: %s" % (weights_shape, monotonicities))
+
+  if monotonic_dominances is not None:
+    assert monotonicities is not None
+    num_input_dims = len(monotonicities)
+    dim_pairs = set()
+    for constraint in monotonic_dominances:
+      if len(constraint) != 2:
+        raise ValueError("Monotonic dominance constraints must consist of 2 "
+                         "elements. Seeing constraint tuple %s" % (constraint,))
+      dominant_dim, weak_dim = constraint
+      if (dominant_dim >= num_input_dims or weak_dim >= num_input_dims or
+          dominant_dim < 0 or weak_dim < 0):
+        raise ValueError("Dimensions constrained by monotonic dominance "
+                         "constraints are not within the input dimensions. "
+                         "'dims': %s, %s, num_dims: %s" %
+                         (dominant_dim, weak_dim, num_input_dims))
+      if not isinstance(dominant_dim, int) or not isinstance(weak_dim, int):
+        raise ValueError("Monotonic dominance constraint dimensions must be "
+                         "integers. Seeing dominant_dim %s and weak_dim %s" %
+                         (dominant_dim, weak_dim))
+      for dim in [dominant_dim, weak_dim]:
+        if monotonicities[dim] != 1:
+          raise ValueError("Monotonic dominance constraint's features must be "
+                           "monotonic. Dimension %d is not monotonic." % (dim))
+      if (weak_dim, dominant_dim) in dim_pairs:
+        raise ValueError("Cannot have two dominance constraints on the same "
+                         "pair of features conflicting. Features: %d, %d" %
+                         (dominant_dim, weak_dim))
+      dim_pairs.add((dominant_dim, weak_dim))
+
+
+def canonicalize_monotonicities(monotonicities):
+  """Converts string constants representing monotonicities into integers.
+
+  Args:
+    monotonicities: monotonicities hyperparameter of `Lattice` layer.
+
+  Raises:
+    ValueError if one of monotonicities is invalid.
+
+  Returns:
+    monotonicities represented as 0 or 1.
+  """
+  if monotonicities:
+    canonicalized = []
+    for item in monotonicities:
+      if item in [-1, 0, 1]:
+        canonicalized.append(item)
+      elif isinstance(item, six.string_types) and item.lower() == "decreasing":
+        canonicalized.append(-1)
+      elif isinstance(item, six.string_types) and item.lower() == "none":
+        canonicalized.append(0)
+      elif isinstance(item, six.string_types) and item.lower() == "increasing":
+        canonicalized.append(1)
+      else:
+        raise ValueError("'monotonicities' elements must be from: [-1, 0, 1, "
+                         "'decreasing', 'none', 'increasing']. "
+                         "Given: %s" % monotonicities)
+    return canonicalized
+  return None
diff --git a/tensorflow_lattice/python/linear_test.py b/tensorflow_lattice/python/linear_test.py
new file mode 100644
index 0000000..374aa12
--- /dev/null
+++ b/tensorflow_lattice/python/linear_test.py
@@ -0,0 +1,543 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for Tensorflow Lattice linear layer."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+from absl import logging
+from absl.testing import parameterized
+import numpy as np
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow_lattice.python import linear_layer as linl
+from tensorflow_lattice.python import linear_lib
+from tensorflow_lattice.python import test_utils
+
+_DISABLE_ALL = False
+_LOSS_EPS = 0.0001
+_SMALL_EPS = 1e-6
+
+
+class LinearTest(parameterized.TestCase, tf.test.TestCase):
+  """Tests for TFL linear layer."""
+
+  def _ResetAllBackends(self):
+    keras.backend.clear_session()
+    tf.compat.v1.reset_default_graph()
+
+  def _ScaterXUniformly(self, num_points, num_dims, input_min, input_max):
+    """Generates num_points num_dims-dimensional points within given range."""
+    np.random.seed(41)
+    x = []
+    for _ in range(num_points):
+      point = [
+          np.random.random() * (input_max - input_min) + input_min
+          for _ in range(num_dims)
+      ]
+      x.append(np.asarray(point))
+    if num_dims == 1:
+      x.sort()
+    return x
+
+  def _TwoDMeshGrid(self, num_points, num_dims, input_min, input_max):
+    """Mesh grid for visualisation of 3-d surfaces via pyplot."""
+    if num_dims != 2:
+      raise ValueError("2-d mesh grid can be created only for 2-d data. Given: "
+                       "%d." % num_dims)
+    return test_utils.two_dim_mesh_grid(
+        num_points=num_points,
+        x_min=input_min,
+        y_min=input_min,
+        x_max=input_max,
+        y_max=input_max)
+
+  def _GenLinearFunction(self, weights, bias=0.0, noise=None):
+    """Returns python function which computes linear function."""
+
+    def Linear(x):
+      if len(x) != len(weights):
+        raise ValueError("X and weights have different number of elements. "
+                         "X: " + str(x) + "; weights: " + str(weights))
+      result = bias
+      if noise:
+        result += noise(x)
+      for (i, y) in enumerate(x):
+        result += weights[i] * y
+      return result
+
+    return Linear
+
+  def _SinPlusXPlusD(self, x):
+    return math.sin(x[0]) + x[0] / 3.0 + 3.0
+
+  def _SetDefaults(self, config):
+    config.setdefault("monotonicities", None)
+    config.setdefault("monotonic_dominances", None)
+    config.setdefault("use_bias", False)
+    config.setdefault("normalization_order", None)
+    config.setdefault("kernel_init_constant", 0.0)
+    config.setdefault("bias_init_constant", 0.0)
+    config.setdefault("kernel_regularizer", None)
+    config.setdefault("bias_regularizer", None)
+    config.setdefault("allowed_constraints_violation", 1e-6)
+    return config
+
+  def _GetTrainingInputsAndLabels(self, config):
+    """Generates training inputs and labels.
+
+    Args:
+      config: Dict with config for this unit test.
+
+    Returns:
+      Tuple `(training_inputs, training_labels, raw_training_inputs)` where
+        `training_inputs` and `training_labels` are data for training and
+        `raw_training_inputs` are representation of `training_inputs` for
+        visualisation.
+    """
+    raw_training_inputs = config["x_generator"](
+        num_points=config["num_training_records"],
+        num_dims=config["num_input_dims"],
+        input_min=config["input_min"],
+        input_max=config["input_max"])
+
+    if isinstance(raw_training_inputs, tuple):
+      # This means that raw inputs are 2-d mesh grid. Convert them into list of
+      # 2-d points.
+      training_inputs = list(np.dstack(raw_training_inputs).reshape((-1, 2)))
+    else:
+      training_inputs = raw_training_inputs
+
+    training_labels = [config["y_function"](x) for x in training_inputs]
+    return training_inputs, training_labels, raw_training_inputs
+
+  def _TrainModel(self, config, plot_path=None):
+    """Trains model and returns loss.
+
+    Args:
+      config: Layer config internal for this test which specifies params of
+        linear layer to train.
+      plot_path: if specified - png file name to save visualisation. See
+        test_utils.run_training_loop() for more details.
+
+    Returns:
+      Training loss.
+    """
+    logging.info("Testing config:")
+    logging.info(config)
+    config = self._SetDefaults(config)
+
+    self._ResetAllBackends()
+
+    training_inputs, training_labels, raw_training_inputs = (
+        self._GetTrainingInputsAndLabels(config))
+
+    linear_layer = linl.Linear(
+        input_shape=[config["num_input_dims"]],
+        num_input_dims=config["num_input_dims"],
+        monotonicities=config["monotonicities"],
+        monotonic_dominances=config["monotonic_dominances"],
+        use_bias=config["use_bias"],
+        normalization_order=config["normalization_order"],
+        kernel_initializer=keras.initializers.Constant(
+            config["kernel_init_constant"]),
+        bias_initializer=keras.initializers.Constant(
+            config["bias_init_constant"]),
+        kernel_regularizer=config["kernel_regularizer"],
+        bias_regularizer=config["bias_regularizer"],
+        dtype=tf.float32)
+    model = keras.models.Sequential()
+    model.add(linear_layer)
+    optimizer = config["optimizer"](learning_rate=config["learning_rate"])
+    model.compile(loss=keras.losses.mean_squared_error, optimizer=optimizer)
+
+    training_data = (training_inputs, training_labels, raw_training_inputs)
+
+    loss = test_utils.run_training_loop(
+        config=config,
+        training_data=training_data,
+        keras_model=model,
+        plot_path=plot_path)
+
+    assetion_ops = linear_layer.assert_constraints(
+        eps=config["allowed_constraints_violation"])
+    if not tf.executing_eagerly() and assetion_ops:
+      tf.compat.v1.keras.backend.get_session().run(assetion_ops)
+    return loss
+
+  def _NegateAndTrain(self, config):
+    """Changes monotonicity directions to opposite and trains model."""
+    negated_config = dict(config)
+    negated_config["y_function"] = lambda x: -config["y_function"](x)
+    negated_config["bias_init_constant"] = -config["bias_init_constant"]
+    negated_config["kernel_init_constant"] = -config["kernel_init_constant"]
+
+    if isinstance(config["monotonicities"], list):
+      negated_config["monotonicities"] = [
+          -monotonicity for monotonicity in
+          linear_lib.canonicalize_monotonicities(config["monotonicities"])
+      ]
+    else:
+      negated_config["monotonicities"] = -config["monotonicities"]
+
+    negated_loss = self._TrainModel(negated_config)
+    return negated_loss
+
+  @parameterized.parameters((False, 1.623906), (True, 0.456815))
+  # Expected losses are computed by running this test. Correctness is verified
+  # manually by looking at visualisation of learned function vs ground truth.
+  def testOneDUnconstrained(self, use_bias, expected_loss):
+    if _DISABLE_ALL:
+      return
+    config = {
+        "num_input_dims": 1,
+        "use_bias": use_bias,
+        "num_training_records": 128,
+        "num_training_epoch": 400,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.5,
+        "x_generator": self._ScaterXUniformly,
+        "input_min": 5.0,
+        "input_max": 25.0,
+        "y_function": self._SinPlusXPlusD,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=_LOSS_EPS)
+
+  @parameterized.parameters((False, 0.881774), (True, 0.441771))
+  # Expected losses are computed by running this test. Correctness is verified
+  # manually by looking at visualisation of learned function vs ground truth.
+  def testTwoDUnconstrained(self, use_bias, expected_loss):
+    if _DISABLE_ALL:
+      return
+    config = {
+        "num_input_dims": 2,
+        "use_bias": use_bias,
+        "num_training_records": 64,
+        "num_training_epoch": 160,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.5,
+        "x_generator": self._TwoDMeshGrid,
+        "input_min": 0.0,
+        "input_max": 4.0,
+        "y_function": self._GenLinearFunction(
+            weights=[-1.0, 2.0],
+            bias=-2.0,
+            noise=lambda x: math.sin(sum(x)) / 1.0),
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=_LOSS_EPS)
+
+  def testInitializers(self):
+    # Test initializers by trying to fit linear function using 0 iterations.
+    if _DISABLE_ALL:
+      return
+    config = {
+        "num_input_dims": 2,
+        "use_bias": True,
+        "num_training_records": 64,
+        "num_training_epoch": 0,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.5,
+        "x_generator": self._TwoDMeshGrid,
+        "input_min": 0.0,
+        "input_max": 4.0,
+        "kernel_init_constant": 3.0,
+        "bias_init_constant": -2.0,
+        "y_function": self._GenLinearFunction(weights=[3.0, 3.0], bias=-2.0)
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.0, delta=_LOSS_EPS)
+
+  def testAssertConstraints(self):
+    if _DISABLE_ALL:
+      return
+    config = {
+        "num_input_dims": 4,
+        "use_bias": True,
+        "num_training_records": 64,
+        "num_training_epoch": 0,
+        "normalization_order": 1,
+        "monotonicities": [1] * 4,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.5,
+        "x_generator": self._ScaterXUniformly,
+        "input_min": 0.0,
+        "input_max": 4.0,
+        "kernel_init_constant": 0.25,
+        "bias_init_constant": -2.0,
+        "y_function": self._GenLinearFunction(weights=[0.25] * 4, bias=-2.0)
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.0, delta=_LOSS_EPS)
+
+    with self.assertRaises(tf.errors.InvalidArgumentError):
+      config["normalization_order"] = 2
+      self._TrainModel(config)
+
+    with self.assertRaises(tf.errors.InvalidArgumentError):
+      # Setting valid normalization order back and instead violating
+      # monotonicity.
+      config["normalization_order"] = 1
+      config["monotonicities"] = [1, 1, -1, 0]
+      self._TrainModel(config)
+
+  @parameterized.parameters((False, 1.623906), (True, 0.456815))
+  # Expected losses are computed by running this test. Correctness is verified
+  # manually by looking at visualisation of learned function vs ground truth.
+  def testOneDMonotonicities_MonotonicInput(self, use_bias, expected_loss):
+    if _DISABLE_ALL:
+      return
+    config = {
+        "num_input_dims": 1,
+        "monotonicities": [1],
+        "use_bias": use_bias,
+        "num_training_records": 128,
+        "num_training_epoch": 400,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.5,
+        "x_generator": self._ScaterXUniformly,
+        "input_min": 5.0,
+        "input_max": 25.0,
+        "y_function": self._SinPlusXPlusD,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=_LOSS_EPS)
+    self.assertAlmostEqual(loss, self._NegateAndTrain(config), delta=_SMALL_EPS)
+
+  @parameterized.parameters((False, 62.670425), (True, 3.326165))
+  # Expected losses are computed by running this test. Correctness is verified
+  # manually by looking at visualisation of learned function vs ground truth.
+  def testOneDMonotonicities_AntiMonotonicInput(self, use_bias, expected_loss):
+    if _DISABLE_ALL:
+      return
+    config = {
+        "num_input_dims": 1,
+        "monotonicities": ["increasing"],
+        "use_bias": use_bias,
+        "num_training_records": 128,
+        "num_training_epoch": 400,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.5,
+        "x_generator": self._ScaterXUniformly,
+        "input_min": 5.0,
+        "input_max": 25.0,
+        "y_function": lambda x: -self._SinPlusXPlusD(x),
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=_LOSS_EPS)
+    self.assertAlmostEqual(loss, self._NegateAndTrain(config), delta=_SMALL_EPS)
+
+  @parameterized.parameters((1, 2.0), (1, -2.0), (2, 2.0), (2, -2.0))
+  # Expected loss is computed by running this test. Correctness is verified
+  # manually by looking at visualisation of learned function vs ground truth.
+  def testOneDNormalizationOrder(self, norm_order, weight):
+    if _DISABLE_ALL:
+      return
+    config = {
+        "num_input_dims": 1,
+        "monotonicities": [0],
+        "normalization_order": norm_order,
+        "use_bias": True,
+        "num_training_records": 128,
+        "num_training_epoch": 20,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.5,
+        "x_generator": self._ScaterXUniformly,
+        "input_min": 0.0,
+        "input_max": 5.0,
+        "y_function": self._GenLinearFunction(weights=[weight], bias=0.0)
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    # For 1-d case normalization order does not change anything.
+    self.assertAlmostEqual(loss, 1.727717, delta=_LOSS_EPS)
+
+  def testOneDNormalizationOrderZeroWeights(self):
+    if _DISABLE_ALL:
+      return
+    # Normalization is impossible when all weights are 0.0 so weights should not
+    # be affected by it.
+    config = {
+        "num_input_dims": 1,
+        "monotonicities": ["none"],
+        "normalization_order": 1,
+        "use_bias": True,
+        "num_training_records": 128,
+        "num_training_epoch": 20,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.5,
+        "x_generator": self._ScaterXUniformly,
+        "input_min": 0.0,
+        "input_max": 5.0,
+        "y_function": self._GenLinearFunction(weights=[0.0], bias=0.0)
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.0, delta=_LOSS_EPS)
+
+  @parameterized.parameters(
+      (0.441771, 0),
+      (0.441771, ["none", "none"]),
+      (2.61706, 1),
+      (2.61706, ["increasing", "increasing"]),
+      (2.61706, ["increasing", "none"]),
+      (0.441771, ["none", "increasing"])
+  )
+  # Expected losses are computed by running this test. Correctness is verified
+  # manually by looking at visualisation of learned function vs ground truth.
+  def testTwoDMonotonicity(self, expected_loss, monotonicities):
+    if _DISABLE_ALL:
+      return
+    config = {
+        "num_input_dims": 2,
+        "monotonicities": monotonicities,
+        "use_bias": True,
+        "num_training_records": 64,
+        "num_training_epoch": 160,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.5,
+        "x_generator": self._TwoDMeshGrid,
+        "input_min": 0.0,
+        "input_max": 4.0,
+        "y_function": self._GenLinearFunction(
+            weights=[-1.0, 2.0],
+            bias=-2.0,
+            noise=lambda x: math.sin(sum(x)) / 1.0)
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=_LOSS_EPS)
+    self.assertAlmostEqual(loss, self._NegateAndTrain(config), delta=_SMALL_EPS)
+
+  @parameterized.parameters(
+      (1, [0.2, 0.3], 0, 0.250532),  # Testing sum of weights < 1.0.
+      (1, [0.2, 0.3], 1, 0.250532),  # Monotonicity does not matter here.
+      (2, [0.2, 0.3], 0, 0.753999),
+      (1, [1.0, 2.0], 0, 5.688659),  # Testing sum of weights > 1.0.
+      (1, [-1.0, 2.0], 0, 4.043515),
+      # With negative weights monotonicity matters.
+      (1, [-1.0, 2.0], 1, 3.433537))
+  # Expected losses are computed by running this test. Correctness is verified
+  # manually by looking at visualisation of learned function vs ground truth.
+  def testTwoDNormalizationOrder(self, norm_order, weights, monotonicities,
+                                 expected_loss):
+    if _DISABLE_ALL:
+      return
+    config = {
+        "num_input_dims": 2,
+        "normalization_order": norm_order,
+        "monotonicities": monotonicities,
+        # If normalization order is set then layer will always converges to
+        # extremes if there is no bias or other layers. That's why we always
+        # use bias for normalization order tests.
+        "use_bias": True,
+        "num_training_records": 64,
+        "num_training_epoch": 160,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 0.15,
+        "x_generator": self._TwoDMeshGrid,
+        "input_min": 0.0,
+        "input_max": 4.0,
+        "y_function": self._GenLinearFunction(
+            weights=weights, noise=lambda x: math.sin(sum(x)) / 10.0)
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=_LOSS_EPS)
+
+  @parameterized.parameters(
+      ([0.5, 0.6, 0.06, 0.07, 0.08], [1, 1, 1, 1, 1], 0.0408642),
+      ([0.5, -0.6, 0.06, -0.07, 0.08], [1, 1, 1, 1, 1], 0.561592),
+      ([0.5, -0.6, 0.06, -0.07, 0.08], [0, 0, 1, 1, 1], 0.047663))
+  # Expected losses are computed by running this test. Correctness is verified
+  # manually by looking at visualisation of learned function vs ground truth.
+  def testFiveDAllConstraints(self, weights, monotonicities, expected_loss):
+    if _DISABLE_ALL:
+      return
+    config = {
+        "num_input_dims": 5,
+        "normalization_order": 1,
+        "monotonicities": monotonicities,
+        "use_bias": True,
+        "num_training_records": 640,
+        "num_training_epoch": 160,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.5,
+        "x_generator": self._ScaterXUniformly,
+        "input_min": 0.0,
+        "kernel_init_constant": 0.7,
+        "input_max": 4.0,
+        "y_function": self._GenLinearFunction(
+            weights=weights, noise=lambda x: math.sin(sum(x)) / 30.0)
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=_LOSS_EPS)
+
+  @parameterized.parameters((0.85766, [(0, 1)]),
+                            (1e-13, [(1, 0)]))
+  # Expected losses are computed by running this test. Correctness is verified
+  # manually by looking at visualisation of learned function vs ground truth.
+  def testTwoDMonotonicDominance(self, expected_loss, dominances):
+    if _DISABLE_ALL:
+      return
+    config = {
+        "num_input_dims": 2,
+        "monotonicities": ["increasing", "increasing"],
+        "monotonic_dominances": dominances,
+        "num_training_records": 64,
+        "num_training_epoch": 160,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.5,
+        "x_generator": self._TwoDMeshGrid,
+        "input_min": 0.0,
+        "input_max": 4.0,
+        "y_function": self._GenLinearFunction(weights=[1.0, 2.0])
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=_LOSS_EPS)
+
+  @parameterized.parameters(
+      # Standard Keras regularizer:
+      (keras.regularizers.l1_l2(l1=0.01, l2=0.001),),
+      # Tuple of regularizers:
+      ((keras.regularizers.l1_l2(l1=0.01, l2=0.0),
+        keras.regularizers.l1_l2(l1=0.0, l2=0.001)),),
+  )
+  def testRegularizers(self, regularizer):
+    if _DISABLE_ALL:
+      return
+    config = {
+        "num_input_dims": 2,
+        "use_bias": True,
+        "num_training_records": 64,
+        "num_training_epoch": 0,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.5,
+        "x_generator": self._TwoDMeshGrid,
+        "input_min": 0.0,
+        "input_max": 4.0,
+        "kernel_init_constant": 2.0,
+        "bias_init_constant": 3.0,
+        "y_function": self._GenLinearFunction(weights=[2.0, 2.0], bias=3.0),
+        "kernel_regularizer": regularizer,
+        "bias_regularizer": regularizer,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    # This loss is pure regularization loss because initializer matches target
+    # function and there was 0 training epochs.
+    self.assertAlmostEqual(loss, 0.087, delta=_LOSS_EPS)
+
+if __name__ == "__main__":
+  tf.test.main()
diff --git a/tensorflow_lattice/python/model_info.py b/tensorflow_lattice/python/model_info.py
new file mode 100644
index 0000000..465ea94
--- /dev/null
+++ b/tensorflow_lattice/python/model_info.py
@@ -0,0 +1,109 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Classes defining trained TFL model structure and parameter information.
+
+This package provides representations and tools for analysis of a trained
+TF Lattice model, e.g. a canned estimator in saved model format.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+
+class ModelGraph(
+    collections.namedtuple('ModelGraph', ['nodes', 'output_node'])):
+  """Model info and parameter as a graph.
+
+  Note that this is not a TF graph, but rather a graph of python object that
+  describe model structure and parameters.
+
+  Attributes:
+   nodes: List of all the nodes in the model.
+   output_node: The output node of the model.
+  """
+
+
+class InputFeatureNode(
+    collections.namedtuple('InputFeatureNode',
+                           ['name', 'is_categorical', 'vocabulary_list'])):
+  """Input features to the model.
+
+  Attributes:
+   name: Name of the input feature.
+   is_categorical: If the feature is categorical.
+   vocabulary_list: Category values for categorical features or None.
+  """
+
+
+class PWLCalibrationNode(
+    collections.namedtuple('PWLCalibrationNode', [
+        'input_node', 'input_keypoints', 'output_keypoints', 'default_input',
+        'default_output'
+    ])):
+  """Represetns a PWL calibration layer.
+
+  Attributes:
+   input_node: Input node for the calibration.
+   input_keypoints: Input keypoints for PWL calibration.
+   output_keypoints: Output keypoints for PWL calibration.
+   default_input: Default/missing input value or None.
+   default_output: Default/missing output value or None.
+  """
+
+
+class CategoricalCalibrationNode(
+    collections.namedtuple('CategoricalCalibrationNode',
+                           ['input_node', 'output_values', 'default_input'])):
+  """Represetns a categorical calibration layer.
+
+  Attributes:
+   input_node: Input node for the calibration.
+   output_values: Output calibration values. If the calibrated feature has
+     default/missing values, the last value will be for default/missing.
+   default_input: Default/missing input value or None.
+  """
+
+
+class LinearNode(
+    collections.namedtuple('LinearNode',
+                           ['input_nodes', 'coefficients', 'bias'])):
+  """Represents a linear layer.
+
+  Attributes:
+   input_nodes: List of input nodes to the linear layer.
+   coefficients: Linear weights.
+   bias: Bias term for the linear layer.
+  """
+
+
+class LatticeNode(
+    collections.namedtuple('LatticeNode', ['input_nodes', 'weights'])):
+  """Represetns a lattice layer.
+
+  Attributes:
+   input_nodes: List of input nodes to the lattice layer.
+   weights: Lattice parameters.
+  """
+
+
+class MeanNode(collections.namedtuple('MeanNode', ['input_nodes'])):
+  """Represents an averaging layer.
+
+  Attributes:
+   input_nodes: List of input nodes to the average layer.
+  """
diff --git a/tensorflow_lattice/python/ops/__init__.py b/tensorflow_lattice/python/ops/__init__.py
deleted file mode 100644
index bae50ba..0000000
--- a/tensorflow_lattice/python/ops/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""TensorFlow Lattice operators."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
diff --git a/tensorflow_lattice/python/ops/lattice_ops.py b/tensorflow_lattice/python/ops/lattice_ops.py
deleted file mode 100644
index 181534d..0000000
--- a/tensorflow_lattice/python/ops/lattice_ops.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Lattice interpolation and gradient ops."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-
-# pylint: disable=unused-import
-from tensorflow_lattice.python.ops.gen_lattice_interpolation import hypercube_gradient
-from tensorflow_lattice.python.ops.gen_lattice_interpolation import hypercube_interpolation
-from tensorflow_lattice.python.ops.gen_lattice_interpolation import simplex_gradient
-from tensorflow_lattice.python.ops.gen_lattice_interpolation import simplex_interpolation
-# pylint: enable=unused-import
-
-_lattice_ops = tf.load_op_library(
-    tf.compat.v1.resource_loader.get_path_to_datafile(
-        '../../cc/ops/_lattice_ops.so'))
-
-
-@tf.RegisterGradient('HypercubeInterpolation')
-def _hypercube_gradient(op, grad_wrt_weight):
-  """Register gradient for HypercubeInterpolationOp."""
-  grad_wrt_input = hypercube_gradient(
-      input=op.inputs[0],
-      weight=op.outputs[0],
-      grad_wrt_weight=grad_wrt_weight,
-      lattice_sizes=op.get_attr('lattice_sizes'))
-  return [grad_wrt_input]
-
-
-@tf.RegisterGradient('SimplexInterpolation')
-def _simplex_gradient(op, grad_wrt_weight):
-  """Register gradient for SimplexInterpolationOp."""
-  grad_wrt_input = simplex_gradient(
-      input=op.inputs[0],
-      weight=op.outputs[0],
-      grad_wrt_weight=grad_wrt_weight,
-      lattice_sizes=op.get_attr('lattice_sizes'))
-  return [grad_wrt_input]
-
-
-def lattice(input_tensor,
-            parameter_tensor,
-            lattice_sizes,
-            interpolation_type='hypercube'):
-  """Returns an interpolated look-up table (lattice) op.
-
-  Args:
-    input_tensor: [batch_size, input_dim] tensor.
-    parameter_tensor: [output_dim, param_dim] tensor, where param_dim ==
-      lattice_sizes[0] * ... * lattice_sizes[input_dim - 1].
-    lattice_sizes: A list of lattice sizes of each dimension.
-    interpolation_type: 'hypercube' or 'simplex'.
-
-  Returns:
-    output_tensor: [batch_size, num_outputs] tensor that contains the output of
-    hypercube lattice.
-
-  Raises:
-    ValueError: If interpolation_type is not 'hypercube' nor 'simplex'.
-
-
-  """
-  if interpolation_type not in ['hypercube', 'simplex']:
-    raise ValueError("interpolation_type should be 'hypercube' or 'simplex'")
-
-  if interpolation_type == 'hypercube':
-    interpolation_weights = hypercube_interpolation(
-        input_tensor, lattice_sizes=lattice_sizes)
-  else:
-    interpolation_weights = simplex_interpolation(
-        input_tensor, lattice_sizes=lattice_sizes)
-
-  # Now the dimension is [batch_size, num_outputs].
-
-  output_tensor = tf.matmul(
-      interpolation_weights, parameter_tensor, transpose_b=True)
-
-  return output_tensor
diff --git a/tensorflow_lattice/python/ops/pwl_calibration_ops.py b/tensorflow_lattice/python/ops/pwl_calibration_ops.py
deleted file mode 100644
index b98d8c7..0000000
--- a/tensorflow_lattice/python/ops/pwl_calibration_ops.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Piecewise-linear calibration ops.
-
-Piecewise-linear calibration works particularly well with lattice models, and
-is therefore part of the "TensorFlow Lattice" package.
-
-But it can be used in conjunction with other types of models as well, in
-particular with linear models: it increases its power without breaking
-independence of the variables (desirable in some situations).
-
-This file exports the basic graph operations used for calibrators. See
-pwl_calibration_layers.py for more details and higher level calibration
-functions, for building models.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-
-# pylint: disable=unused-import
-from tensorflow_lattice.python.ops.gen_monotonic_projection import monotonic_projection
-from tensorflow_lattice.python.ops.gen_pwl_indexing_calibrator import pwl_indexing_calibrator
-from tensorflow_lattice.python.ops.gen_pwl_indexing_calibrator import pwl_indexing_calibrator_gradient
-from tensorflow_lattice.python.ops.gen_pwl_indexing_calibrator import pwl_indexing_calibrator_sparse
-from tensorflow_lattice.python.ops.gen_pwl_indexing_calibrator import pwl_indexing_calibrator_sparse_gradient
-# pylint: enable=unused-import
-
-_pwl_calibration_ops = tf.load_op_library(
-    tf.compat.v1.resource_loader.get_path_to_datafile(
-        '../../cc/ops/_pwl_calibration_ops.so'))
-
-
-@tf.RegisterGradient('PwlIndexingCalibrator')
-def _pwl_indexing_calibrator_grad(op, grad_wrt_weights):
-  """Register gradient for PwlIndexingCalibrator."""
-  grad_wrt_input, grad_wrt_kp_inputs = pwl_indexing_calibrator_gradient(
-      input=op.inputs[0],
-      kp_inputs=op.inputs[1],
-      grad_wrt_weights=grad_wrt_weights)
-  return [grad_wrt_input, grad_wrt_kp_inputs]
-
-
-@tf.RegisterGradient('PwlIndexingCalibratorSparse')
-def _pwl_indexing_calibrator_sparse_grad(op, unused_grad_wrt_indices,
-                                         grad_wrt_weights):
-  """Register gradient for PwlIndexingCalibratorSparse."""
-  # unused_grad_wrt_indices is None and not used. But the optimizers do pass
-  # the extra parameter, so it needs to be there.
-  grad_wrt_input, grad_wrt_params = pwl_indexing_calibrator_sparse_gradient(
-      input=op.inputs[0],
-      kp_inputs=op.inputs[1],
-      indices=op.outputs[0],
-      grad_wrt_weights=grad_wrt_weights)
-  return [grad_wrt_input, grad_wrt_params]
diff --git a/tensorflow_lattice/python/parallel_combination_layer.py b/tensorflow_lattice/python/parallel_combination_layer.py
new file mode 100644
index 0000000..1382adb
--- /dev/null
+++ b/tensorflow_lattice/python/parallel_combination_layer.py
@@ -0,0 +1,164 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""ParallelCombination layer for combining several parallel calibration layers.
+
+This layer wraps several calibration layers under single ParallelCombination one
+that can be used by `Sequential` Keras model.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow_lattice.python import categorical_calibration_layer
+from tensorflow_lattice.python import lattice_layer
+from tensorflow_lattice.python import linear_layer
+from tensorflow_lattice.python import pwl_calibration_layer
+
+
+class ParallelCombination(keras.layers.Layer):
+  # pyformat: disable
+  """Wraps several parallel calibration layers under single one.
+
+  `ParallelCombination` is designed for combning several calibration layers
+  which output goes into single `Lattice` or `Linear` layer in order to be able
+  to use calibration layers within `Sequential` model.
+
+  Difference from `keras.layers.Concatenate` is that last one operates on
+  already built objects and thus cannot be used to group layers for `Sequential`
+  model.
+
+  Input shape:
+    `(batch_size, k)` or list of length `k` of shapes: `(batch_size, 1)` where
+    `k` is a number of associated calibration layers.
+
+  Output shape:
+    `(batch_size, k)` or list of length `k` of shapes: `(batch_size, 1)` where
+    `k` is a number of associated calibration layers. Shape of output depends on
+    `single_output` parameter.
+
+  Attributes:
+    - All `__init__` arguments.
+
+  Example:
+
+  Example usage with a Sequential model:
+
+  ```python
+  model = keras.models.Sequential()
+  combined_calibrators = ParallelCombination()
+  for i in range(num_dims):
+    calibration_layer = PWLCalibration(...)
+    combined_calibrators.append(calibration_layer)
+  model.add(combined_calibrators)
+  model.add(Lattice(...))
+  ```
+  """
+  # pyformat: enable
+
+  def __init__(self, calibration_layers=None, single_output=True, **kwargs):
+    """Initializes an instance of `ParallelCombination`.
+
+    Args:
+      calibration_layers: List of `PWLCalibration` or `CategoricalCalibration`
+        objects or any other layers taking and returning tensor of shape
+        `(batch_size, 1)`.
+      single_output: if True returns output as single tensor of shape
+        `(batch_size, k)`. Otherwise returns list of `k` tensors of shape
+        `(batch_size, 1)`.
+      **kwargs: other args passed to `tf.keras.layers.Layer` initializer.
+    """
+    super(ParallelCombination, self).__init__(**kwargs)
+    self.calibration_layers = []
+    for calibration_layer in calibration_layers or []:
+      if not isinstance(calibration_layer, dict):
+        self.calibration_layers.append(calibration_layer)
+      else:
+        # Keras deserialization logic must have explicit acceess to all custom
+        # classes. This is standard way to provide such access.
+        with keras.utils.custom_object_scope({
+            "Lattice":
+                lattice_layer.Lattice,
+            "Linear":
+                linear_layer.Linear,
+            "PWLCalibration":
+                pwl_calibration_layer.PWLCalibration,
+            "CategoricalCalibration":
+                categorical_calibration_layer.CategoricalCalibration,
+        }):
+          self.calibration_layers.append(
+              keras.layers.deserialize(calibration_layer))
+    self.single_output = single_output
+
+  def append(self, calibration_layer):
+    """Appends new calibration layer to the end."""
+    self.calibration_layers.append(calibration_layer)
+
+  def build(self, input_shape):
+    """Standard Keras build() method."""
+    if isinstance(input_shape, list):
+      if len(input_shape) != len(self.calibration_layers):
+        raise ValueError("Number of ParallelCombination input tensors does not "
+                         "match number of calibration layers. input_shape: %s, "
+                         "layers: %s" % (input_shape, self.calibration_layers))
+      for layer, shape in zip(self.calibration_layers, input_shape):
+        layer.build(shape)
+    else:
+      if input_shape[1] != len(self.calibration_layers):
+        raise ValueError("Second dimension of ParallelCombination input tensor "
+                         "does not match number of calibration layers. "
+                         "input_shape: %s, layers: %s" %
+                         (input_shape, self.calibration_layers))
+      for layer in self.calibration_layers:
+        layer.build(tf.TensorShape([input_shape[0], 1]))
+
+    super(ParallelCombination, self).build(input_shape)
+
+  def call(self, inputs):
+    """Standard Keras call() method."""
+    if not isinstance(inputs, list):
+      if len(inputs.shape) != 2:
+        raise ValueError("'inputs' is expected to have rank-2. "
+                         "Given: %s" % inputs)
+      inputs = tf.split(inputs, axis=1, num_or_size_splits=inputs.shape[1])
+    if len(inputs) != len(self.calibration_layers):
+      raise ValueError("Number of ParallelCombination input tensors does not "
+                       "match number of calibration layers. inputs: %s, "
+                       "layers: %s" % (inputs, self.calibration_layers))
+    outputs = [
+        layer(one_d_input)
+        for layer, one_d_input in zip(self.calibration_layers, inputs)
+    ]
+    if self.single_output:
+      return tf.concat(outputs, axis=1)
+    else:
+      return outputs
+
+  def compute_output_shape(self, input_shape):
+    if self.single_output:
+      return tf.TensorShape(None, len(self.calibration_layers))
+    else:
+      return [tf.TensorShape(None, 1)] * len(self.calibration_layers)
+
+  def get_config(self):
+    """Standard Keras config for serialization."""
+    config = {
+        "calibration_layers": [keras.layers.serialize(layer)
+                               for layer in self.calibration_layers],
+        "single_output": self.single_output,
+    }  # pyformat: disable
+    config.update(super(ParallelCombination, self).get_config())
+    return config
diff --git a/tensorflow_lattice/python/parallel_combination_test.py b/tensorflow_lattice/python/parallel_combination_test.py
new file mode 100644
index 0000000..a13601c
--- /dev/null
+++ b/tensorflow_lattice/python/parallel_combination_test.py
@@ -0,0 +1,140 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for Lattice Layer."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+import numpy as np
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow_lattice.python import lattice_layer as ll
+from tensorflow_lattice.python import parallel_combination_layer as pcl
+
+
+class ParallelCombinationTest(parameterized.TestCase, tf.test.TestCase):
+
+  def setUp(self):
+    super(ParallelCombinationTest, self).setUp()
+    self.disable_all = False
+
+  def testParallelCombinationSingleInput(self):
+    if self.disable_all:
+      return
+    all_calibrators = pcl.ParallelCombination()
+    for i in range(3):
+      # Its not typical to use 1-d Lattice layer for calibration, but lets do it
+      # to avoid redundant dependency on PWLCalibration layer.
+      calibrator = ll.Lattice(
+          lattice_sizes=[2], output_min=0.0, output_max=i + 1.0)
+      all_calibrators.append(calibrator)
+
+    # Given output range specified below linear initializer will have lattice to
+    # simply sum up inputs.
+    simple_sum = ll.Lattice(
+        lattice_sizes=[5] * 3,
+        kernel_initializer="linear_initializer",
+        output_min=0.0,
+        output_max=12.0,
+        name="SummingLattice")
+    model = keras.models.Sequential()
+    model.add(all_calibrators)
+    model.add(simple_sum)
+
+    test_inputs = np.asarray([
+        [0.0, 0.0, 0.0],
+        [0.1, 0.2, 0.3],
+        [1.0, 1.0, 1.0],
+    ])
+    predictions = model.predict(test_inputs)
+    print("predictions")
+    print(predictions)
+    self.assertTrue(np.allclose(predictions, np.asarray([[0.0], [1.4], [6.0]])))
+
+  def testParallelCombinationMultipleInputs(self):
+    if self.disable_all:
+      return
+    input_layers = [keras.layers.Input(shape=[1]) for _ in range(3)]
+    all_calibrators = pcl.ParallelCombination(single_output=False)
+    for i in range(3):
+      # Its not typical to use 1-d Lattice layer for calibration, but lets do it
+      # to avoid redundant dependency on PWLCalibration layer.
+      calibrator = ll.Lattice(
+          lattice_sizes=[2], output_min=0.0, output_max=i + 1.0)
+      all_calibrators.append(calibrator)
+
+    # Given output range specified below linear initializer will have lattice to
+    # simply sum up inputs.
+    simple_sum = ll.Lattice(
+        lattice_sizes=[5] * 3,
+        kernel_initializer="linear_initializer",
+        output_min=0.0,
+        output_max=12.0,
+        name="SummingLattice",
+        trainable=False)
+
+    output = simple_sum(all_calibrators(input_layers))
+    model = keras.models.Model(inputs=input_layers, outputs=output)
+
+    test_inputs = [
+        np.asarray([[0.0], [0.1], [1.0]]),
+        np.asarray([[0.0], [0.2], [1.0]]),
+        np.asarray([[0.0], [0.3], [1.0]]),
+    ]
+    predictions = model.predict(test_inputs)
+    print("predictions")
+    print(predictions)
+    self.assertTrue(np.allclose(predictions, np.asarray([[0.0], [1.4], [6.0]])))
+
+  def testParallelCombinationClone(self):
+    if self.disable_all:
+      return
+    input_layers = [keras.layers.Input(shape=[1]) for _ in range(3)]
+    all_calibrators = pcl.ParallelCombination(single_output=False)
+    for i in range(3):
+      # Its not typical to use 1-d Lattice layer for calibration, but lets do it
+      # to avoid redundant dependency on PWLCalibration layer.
+      calibrator = ll.Lattice(
+          lattice_sizes=[2], output_min=0.0, output_max=i + 1.0)
+      all_calibrators.append(calibrator)
+
+    # Given output range specified below linear initializer will have lattice to
+    # simply sum up inputs.
+    simple_sum = ll.Lattice(
+        lattice_sizes=[5] * 3,
+        kernel_initializer="linear_initializer",
+        output_min=0.0,
+        output_max=12.0,
+        name="SummingLattice",
+        trainable=False)
+
+    output = simple_sum(all_calibrators(input_layers))
+    model = keras.models.Model(inputs=input_layers, outputs=output)
+    clone = keras.models.clone_model(model)
+
+    test_inputs = [
+        np.asarray([[0.0], [0.1], [1.0]]),
+        np.asarray([[0.0], [0.2], [1.0]]),
+        np.asarray([[0.0], [0.3], [1.0]]),
+    ]
+    predictions = clone.predict(test_inputs)
+    print("predictions")
+    print(predictions)
+    self.assertTrue(np.allclose(predictions, np.asarray([[0.0], [1.4], [6.0]])))
+
+
+if __name__ == "__main__":
+  tf.test.main()
diff --git a/tensorflow_lattice/python/pwl_calibration_layer.py b/tensorflow_lattice/python/pwl_calibration_layer.py
new file mode 100644
index 0000000..b02806f
--- /dev/null
+++ b/tensorflow_lattice/python/pwl_calibration_layer.py
@@ -0,0 +1,966 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Piecewise linear calibration layer.
+
+Keras implementation of tensorflow lattice pwl calibration layer. Layer takes
+single or multi-dimensional input and transforms it using piecewise linear
+functions following monotonicity, convexity/concavity and bounds constraints if
+specified.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from . import pwl_calibration_lib
+
+from absl import logging
+import numpy as np
+import six
+import tensorflow as tf
+from tensorflow import keras
+
+INTERPOLATION_KEYPOINTS_NAME = "interpolation_keypoints"
+LENGTHS_NAME = "lengths"
+MISSING_INPUT_VALUE_NAME = "missing_input_value"
+PWL_CALIBRATION_KERNEL_NAME = "pwl_calibration_kernel"
+PWL_CALIBRATION_MISSING_OUTPUT_NAME = "pwl_calibration_missing_output"
+
+
+class PWLCalibration(keras.layers.Layer):
+  # pyformat: disable
+  """Piecewise linear calibration layer.
+
+  Layer takes input of shape `(batch_size, units)` or `(batch_size, 1)` and
+  transforms it using `units` number of piecewise linear functions following
+  monotonicity, convexity and bounds constraints if specified. If multi
+  dimensional input is provides, each output will be for the corresponding
+  input, otherwise all PWL functions will act on the same input. All units share
+  the same layer configuration, but each has their separate set of trained
+  parameters.
+
+  See `tfl.parallel_combination.ParallelCombination` layer for using
+  PWLCalibration layer within Sequential Keras models.
+
+  Input shape:
+  Single input should be a rank-2 tensor with shape: `(batch_size, units)` or
+  `(batch_size, 1)`. The input can also be a list of two tensors of the same
+  shape where the first tensor is the regular input tensor and the second is the
+  `is_missing` tensor. In the `is_missing` tensor, 1.0 represents missing input
+  and 0.0 represents available input.
+
+  Output shape:
+  Rank-2 tensor with shape: `(batch_size, units)`.
+
+  Attributes:
+    - All `__init__` arguments.
+    kernel: TF variable which stores weights of piecewise linear function.
+    missing_output: TF variable which stores output learned for missing input.
+      Or TF Constant which stores `missing_output_value` if one is provided.
+      Available only if `impute_missing` is True.
+
+  Example:
+
+  ```python
+  calibrator = tfl.pwl_calibration_layer.PWLCalibration(
+      # Key-points of piecewise-linear function.
+      input_keypoints=np.linspace(1., 4., num=4),
+      # Output can be bounded, e.g. when this layer feeds into a lattice.
+      output_min=0.0,
+      output_max=2.0,
+      # You can specify monotonicity and other shape constraints for the layer.
+      monotonicity='increasing',
+      # You can specify TFL regularizers as tuple ('regularizer name', l1, l2).
+      # You can also pass any keras Regularizer object.
+      kernel_regularizer=('hessian', 0.0, 1e-4),
+  )
+  ```
+  """
+  # pyformat: enable
+
+  def __init__(self,
+               input_keypoints,
+               units=1,
+               output_min=None,
+               output_max=None,
+               clamp_min=False,
+               clamp_max=False,
+               monotonicity="none",
+               convexity="none",
+               is_cyclic=False,
+               kernel_initializer="equal_heights",
+               kernel_regularizer=None,
+               impute_missing=False,
+               missing_input_value=None,
+               missing_output_value=None,
+               num_projection_iterations=8,
+               **kwargs):
+    # pyformat: disable
+    """Initializes an instance of `PWLCalibration`.
+
+    Args:
+      input_keypoints: Ordered list of keypoints of piecewise linear function.
+        Can be anything accepted by tf.convert_to_tensor().
+      units: Output dimension of the layer. See class comments for details.
+      output_min: Minimum output of calibrator.
+      output_max: Maximum output of calibrator.
+      clamp_min: For monotonic calibrators ensures that output_min is reached.
+      clamp_max: For monotonic calibrators ensures that output_max is reached.
+      monotonicity: Constraints piecewise linear function to be monotonic using
+        'increasing' or 1 to indicate increasing monotonicity, 'decreasing' or
+        -1 to indicate decreasing monotonicity and 'none' or 0 to indicate no
+        monotonicity constraints.
+      convexity: Constraints piecewise linear function to be convex or concave.
+        Convexity is indicated by 'convex' or 1, concavity is indicated by
+        'concave' or -1, 'none' or 0 indicates no convexity/concavity
+        constraints.
+        Concavity together with increasing monotonicity as well as convexity
+        together with decreasing monotonicity results in diminishing return
+        constraints.
+        Consider increasing the value of `num_projection_iterations` if
+        convexity is specified, especially with larger number of keypoints.
+      is_cyclic: Whether the output for last keypoint should be identical to
+        output for first keypoint. This is useful for features such as
+        "time of day" or "degree of turn". If inputs are discrete and exactly
+        match keypoints then is_cyclic will have an effect only if TFL
+        regularizers are being used.
+      kernel_initializer: None or one of:
+        - String `"equal_heights"`: For pieces of pwl function to have equal
+          heights.
+        - String `"equal_slopes"`: For pieces of pwl function to have equal
+          slopes.
+        - Any Keras initializer object. If you are passing such object make sure
+          that you know how layer stores its data.
+      kernel_regularizer: None or single element or list of following:
+        - Tuple `("laplacian", l1, l2)` where `l1` and `l2` are floats which
+          represent corresponding regularization amount for Laplacian
+          regularizer. It penalizes the first derivative to make the function
+          more constant. See `tfl.pwl_calibration.LaplacianRegularizer` for more
+          details.
+        - Tuple `("hessian", l1, l2)` where `l1` and `l2` are floats which
+          represent corresponding regularization amount for Hessian regularizer.
+          It penalizes the second derivative to make the function more linear.
+          See `tfl.pwl_calibration.HessianRegularizer` for more details.
+        - Tuple `("wrinkle", l1, l2)` where `l1` and `l2` are floats which
+          represent corresponding regularization amount for wrinkle regularizer.
+          It penalizes the third derivative to make the function more smooth.
+          See 'tfl.pwl_calibration.WrinkleRegularizer` for more details.
+        - Any Keras regularizer object.
+      impute_missing: Whether to learn an output for cases where input data is
+        missing. If set to True, either `missing_input_value` should be
+        initialized, or the `call()` method should get pair of tensors. See
+        class input shape description for more details.
+      missing_input_value: If set, all inputs which are equal to this value will
+        be considered as missing. Can not be set if `impute_missing` is False.
+      missing_output_value: If set, instead of learning output for missing
+        inputs, simply maps them into this value. Can not be set if
+        `impute_missing` is False.
+      num_projection_iterations: Number of iterations of the Dykstra's
+        projection algorithm. Constraints are strictly satisfied at the end of
+        each update, but the update will be closer to a true L2 projection with
+        higher number of iterations. See
+        `tfl.pwl_calibration_lib.project_all_constraints` for more details.
+      **kwargs: Other args passed to `tf.keras.layers.Layer` initializer.
+
+    Raises:
+      ValueError: If layer hyperparameters are invalid.
+    """
+    # pyformat: enable
+    super(PWLCalibration, self).__init__(**kwargs)
+
+    pwl_calibration_lib.verify_hyperparameters(
+        input_keypoints=input_keypoints,
+        output_min=output_min,
+        output_max=output_max,
+        monotonicity=monotonicity,
+        convexity=convexity,
+        is_cyclic=is_cyclic)
+    if missing_input_value is not None and not impute_missing:
+      raise ValueError("'missing_input_value' is specified, but "
+                       "'impute_missing' is set to False. "
+                       "'missing_input_value': " + str(missing_input_value))
+    if missing_output_value is not None and not impute_missing:
+      raise ValueError("'missing_output_value' is specified, but "
+                       "'impute_missing' is set to False. "
+                       "'missing_output_value': " + str(missing_output_value))
+    if input_keypoints is None:
+      raise ValueError("'input_keypoints' can't be None")
+    if monotonicity is None:
+      raise ValueError("'monotonicity' can't be None. Did you mean '0'?")
+
+    self.input_keypoints = input_keypoints
+    self.units = units
+    self.output_min = output_min
+    self.output_max = output_max
+    self.clamp_min = clamp_min
+    self.clamp_max = clamp_max
+    (self._output_init_min, self._output_init_max, self._output_min_constraints,
+     self._output_max_constraints
+    ) = pwl_calibration_lib.convert_all_constraints(self.output_min,
+                                                    self.output_max,
+                                                    self.clamp_min,
+                                                    self.clamp_max)
+
+    self.monotonicity = monotonicity
+    self.convexity = convexity
+    self.is_cyclic = is_cyclic
+
+    if kernel_initializer == "equal_heights":
+      self.kernel_initializer = UniformOutputInitializer(
+          output_min=self._output_init_min,
+          output_max=self._output_init_max,
+          monotonicity=self.monotonicity)
+    elif kernel_initializer == "equal_slopes":
+      self.kernel_initializer = UniformOutputInitializer(
+          output_min=self._output_init_min,
+          output_max=self._output_init_max,
+          monotonicity=self.monotonicity,
+          keypoints=self.input_keypoints)
+    else:
+      # Keras deserialization logic must have explicit acceess to all custom
+      # classes. This is standard way to provide such access.
+      with keras.utils.custom_object_scope({
+          "UniformOutputInitializer": UniformOutputInitializer,
+      }):
+        self.kernel_initializer = keras.initializers.get(kernel_initializer)
+
+    self.kernel_regularizer = []
+    if kernel_regularizer:
+      if (callable(kernel_regularizer) or
+          (isinstance(kernel_regularizer, tuple) and
+           isinstance(kernel_regularizer[0], six.string_types))):
+        kernel_regularizer = [kernel_regularizer]
+
+      for reg in kernel_regularizer:
+        if isinstance(reg, tuple):
+          (name, l1, l2) = reg
+          if name.lower() == "laplacian":
+            self.kernel_regularizer.append(
+                LaplacianRegularizer(l1=l1, l2=l2, is_cyclic=self.is_cyclic))
+          elif name.lower() == "hessian":
+            self.kernel_regularizer.append(
+                HessianRegularizer(l1=l1, l2=l2, is_cyclic=self.is_cyclic))
+          elif name.lower() == "wrinkle":
+            self.kernel_regularizer.append(
+                WrinkleRegularizer(l1=l1, l2=l2, is_cyclic=self.is_cyclic))
+          else:
+            raise ValueError("Unknown custom lattice regularizer: %s" % reg)
+        else:
+          # This is needed for Keras deserialization logic to be aware of our
+          # custom objects.
+          with keras.utils.custom_object_scope({
+              "LaplacianRegularizer": LaplacianRegularizer,
+              "HessianRegularizer": HessianRegularizer,
+              "WrinkleRegularizer": WrinkleRegularizer,
+          }):
+            self.kernel_regularizer.append(keras.regularizers.get(reg))
+
+    self.impute_missing = impute_missing
+    self.missing_input_value = missing_input_value
+    self.missing_output_value = missing_output_value
+    self.num_projection_iterations = num_projection_iterations
+
+  def build(self, input_shape):
+    """Standard Keras build() method."""
+    input_keypoints = np.array(self.input_keypoints)
+    # Don't need last keypoint for interpolation because we need only beginnings
+    # of intervals.
+    self._interpolation_keypoints = tf.constant(
+        input_keypoints[:-1],
+        dtype=self.dtype,
+        name=INTERPOLATION_KEYPOINTS_NAME)
+    self._lengths = tf.constant(
+        input_keypoints[1:] - input_keypoints[:-1],
+        dtype=self.dtype,
+        name=LENGTHS_NAME)
+
+    constraints = PWLCalibrationConstraints(
+        monotonicity=self.monotonicity,
+        convexity=self.convexity,
+        lengths=self._lengths,
+        output_min=self.output_min,
+        output_max=self.output_max,
+        output_min_constraints=self._output_min_constraints,
+        output_max_constraints=self._output_max_constraints,
+        num_projection_iterations=self.num_projection_iterations)
+
+    if not self.kernel_regularizer:
+      kernel_reg = None
+    elif len(self.kernel_regularizer) == 1:
+      kernel_reg = self.kernel_regularizer[0]
+    else:
+      # Keras interface assumes only one regularizer, so summ all regularization
+      # losses which we have.
+      kernel_reg = lambda x: tf.add_n([r(x) for r in self.kernel_regularizer])
+
+    # If 'is_cyclic' is specified - last weight will be computed from previous
+    # weights in order to connect last keypoint with first.
+    num_weights = input_keypoints.size - self.is_cyclic
+
+    # PWL calibration layer kernel is units-column matrix. First row of matrix
+    # represents bias. All remaining represent delta in y-value compare to
+    # previous point. Aka heights of segments.
+    self.kernel = self.add_weight(
+        PWL_CALIBRATION_KERNEL_NAME,
+        shape=[num_weights, self.units],
+        initializer=self.kernel_initializer,
+        regularizer=kernel_reg,
+        constraint=constraints,
+        dtype=self.dtype)
+
+    if self.kernel_regularizer and not tf.executing_eagerly():
+      # Keras has its own mechanism to handle regularization losses which
+      # does not use GraphKeys, but we want to also add losses to graph keys so
+      # they are easily accessable when layer is being used outside of Keras.
+      # Adding losses to GraphKeys will not interfer with Keras.
+      for reg in self.kernel_regularizer:
+        tf.compat.v1.add_to_collection(
+            tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES, reg(self.kernel))
+
+    if self.impute_missing:
+      if self.missing_input_value is not None:
+        self._missing_input_value_tensor = tf.constant(
+            self.missing_input_value,
+            dtype=self.dtype,
+            name=MISSING_INPUT_VALUE_NAME)
+      else:
+        self._missing_input_value_tensor = None
+
+      if self.missing_output_value is not None:
+        self.missing_output = tf.constant(
+            self.missing_output_value, shape=[1, self.units], dtype=self.dtype)
+      else:
+        missing_init = (self._output_init_min + self._output_init_max) / 2.0
+        missing_constraints = NaiveBoundsConstraints(
+            lower_bound=self.output_min, upper_bound=self.output_max)
+        self.missing_output = self.add_weight(
+            PWL_CALIBRATION_MISSING_OUTPUT_NAME,
+            shape=[1, self.units],
+            initializer=keras.initializers.Constant(value=missing_init),
+            constraint=missing_constraints,
+            dtype=self.dtype)
+
+    super(PWLCalibration, self).build(input_shape)
+
+  def call(self, inputs):
+    """Standard Keras call() method..
+
+    Args:
+      inputs: Either input tensor or list of 2 elements: input tensor and
+        `is_missing` tensor.
+
+    Returns:
+      Calibrated input tensor.
+
+    Raises:
+      ValueError: If `is_missing` tensor specified incorrectly.
+    """
+    is_missing = None
+    if isinstance(inputs, list):
+      # Only 2 element lists are allowed. When such list is given - second
+      # element represents 'is_missing' tensor encoded as float value.
+      if not self.impute_missing:
+        raise ValueError("Multiple inputs for PWLCalibration layer assume "
+                         "regular input tensor and 'is_missing' tensor, but "
+                         "this instance of a layer is not configured to handle "
+                         "missing value. See 'impute_missing' parameter.")
+      if len(inputs) > 2:
+        raise ValueError("Multiple inputs for PWLCalibration layer assume "
+                         "normal input tensor and 'is_missing' tensor, but more"
+                         " than 2 tensors given. 'inputs': " + str(inputs))
+      if len(inputs) == 2:
+        inputs, is_missing = inputs
+        if is_missing.shape.as_list() != inputs.shape.as_list():
+          raise ValueError(
+              "is_missing shape %s does not match inputs shape %s for "
+              "PWLCalibration layer" %
+              (str(is_missing.shape), str(inputs.shape)))
+      else:
+        [inputs] = inputs
+    if len(inputs.shape) != 2 or (inputs.shape[1] != self.units and
+                                  inputs.shape[1] != 1):
+      raise ValueError("Shape of input tensor for PWLCalibration layer must be "
+                       "[-1, units] or [-1, 1]. It is: " + str(inputs.shape))
+
+    if inputs.dtype != self._interpolation_keypoints.dtype:
+      raise ValueError("dtype(%s) of input to PWLCalibration layer does not "
+                       "correspond to dtype(%s) of keypoints. You can enforce "
+                       "dtype of keypoints be explicitly providing 'dtype' "
+                       "parameter to layer constructor or by passing keypoints "
+                       "in such format which by default will be converted into "
+                       "desired one." %
+                       (inputs.dtype, self._interpolation_keypoints.dtype))
+
+    # Here is calibration. Everything else is handling of missing.
+    if inputs.shape[1] > 1:
+      # Add dimension to multi dim input to get shape [batch_size, units, 1].
+      # Interpolation will have shape [batch_size, units, weights].
+      inputs_to_calibration = tf.expand_dims(inputs, -1)
+    else:
+      inputs_to_calibration = inputs
+    interpolation_weights = pwl_calibration_lib.compute_interpolation_weights(
+        inputs_to_calibration, self._interpolation_keypoints, self._lengths)
+    if self.is_cyclic:
+      # Need to add such last height to make all heights to sum up to 0.0 in
+      # order to make calibrator cyclic.
+      bias_and_heights = tf.concat(
+          [self.kernel, -tf.reduce_sum(self.kernel[1:], axis=0, keepdims=True)],
+          axis=0)
+    else:
+      bias_and_heights = self.kernel
+
+    # bias_and_heights has shape [weight, units].
+    if inputs.shape[1] > 1:
+      # Multi dim input has interpolation shape [batch_size, units, weights].
+      result = tf.reduce_sum(
+          interpolation_weights * tf.transpose(bias_and_heights), axis=-1)
+    else:
+      # Single dim input has interpolation shape [batch_size, weights].
+      result = tf.matmul(interpolation_weights, bias_and_heights)
+
+    if self.impute_missing:
+      if is_missing is None:
+        if self.missing_input_value is None:
+          raise ValueError("PWLCalibration layer is configured to impute "
+                           "missing but no 'missing_input_value' specified and "
+                           "'is_missing' tensor is not given.")
+        assert self._missing_input_value_tensor is not None
+        is_missing = tf.cast(
+            tf.equal(inputs, self._missing_input_value_tensor),
+            dtype=self.dtype)
+      result = is_missing * self.missing_output + (1.0 - is_missing) * result
+    return result
+
+  def compute_output_shape(self, input_shape):
+    """Standard Keras compute_output_shape() method."""
+    del input_shape
+    return [None, self.units]
+
+  def get_config(self):
+    """Standard Keras config for serialization."""
+    config = {
+        "input_keypoints": self.input_keypoints,
+        "units": self.units,
+        "output_min": self.output_min,
+        "output_max": self.output_max,
+        "clamp_min": self.clamp_min,
+        "clamp_max": self.clamp_max,
+        "monotonicity": self.monotonicity,
+        "convexity": self.convexity,
+        "is_cyclic": self.is_cyclic,
+        "kernel_initializer":
+            keras.initializers.serialize(self.kernel_initializer),
+        "kernel_regularizer":
+            [keras.regularizers.serialize(r) for r in self.kernel_regularizer],
+        "impute_missing": self.impute_missing,
+        "missing_input_value": self.missing_input_value,
+        "num_projection_iterations": self.num_projection_iterations,
+    }  # pyformat: disable
+    config.update(super(PWLCalibration, self).get_config())
+    return config
+
+  def assert_constraints(self, eps=1e-6):
+    """Asserts that layer weights satisfy all constraints.
+
+    In graph mode builds and returns list of assertion ops. Note that ops will
+    be created at the moment when this function is being called.
+    In eager mode directly executes assetions.
+
+    Args:
+      eps: Allowed constraints violation.
+
+    Returns:
+      List of assertion ops in graph mode or immideately asserts in eager mode.
+    """
+    # Assert by computing outputs for keypoints and testing them against
+    # constraints.
+    test_inputs = tf.constant(
+        value=self.input_keypoints,
+        dtype=self.dtype,
+        shape=[len(self.input_keypoints), 1])
+    outputs = self.call(test_inputs)
+
+    asserts = pwl_calibration_lib.assert_constraints(
+        outputs=outputs,
+        monotonicity=pwl_calibration_lib.canonicalize_monotonicity(
+            self.monotonicity),
+        output_min=self.output_min,
+        output_max=self.output_max,
+        clamp_min=self.clamp_min,
+        clamp_max=self.clamp_max,
+        debug_tensors=["weights:", self.kernel],
+        eps=eps)
+
+    if self.impute_missing and self.missing_output_value is None:
+      asserts.append(
+          pwl_calibration_lib.assert_constraints(
+              outputs=self.missing_output,
+              monotonicity=0,
+              output_min=self.output_min,
+              output_max=self.output_max,
+              clamp_min=False,
+              clamp_max=False,
+              debug_tensors=["Imputed missing value:", self.missing_output],
+              eps=eps))
+    return asserts
+
+  def keypoints_outputs(self):
+    """Returns tensor which corresponds to outputs of layer for keypoints."""
+    kp_outputs = tf.cumsum(self.kernel)
+    if self.is_cyclic:
+      kp_outputs = tf.concat([kp_outputs, kp_outputs[0:1]], axis=0)
+    return kp_outputs
+
+
+class UniformOutputInitializer(keras.initializers.Initializer):
+  # pyformat: disable
+  """Initializes PWL calibration layer to represent linear function.
+
+  PWL calibration layer weights are one-d tensor. First element of tensor
+  represents bias. All remaining represent delta in y-value compare to previous
+  point. Aka heights of segments.
+
+  Attributes:
+    - All `__init__` arguments.
+  """
+  # pyformat: enable
+
+  def __init__(self, output_min, output_max, monotonicity, keypoints=None):
+    # pyformat: disable
+    """Initializes an instance of `UniformOutputInitializer`.
+
+    Args:
+      output_min: Minimum value of PWL calibration output after initialization.
+      output_max: Maximum value of PWL calibration output after initialization.
+      monotonicity:
+        - if 'none' or 'increasing', the returned function will go from
+          `(input_min, output_min)` to `(input_max, output_max)`.
+        - if 'decreasing', the returned function will go from
+          `(input_min, output_max)` to `(input_max, output_min)`.
+      keypoints:
+        - if not provided (None or []), all pieces of returned function
+          will have equal heights (i.e. `y[i+1] - y[i]` is constant).
+        - if provided, all pieces of returned function will have equal slopes
+          (i.e. `(y[i+1] - y[i]) / (x[i+1] - x[i])` is constant).
+    """
+    # pyformat: enable
+    pwl_calibration_lib.verify_hyperparameters(
+        input_keypoints=keypoints,
+        output_min=output_min,
+        output_max=output_max,
+        monotonicity=monotonicity)
+    self.output_min = output_min
+    self.output_max = output_max
+    self.monotonicity = monotonicity
+    self.keypoints = keypoints
+
+  def __call__(self, shape, dtype=None, partition_info=None):
+    """Returns weights of PWL calibration layer.
+
+    Args:
+      shape: Must be rank-2 tensor with of shape `(k, units)` where `k >= 2`.
+      dtype: Standard Keras initializer param.
+      partition_info: Standard Keras initializer param.
+
+    Returns:
+      Weights of PWL calibration layer.
+
+    Raises:
+      ValueError: If requested shape is invalid for PWL calibration layer
+        weights.
+    """
+    return pwl_calibration_lib.linear_initializer(
+        shape=shape,
+        output_min=self.output_min,
+        output_max=self.output_max,
+        monotonicity=pwl_calibration_lib.canonicalize_monotonicity(
+            self.monotonicity),
+        keypoints=self.keypoints,
+        dtype=dtype)
+
+  def get_config(self):
+    """Standard Keras config for serialization."""
+    return {
+        "output_min": self.output_min,
+        "output_max": self.output_max,
+        "monotonicity": self.monotonicity,
+        "keypoints": self.keypoints,
+    }  # pyformat: disable
+
+
+class PWLCalibrationConstraints(keras.constraints.Constraint):
+  # pyformat: disable
+  """Monotonicity and bounds constraints for PWL calibration layer.
+
+  Applies an approximate L2 projection to the weights of a PWLCalibration layer
+  such that the result satisfies the specified constraints.
+
+  Attributes:
+    - All `__init__` arguments.
+  """
+  # pyformat: enable
+
+  def __init__(
+      self,
+      monotonicity="none",
+      convexity="none",
+      lengths=None,
+      output_min=None,
+      output_max=None,
+      output_min_constraints=pwl_calibration_lib.BoundConstraintsType.NONE,
+      output_max_constraints=pwl_calibration_lib.BoundConstraintsType.NONE,
+      num_projection_iterations=8):
+    """Initializes an instance of `PWLCalibration`.
+
+    Args:
+      monotonicity: Same meaning as corresponding parameter of `PWLCalibration`.
+      convexity: Same meaning as corresponding parameter of `PWLCalibration`.
+      lengths: Lengths of pieces of piecewise linear function. Needed only if
+        convexity is specified.
+      output_min: Minimum possible output of pwl function.
+      output_max: Maximum possible output of pwl function.
+      output_min_constraints: A `tfl.pwl_calibration_lib.BoundConstraintsType`
+        describing the constraints on the layer's minimum value.
+      output_max_constraints: A `tfl.pwl_calibration_lib.BoundConstraintsType`
+        describing the constraints on the layer's maximum value.
+      num_projection_iterations: Same meaning as corresponding parameter of
+        `PWLCalibration`.
+    """
+    pwl_calibration_lib.verify_hyperparameters(
+        output_min=output_min,
+        output_max=output_max,
+        monotonicity=monotonicity,
+        convexity=convexity,
+        lengths=lengths)
+    self.monotonicity = monotonicity
+    self.convexity = convexity
+    self.lengths = lengths
+    self.output_min = output_min
+    self.output_max = output_max
+    self.output_min_constraints = output_min_constraints
+    self.output_max_constraints = output_max_constraints
+    self.num_projection_iterations = num_projection_iterations
+
+    canonical_convexity = pwl_calibration_lib.canonicalize_convexity(
+        self.convexity)
+    canonical_monotonicity = pwl_calibration_lib.canonicalize_monotonicity(
+        self.monotonicity)
+    if (canonical_convexity != 0 and canonical_monotonicity == 0 and
+        (output_min_constraints != pwl_calibration_lib.BoundConstraintsType.NONE
+         or output_max_constraints !=
+         pwl_calibration_lib.BoundConstraintsType.NONE)):
+      logging.warning("Convexity constraints are specified with bounds "
+                      "constraints, but without monotonicity. Such combination "
+                      "might lead to convexity being slightly violated. "
+                      "Consider increasing num_projection_iterations to "
+                      "reduce violation.")
+
+  def __call__(self, w):
+    """Applies constraints to w."""
+    return pwl_calibration_lib.project_all_constraints(
+        weights=w,
+        monotonicity=pwl_calibration_lib.canonicalize_monotonicity(
+            self.monotonicity),
+        output_min=self.output_min,
+        output_max=self.output_max,
+        output_min_constraints=self.output_min_constraints,
+        output_max_constraints=self.output_max_constraints,
+        convexity=pwl_calibration_lib.canonicalize_convexity(
+            self.convexity),
+        lengths=self.lengths,
+        num_projection_iterations=self.num_projection_iterations)
+
+  def get_config(self):
+    """Standard Keras config for serialization."""
+    return {
+        "monotonicity": self.monotonicity,
+        "output_min": self.output_min,
+        "output_max": self.output_max,
+        "output_min_constraints": self.output_min_constraints,
+        "output_max_constraints": self.output_max_constraints,
+        "convexity": self.convexity,
+        "lengths": self.lengths,
+        "num_projection_iterations": self.num_projection_iterations,
+    }  # pyformat: disable
+
+
+class NaiveBoundsConstraints(keras.constraints.Constraint):
+  # pyformat: disable
+  """Naively clips all elements of tensor to be within bounds.
+
+  This constraint is used only for the weight tensor for missing output value.
+
+  Attributes:
+    - All `__init__` arguments.
+  """
+  # pyformat: enable
+
+  def __init__(self, lower_bound=None, upper_bound=None):
+    """Initializes an instance of `NaiveBoundsConstraints`.
+
+    Args:
+      lower_bound: Lower bound to clip variable values to.
+      upper_bound: Upper bound to clip variable values to.
+    """
+    self.lower_bound = lower_bound
+    self.upper_bound = upper_bound
+
+  def __call__(self, w):
+    """Applies constraints to w."""
+    if self.lower_bound is not None:
+      w = tf.maximum(w, self.lower_bound)
+    if self.upper_bound is not None:
+      w = tf.minimum(w, self.upper_bound)
+    return w
+
+  def get_config(self):
+    """Standard Keras config for serialization."""
+    return {
+        "lower_bound": self.lower_bound,
+        "upper_bound": self.upper_bound
+    }  # pyformat: disable
+
+
+class LaplacianRegularizer(keras.regularizers.Regularizer):
+  # pyformat: disable
+  """Laplacian regularizer for PWL calibration layer.
+
+  Calibrator Laplacian regularization penalizes the change in the calibration
+  output. It is defined to be:
+
+  `l1 * ||delta||_1 + l2 * ||delta||_2^2`
+
+  where `delta` is:
+
+  `output_keypoints[1:end] - output_keypoints[0:end-1]`.
+
+  Attributes:
+    - All `__init__` arguments.
+  """
+  # pyformat: enable
+
+  def __init__(self, l1=0.0, l2=0.0, is_cyclic=False):
+    """Initializes an instance of `LaplacianRegularizer`.
+
+    Args:
+      l1: l1 regularization amount as float.
+      l2: l2 regularization amount as float.
+      is_cyclic: Whether the first and last keypoints should take the same
+        output value.
+    """
+    self.l1 = l1
+    self.l2 = l2
+    self.is_cyclic = is_cyclic
+
+  def __call__(self, x):
+    """Returns regularization loss.
+
+    Args:
+      x: Tensor of shape: `(k, units)` which represents weights of PWL
+        calibration layer. First row of weights is bias term. All remaining
+        represent delta in y-value compare to previous point (segment heights).
+    """
+    if not self.l1 and not self.l2:
+      return tf.constant(0.0, dtype=x.dtype, shape=())
+    heights = x[1:]
+    if self.is_cyclic:
+      # Need to add such last height to make all heights to sum up to 0.0 in
+      # order to make calibrator cyclic.
+      heights = tf.concat(
+          [heights, -tf.reduce_sum(heights, axis=0, keepdims=True)], axis=0)
+
+    losses = []
+    if self.l1:
+      losses.append(self.l1 * tf.reduce_sum(tf.abs(heights)))
+    if self.l2:
+      losses.append(self.l2 * tf.reduce_sum(tf.square(heights)))
+
+    result = losses[0]
+    if len(losses) == 2:
+      result += losses[1]
+    return result
+
+  def get_config(self):
+    """Standard Keras config for serialization."""
+    return {
+        "l1": self.l1,
+        "l2": self.l2,
+        "is_cyclic": self.is_cyclic,
+    }  # pyformat: disable
+
+
+class HessianRegularizer(keras.regularizers.Regularizer):
+  # pyformat: disable
+  """Hessian regularizer for PWL calibration layer.
+
+  Calibrator hessian regularizer penalizes the change in slopes of linear
+  pieces. It is define to be:
+
+  `l1 * ||nonlinearity||_1 + l2 * ||nonlinearity||_2^2`
+
+  where `nonlinearity` is:
+
+  `2 * output_keypoints[1:end-1] - output_keypoints[0:end-2]
+     - output_keypoints[2:end]`.
+
+  This regularizer is zero when the output_keypoints form a linear function of
+  the index (and not necessarily linear in input values, e.g. when using
+  non-uniform input keypoints).
+
+  Attributes:
+    - All `__init__` arguments.
+  """
+  # pyformat: enable
+
+  def __init__(self, l1=0.0, l2=0.0, is_cyclic=False):
+    """Initializes an instance of `HessianRegularizer`.
+
+    Args:
+      l1: l1 regularization amount as float.
+      l2: l2 regularization amount as float.
+      is_cyclic: Whether the first and last keypoints should take the same
+        output value.
+    """
+    self.l1 = l1
+    self.l2 = l2
+    self.is_cyclic = is_cyclic
+
+  def __call__(self, x):
+    """Returns regularization loss.
+
+    Args:
+      x: Tensor of shape: `(k, units)` which represents weights of PWL
+        calibration layer. First row of weights is bias term. All remaining
+        represent delta in y-value compare to previous point (segment heights).
+    """
+    if not self.l1 and not self.l2:
+      return tf.constant(0.0, dtype=x.dtype, shape=())
+
+    if self.is_cyclic:
+      heights = x[1:]
+      heights = tf.concat(
+          [
+              heights,
+              -tf.reduce_sum(heights, axis=0, keepdims=True),
+              heights[0:1],
+          ],
+          axis=0,
+      )
+      nonlinearity = heights[1:] - heights[:-1]
+    else:
+      nonlinearity = x[2:] - x[1:-1]
+
+    losses = []
+    if self.l1:
+      losses.append(self.l1 * tf.reduce_sum(tf.abs(nonlinearity)))
+    if self.l2:
+      losses.append(self.l2 * tf.reduce_sum(tf.square(nonlinearity)))
+
+    result = losses[0]
+    if len(losses) == 2:
+      result += losses[1]
+    return result
+
+  def get_config(self):
+    """Standard Keras config for serialization."""
+    return {
+        "l1": self.l1,
+        "l2": self.l2,
+        "is_cyclic": self.is_cyclic,
+    }  # pyformat: disable
+
+
+class WrinkleRegularizer(keras.regularizers.Regularizer):
+  # pyformat: disable
+  """Wrinkle regularizer for PWL calibration layer.
+
+  Calibrator wrinkle regularization penalizes the change in the second
+  derivative. It is defined to be:
+
+  `l1 * ||third_derivative||_1 + l2 * ||third_derivative||_2^2`
+
+  where `third_derivative` is:
+
+  `3 * output_keypoints[1:end-2] - 3 * output_keypoints[2:end-1]
+   - output_keypoints[0:end-3] + output_keypoints[3:end]`.
+
+  This regularizer is zero when the output_keypoints form a 2nd order polynomial
+  of the index (and not necessarily in input values, e.g. when using
+  non-uniform input keypoints).
+
+  Attributes:
+    - All `__init__` arguments.
+  """
+  # pyformat: enable
+
+  def __init__(self, l1=0.0, l2=0.0, is_cyclic=False):
+    """Initializes an instance of `WrinkleRegularizer`.
+
+    Args:
+      l1: l1 regularization amount as float.
+      l2: l2 regularization amount as float.
+      is_cyclic: Whether the first and last keypoints should take the same
+        output value.
+    """
+    self.l1 = l1
+    self.l2 = l2
+    self.is_cyclic = is_cyclic
+
+  def __call__(self, x):
+    """Returns regularization loss.
+
+    Args:
+      x: Tensor of shape: `(k, units)` which represents weights of PWL
+        calibration layer. First row of weights is bias term. All remaining
+        represent delta in y-value compare to previous point (segment heights).
+    """
+    if not self.l1 and not self.l2:
+      return tf.constant(0.0, dtype=x.dtype, shape=())
+    if x.shape[0] < 3:
+      return tf.constant(0.0, dtype=x.dtype, shape=())
+
+    if self.is_cyclic:
+      heights = x[1:]
+      heights = tf.concat(
+          [
+              heights,
+              -tf.reduce_sum(heights, axis=0, keepdims=True),
+              heights[0:1],
+              heights[1:2],
+          ],
+          axis=0,
+      )
+      nonlinearity = heights[1:] - heights[:-1]
+    else:
+      nonlinearity = x[2:] - x[1:-1]
+    wrinkleness = nonlinearity[1:] - nonlinearity[0:-1]
+
+    losses = []
+    if self.l1:
+      losses.append(self.l1 * tf.reduce_sum(tf.abs(wrinkleness)))
+    if self.l2:
+      losses.append(self.l2 * tf.reduce_sum(tf.square(wrinkleness)))
+
+    result = losses[0]
+    if len(losses) == 2:
+      result += losses[1]
+    return result
+
+  def get_config(self):
+    """Standard Keras config for serialization."""
+    return {
+        "l1": self.l1,
+        "l2": self.l2,
+        "is_cyclic": self.is_cyclic,
+    }  # pyformat: disable
diff --git a/tensorflow_lattice/python/pwl_calibration_lib.py b/tensorflow_lattice/python/pwl_calibration_lib.py
new file mode 100644
index 0000000..a3eff70
--- /dev/null
+++ b/tensorflow_lattice/python/pwl_calibration_lib.py
@@ -0,0 +1,998 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Implementation of algorithms required for PWL calibration layer."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import copy
+from enum import Enum
+import six
+import tensorflow as tf
+
+
+class BoundConstraintsType(Enum):
+  """Type of bound constraints for PWL calibration.
+
+  - NONE: no constraints.
+  - BOUND: output range can be anywhere within bounds.
+  - CLAMPED: output range must exactly match bounds.
+  """
+  NONE = 0
+  BOUND = 1
+  CLAMPED = 2
+
+
+def convert_all_constraints(output_min, output_max, clamp_min, clamp_max):
+  """Converts parameters of PWL calibration layer to internal format.
+
+  Args:
+    output_min: None for unconstrained bound or some numeric value.
+    output_max: None for unconstrained bound or some numeric value.
+    clamp_min: Whether to clamp pwl calibrator to value if `output_min` is not
+      None.
+    clamp_max: Whether to clamp pwl calibrator to value if `output_max` is not
+      None.
+
+  Returns:
+    "value" as float and appropriate value of
+    `tfl.pwl_calibration_lib.BoundConstraintsType` enum which corresponds to
+    `output_min(max)` and `clamp_min(max)`.
+  """
+  if output_min is None:
+    output_max, output_max_constraints = _convert_constraints(
+        output_max, clamp_max)
+    output_min = output_max
+    output_min_constraints = BoundConstraintsType.NONE
+  elif output_max is None:
+    output_min, output_min_constraints = _convert_constraints(
+        output_min, clamp_min)
+    output_max = output_min
+    output_max_constraints = BoundConstraintsType.NONE
+  else:
+    output_min, output_min_constraints = _convert_constraints(
+        output_min, clamp_min)
+    output_max, output_max_constraints = _convert_constraints(
+        output_max, clamp_max)
+  return output_min, output_max, output_min_constraints, output_max_constraints
+
+
+def _convert_constraints(value, clamp_to_value):
+  """Converts constraints for output_min/max to internal format.
+
+  Args:
+    value: None for unconstrained bound or some numeric value.
+    clamp_to_value: Whether to clamp pwl calibrator to value if value isn't None
+
+  Returns:
+    "value" as float and appropriate value of
+    `tfl.pwl_calibration_lib.BoundConstraintsType` enum which
+    corresponds to `value` and `clamp_to_value`.
+  """
+  if value is None:
+    return 0.0, BoundConstraintsType.NONE
+  else:
+    value = float(value)
+    if clamp_to_value:
+      return value, BoundConstraintsType.CLAMPED
+    else:
+      return value, BoundConstraintsType.BOUND
+
+
+def compute_interpolation_weights(inputs, keypoints, lengths):
+  """Computes weights for PWL calibration.
+
+  Args:
+    inputs: Tensor of shape: `(D0, D1, ..., DN, 1)` which represents inputs to
+      to the pwl function. A typical shape is: `(batch_size, 1)`.
+    keypoints: Rank-1 tensor of shape `(num_keypoints - 1)` which represents
+      left keypoint of pieces of piecewise linear function along X axis.
+    lengths: Rank-1 tensor of shape `(num_keypoints - 1)` which represents
+      lengths of pieces of piecewise linear function along X axis.
+
+  Returns:
+    Interpolation weights tensor of shape: `(D0, D1, ..., DN, num_keypoints)`.
+  """
+  weights = (inputs - keypoints) / lengths
+  weights = tf.minimum(weights, 1.0)
+  weights = tf.maximum(weights, 0.0)
+  # Prepend 1.0 at the beginning to add bias unconditionally.
+  return tf.concat([tf.ones_like(inputs), weights], axis=-1)
+
+
+def linear_initializer(shape,
+                       output_min,
+                       output_max,
+                       monotonicity,
+                       keypoints=None,
+                       dtype=None):
+  """Initializes PWL calibration layer to represent linear function.
+
+  PWL calibration layer weights have shape `(knum_keypoints, units)`. First row
+  represents bias. All remaining represent delta in y-value compare to previous
+  point. Aka heights of segments.
+
+  Args:
+    shape: Requested shape. Must be `(num_keypoints, units)`.
+    output_min: Minimum value of PWL calibration output after initialization.
+    output_max: Maximum value of PWL calibration output after initialization.
+    monotonicity: If one of {0, 1}, the returned function will go from
+      `(input_min, output_min)` to `(input_max, output_max)`. If set to -1, the
+      returned function will go from `(input_min, output_max)` to `(input_max,
+      output_min)`.
+    keypoints: If not provided (None or []), all pieces of returned function
+      will have equal heights (i.e. `y[i+1] - y[i]` is constant). If provided,
+      all pieces of returned function will have equal slopes (i.e. `(y[i+1] -
+      y[i]) / (x[i+1] - x[i])` is constant).
+    dtype: dtype.
+
+  Returns:
+    PWLCalibration layer weights initialized according to params.
+
+  Raises:
+    ValueError: If given parameters are inconsistent.
+  """
+  verify_hyperparameters(
+      input_keypoints=keypoints,
+      output_min=output_min,
+      output_max=output_max,
+      monotonicity=monotonicity,
+      weights_shape=shape)
+
+  num_keypoints, units = int(shape[0]), int(shape[1])
+  if keypoints is None:
+    # Subtract 1 for bias which will be handled separately.
+    num_pieces = num_keypoints - 1
+    segment_height = (output_max - output_min) / num_pieces
+    heights_tensor = tf.constant(
+        [segment_height] * num_pieces, shape=[num_pieces, 1], dtype=dtype)
+  else:
+    keypoints_tensor = tf.constant(
+        keypoints, shape=[num_keypoints, 1], dtype=dtype)
+    lengths_tensor = keypoints_tensor[1:] - keypoints_tensor[0:-1]
+    output_range = output_max - output_min
+    heights_tensor = (
+        lengths_tensor * (output_range / tf.reduce_sum(lengths_tensor)))
+
+  if units > 1:
+    heights_tensor = tf.tile(heights_tensor, multiples=[1, units])
+
+  if monotonicity == -1:
+    bias = output_max
+    heights_tensor = -heights_tensor
+  else:
+    bias = output_min
+  bias_tensor = tf.constant(bias, shape=[1, units], dtype=dtype)
+
+  return tf.concat([bias_tensor, heights_tensor], axis=0)
+
+
+def _approximately_project_bounds_only(bias, heights, output_min, output_max,
+                                       output_min_constraints,
+                                       output_max_constraints):
+  """Bounds constraints implementation for PWL calibration layer.
+
+  Maps given weights of PWL calibration layer into some point which satisfies
+  given bounds by capping the function based on the bounds. This is not an exact
+  projection in L2 norm, but it is sufficiently accurate and efficient in
+  practice for non monotonic functions.
+
+  Args:
+    bias: `(1, units)`-shape tensor which represents bias.
+    heights: `(num_heights, units)`-shape tensor which represents heights.
+    output_min: Minimum possible output of pwl function.
+    output_max: Maximum possible output of pwl function.
+    output_min_constraints: A `tfl.pwl_calibration_lib.BoundConstraintsType`
+      describing the constraints on the layer's minimum value.
+    output_max_constraints: A `tfl.pwl_calibration_lib.BoundConstraintsType`
+      describing the constraints on the layer's maximum value.
+
+  Raises:
+    ValueError: If `output_min(max)_constraints` is set to "CLAMPED" which is
+      not supported.
+
+  Returns:
+    Projected bias and heights.
+  """
+  if (output_min_constraints == BoundConstraintsType.CLAMPED or
+      output_max_constraints == BoundConstraintsType.CLAMPED):
+    raise ValueError("Clamping is not implemented for non monotonic functions.")
+  if (output_min_constraints == BoundConstraintsType.NONE and
+      output_max_constraints == BoundConstraintsType.NONE):
+    return bias, heights
+
+  # Compute cumulative sums - they correspond to our calibrator outputs at
+  # keypoints. Simply clip them according to config and compute new heights
+  # using clipped cumulative sums.
+  sums = tf.cumsum(tf.concat([bias, heights], axis=0))
+  if output_min_constraints == BoundConstraintsType.BOUND:
+    sums = tf.maximum(sums, output_min)
+  if output_max_constraints == BoundConstraintsType.BOUND:
+    sums = tf.minimum(sums, output_max)
+
+  bias = sums[0:1]
+  heights = sums[1:] - sums[:-1]
+  return bias, heights
+
+
+def _project_bounds_considering_monotonicity(bias, heights, monotonicity,
+                                             output_min, output_max,
+                                             output_min_constraints,
+                                             output_max_constraints):
+  """Bounds projection given monotonicity constraints.
+
+  Projects weights of PWLCalibration layer into nearest in terms of l2 distance
+  point which satisfies bounds constraints taking into account that function
+  is monotonic.
+
+  Algorithm:
+  To minimize L2 distance to projected point we want to distribute update
+  through heights as evenly as possible. A simplified description of the
+  algorithm for and increasing function is as follows:
+  Consider only increasing function.
+
+  ```
+  delta = (output_max - (bias + sum(heights[:]))) / (num_heights + 1)
+  bias = max(bias + delta, output_min)
+  heights[:] += delta
+  ```
+
+  Some details which were omitted above:
+  * If `output_min_constraints == "CAPPED"` then `bias` variable becomes
+    constant (this means we can't add delta to it).
+  * if `output_max_constraints != "CAPPED"` we are looking only for negative
+    delta because we are not required to stretch function to meet upper bound.
+  * If function is decreasing we multiply everything by -1 and switch min and
+    max to make it increasing.
+
+  Args:
+    bias: `(1, units)`-shape tensor which represents bias.
+    heights: `(num_heights, units)`-shape tensor which represents heights.
+    monotonicity: 1 for increasing, -1 for decreasing.
+    output_min: Lower bound constraint of PWL calibration layer.
+    output_max: Upper bound constraint of PWL calibration layer.
+    output_min_constraints: A `tfl.pwl_calibration_lib.BoundConstraintsType`
+      describing the constraints on the layer's minimum value.
+    output_max_constraints: A `tfl.pwl_calibration_lib.BoundConstraintsType`
+      describing the constraints on the layer's maximum value.
+
+  Returns:
+    Projected bias and heights tensors.
+
+  Raises:
+    ValueError: If monotonicity is not in: {-1, 1}
+  """
+  if monotonicity not in [-1, 1]:
+    raise ValueError("Monotonicity should be one of: [-1, 1]. It is: " +
+                     str(monotonicity))
+  if monotonicity == -1:
+    # Reduce computation of projection of decreasing function to computation of
+    # projection of increasing function by multiplying everything by -1 and
+    # swapping maximums and minimums.
+    (projected_bias,
+     projected_heights) = _project_bounds_considering_monotonicity(
+         bias=-bias,
+         heights=-heights,
+         monotonicity=1,
+         output_min=None if output_max is None else -output_max,
+         output_max=None if output_min is None else -output_min,
+         output_min_constraints=output_max_constraints,
+         output_max_constraints=output_min_constraints)
+    return -projected_bias, -projected_heights
+
+  bct = BoundConstraintsType
+  if output_max_constraints != bct.NONE:
+    num_heights = float(heights.shape.dims[0].value)
+    sum_heights = tf.reduce_sum(heights, axis=0)
+
+    # For each possible output_min_constraints value compute projected bias and
+    # heights_delta.
+    if output_min_constraints == bct.CLAMPED:
+      # If output_min is clamped - bias must have fixed value and number of free
+      # parameters is equal to number of heights.
+      bias = tf.constant(output_min, shape=bias.shape, dtype=bias.dtype)
+      heights_delta = (output_max - (bias + sum_heights)) / num_heights
+    elif output_min_constraints == bct.BOUND:
+      # If output_min is not clamped then number of free parameters is
+      # num_heights + 1.
+      bias_delta = (output_max - (bias + sum_heights)) / (num_heights + 1)
+      if output_max_constraints != bct.CLAMPED:
+        # If output_max is not clamped - there is no need to stretch our
+        # function. We need only to squeeze it.
+        bias_delta = tf.minimum(bias_delta, 0.0)
+      bias = tf.maximum(bias + bias_delta, output_min)
+      # For this branch compute heights delta _after_ we applied bias projection
+      # because heights are not bound by output_min constraint unlike bias.
+      heights_delta = (output_max - (bias + sum_heights)) / num_heights
+    else:
+      bias_delta = (output_max - (bias + sum_heights)) / (num_heights + 1)
+      # For this branch heights delta and bias delta are same because none of
+      # them are bounded from below.
+      heights_delta = bias_delta
+      if output_max_constraints != bct.CLAMPED:
+        # If output_max is not clamped - there is no need to stretch our
+        # function. We need only to squeeze it.
+        bias_delta = tf.minimum(bias_delta, 0.0)
+      bias += bias_delta
+
+    if output_max_constraints != bct.CLAMPED:
+      # If output_max is not clamped - there is no need to stretch our function.
+      # We need only to squeeze it.
+      heights_delta = tf.minimum(heights_delta, 0.0)
+    heights += heights_delta
+  else:
+    # No need to do anything with heights if there are no output_max
+    # constraints.
+    if output_min_constraints == bct.CLAMPED:
+      bias = tf.constant(output_min, shape=bias.shape, dtype=bias.dtype)
+    elif output_min_constraints == bct.BOUND:
+      bias = tf.maximum(bias, output_min)
+
+  return bias, heights
+
+
+def _project_convexity(heights, lengths, convexity, constraint_group):
+  """Convexity projection for given 'constraint_group'.
+
+  Since an exact single step projection is not possible for convexity
+  constraints, we break the constraints into two independent groups and apply
+  Dykstra's alternating projections algorithm. Each group consists of a list of
+  pairs where each pair represents constraints on 2 consequtive heights.
+
+  Groups:
+
+  ```
+  g0 = [(h0, h1), (h2, h3), (h4, h5), ...]
+  g1 = [(h1, h2), (h3, h4), (h5, h6), ...]
+  ```
+
+  We know how to project single pair of adjacent heights:
+  h0_prime = min/max(h0, (l0 / (l0 + l1)) * (h0 + h1))
+  h1_prime = min/max(h1, (l1 / (l0 + l1)) * (h0 + h1))
+  where l0 and l1 stand for lengths of segment which correspond to h0 and h1 and
+  choise of min or max functions depends on convexity direction.
+
+  We can see that all pairs within same group are independent so we know how to
+  project such group of constraints in single pass.
+
+  This function breaks heights and their lengths into given constraint group
+  and does projection for this group.
+
+  Args:
+    heights: `(num_heights, units)`-shape tensor which represents heights.
+    lengths: `(num_heights)`-shape tensor which represents lengths of segments
+      which correspond to heights.
+    convexity: -1 or 1 where 1 stands for convex function and -1 for concave.
+    constraint_group: 0 or 1 which represent group from description above.
+
+  Returns:
+    Projected heights for given constraint group.
+  """
+  verify_hyperparameters(
+      convexity=convexity,
+      lengths=lengths,
+      weights_shape=[heights.shape[0] + 1, heights.shape[1]])
+  if constraint_group not in [0, 1]:
+    raise ValueError("constraint_group must be one of: [0, 1]. "
+                     "Given: %s" % constraint_group)
+
+  if convexity == 0 or heights.shape[0] == 1:
+    return heights
+
+  num_heights = heights.shape.dims[0].value
+  # To avoid broadcasting when performing math ops with 'heights'.
+  lengths = tf.reshape(lengths, shape=(-1, 1))
+
+  # Split heigths and lengths into pairs which correspond to given constraint
+  # group. In order to do this we need to split heights into odd and even. We
+  # can possibly omit last element of larger set to ensure that both sets have
+  # same number of elements.
+  num_0 = (num_heights - constraint_group + 1) // 2
+  num_1 = (num_heights - constraint_group) // 2
+  if num_1 == num_0:
+    last_index = None
+  else:
+    last_index = -1
+  heights_0 = heights[constraint_group:last_index:2]
+  lengths_0 = lengths[constraint_group:last_index:2]
+  heights_1 = heights[constraint_group + 1::2]
+  lengths_1 = lengths[constraint_group + 1::2]
+
+  # h0_prime = (l0 / (l0 + l1)) * (h0 + h1) = l0 * base
+  # h1_prime = (l1 / (l0 + l1)) * (h0 + h1) = l1 * base
+  base = (heights_0 + heights_1) / (lengths_0 + lengths_1)
+  heights_0_prime = lengths_0 * base
+  heights_1_prime = lengths_1 * base
+  if convexity == 1:
+    heights_0 = tf.minimum(heights_0, heights_0_prime)
+    heights_1 = tf.maximum(heights_1, heights_1_prime)
+  else:
+    heights_0 = tf.maximum(heights_0, heights_0_prime)
+    heights_1 = tf.minimum(heights_1, heights_1_prime)
+
+  # Now we need to merge heights in such way that elements from 'heights_0' and
+  # 'heights_1' alternate:
+  # merged = [heights_0[0], heights_1[0], heights_0[1], heights_1[1], ...]
+  # Achieve this by concatenating along axis=1 so after concatenation elements
+  # from 'heights_0' and 'heights_1' will alternate in memory and reshape will
+  # give us desired result.
+  projected_heights = tf.reshape(
+      tf.concat([heights_0, heights_1], axis=1), shape=[-1, heights.shape[1]])
+
+  weights_pieces = [projected_heights]
+  if constraint_group == 1:
+    # First height was skipped during initial split.
+    weights_pieces = [heights[0:1]] + weights_pieces
+  if last_index == -1:
+    # Last height was skipped during initial split.
+    weights_pieces.append(heights[-1:])
+
+  if len(weights_pieces) == 1:
+    return weights_pieces[0]
+  else:
+    return tf.concat(weights_pieces, axis=0)
+
+
+def _project_monotonicity(heights, monotonicity):
+  """Projects into monotonic function."""
+  if monotonicity == 0:
+    return heights
+  elif monotonicity == 1:
+    return tf.maximum(heights, 0.0)
+  else:
+    return tf.minimum(heights, 0.0)
+
+
+def project_all_constraints(weights,
+                            monotonicity,
+                            output_min,
+                            output_max,
+                            output_min_constraints,
+                            output_max_constraints,
+                            convexity,
+                            lengths,
+                            num_projection_iterations=8):
+  """Jointly projects into all supported constraints.
+
+  For all combinations of constraints except the case where bounds constraints
+  are specified without monotonicity constraints we properly project into
+  nearest point with respect to L2 norm. For later case we use a heuristic to
+  map input point into some feasible point with no guarantees on how close this
+  point is to the true projection.
+
+  If only bounds or only monotonicity constraints are specified there will be a
+  single step projection. For all other combinations of constraints we use
+  num_projection_iterations iterations of Dykstra's alternating projection
+  algorithm to jointly project onto all the given constraints. Dykstra's
+  algorithm gives us proper projection with respect to L2 norm but approaches it
+  from "wrong" side. That's why in order to ensure that constraints are strictly
+  met we'll do approximate projections in the end which project strictly into
+  feasible space, but it's not an exact projection with respect to the L2 norm.
+  With enough iterations of the Dykstra's algorithm, the impact of such
+  approximate projection should be negligible.
+
+  With bound and convexity constraints and no specified monotonicity, this
+  method does not fully satisfy the constrains. Increasing the number of
+  iterations can reduce the constraint violation in such cases.
+
+  Args:
+    weights: `(num_keypoints, units)`-shape tensor which represents weights of
+      PWL calibration layer.
+    monotonicity: 1 for increasing, -1 for decreasing, 0 for no monotonicity
+      constraints.
+    output_min: Lower bound constraint of PWL calibration layer.
+    output_max: Upper bound constraint of PWL calibration layer.
+    output_min_constraints: A `tfl.pwl_calibration_lib.BoundConstraintsType`
+      describing the constraints on the layer's minimum value.
+    output_max_constraints: A `tfl.pwl_calibration_lib.BoundConstraintsType`
+      describing the constraints on the layer's maximum value.
+    convexity: 1 for convex, -1 for concave, 0 for no convexity constraints.
+    lengths: Lengths of pieces of piecewise linear function. Needed only if
+      convexity projection is specified.
+    num_projection_iterations: Number of iterations of Dykstra's alternating
+      projection algorithm.
+
+  Returns:
+    Projected weights tensor.
+  """
+  bias = weights[0:1]
+  heights = weights[1:]
+
+  def body(projection_counter, bias, heights, last_bias_change,
+           last_heights_change):
+    """The body of tf.while_loop implementing a step of Dykstra's projection.
+
+    Args:
+      projection_counter: The counter tensor or number at the beginning of the
+        iteration.
+      bias: Bias tensor at the beginning of the iteration.
+      heights: Heights tensor at the beginning of the iteration.
+      last_bias_change: Dict that stores the last change in the bias after
+        projecting onto each subset of constraints.
+      last_heights_change: Dict that stores the last change in the heights after
+        projecting onto each subset of constraints.
+
+    Returns:
+      The tuple `(num_projection_counter, bias, heights, last_bias_change,
+      last_heights_change)` at the end of the iteration.
+    """
+    last_bias_change = copy.copy(last_bias_change)
+    last_heights_change = copy.copy(last_heights_change)
+    num_projections = 0
+    # ******************** BOUNDS *********************
+    bct = BoundConstraintsType
+    if output_min_constraints != bct.NONE or output_max_constraints != bct.NONE:
+      rolled_back_bias = bias - last_bias_change["BOUNDS"]
+      rolled_back_heights = heights - last_heights_change["BOUNDS"]
+      if monotonicity != 0:
+        bias, heights = _project_bounds_considering_monotonicity(
+            bias=rolled_back_bias,
+            heights=rolled_back_heights,
+            monotonicity=monotonicity,
+            output_min=output_min,
+            output_max=output_max,
+            output_min_constraints=output_min_constraints,
+            output_max_constraints=output_max_constraints)
+      else:
+        bias, heights = _approximately_project_bounds_only(
+            bias=rolled_back_bias,
+            heights=rolled_back_heights,
+            output_min=output_min,
+            output_max=output_max,
+            output_min_constraints=output_min_constraints,
+            output_max_constraints=output_max_constraints)
+      last_bias_change["BOUNDS"] = bias - rolled_back_bias
+      last_heights_change["BOUNDS"] = heights - rolled_back_heights
+      num_projections += 1
+
+    # ******************** MONOTONICITY *********************
+    if monotonicity != 0:
+      rolled_back_heights = heights - last_heights_change["MONOTONICITY"]
+      heights = _project_monotonicity(
+          heights=rolled_back_heights, monotonicity=monotonicity)
+      last_heights_change["MONOTONICITY"] = heights - rolled_back_heights
+      num_projections += 1
+
+    # ******************** CONVEXITY *********************
+    if convexity != 0:
+      if heights.shape[0] >= 2:
+        rolled_back_heights = heights - last_heights_change["CONVEXITY_0"]
+        heights = _project_convexity(
+            heights=rolled_back_heights,
+            lengths=lengths,
+            convexity=convexity,
+            constraint_group=0)
+        last_heights_change["CONVEXITY_0"] = heights - rolled_back_heights
+        num_projections += 1
+      if heights.shape[0] >= 3:
+        rolled_back_heights = heights - last_heights_change["CONVEXITY_1"]
+        heights = _project_convexity(
+            heights=rolled_back_heights,
+            lengths=lengths,
+            convexity=convexity,
+            constraint_group=1)
+        last_heights_change["CONVEXITY_1"] = heights - rolled_back_heights
+        num_projections += 1
+
+    return (projection_counter + num_projections, bias, heights,
+            last_bias_change, last_heights_change)
+
+  # Call the body of the loop once to see if Dykstra's is needed.
+  # If there is only one set of projections, apply it without a loop.
+  # Running the body of the loop also finds the required last_bias_change
+  # and last_heights_change keys. The set of keys in the input and output of the
+  # body of tf.while_loop must be the same across iterations.
+  zero_bias = tf.zeros_like(bias)
+  zero_heights = tf.zeros_like(heights)
+  last_bias_change = collections.defaultdict(lambda: zero_bias)
+  last_heights_change = collections.defaultdict(lambda: zero_heights)
+  (num_projections, projected_bias, projected_heights, last_bias_change,
+   last_heights_change) = body(0, bias, heights, last_bias_change,
+                               last_heights_change)
+  if num_projections <= 1:
+    return tf.concat([projected_bias, projected_heights], axis=0)
+
+  def cond(projection_counter, bias, heights, last_bias_change,
+           last_heights_change):
+    del bias, heights, last_bias_change, last_heights_change
+    return tf.less(projection_counter,
+                   num_projection_iterations * num_projections)
+
+  # Apply Dykstra's algorithm with tf.while_loop.
+  projection_counter = tf.constant(0)
+  last_bias_change = {k: zero_bias for k in last_bias_change}
+  last_heights_change = {k: zero_heights for k in last_heights_change}
+  (_, bias, heights, _,
+   _) = tf.while_loop(cond, body, (projection_counter, bias, heights,
+                                   last_bias_change, last_heights_change))
+
+  # Since Dykstra's algorithm is iterative in order to strictly meet constraints
+  # we use approximate projection algorithm to finalize them.
+  return _finalize_constraints(
+      bias=bias,
+      heights=heights,
+      monotonicity=monotonicity,
+      output_min=output_min,
+      output_max=output_max,
+      output_min_constraints=output_min_constraints,
+      output_max_constraints=output_max_constraints,
+      convexity=convexity,
+      lengths=lengths)
+
+
+def _squeeze_by_scaling(bias, heights, monotonicity, output_min, output_max,
+                        output_min_constraints, output_max_constraints):
+  """Squeezes monotonic calibrators by scaling in order to meet bounds.
+
+  Projection by scaling is not exact with respect to the L2 norm, but maintains
+  convexity unlike projection by shift.
+
+  Args:
+    bias: `(1, units)`-shape tensor which represents bias.
+    heights: `(num_heights, units)`-shape tensor which represents heights.
+    monotonicity: 1 for increasing, -1 for decreasing.
+    output_min: Lower bound constraint of PWL calibration layer.
+    output_max: Upper bound constraint of PWL calibration layer.
+    output_min_constraints: A `tfl.pwl_calibration_lib.BoundConstraintsType`
+      describing the constraints on the layer's minimum value.
+    output_max_constraints: A `tfl.pwl_calibration_lib.BoundConstraintsType`
+      describing the constraints on the layer's maximum value.
+
+  Returns:
+    Projected bias and heights.
+  """
+  if monotonicity == -1:
+    if output_min_constraints == BoundConstraintsType.NONE:
+      return bias, heights
+    # Reduce computation of projection of decreasing function to computation of
+    # projection of increasing function by multiplying everything by -1 and
+    # swapping maximums and minimums.
+    bias, heights = _squeeze_by_scaling(
+        bias=-bias,
+        heights=-heights,
+        monotonicity=1,
+        output_min=None if output_max is None else -output_max,
+        output_max=None if output_min is None else -output_min,
+        output_min_constraints=output_max_constraints,
+        output_max_constraints=output_min_constraints)
+    return -bias, -heights
+  if output_max_constraints == BoundConstraintsType.NONE:
+    return bias, heights
+
+  delta = output_max - bias
+  # For better stability use tf.where rather than the more standard approach:
+  # heights *= tf.reduce_sum(heights) / max(delta, eps)
+  # in order to keep everything strictly unchanged for small deltas, rather than
+  # increase heights by factor 1/eps and still don't meet constraints.
+  scaling_factor = tf.where(delta > 0.001,
+                            tf.reduce_sum(heights, axis=0) / delta,
+                            tf.ones_like(delta))
+  heights = heights / tf.maximum(scaling_factor, 1.0)
+  return bias, heights
+
+
+def _approximately_project_convexity(heights, lengths, convexity):
+  """Strictly projects convexity, but is not exact with respect to the L2 norm.
+
+  Projects by iterating over pieces of piecewise linear function left to right
+  and aligning current slope with previous one if it violates convexity.
+
+  Args:
+    heights: `(num_heights, units)`-shape tensor which represents heights.
+    lengths: `(num_heights)`-shape tensor which represents lengths of segments
+      which correspond to heights.
+    convexity: -1 or 1 where 1 stands for convex function and -1 for concave.
+
+  Returns:
+    Projected heights.
+  """
+  if convexity == 0:
+    return heights
+  heights = tf.unstack(heights, axis=0)
+  lengths = tf.unstack(lengths, axis=0)
+  for i in range(1, len(heights)):
+    temp = heights[i - 1] * (lengths[i] / lengths[i - 1])
+    if convexity == 1:
+      heights[i] = tf.maximum(heights[i], temp)
+    else:
+      heights[i] = tf.minimum(heights[i], temp)
+
+  return tf.stack(heights, axis=0)
+
+
+def _finalize_constraints(bias, heights, monotonicity, output_min, output_max,
+                          output_min_constraints, output_max_constraints,
+                          convexity, lengths):
+  """Strictly projects onto the given constraint, approximate w.r.t the L2 norm.
+
+  Dykstra's algorithm gives us proper projection with respect to L2 norm but
+  approaches it from "wrong" side. In order to ensure that constraints are
+  strictly met we'll do approximate projections in the end which project
+  strictly into feasible space, but it's not an exact projection with respect to
+  the L2 norm. With enough iterations of the Dykstra's algorithm, the impact of
+  such approximate projection should be negligible.
+
+  With bound and convexity constraints and no specified monotonicity, this
+  method does not fully satisfy the constrains. Increasing the number of
+  iterations can reduce the constraint violation in such cases. Fortunately it
+  does not seem to be common config.
+
+  Args:
+    bias: `(1, units)`-shape tensor which represents bias.
+    heights: `(num_heights, units)`-shape tensor which represents heights.
+    monotonicity: 1 for increasing, -1 for decreasing, 0 for no monotonicity
+      constraints.
+    output_min: Lower bound constraint of PWL calibration layer.
+    output_max: Upper bound constraint of PWL calibration layer.
+    output_min_constraints: A `tfl.pwl_calibration_lib.BoundConstraintsType`
+      describing the constraints on the layer's minimum value.
+    output_max_constraints: A `tfl.pwl_calibration_lib.BoundConstraintsType`
+      describing the constraints on the layer's maximum value.
+    convexity: 1 for convex, -1 for concave, 0 for no convexity constraints.
+    lengths: Lengths of pieces of piecewise linear function. Needed only if
+      convexity projection is specified.
+
+  Returns:
+    Projected weights tensor.
+  """
+  # Convexity and monotonicity projections don't violate each other, but both
+  # might lead to bounds violation, so do them first and fix bounds after.
+  if monotonicity != 0:
+    heights = _project_monotonicity(heights=heights, monotonicity=monotonicity)
+  if convexity != 0:
+    heights = _approximately_project_convexity(
+        heights=heights, lengths=lengths, convexity=convexity)
+
+  bct = BoundConstraintsType
+  if output_min_constraints != bct.NONE or output_max_constraints != bct.NONE:
+    if monotonicity != 0 and convexity != 0:
+      # Both monotonicity and convexity projection can only increase upper bound
+      # so we only need to take care of decreasing it back.
+      bias, heights = _squeeze_by_scaling(
+          bias=bias,
+          heights=heights,
+          monotonicity=monotonicity,
+          output_min=output_min,
+          output_max=output_max,
+          output_min_constraints=output_min_constraints,
+          output_max_constraints=output_max_constraints)
+    else:
+      # This bounds projection might violate convexity. Unfortunately bounds
+      # projections with convexity and without monotonicity are are difficult to
+      # achieve strictly and might be violated. so ignore this for now. In order
+      # to minimize projection error consider increasing
+      # num_projection_iterations.
+      if output_min_constraints == bct.CLAMPED:
+        output_min_constraints = bct.BOUND
+      if output_max_constraints == bct.CLAMPED:
+        output_max_constraints = bct.BOUND
+      bias, heights = _approximately_project_bounds_only(
+          bias=bias,
+          heights=heights,
+          output_min=output_min,
+          output_max=output_max,
+          output_min_constraints=output_min_constraints,
+          output_max_constraints=output_max_constraints)
+  return tf.concat([bias, heights], axis=0)
+
+
+def assert_constraints(outputs,
+                       monotonicity,
+                       output_min,
+                       output_max,
+                       clamp_min=False,
+                       clamp_max=False,
+                       debug_tensors=None,
+                       eps=1e-6):
+  """Asserts that 'outputs' satisfiy constraints.
+
+  Args:
+    outputs: Tensor of shape `(num_output_values, units)` which represents
+      outputs of pwl calibration layer which will be tested against the given
+      constraints. If monotonicity is specified these outputs must be for
+      consequtive inputs.
+    monotonicity: One of {-1, 0, 1}. -1 for decreasing, 1 for increasing 0 means
+      no monotonicity checks.
+    output_min: Lower bound or None.
+    output_max: Upper bound or None.
+    clamp_min: Whether one of outputs must match output_min.
+    clamp_max: Whther one of outputs must match output_max.
+    debug_tensors: None or list of anything convertible to tensor (for example
+      tensors or strings) which will be printed in case of constraints
+      violation.
+    eps: Allowed constraints violation.
+
+  Raises:
+    ValueError: If monotonicity is not one of {-1, 0, 1}
+
+  Returns:
+    List of assertion ops in graph mode or immideately asserts in eager mode.
+  """
+
+  info = ["Outputs: ", outputs, "Epsilon: ", eps]
+  if debug_tensors:
+    info += debug_tensors
+  asserts = []
+
+  if output_min is not None:
+    min_output = tf.reduce_min(outputs, axis=0)
+    if clamp_min:
+      asserts.append(
+          tf.Assert(
+              tf.reduce_all(tf.abs(min_output - output_min) <= eps),
+              data=["Clamp_min violation.", "output_min:", output_min] + info,
+              summarize=outputs.shape[0]))
+    else:
+      asserts.append(
+          tf.Assert(
+              tf.reduce_all(min_output >= output_min - eps),
+              data=["Lower bound violation.", "output_min:", output_min] + info,
+              summarize=outputs.shape[0]))
+
+  if output_max is not None:
+    max_output = tf.reduce_max(outputs, axis=0)
+    if clamp_max:
+      asserts.append(
+          tf.Assert(
+              tf.reduce_all(tf.abs(max_output - output_max) <= eps),
+              data=["Clamp_max violation.", "output_max:", output_max] + info,
+              summarize=outputs.shape[0]))
+    else:
+      asserts.append(
+          tf.Assert(
+              tf.reduce_all(max_output <= output_max + eps),
+              data=["Upper bound violation.", "output_max:", output_max] + info,
+              summarize=outputs.shape[0]))
+
+  if monotonicity not in [-1, 0, 1]:
+    raise ValueError("'monotonicity' must be one of: [-1, 0, 1]. It is: %s" %
+                     monotonicity)
+  if monotonicity != 0:
+    diffs = (outputs[1:] - outputs[0:-1])
+    asserts.append(
+        tf.Assert(
+            tf.reduce_min(diffs * monotonicity) >= -eps,
+            data=["Monotonicity violation.", "monotonicity:", monotonicity] +
+            info,
+            summarize=outputs.shape[0]))
+
+  return asserts
+
+
+def verify_hyperparameters(input_keypoints=None,
+                           output_min=None,
+                           output_max=None,
+                           monotonicity=None,
+                           convexity=None,
+                           is_cyclic=False,
+                           lengths=None,
+                           weights_shape=None):
+  """Verifies that all given hyperparameters are consistent.
+
+  See PWLCalibration class level comment for detailed description of arguments.
+
+  Args:
+    input_keypoints: `input_keypoints` of PWLCalibration layer.
+    output_min: Smallest output of PWLCalibration layer.
+    output_max: Largest output of PWLCalibration layer.
+    monotonicity: `monotonicity` hyperparameter of PWLCalibration layer.
+    convexity: `convexity` hyperparameter of PWLCalibration layer.
+    is_cyclic: `is_cyclic` hyperparameter of PWLCalibration layer.
+    lengths: Lengths of pieces of piecewise linear function.
+    weights_shape: Shape of weights of PWLCalibration layer.
+
+  Raises:
+    ValueError: If something is inconsistent.
+  """
+  if input_keypoints is not None:
+    if tf.is_tensor(input_keypoints):
+      if len(input_keypoints.shape) != 1 or input_keypoints.shape[0] < 2:
+        raise ValueError("Input keypoints must be rank-1 tensor of size at "
+                         "least 2. It is: " + str(input_keypoints))
+    else:
+      if len(input_keypoints) < 2:
+        raise ValueError("At least 2 input keypoints must be provided. "
+                         "Given: " + str(input_keypoints))
+      if not all(input_keypoints[i] < input_keypoints[i + 1]
+                 for i in range(len(input_keypoints) - 1)):
+        raise ValueError("Keypoints must be strictly increasing. They are: " +
+                         str(input_keypoints))
+
+  if output_min is not None and output_max is not None:
+    if output_max < output_min:
+      raise ValueError("If specified output_max must be greater than "
+                       "output_min. "
+                       "They are: ({}, {})".format(output_min, output_max))
+
+  # It also raises errors if monotonicities specified incorrectly.
+  monotonicity = canonicalize_monotonicity(monotonicity)
+  convexity = canonicalize_convexity(convexity)
+
+  if is_cyclic and (monotonicity or convexity):
+    raise ValueError("'is_cyclic' can not be specified together with "
+                     "'monotonicity'({}) or 'convexity'({}).".format(
+                         monotonicity, convexity))
+
+  if weights_shape is not None:
+    if len(weights_shape) != 2 or weights_shape[0] < 2:
+      raise ValueError("PWLCalibrator weights must have shape: [k, units] where"
+                       " k > 1. It is: " + str(weights_shape))
+
+  if lengths is not None and weights_shape is not None:
+    if tf.is_tensor(lengths):
+      num_lengths = lengths.shape[0]
+    else:
+      num_lengths = len(lengths)
+    if num_lengths + 1 != weights_shape[0]:
+      raise ValueError("Number of lengths must be equal to number of weights "
+                       "minus one. Lengths: %s, weights_shape: %s" %
+                       (lengths, weights_shape))
+
+
+def canonicalize_monotonicity(monotonicity):
+  """Converts string constants representing monotonicity into integers.
+
+  Args:
+    monotonicity: monotonicity hyperparameter of `PWLCalibration` layer.
+
+  Raises:
+    ValueError if monotonicity is invalid.
+
+  Returns:
+    monotonicity represented as -1, 0 or 1.
+  """
+  if monotonicity is None:
+    return None
+
+  if monotonicity in [-1, 0, 1]:
+    return monotonicity
+  elif isinstance(monotonicity, six.string_types):
+    if monotonicity.lower() == "decreasing":
+      return -1
+    if monotonicity.lower() == "none":
+      return 0
+    if monotonicity.lower() == "increasing":
+      return 1
+  raise ValueError("'monotonicities' must be from: [-1, 0, 1, 'decreasing', "
+                   "'none', 'increasing']. Given: %s" % monotonicity)
+
+
+def canonicalize_convexity(convexity):
+  """Converts string constants representing convexity into integers.
+
+  Args:
+    convexity: convexity hyperparameter of `PWLCalibration` layer.
+
+  Raises:
+    ValueError if convexity is invalid.
+
+  Returns:
+    convexity represented as -1, 0 or 1.
+  """
+  if convexity is None:
+    return None
+
+  if convexity in [-1, 0, 1]:
+    return convexity
+  elif isinstance(convexity, six.string_types):
+    if convexity.lower() == "concave":
+      return -1
+    if convexity.lower() == "none":
+      return 0
+    if convexity.lower() == "convex":
+      return 1
+  raise ValueError("'convexity' must be from: [-1, 0, 1, 'concave', "
+                   "'none', 'convex']. Given: %s" % convexity)
diff --git a/tensorflow_lattice/python/pwl_calibration_test.py b/tensorflow_lattice/python/pwl_calibration_test.py
new file mode 100644
index 0000000..3fb7939
--- /dev/null
+++ b/tensorflow_lattice/python/pwl_calibration_test.py
@@ -0,0 +1,1119 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for PWL calibration layer.
+
+This test should be run with "-c opt" since otherwise it's slow.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+from absl import logging
+from absl.testing import parameterized
+import numpy as np
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow_lattice.python import parallel_combination_layer as parallel_combination
+from tensorflow_lattice.python import pwl_calibration_layer as pwl_calibraion
+from tensorflow_lattice.python import pwl_calibration_lib as pwl_lib
+from tensorflow_lattice.python import test_utils
+
+
+class CalibrateWithSeparateMissing(tf.keras.layers.Layer):
+  """Create separate is_missing tensor.
+
+  Splits input tensor into list: [input_tensor, is_missing_tensor] and passes
+  this list as input to given calibration layer.
+  """
+
+  def __init__(self, calibration_layer, missing_input_value):
+    super(CalibrateWithSeparateMissing, self).__init__()
+    self.calibration_layer = calibration_layer
+    self.missing_input_value = missing_input_value
+
+  def call(self, x):
+    is_missing = tf.cast(tf.equal(x, self.missing_input_value),
+                         dtype=tf.float32)
+    return self.calibration_layer([x, is_missing])
+
+
+class PwlCalibrationLayerTest(parameterized.TestCase, tf.test.TestCase):
+
+  def setUp(self):
+    self._disable_all = False
+    self._loss_eps = 0.0001
+    self._small_eps = 1e-6
+    super(PwlCalibrationLayerTest, self).setUp()
+
+  def _ResetAllBackends(self):
+    keras.backend.clear_session()
+    tf.compat.v1.reset_default_graph()
+
+  def _ScatterXUniformly(self, units, num_points, input_min, input_max,
+                         missing_probability, missing_input_value):
+    """Randomly uniformly scatters points across input space."""
+    np.random.seed(41)
+    x = [
+        input_min + np.random.random(units) * (input_max - input_min)
+        for _ in range(num_points)
+    ]
+    if missing_probability > 0.0:
+      is_missings = np.random.random([num_points, units]) < missing_probability
+      x = [
+          is_missing * missing_input_value + (1. - is_missing) * point
+          for point, is_missing in zip(x, is_missings)
+      ]
+    x.sort(key=np.sum)
+    return x
+
+  def _ScatterXUniformlyIncludeBounds(self, units, **kwargs):
+    """Same as _ScatterXUniformly() but includes bounds."""
+    x = self._ScatterXUniformly(units, **kwargs)
+    x[0] = np.array([kwargs["input_min"]] * units)
+    x[-1] = np.array([kwargs["input_max"]] * units)
+    return x
+
+  def _SmallWaves(self, x):
+    return np.mean(
+        np.power(x, 3) + 0.1 * np.sin(x * math.pi * 8), keepdims=True)
+
+  def _SmallWavesPlusOne(self, x):
+    return self._SmallWaves(x) + 1.0
+
+  def _WavyParabola(self, x):
+    return np.mean(
+        np.power(x, 2) + 0.1 * np.sin(x * math.pi * 8) - 0.5, keepdims=True)
+
+  def _SinCycle(self, x):
+    # Almost entire cycle of sin.
+    return np.mean(np.sin(x / 26.0 * (2.0 * math.pi)), keepdims=True)
+
+  def _GenPWLFunction(self, input_keypoints, pwl_weights):
+    """Returns python function equivalent to PWL calibration layer.
+
+    Output of returned function is equivalent ot output of PWL calibration layer
+    with keypoints being 'input_keypoints' and learned weights being
+    'pwl_weights'.
+
+    Args:
+      input_keypoints: list of keypoints of PWL calibration layer.
+      pwl_weights: list of weights of PWL calibration layer.
+    """
+
+    def Pwl(x):
+      result = pwl_weights[0]
+      for begin, end, weight in zip(input_keypoints[0:-1], input_keypoints[1:],
+                                    pwl_weights[1:]):
+        result += weight * np.maximum(
+            np.minimum((x - begin) / (end - begin), 1.0), 0.0)
+      return np.mean(result, keepdims=True)
+
+    return Pwl
+
+  def _SetDefaults(self, config):
+    config.setdefault("units", 1)
+    config.setdefault("use_multi_calibration_layer", False)
+    config.setdefault("one_d_input", False)
+    config.setdefault("use_separate_missing", False)
+    config.setdefault("output_min", None)
+    config.setdefault("output_max", None)
+    config.setdefault("missing_input_value", None)
+    config.setdefault("missing_output_value", None)
+    config.setdefault("monotonicity", 0)
+    config.setdefault("convexity", 0)
+    config.setdefault("is_cyclic", False)
+    config.setdefault("clamp_min", False)
+    config.setdefault("clamp_max", False)
+    config.setdefault("initializer", "equal_heights")
+    config.setdefault("kernel_regularizer", None)
+    config.setdefault("impute_missing", False)
+    config.setdefault("missing_probability", 0.0)
+    config.setdefault("num_projection_iterations", 8)
+    config.setdefault("constraint_assertion_eps", 1e-6)
+    config.setdefault("model_dir", "/tmp/test_pwl_model_dir/")
+
+    if "input_keypoints" not in config:
+      # If "input_keypoints" are provided - other params referred by code below
+      # might be not available.
+      config.setdefault("input_keypoints",
+                        np.linspace(start=config["input_min"],
+                                    stop=config["input_max"],
+                                    num=config["num_keypoints"]))
+    return config
+
+  def _TrainModel(self, config, plot_path=None):
+    """Trains model and returns loss.
+
+    Args:
+      config: Layer config internal for this test which specifies params of
+        piecewise linear layer to train.
+      plot_path: if specified - png file name to save visualization. See
+        test_utils.run_training_loop() for more details.
+
+    Returns:
+      Training loss.
+    """
+    logging.info("Testing config:")
+    logging.info(config)
+    if plot_path is not None and config["units"] > 1:
+      raise ValueError("Test config error. "
+                       "Can not plot multi unit calibrators.")
+    config = self._SetDefaults(config)
+
+    self._ResetAllBackends()
+
+    # The input to the model can either be single or multi dimensional.
+    input_units = 1 if config["one_d_input"] else config["units"]
+
+    training_inputs = config["x_generator"](
+        units=input_units,
+        num_points=config["num_training_records"],
+        input_min=config["input_keypoints"][0],
+        input_max=config["input_keypoints"][-1],
+        missing_probability=config["missing_probability"],
+        missing_input_value=config["missing_input_value"])
+    training_labels = [config["y_function"](x) for x in training_inputs]
+
+    # Either create multiple PWLCalibration layers and combine using a
+    # ParallelCombination layer, or create a single PWLCalibration with multiple
+    # output dimensions.
+    if config["use_multi_calibration_layer"]:
+      num_calibration_layers = config["units"]
+      pwl_calibration_units = 1
+    else:
+      num_calibration_layers = 1
+      pwl_calibration_units = config["units"]
+
+    model = keras.models.Sequential()
+    model.add(tf.keras.layers.Input(shape=[input_units], dtype=tf.float32))
+    calibration_layers = []
+    for _ in range(num_calibration_layers):
+      calibration_layers.append(
+          pwl_calibraion.PWLCalibration(
+              units=pwl_calibration_units,
+              dtype=tf.float32,
+              input_keypoints=config["input_keypoints"],
+              output_min=config["output_min"],
+              output_max=config["output_max"],
+              clamp_min=config["clamp_min"],
+              clamp_max=config["clamp_max"],
+              monotonicity=config["monotonicity"],
+              convexity=config["convexity"],
+              is_cyclic=config["is_cyclic"],
+              kernel_initializer=config["initializer"],
+              kernel_regularizer=config["kernel_regularizer"],
+              impute_missing=config["impute_missing"],
+              missing_output_value=config["missing_output_value"],
+              missing_input_value=config["missing_input_value"],
+              num_projection_iterations=config["num_projection_iterations"]))
+    if len(calibration_layers) == 1:
+      if config["use_separate_missing"]:
+        model.add(CalibrateWithSeparateMissing(
+            calibration_layer=calibration_layers[0],
+            missing_input_value=config["missing_input_value"]))
+      else:
+        model.add(calibration_layers[0])
+    else:
+      model.add(parallel_combination.ParallelCombination(calibration_layers))
+
+    if config["units"] > 1:
+      model.add(keras.layers.Lambda(
+          lambda x: tf.reduce_mean(x, axis=1, keepdims=True)))
+
+    model.compile(
+        loss=keras.losses.mean_squared_error,
+        optimizer=config["optimizer"](learning_rate=config["learning_rate"]))
+
+    training_data = (training_inputs, training_labels, training_inputs)
+
+    loss = test_utils.run_training_loop(
+        config=config,
+        training_data=training_data,
+        keras_model=model,
+        plot_path=plot_path)
+
+    assetion_ops = []
+    for calibration_layer in calibration_layers:
+      assetion_ops.extend(
+          calibration_layer.assert_constraints(
+              eps=config["constraint_assertion_eps"]))
+    if not tf.executing_eagerly() and assetion_ops:
+      tf.compat.v1.keras.backend.get_session().run(assetion_ops)
+
+    return loss
+
+  def _InverseAndTrain(self, config):
+    """Changes monotonicity directions to opposite and trains model."""
+    inversed_config = dict(config)
+    inversed_config["y_function"] = lambda x: -config["y_function"](x)
+
+    inversed_config["output_max"] = config["output_min"]
+    if inversed_config["output_max"] is not None:
+      inversed_config["output_max"] = inversed_config["output_max"] * -1.0
+
+    inversed_config["output_min"] = config["output_max"]
+    if inversed_config["output_min"] is not None:
+      inversed_config["output_min"] = inversed_config["output_min"] * -1.0
+
+    inversed_config["clamp_min"] = config["clamp_max"]
+    inversed_config["clamp_max"] = config["clamp_min"]
+    inversed_config["monotonicity"] = -pwl_lib.canonicalize_monotonicity(
+        config["monotonicity"])
+    inversed_config["convexity"] = -pwl_lib.canonicalize_convexity(
+        config["convexity"])
+    inversed_loss = self._TrainModel(inversed_config)
+    return inversed_loss
+
+  @parameterized.parameters(
+      (1, False, 0.001022),
+      (3, False, 0.000543),
+      (3, True, 0.000987),
+  )
+  def testUnconstrainedNoMissingValue(self, units, one_d_input, expected_loss):
+    if self._disable_all:
+      return
+    config = {
+        "units": units,
+        "one_d_input": one_d_input,
+        "num_training_records": 100,
+        "num_training_epoch": 2000,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 0.15,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._SmallWaves,
+        "monotonicity": 0,
+        "num_keypoints": 21,
+        "input_min": -1.0,
+        "input_max": 1.0,
+        "output_min": None,
+        "output_max": None,
+    }
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+    if units > 1 and not one_d_input:
+      config["use_multi_calibration_layer"] = True
+      loss = self._TrainModel(config)
+      self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+
+  @parameterized.parameters(
+      (1, None, 0.000858),
+      (1, 0.5, 0.637769),
+      (3, None, 0.000471),
+      (3, 0.5, 0.190513),
+  )
+  def testUnconstrainedWithMissingValue(self, units, missing_output_value,
+                                        expected_loss):
+    if self._disable_all:
+      return
+    config = {
+        "units": units,
+        "num_training_records": 100,
+        "num_training_epoch": 2000,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 0.15,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._SmallWaves,
+        "monotonicity": 0,
+        "num_keypoints": 21,
+        "input_min": -1.0,
+        "input_max": 1.0,
+        "output_min": None,
+        "output_max": None,
+        "impute_missing": True,
+        "missing_input_value": -1.2,
+        "missing_output_value": missing_output_value,
+        "missing_probability": 0.1,
+    }
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+    config["use_separate_missing"] = True
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+    if units > 1:
+      config["use_multi_calibration_layer"] = True
+      loss = self._TrainModel(config)
+      self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+
+  @parameterized.parameters(
+      (1, -1.5, 1.5, tf.keras.optimizers.SGD, 2100, 0.002957),
+      (1, -1.5, 1.5, tf.keras.optimizers.Adagrad, 2100, 0.002798),
+      # TODO: Something really weird is going on here with Adam
+      # optimizer in case when num_training_epoch is exactly 2010.
+      # Test verifies result with 2100 epochs which behaves as expected.
+      (1, -1.5, 1.5, tf.keras.optimizers.Adam, 2100, 0.000769),
+      (1, -0.5, 0.5, tf.keras.optimizers.SGD, 200, 0.011483),
+      (1, -0.5, 0.5, tf.keras.optimizers.Adagrad, 200, 0.011645),
+      (1, -0.5, 0.5, tf.keras.optimizers.Adam, 200, 0.011116),
+      (3, -1.5, 1.5, tf.keras.optimizers.Adagrad, 2100, 0.001759),
+      (3, -0.5, 0.5, tf.keras.optimizers.Adagrad, 200, 0.005986),
+  )
+  def testNonMonotonicFunction(self, units, output_min, output_max, optimizer,
+                               num_training_epoch, expected_loss):
+    if self._disable_all:
+      return
+    config = {
+        "units": units,
+        "num_training_records": 100,
+        "num_training_epoch": 2100,
+        "optimizer": tf.keras.optimizers.SGD,
+        "learning_rate": 0.015,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._SmallWaves,
+        "monotonicity": 0,
+        "num_keypoints": 21,
+        "input_min": -1.0,
+        "input_max": 1.0,
+        "output_min": -1.5,
+        "output_max": 1.5,
+        "clamp_min": False,
+        "clamp_max": False,
+    }
+    config["output_min"] = output_min
+    config["output_max"] = output_max
+    config["optimizer"] = optimizer
+    config["num_training_epoch"] = num_training_epoch
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+    if units > 1:
+      config["use_multi_calibration_layer"] = True
+      loss = self._TrainModel(config)
+      self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+
+  @parameterized.parameters(
+      (1, -1.5, 0.287357),
+      (1, 1.5, 0.287357),
+      (3, -1.5, 0.122801),
+      (3, 1.5, 0.106150),
+  )
+  # Since function is symmetric result should be same for both values above.
+  def testBoundsForMissing(self, units, missing_input_value, expected_loss):
+    if self._disable_all:
+      return
+    config = {
+        "units": units,
+        "num_training_records": 100,
+        "num_training_epoch": 200,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 0.15,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._SmallWaves,
+        "monotonicity": 1,
+        "num_keypoints": 21,
+        "input_min": -1.0,
+        "input_max": 1.0,
+        "output_min": -2.0,
+        "output_max": 2.0,
+        "clamp_min": False,
+        "clamp_max": True,
+        "impute_missing": True,
+        "missing_probability": 0.1,
+    }
+    config["missing_input_value"] = missing_input_value
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+    if units > 1:
+      config["use_multi_calibration_layer"] = True
+      loss = self._TrainModel(config)
+      self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+
+  @parameterized.parameters(
+      (1, None, None, 0.002505),
+      (1, None, 1.21, 0.008076),
+      (1, None, 1.6, 0.000251),
+      (1, None, 2.0, 0.001107),
+      (1, 0.5, None, 0.000790),
+      (1, 0.5, 1.21, 0.008353),
+      (1, 0.5, 1.6, 0.000685),
+      (1, 0.5, 2.0, 0.000694),
+      (1, 0.9, None, 0.000143),
+      (1, 0.9, 1.21, 0.008108),
+      (1, 0.9, 1.6, 0.000125),
+      (1, 0.9, 2.0, 0.000120),
+      (1, 1.2, None, 0.025762),
+      (1, 1.2, 1.21, 0.026069),
+      (1, 1.2, 1.6, 0.025240),
+      (1, 1.2, 2.0, 0.024802),
+      (3, None, None, 0.003268),
+      (3, None, 1.21, 0.003901),
+      (3, None, 1.6, 0.000897),
+      (3, None, 2.0, 0.002608),
+      (3, 0.5, None, 0.000945),
+      (3, 0.5, 1.21, 0.004830),
+      (3, 0.5, 1.6, 0.000945),
+      (3, 0.5, 2.0, 0.000923),
+      (3, 0.9, None, 0.000318),
+      (3, 0.9, 1.21, 0.004215),
+      (3, 0.9, 1.6, 0.000335),
+      (3, 0.9, 2.0, 0.000297),
+      (3, 1.2, None, 0.011354),
+      (3, 1.2, 1.21, 0.011354),
+      (3, 1.2, 1.6, 0.011354),
+      (3, 1.2, 2.0, 0.011354),
+  )
+  def testAllBoundsWithoutMonotonicityConstraints(self, units, output_min,
+                                                  output_max, expected_loss):
+    if self._disable_all:
+      return
+    config = {
+        "units": units,
+        "num_training_records": 100,
+        "num_training_epoch": 200,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 0.15,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._SmallWavesPlusOne,
+        "monotonicity": 0,
+        "num_keypoints": 21,
+        "input_min": 0.1,
+        "input_max": 0.8,
+        "clamp_min": False,
+        "clamp_max": False,
+    }
+    config["output_min"] = output_min
+    config["output_max"] = output_max
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+    if units > 1:
+      config["use_multi_calibration_layer"] = True
+      loss = self._TrainModel(config)
+      self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+
+  @parameterized.parameters(
+      (1, False, tf.keras.optimizers.SGD, 0.004715),
+      (1, False, tf.keras.optimizers.Adagrad, 0.003820),
+      (1, False, tf.keras.optimizers.Adam, 0.002797),
+      (1, True, tf.keras.optimizers.SGD, 0.004427),
+      (1, True, tf.keras.optimizers.Adagrad, 0.004084),
+      # Adam is doing terrible when required to stretch monotonic function
+      # even if bounds are proper.
+      (1, True, tf.keras.optimizers.Adam, 0.065664),
+      (3, False, tf.keras.optimizers.Adagrad, 0.002371),
+      (3, True, tf.keras.optimizers.Adagrad, 0.002670),
+  )
+  def testMonotonicProperBounds(self, units, is_clamped, optimizer,
+                                expected_loss):
+    if self._disable_all:
+      return
+    config = {
+        "units": units,
+        "num_training_records": 100,
+        "num_training_epoch": 400,
+        "optimizer": optimizer,
+        "learning_rate": 0.015,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._SmallWaves,
+        "monotonicity": "increasing",
+        "num_keypoints": 21,
+        "input_min": -1.0,
+        "input_max": 1.0,
+        "output_min": -1.0,
+        "output_max": 1.0,
+        "clamp_min": is_clamped,
+        "clamp_max": is_clamped,
+    }
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+    if units > 1:
+      config["use_multi_calibration_layer"] = True
+      loss = self._TrainModel(config)
+      self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+
+  @parameterized.parameters(
+      (1, False, tf.keras.optimizers.SGD, 0.15, 0.009563),
+      (1, False, tf.keras.optimizers.Adagrad, 0.015, 0.011117),
+      (1, False, tf.keras.optimizers.Adam, 0.015, 0.015356),
+      (1, True, tf.keras.optimizers.SGD, 0.15, 0.009563),
+      (1, True, tf.keras.optimizers.Adagrad, 0.015, 0.011117),
+      # Adam squeezes monotonic function just slightly worse than adagrad.
+      (1, True, tf.keras.optimizers.Adam, 0.015, 0.015189),
+      (3, False, tf.keras.optimizers.Adagrad, 0.015, 0.006057),
+      (3, True, tf.keras.optimizers.Adagrad, 0.015, 0.006049),
+  )
+  def testMonotonicNarrowBounds(self, units, is_clamped, optimizer,
+                                learning_rate, expected_loss):
+    if self._disable_all:
+      return
+    config = {
+        "units": units,
+        "num_training_records": 100,
+        "num_training_epoch": 200,
+        "optimizer": optimizer,
+        "learning_rate": learning_rate,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._SmallWaves,
+        "monotonicity": 1,
+        "num_keypoints": 21,
+        "input_min": -1.0,
+        "input_max": 1.0,
+        "output_min": -0.5,
+        "output_max": 0.5,
+        "clamp_min": is_clamped,
+        "clamp_max": is_clamped,
+    }
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+    if units > 1:
+      config["use_multi_calibration_layer"] = True
+      loss = self._TrainModel(config)
+      self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+
+  @parameterized.parameters(
+      (1, False, tf.keras.optimizers.SGD, 0.005920),
+      (1, False, tf.keras.optimizers.Adagrad, 0.006080),
+      (1, False, tf.keras.optimizers.Adam, 0.002914),
+      (1, True, tf.keras.optimizers.SGD, 0.013836),
+      (1, True, tf.keras.optimizers.Adagrad, 0.066928),
+      # Adam is doing terrible when required to stretch monotonic function.
+      (1, True, tf.keras.optimizers.Adam, 0.230402),
+      (3, False, tf.keras.optimizers.Adagrad, 0.004891),
+      (3, True, tf.keras.optimizers.Adagrad, 0.021490),
+  )
+  def testMonotonicWideBounds(self, units, is_clamped, optimizer,
+                              expected_loss):
+    if self._disable_all:
+      return
+    config = {
+        "units": units,
+        "num_training_records": 100,
+        "num_training_epoch": 400,
+        "optimizer": optimizer,
+        "learning_rate": 0.015,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._SmallWaves,
+        "monotonicity": 1,
+        "num_keypoints": 21,
+        "input_min": -1.0,
+        "input_max": 1.0,
+        "output_min": -1.5,
+        "output_max": 1.5,
+        "clamp_min": is_clamped,
+        "clamp_max": is_clamped,
+    }
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+    if units > 1:
+      config["use_multi_calibration_layer"] = True
+      loss = self._TrainModel(config)
+      self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+
+  @parameterized.parameters(
+      (1, None, None, False, False, 0.003744),
+      (1, None, None, False, True, 0.003744),
+      (1, None, 1.6, True, False, 0.001456),
+      (1, None, 1.6, True, True, 0.001465),
+      (1, None, 2.0, False, False, 0.001712),
+      (1, None, 2.0, False, True, 0.01623),
+      (1, None, 2.0, True, False, 0.001712),
+      (1, None, 2.0, True, True, 0.01623),
+      (1, 0.5, None, False, False, 0.002031),
+      (1, 0.5, None, False, True, 0.002031),
+      (1, 0.5, None, True, False, 0.003621),
+      (1, 0.5, None, True, True, 0.003621),
+      (1, None, None, True, False, 0.003744),
+      (1, 0.5, 1.21, False, False, 0.007572),
+      (1, 0.5, 1.21, False, True, 0.007572),
+      (1, 0.5, 1.21, True, False, 0.009876),
+      (1, 0.5, 1.21, True, True, 0.009876),
+      (1, 0.5, 1.6, False, False, 0.001916),
+      (1, 0.5, 1.6, False, True, 0.001737),
+      (1, 0.5, 1.6, True, False, 0.003103),
+      (1, 0.5, 1.6, True, True, 0.002692),
+      (1, 0.5, 2.0, False, False, 0.001873),
+      (1, 0.5, 2.0, False, True, 0.003333),
+      (1, None, None, True, True, 0.003744),
+      (1, 0.5, 2.0, True, False, 0.003315),
+      (1, 0.5, 2.0, True, True, 0.004289),
+      (1, 0.9, None, False, False, 0.00151),
+      (1, 0.9, None, False, True, 0.00151),
+      (1, 0.9, None, True, False, 0.001552),
+      (1, 0.9, None, True, True, 0.001552),
+      (1, 0.9, 1.21, False, False, 0.005387),
+      (1, 0.9, 1.21, False, True, 0.005387),
+      (1, 0.9, 1.21, True, False, 0.005427),
+      (1, 0.9, 1.21, True, True, 0.005427),
+      (1, None, 1.21, False, False, 0.005366),
+      (1, 0.9, 1.6, False, False, 0.0015),
+      (1, 0.9, 1.6, False, True, 0.001454),
+      (1, 0.9, 1.6, True, False, 0.001546),
+      (1, 0.9, 1.6, True, True, 0.001514),
+      (1, 0.9, 2.0, False, False, 0.001501),
+      (1, 0.9, 2.0, False, True, 0.003067),
+      (1, 0.9, 2.0, True, False, 0.001547),
+      (1, 0.9, 2.0, True, True, 0.00312),
+      (1, 1.2, None, False, False, 0.021835),
+      (1, 1.2, None, False, True, 0.021835),
+      (1, None, 1.21, False, True, 0.005366),
+      (1, 1.2, None, True, False, 0.021835),
+      (1, 1.2, None, True, True, 0.021835),
+      (1, 1.2, 1.21, False, False, 0.025733),
+      (1, 1.2, 1.21, False, True, 0.025733),
+      (1, 1.2, 1.21, True, False, 0.025733),
+      (1, 1.2, 1.21, True, True, 0.025733),
+      (1, 1.2, 1.6, False, False, 0.021834),
+      (1, 1.2, 1.6, False, True, 0.021967),
+      (1, 1.2, 1.6, True, False, 0.021834),
+      (1, 1.2, 1.6, True, True, 0.021967),
+      (1, None, 1.21, True, False, 0.005366),
+      (1, 1.2, 2.0, False, False, 0.021834),
+      (1, 1.2, 2.0, False, True, 0.023642),
+      (1, 1.2, 2.0, True, False, 0.021834),
+      (1, 1.2, 2.0, True, True, 0.023642),
+      (1, None, 1.21, True, True, 0.005366),
+      (1, None, 1.6, False, False, 0.001456),
+      (1, None, 1.6, False, True, 0.001465),
+      (3, None, None, False, False, 0.003969),
+      (3, None, None, False, True, 0.003969),
+      (3, 0.5, None, True, False, 0.003125),
+      (3, 0.5, None, True, True, 0.003125),
+      (3, None, None, True, False, 0.003969),
+      (3, 0.5, 1.21, False, False, 0.003676),
+      (3, 0.5, 1.21, False, True, 0.003676),
+      (3, 0.5, 1.21, True, False, 0.006550),
+      (3, 0.5, 1.21, True, True, 0.006550),
+      (3, 0.5, 1.6, False, False, 0.001246),
+      (3, 0.5, 1.6, False, True, 0.001000),
+      (3, 0.5, 1.6, True, False, 0.002775),
+      (3, None, 1.6, True, False, 0.000662),
+      (3, 0.5, 1.6, True, True, 0.002720),
+      (3, 0.5, 2.0, False, False, 0.001272),
+      (3, 0.5, 2.0, False, True, 0.001779),
+      (3, None, None, True, True, 0.003969),
+      (3, 0.5, 2.0, True, False, 0.002852),
+      (3, 0.5, 2.0, True, True, 0.003496),
+      (3, 0.9, None, False, False, 0.000597),
+      (3, 0.9, None, False, True, 0.000597),
+      (3, 0.9, None, True, False, 0.000678),
+      (3, 0.9, None, True, True, 0.000678),
+      (3, None, 1.6, True, True, 0.000640),
+      (3, 0.9, 1.21, False, False, 0.002630),
+      (3, 0.9, 1.21, False, True, 0.002630),
+      (3, 0.9, 1.21, True, False, 0.002906),
+      (3, 0.9, 1.21, True, True, 0.002906),
+      (3, None, 1.21, False, False, 0.002565),
+      (3, 0.9, 1.6, False, False, 0.000575),
+      (3, 0.9, 1.6, False, True, 0.000520),
+      (3, 0.9, 1.6, True, False, 0.000648),
+      (3, 0.9, 1.6, True, True, 0.000606),
+      (3, 0.9, 2.0, False, False, 0.000556),
+      (3, None, 2.0, False, False, 0.000901),
+      (3, 0.9, 2.0, False, True, 0.001230),
+      (3, 0.9, 2.0, True, False, 0.000636),
+      (3, 0.9, 2.0, True, True, 0.001314),
+      (3, 1.2, None, False, False, 0.010638),
+      (3, 1.2, None, False, True, 0.010638),
+      (3, None, 1.21, False, True, 0.002565),
+      (3, 1.2, None, True, False, 0.010638),
+      (3, 1.2, None, True, True, 0.010638),
+      (3, 1.2, 1.21, False, False, 0.011300),
+      (3, 1.2, 1.21, False, True, 0.011309),
+      (3, None, 2.0, False, True, 0.003166),
+      (3, 1.2, 1.21, True, False, 0.011300),
+      (3, 1.2, 1.21, True, True, 0.011309),
+      (3, 1.2, 1.6, False, False, 0.010631),
+      (3, 1.2, 1.6, False, True, 0.012681),
+      (3, 1.2, 1.6, True, False, 0.010631),
+      (3, 1.2, 1.6, True, True, 0.012681),
+      (3, None, 1.21, True, False, 0.002565),
+      (3, 1.2, 2.0, False, False, 0.010627),
+      (3, 1.2, 2.0, False, True, 0.016435),
+      (3, 1.2, 2.0, True, False, 0.010627),
+      (3, None, 2.0, True, False, 0.000901),
+      (3, 1.2, 2.0, True, True, 0.016435),
+      (3, None, 1.21, True, True, 0.002565),
+      (3, None, 1.6, False, False, 0.000662),
+      (3, None, 1.6, False, True, 0.000640),
+      (3, None, 2.0, True, True, 0.003166),
+      (3, 0.5, None, False, False, 0.001334),
+      (3, 0.5, None, False, True, 0.001334),
+  )
+  def testAllBoundsAndMonotonicityDirection(self, units, output_min, output_max,
+                                            clamp_min, clamp_max,
+                                            expected_loss):
+    if self._disable_all:
+      return
+    config = {
+        "units": units,
+        "num_training_records": 100,
+        "num_training_epoch": 200,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 0.15,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._SmallWavesPlusOne,
+        "monotonicity": 1,
+        "num_keypoints": 21,
+        "input_min": 0.1,
+        "input_max": 0.8,
+        "output_min": output_min,
+        "output_max": output_max,
+        "clamp_min": clamp_min,
+        "clamp_max": clamp_max,
+    }
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+    self.assertAlmostEqual(
+        loss, self._InverseAndTrain(config), delta=self._small_eps)
+    if units > 1:
+      config["use_multi_calibration_layer"] = True
+      loss = self._TrainModel(config)
+      self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+      self.assertAlmostEqual(
+          loss, self._InverseAndTrain(config), delta=self._small_eps)
+
+  @parameterized.parameters(
+      (1, 1, 0.018919),
+      (1, -1, 0.019434),
+      (3, "convex", 0.008592),
+      (3, "concave", 0.01134),
+  )
+  def testConvexitySimple(self, units, convexity, expected_loss):
+    # No constraints other than convexity.
+    if self._disable_all:
+      return
+    config = {
+        "units": units,
+        "num_training_records": 100,
+        "num_training_epoch": 120,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._SmallWaves,
+        "monotonicity": "none",
+        "convexity": convexity,
+        "num_keypoints": 21,
+        "input_min": -1.0,
+        "input_max": 1.0,
+        "output_min": None,
+        "output_max": None,
+        "num_projection_iterations": 18,
+    }
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+    if units > 1:
+      config["use_multi_calibration_layer"] = True
+      loss = self._TrainModel(config)
+      self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+
+  @parameterized.parameters(
+      (1, 1, 0.006286),
+      (1, -1, 0.078076),
+      (3, 1, 0.002941),
+      (3, -1, 0.032497),
+  )
+  def testConvexityNonUniformKeypoints(self, units, convexity, expected_loss):
+    # No constraints other than convexity.
+    if self._disable_all:
+      return
+
+    config = {
+        "units": units,
+        "num_training_records": 100,
+        "num_training_epoch": 200,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 1.0,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._WavyParabola,
+        "monotonicity": 0,
+        "convexity": convexity,
+        "input_keypoints": [-1.0, -0.9, -0.3, -0.2, 0.0, 0.3, 0.31, 0.35, 1.0],
+        "output_min": None,
+        "output_max": None,
+    }
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+    if units > 1:
+      config["use_multi_calibration_layer"] = True
+      loss = self._TrainModel(config)
+      self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+
+  @parameterized.parameters(
+      (1, 2, 0.033706),
+      (1, 3, 0.006485),
+      (1, 4, 0.005128),
+      (1, 5, 0.004878),
+      (1, 6, 0.005083),
+      (1, 7, 0.004860),
+      (3, 2, 0.013585),
+      (3, 3, 0.003311),
+      (3, 4, 0.002633),
+      (3, 5, 0.001909),
+      (3, 6, 0.001822),
+      (3, 7, 0.001599),
+  )
+  def testConvexityDifferentNumKeypoints(self, units, num_keypoints,
+                                         expected_loss):
+    # No constraints other than convexity.
+    if self._disable_all:
+      return
+
+    config = {
+        "units": units,
+        "num_training_records": 100,
+        "num_training_epoch": 120,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 0.3,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._WavyParabola,
+        "monotonicity": 0,
+        "convexity": 1,
+        "num_keypoints": num_keypoints,
+        "input_min": -0.8,
+        "input_max": 0.8,
+        "output_min": None,
+        "output_max": None,
+    }
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+    if units > 1:
+      config["use_multi_calibration_layer"] = True
+      loss = self._TrainModel(config)
+      self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+
+  @parameterized.parameters(
+      (1, "increasing", None, 0.055837),
+      (1, "decreasing", None, 0.046657),
+      (1, "none",       0.0,  0.027777),
+      (1, "increasing", 0.0,  0.065516),
+      (1, "decreasing", 0.0,  0.057453),
+      (3, "increasing", None, 0.022467),
+      (3, "decreasing", None, 0.019012),
+      (3, "none",       0.0,  0.014693),
+      (3, "increasing", 0.0,  0.026284),
+      (3, "decreasing", 0.0,  0.025498),
+  )
+  def testConvexityWithMonotonicityAndBounds(self, units, monotonicity,
+                                             output_max, expected_loss):
+    if self._disable_all:
+      return
+
+    config = {
+        "units": units,
+        "num_training_records": 100,
+        "num_training_epoch": 120,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 0.5,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._WavyParabola,
+        "monotonicity": monotonicity,
+        "convexity": 1,
+        "num_keypoints": 21,
+        "input_min": -1.0,
+        "input_max": 1.0,
+        "output_min": None,
+        "output_max": output_max,
+        "num_projection_iterations": 8,
+    }
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+    self.assertAlmostEqual(
+        loss, self._InverseAndTrain(config), delta=self._small_eps)
+    if units > 1:
+      config["use_multi_calibration_layer"] = True
+      loss = self._TrainModel(config)
+      self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+      self.assertAlmostEqual(
+          loss, self._InverseAndTrain(config), delta=self._small_eps)
+
+  @parameterized.parameters(
+      ([-1.0, -0.8, 0.0, 0.2, 0.8, 1.0],),
+      (np.array([-1.0, -0.8, 0.0, 0.2, 0.8, 1.0]),),
+  )
+  def testInputKeypoints(self, keypoints):
+    if self._disable_all:
+      return
+    config = {
+        "num_training_records": 100,
+        "num_training_epoch": 200,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 0.15,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._SmallWaves,
+        "monotonicity": 0,
+        "input_keypoints": keypoints,
+        "output_min": None,
+        "output_max": None,
+    }
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.009650, delta=self._loss_eps)
+
+  @parameterized.parameters(
+      (1, None, 600, 0.002058),
+      (1, ("laplacian", 0.01, 0.0), 420, 0.040492),
+      (1, ("hessian", 0.01, 0.01), 300, 0.040932),
+      (1, ("wrinkle", 0.01, 0.01), 300, 0.027430),
+      (3, None, 600, 0.002150),
+      (3, ("laplacian", 0.01, 0.0), 420, 0.096667),
+      (3, ("hessian", 0.01, 0.01), 300, 0.092306),
+      (3, ("wrinkle", 0.01, 0.01), 300, 0.064053),
+  )
+  def testIsCyclic(self, units, regularizer, num_training_epoch, expected_loss):
+    if self._disable_all:
+      return
+    config = {
+        "units": units,
+        "num_training_records": 100,
+        "num_training_epoch": num_training_epoch,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 0.15,
+        "x_generator": self._ScatterXUniformlyIncludeBounds,
+        "y_function": self._SinCycle,
+        "monotonicity": 0,
+        "input_min": 0.0,
+        "input_max": 24.0,
+        "num_keypoints": 10,
+        "is_cyclic": True,
+        "kernel_regularizer": regularizer,
+        "output_min": None,
+        "output_max": None,
+    }
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+    if units > 1:
+      config["use_multi_calibration_layer"] = True
+      loss = self._TrainModel(config)
+      self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+
+  @parameterized.parameters(
+      (1, "equal_heights", 0.332572),
+      (1, "equal_slopes", 0.476452),
+      (3, "equal_heights", 0.271896),
+      (3, "equal_slopes", 0.356754),
+  )
+  def testInitializer(self, units, initializer, expected_loss):
+    if self._disable_all:
+      return
+    config = {
+        "units": units,
+        "num_training_records": 100,
+        # 0 training epochs to see pure output of initializer.
+        "num_training_epoch": 0,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 0.15,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._SmallWaves,
+        "monotonicity": 0,
+        "input_keypoints": [-1.0, -0.8, 0.0, 0.2, 0.8, 1.0],
+        "output_min": -1.0,
+        "output_max": 2.0,
+        "initializer": initializer,
+    }
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+    if units > 1:
+      config["use_multi_calibration_layer"] = True
+      loss = self._TrainModel(config)
+      self.assertAlmostEqual(loss, expected_loss, delta=self._loss_eps)
+
+  # TODO: this test is only using the first piece of the PWL.
+  @parameterized.parameters(
+      (1, ("laplacian", 0.01, 0.001), 0.091, 0.089631),
+      (1, ("Hessian", 0.01, 0.001), 0.035, 0.033504),
+      (1, ("wrinkle", 0.01, 0.001), 0.011, 0.007018),
+      # Standard Keras regularizer:
+      (1, keras.regularizers.l1_l2(l1=0.01, l2=0.001), 0.091, 0.089906),
+      # List of regularizers:
+      (1, [("Hessian", 0.01, 0.001),
+           keras.regularizers.l1_l2(l1=0.01, l2=0.001)], 0.126, 0.122192),
+      (3, ("laplacian", 0.01, 0.001), 0.273, 0.263244),
+      (3, ("Hessian", 0.01, 0.001), 0.105, 0.097368),
+      (3, ("wrinkle", 0.01, 0.001), 0.033, 0.013650),
+      # Standard Keras regularizer:
+      (3, keras.regularizers.l1_l2(l1=0.01, l2=0.001), 0.273, 0.265924),
+      # List of regularizers:
+      (3, [("Hessian", 0.01, 0.001),
+           keras.regularizers.l1_l2(l1=0.01, l2=0.001)], 0.378, 0.354917),
+  )
+  def testRegularizers(self, units, regularizer, pure_reg_loss, training_loss):
+    if self._disable_all:
+      return
+    keypoints = [0.0, 1.0, 2.0, 3.0]
+    pwl_weights = [0.0, 1.0, 2.0, 4.0]
+    multi_pwl_weights = [[w] * units for w in pwl_weights]
+    # Keypoint outputs which correspond to weights: [0.0, 1.0, 3.0, 7.0]
+    config = {
+        "units": units,
+        "num_training_records": 100,
+        "num_training_epoch": 0,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 0.15,
+        "x_generator": self._ScatterXUniformly,
+        "input_keypoints": keypoints,
+        "y_function": self._GenPWLFunction(keypoints, multi_pwl_weights),
+        # Initializer exactly matches target function.
+        "initializer":
+            lambda shape, dtype: tf.constant(multi_pwl_weights, shape=shape),
+        "kernel_regularizer": regularizer,
+    }  # pyformat: disable
+    loss = self._TrainModel(config)
+    # This loss is pure regularization loss because initializer matches target
+    # function and there was 0 training epochs.
+    self.assertAlmostEqual(loss, pure_reg_loss, delta=self._loss_eps)
+
+    config["num_training_epoch"] = 20
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, training_loss, delta=self._loss_eps)
+    if units > 1:
+      config["use_multi_calibration_layer"] = True
+      config["initializer"] = (
+          lambda shape, dtype: tf.constant(pwl_weights, shape=shape))
+      loss = self._TrainModel(config)
+      self.assertAlmostEqual(loss, training_loss, delta=self._loss_eps)
+
+  def testAssertMonotonicity(self):
+    if self._disable_all:
+      return
+    decreasing_initializer = pwl_calibraion.UniformOutputInitializer(
+        output_min=0.0, output_max=1.0, monotonicity=-1)
+    # Specify decreasing initializer and do 0 training iterations so no
+    # projections are being executed.
+    config = {
+        "num_training_records": 100,
+        "num_training_epoch": 0,
+        "optimizer": tf.keras.optimizers.Adagrad,
+        "learning_rate": 0.15,
+        "x_generator": self._ScatterXUniformly,
+        "y_function": self._SmallWaves,
+        "monotonicity": 0,
+        "num_keypoints": 21,
+        "input_min": 0.0,
+        "input_max": 1.0,
+        "output_min": 0.0,
+        "output_max": 1.0,
+        "initializer": decreasing_initializer,
+    }
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.347888, delta=self._loss_eps)
+
+    # We have decreasing initializer so with 0 trainig steps monotonicity is
+    # violated.
+    with self.assertRaises(tf.errors.InvalidArgumentError):
+      config["monotonicity"] = 1
+      loss = self._TrainModel(config)
+
+    # Now set upper bound bigger than necessary. Everything should be fine...
+    config["monotonicity"] = 0
+    config["output_max"] = 1.5
+    loss = self._TrainModel(config)
+    self.assertAlmostEqual(loss, 0.347888, delta=self._loss_eps)
+
+    # ... until we require to clamp max.
+    with self.assertRaises(tf.errors.InvalidArgumentError):
+      config["clamp_max"] = True
+      loss = self._TrainModel(config)
+
+
+if __name__ == "__main__":
+  tf.test.main()
diff --git a/tensorflow_lattice/python/test_utils.py b/tensorflow_lattice/python/test_utils.py
new file mode 100644
index 0000000..c18a566
--- /dev/null
+++ b/tensorflow_lattice/python/test_utils.py
@@ -0,0 +1,276 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Helpers to train simple model for tests and print debug output."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import time
+from . import visualization
+from absl import logging
+import numpy as np
+
+
+class TimeTracker(object):
+  """Tracks time.
+
+  Keeps track of time spent in its scope and appends it to 'list_to_append'
+  on exit from scope divided by 'num_steps' if provided.
+
+  Example:
+    training_step_times = []
+    with TimeTracker(training_step_times, num_steps=num_epochs):
+      model.fit(... epochs=num_epochs ...)
+    print np.median(training_step_times)
+  """
+
+  def __init__(self, list_to_append, num_steps=1):
+    self._list_to_append = list_to_append
+    self._num_steps = float(num_steps)
+
+  def __enter__(self):
+    self._start_time = time.time()
+    return self
+
+  def __exit__(self, unuesd_type, unuesd_value, unuesd_traceback):
+    duration = time.time() - self._start_time
+    self._list_to_append.append(
+        duration / self._num_steps if self._num_steps else 0.0)
+
+
+def run_training_loop(config,
+                      training_data,
+                      keras_model,
+                      plot_path=None,
+                      input_dtype=np.float32,
+                      label_dtype=np.float32):
+  """Trains models and prints debug info.
+
+  Args:
+    config: dictionary of test case parameters. See tests for TensorFlow Lattice
+      layers.
+    training_data: tripple: (training_inputs, labels, raw_training_inputs) where
+      training_inputs and labels are proper data to train models passed via
+      other parameters and raw_training_inputs are representation of
+      training_inputs for visualization.
+    keras_model: Keras model to train on training_data.
+    plot_path: if specified it should be a string which represents file
+      name where to save model output vs ground truth visualization as png.
+      Supported only for 1-d and 2-d inputs. For visualisation of 2-d inputs
+      to work - raw_training_data must be a mesh grid.
+    input_dtype: dtype for input conversion.
+    label_dtype: dtype for label conversion.
+
+  Returns:
+    Loss measured on training data and tf.session() if one was initialized
+    explicitly during training.
+  """
+  (training_inputs, training_labels, raw_training_inputs) = training_data
+  np_training_inputs = np.asarray(training_inputs).astype(input_dtype)
+  np_training_labels = np.asarray(training_labels).astype(label_dtype)
+
+  logging.info(" {0: <10}{1: <10}".format("it", "Loss"))
+
+  num_steps = 10
+  training_step_times = []
+  for step in range(num_steps):
+    begin = (config["num_training_epoch"] * step) // num_steps
+    end = (config["num_training_epoch"] * (step + 1)) // num_steps
+    num_epochs = end - begin
+    if num_epochs == 0:
+      continue
+
+    loss = keras_model.evaluate(np_training_inputs, np_training_labels,
+                                batch_size=len(np_training_inputs),
+                                verbose=0)
+    with TimeTracker(training_step_times, num_steps=num_epochs):
+      keras_model.fit(np_training_inputs, np_training_labels,
+                      batch_size=len(np_training_inputs),
+                      epochs=num_epochs,
+                      verbose=0)
+    logging.info("{0: <10}{1: <10,.6f}".format(begin, loss))
+  # End of: 'for step in range(num_steps):'
+
+  loss = keras_model.evaluate(np_training_inputs, np_training_labels,
+                              batch_size=len(np_training_inputs),
+                              verbose=0)
+  logging.info("Final loss: %f", loss)
+
+  if training_step_times:
+    logging.info("Median training step time: %f",
+                 np.median(training_step_times))
+
+  if plot_path:
+    predictions = keras_model.predict(np_training_inputs)
+    plots = {
+        "Ground truth": training_labels,
+        "Model": predictions
+    }
+    visualization.plot_outputs(inputs=raw_training_inputs,
+                               outputs_map=plots,
+                               file_path=plot_path)
+  return loss
+
+
+def two_dim_mesh_grid(num_points, x_min, y_min, x_max, y_max):
+  """Generates uniform 2-d mesh grid for 3-d surfaces visualisation via pyplot.
+
+  Uniformly distributes 'num_points' within rectangle:
+  (x_min, y_min) - (x_max, y_max)
+  'num_points' should be such that uniform distribution is possible. In other
+  words there should exist such integers 'x_points' and 'y_points' that:
+  - x_points * y_points == num_points
+  - x_points / y_points == (x_max - x_min) / (y_max - y_min)
+
+  Args:
+    num_points: number of points in the grid.
+    x_min: bounds of the grid.
+    y_min: bounds of the grid.
+    x_max: bounds of the grid.
+    y_max: bounds of the grid.
+
+  Returns:
+    Tuple containing 2 numpy arrays which represent X and Y coordinates of mesh
+    grid
+
+  Raises:
+    ValueError: if it's impossible to uniformly distribute 'num_points' across
+    specified grid.
+
+  """
+  x_size = x_max - x_min
+  y_size = y_max - y_min
+  x_points = (num_points * x_size / y_size)**0.5
+  y_points = num_points / x_points
+
+  eps = 1e-7
+  is_int = lambda x: abs(x - int(x + eps)) < eps
+  if not is_int(x_points) or not is_int(y_points):
+    raise ValueError("Cannot evenly distribute %d points across sides of "
+                     "lengths: %f and %f" % (num_points, x_size, y_size))
+
+  x_grid = np.linspace(start=x_min, stop=x_max, num=int(x_points + eps))
+  y_grid = np.linspace(start=y_min, stop=y_max, num=int(y_points + eps))
+
+  # Convert list returned by meshgrid() to tuple so we can easily distinguish
+  # mesh grid vs list of points.
+  return tuple(np.meshgrid(x_grid, y_grid))
+
+
+def sample_uniformly(num_points, lower_bounds, upper_bounds):
+  """Deterministically generates num_point random points within bounds.
+
+  Points will be such that:
+  lower_bounds[i] <= p[i] <= upper_bounds[i]
+
+  Number of dimensions is defined by lengths of lower_bounds list.
+
+  Args:
+    num_points: number of points to generate.
+    lower_bounds: list or tuple of lower bounds.
+    upper_bounds: list or tuple of upper bounds.
+
+  Returns:
+    List of generated points.
+  """
+  if len(lower_bounds) != len(upper_bounds):
+    raise ValueError("Lower and upper bounds must have same length. They are: "
+                     "lower_bounds: %s, upper_bounds: %s" %
+                     (lower_bounds, upper_bounds))
+  np.random.seed(41)
+  x = []
+  for _ in range(num_points):
+    point = [
+        lower + np.random.random() * (upper - lower)
+        for lower, upper in zip(lower_bounds, upper_bounds)
+    ]
+    x.append(np.asarray(point))
+  return x
+
+
+def get_hypercube_interpolation_fn(coefficients):
+  """Returns function which does hypercube interpolation.
+
+  This is only for 2^d lattice aka hypercube.
+
+  Args:
+    coefficients: coefficients of hypercube ordered according to index of
+      corresponding vertex.
+
+  Returns:
+    Function which takes d-dimension point and performs hypercube interpolation
+    with given coefficients.
+  """
+
+  def hypercube_interpolation_fn(x):
+    """Does hypercube interpolation."""
+    if 2**len(x) != len(coefficients):
+      raise ValueError("Number of coefficients(%d) does not correspond to "
+                       "dimension 'x'(%s)" % (len(coefficients), x))
+    result = 0.0
+    for coefficient_index in range(len(coefficients)):
+      weight = 1.0
+      for input_dimension in range(len(x)):
+        if coefficient_index & (1 << input_dimension):
+          # If statement checks whether 'input_dimension' bit of
+          # 'coefficient_index' is set to 1.
+          weight *= x[input_dimension]
+        else:
+          weight *= (1.0 - x[input_dimension])
+      result += coefficients[coefficient_index] * weight
+    return result
+
+  return hypercube_interpolation_fn
+
+
+def get_linear_lattice_interpolation_fn(lattice_sizes, monotonicities,
+                                        output_min, output_max):
+  """Returns function which does lattice interpolation.
+
+  Returned function matches lattice_layer.LinearInitializer with corresponding
+  parameters.
+
+  Args:
+    lattice_sizes: list or tuple of integers which represents lattice sizes.
+    monotonicities: monotonicity constraints.
+    output_min: minimum output of linear function.
+    output_max: maximum output of linear function.
+
+  Returns:
+    Function which takes d-dimension point and performs lattice interpolation
+    assuming lattice weights are such that lattice represents linear function
+    with given output_min and output_max. All monotonic dimesions of this linear
+    function cotribute with same weight despite of numer of vertices per
+    dimension. All non monotonic dimensions have weight 0.0.
+  """
+
+  def linear_interpolation_fn(x):
+    """Linear along monotonic dims and 0.0 along non monotonic."""
+    result = output_min
+    num_monotonic_dims = len(monotonicities) - monotonicities.count(0)
+    if num_monotonic_dims == 0:
+      local_monotonicities = [1] * len(lattice_sizes)
+      num_monotonic_dims = len(lattice_sizes)
+    else:
+      local_monotonicities = monotonicities
+
+    weight = (output_max - output_min) / num_monotonic_dims
+    for i in range(len(x)):
+      if local_monotonicities[i]:
+        result += x[i] * weight / (lattice_sizes[i] - 1.0)
+    return result
+
+  return linear_interpolation_fn
diff --git a/tensorflow_lattice/python/utils.py b/tensorflow_lattice/python/utils.py
new file mode 100644
index 0000000..24cec35
--- /dev/null
+++ b/tensorflow_lattice/python/utils.py
@@ -0,0 +1,174 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Internal helpers shared by multiple modules in TFL.
+
+Note that this module is not expected to be used by TFL users, and that it is
+not exposed in the TFL package.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import tensorflow as tf
+
+
+def _topological_sort(key_less_than_values):
+  """Topological sort for monotonicities.
+
+  Args:
+    key_less_than_values: A defaultdict from index to a list of indices, such
+      that for j in key_less_than_values[i] we must have output(i) <= output(j).
+
+  Returns:
+    A topologically sorted list of indices.
+
+  Raises:
+    ValueError: If monotonicities are circular.
+  """
+  all_values = set()
+  for values in key_less_than_values.values():
+    all_values.update(values)
+
+  q = [k for k in key_less_than_values if k not in all_values]
+  if not q:
+    raise ValueError(
+        "Circular monotonicity constraints: {}".format(key_less_than_values))
+
+  result = []
+  seen = set()
+  while q:
+    v = q[-1]
+    seen.add(v)
+    expand = [x for x in key_less_than_values[v] if x not in seen]
+    if not expand:
+      result = [v] + result
+      q.pop()
+    else:
+      q.append(expand[0])
+
+  return result
+
+
+def _min_projection(weights, sorted_indices, key_less_than_values, step):
+  """Returns an approximate partial min projection with the given step_size.
+
+  Args:
+    weights: A list of tensors of shape `(units,)` to be approximatly projected
+      based on the monotonicity constraints.
+    sorted_indices: Topologically sorted list of indices based on the
+      monotonicity constraints.
+    key_less_than_values: A defaultdict from index to a list of indices, such
+      that for `j` in `key_less_than_values[i]` we must have
+      `weight[i] <= weight[j]`.
+    step: A value defining if we should apply a full projection (`step == 1`) or
+      a partial projection (`step < 1`).
+
+  Returns:
+    Projected list of tensors.
+  """
+  projected_weights = list(weights)  # copy
+  for i in sorted_indices[::-1]:
+    if key_less_than_values[i]:
+      min_projection = projected_weights[i]
+      for j in key_less_than_values[i]:
+        min_projection = tf.minimum(min_projection, projected_weights[j])
+      if step == 1:
+        projected_weights[i] = min_projection
+      else:
+        projected_weights[i] = (
+            step * min_projection + (1 - step) * projected_weights[i])
+  return projected_weights
+
+
+def _max_projection(weights, sorted_indices, key_greater_than_values, step):
+  """Returns an approximate partial max projection with the given step_size.
+
+  Args:
+    weights: A list of tensors of shape `(units,)` to be approximatly projected
+      based on the monotonicity constraints.
+    sorted_indices: Topologically sorted list of indices based on the
+      monotonicity constraints.
+    key_greater_than_values: A defaultdict from index to a list of indices,
+      indicating that for index `j` in `key_greater_than_values[i]` we must have
+      `weight[i] >= weight[j]`.
+    step: A value defining if we should apply a full projection (`step == 1`) or
+      a partial projection (`step < 1`).
+
+  Returns:
+    Projected list of tensors.
+  """
+  projected_weights = list(weights)  # copy
+  for i in sorted_indices:
+    if key_greater_than_values[i]:
+      max_projection = projected_weights[i]
+      for j in key_greater_than_values[i]:
+        max_projection = tf.maximum(max_projection, projected_weights[j])
+      if step == 1:
+        projected_weights[i] = max_projection
+      else:
+        projected_weights[i] = (
+            step * max_projection + (1 - step) * projected_weights[i])
+  return projected_weights
+
+
+def approximately_project_categorical_partial_monotonicities(weights,
+                                                             monotonicities):
+  """Returns an approximation L2 projection for categorical monotonicities.
+
+  Categorical monotonocities are monotonicity constraints applied to the real
+  values that are mapped from categorical inputs. Each monotonicity constraint
+  is specified by a pair of categorical input indices. The projection is also
+  used to constrain pairs of coefficients in linear models.
+
+  Args:
+    weights: Tensor of weights to be approximately projected based on the
+      monotonicity constraints.
+    monotonicities: List of pairs of indices `(i, j)`, indicating constraint
+      `weights[i] <= weights[j]`.
+  """
+  key_less_than_values = collections.defaultdict(list)
+  key_greater_than_values = collections.defaultdict(list)
+  for i, j in monotonicities:
+    key_less_than_values[i].append(j)
+    key_greater_than_values[j].append(i)
+
+  sorted_indices = _topological_sort(key_less_than_values)
+
+  projected_weights = tf.unstack(weights)
+
+  # A 0.5 min projection followed by a full max projection.
+  projected_weights_min_max = _min_projection(projected_weights,
+                                              sorted_indices,
+                                              key_less_than_values, 0.5)
+  projected_weights_min_max = _max_projection(projected_weights_min_max,
+                                              sorted_indices,
+                                              key_greater_than_values, 1)
+  projected_weights_min_max = tf.stack(projected_weights_min_max)
+
+  # A 0.5 max projection followed by a full min projection.
+  projected_weights_max_min = _max_projection(projected_weights,
+                                              sorted_indices,
+                                              key_greater_than_values, 0.5)
+  projected_weights_max_min = _min_projection(projected_weights_max_min,
+                                              sorted_indices,
+                                              key_less_than_values, 1)
+  projected_weights_max_min = tf.stack(projected_weights_max_min)
+
+  # Take the average of the two results to avoid sliding to one direction.
+  projected_weights = (projected_weights_min_max +
+                       projected_weights_max_min) / 2
+  return projected_weights
diff --git a/tensorflow_lattice/python/utils_test.py b/tensorflow_lattice/python/utils_test.py
new file mode 100644
index 0000000..15efa3e
--- /dev/null
+++ b/tensorflow_lattice/python/utils_test.py
@@ -0,0 +1,51 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for Tensorflow Lattice utility functions."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+import numpy as np
+import tensorflow as tf
+from tensorflow_lattice.python import utils
+
+
+class UtilsTest(parameterized.TestCase, tf.test.TestCase):
+
+  def _ResetAllBackends(self):
+    tf.compat.v1.reset_default_graph()
+
+  @parameterized.parameters(
+      ([3., 4.], [(0, 1)], [3., 4.]),
+      ([4., 3.], [(0, 1)], [3.5, 3.5]),
+      ([1., 0.], [(0, 1)], [0.5, 0.5]),
+      ([-1., 0.], [(1, 0)], [-0.5, -0.5]),
+      ([4., 3., 2., 1., 0.], [(0, 1), (1, 2), (2, 3), (3, 4)],
+       [2., 2., 2., 2., 2.]))
+  def testApproximatelyProjectCategoricalPartialMonotonicities(
+      self, weights, monotonicities, expected_projected_weights):
+    self._ResetAllBackends()
+    weights = tf.Variable(weights)
+    projected_weights = (
+        utils.approximately_project_categorical_partial_monotonicities(
+            weights, monotonicities))
+    self.evaluate(tf.compat.v1.global_variables_initializer())
+    self.assertAllClose(self.evaluate(projected_weights),
+                        np.array(expected_projected_weights))
+
+if __name__ == '__main__':
+  tf.test.main()
diff --git a/tensorflow_lattice/python/visualization.py b/tensorflow_lattice/python/visualization.py
new file mode 100644
index 0000000..7cd8580
--- /dev/null
+++ b/tensorflow_lattice/python/visualization.py
@@ -0,0 +1,524 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tools to analyse and plot TFL models."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import os
+import tempfile
+
+from . import model_info
+import matplotlib.pyplot as plt
+# Needed for pyplot 3d projections.
+from mpl_toolkits.mplot3d import Axes3D as _  # pylint: disable=unused-import
+import numpy as np
+
+
+def draw_model_graph(model_graph, calibrator_dpi=30):
+  """Draws the model graph.
+
+  This function requires IPython and graphviz packages.
+
+  ```
+  model_graph = estimators.get_model_graph(saved_model_path)
+  visualization.draw_model_graph(model_graph)
+  ```
+
+  Args:
+    model_graph: a `model_info.ModelInfo` objects to plot.
+    calibrator_dpi: The DPI for calibrator plots inside the graph nodes.
+  """
+  import graphviz  # pylint: disable=g-import-not-at-top
+  import IPython.display  # pylint: disable=g-import-not-at-top
+
+  dot = graphviz.Digraph(format='png', engine='dot')
+  dot.graph_attr['ranksep'] = '0.75'
+
+  # Check if we need split nodes for shared calibration
+  model_has_shared_calibration = False
+  for node in model_graph.nodes:
+    model_has_shared_calibration |= (
+        (isinstance(node, model_info.PWLCalibrationNode) or
+         isinstance(node, model_info.CategoricalCalibrationNode)) and
+        (len(_output_nodes(model_graph, node)) > 1))
+
+  split_nodes = {}
+  for node in model_graph.nodes:
+    node_id = _node_id(node)
+    if (isinstance(node, model_info.PWLCalibrationNode) or
+        isinstance(node, model_info.CategoricalCalibrationNode)):
+      # Add node for calibrator with calibrator plot inside.
+      fig = plot_calibrator_nodes([node])
+      filename = os.path.join(tempfile.tempdir, 'i{}.png'.format(node_id))
+      plt.savefig(filename, dpi=calibrator_dpi)
+      plt.close(fig)
+      dot.node(node_id, '', image=filename, shape='box')
+
+      # Add input feature node.
+      node_is_feature_calibration = isinstance(node.input_node,
+                                               model_info.InputFeatureNode)
+      if node_is_feature_calibration:
+        input_node_id = node_id + 'input'
+        dot.node(input_node_id, node.input_node.name)
+        dot.edge(input_node_id + ':s', node_id + ':n')
+
+        # Add split node for shared calibration.
+        if model_has_shared_calibration:
+          split_node_id = node_id + 'calibrated'
+          split_node_name = 'calibrated {}'.format(node.input_node.name)
+          dot.node(split_node_id, split_node_name)
+          dot.edge(node_id + ':s', split_node_id + ':n')
+          split_nodes[node_id] = (split_node_id, split_node_name)
+
+    elif not isinstance(node, model_info.InputFeatureNode):
+      dot.node(node_id, _node_name(node), shape='box', margin='0.3')
+
+    if node is model_graph.output_node:
+      output_node_id = node_id + 'output'
+      dot.node(output_node_id, 'output')
+      dot.edge(node_id + ':s', output_node_id)
+
+  for node in model_graph.nodes:
+    node_id = _node_id(node)
+    for input_node in _input_nodes(node):
+      if isinstance(input_node, model_info.InputFeatureNode):
+        continue
+      input_node_id = _node_id(input_node)
+      if input_node_id in split_nodes:
+        split_node_id, split_node_name = split_nodes[input_node_id]
+        input_node_id = split_node_id + node_id
+        dot.node(input_node_id, split_node_name)
+
+      dot.edge(input_node_id + ':s', node_id)  # + ':n')
+
+  filename = os.path.join(tempfile.tempdir, 'dot')
+  dot.render(filename)
+  IPython.display.display(IPython.display.Image('{}.png'.format(filename)))
+
+
+def plot_calibrator_nodes(nodes,
+                          plot_submodel_calibration=True,
+                          font_size=12,
+                          axis_label_font_size=14,
+                          figsize=None):
+  """Plots feature calibrator(s) extracted from a TFL canned estimator.
+
+  Args:
+    nodes: List of calibrator nodes to be plotted.
+    plot_submodel_calibration: If submodel calibrators should be included in the
+      output plot, when more than one calibration node is provided. These are
+      individual calibration layers for each lattice in a lattice ensemble
+      constructed from `configs.CalibratedLatticeEnsembleConfig`.
+    font_size: Font size for values and labels on the plot.
+    axis_label_font_size: Font size for axis labels.
+    figsize: The figsize parameter passed to `pyplot.figure()`.
+
+  Returns:
+    Pyplot figure object containing the visualisation.
+  """
+
+  with plt.style.context('seaborn-whitegrid'):
+    plt.rc('font', size=font_size)
+    plt.rc('axes', titlesize=font_size)
+    plt.rc('xtick', labelsize=font_size)
+    plt.rc('ytick', labelsize=font_size)
+    plt.rc('legend', fontsize=font_size)
+    plt.rc('axes', labelsize=axis_label_font_size)
+    fig = plt.figure(figsize=figsize)
+    axes = fig.add_subplot(1, 1, 1)
+    if isinstance(nodes[0], model_info.PWLCalibrationNode):
+      _plot_pwl_calibrator(nodes, axes, plot_submodel_calibration)
+    elif isinstance(nodes[0], model_info.CategoricalCalibrationNode):
+      _plot_categorical_calibrator(nodes, axes, plot_submodel_calibration)
+    else:
+      raise ValueError('Unknown calibrator type: {}'.format(nodes[0]))
+    plt.tight_layout()
+
+  return fig
+
+
+def plot_feature_calibrator(model_graph,
+                            feature_name,
+                            plot_submodel_calibration=True,
+                            font_size=12,
+                            axis_label_font_size=14,
+                            figsize=None):
+  """Plots feature calibrator(s) extracted from a TFL canned estimator.
+
+  ```
+  model_graph = estimators.get_model_graph(saved_model_path)
+  visualization.plot_feature_calibrator(model_graph, "feature_name")
+  ```
+
+  Args:
+    model_graph: `model_info.ModelGraph` object that includes model nodes.
+    feature_name: Name of the feature to plot the calibrator for.
+    plot_submodel_calibration: If submodel calibrators should be included in the
+      output plot, when more than one calibration node is provided. These are
+      individual calibration layers for each lattice in a lattice ensemble
+      constructed from `configs.CalibratedLatticeEnsembleConfig`.
+    font_size: Font size for values and labels on the plot.
+    axis_label_font_size: Font size for axis labels.
+    figsize: The figsize parameter passed to `pyplot.figure()`.
+
+  Returns:
+    Pyplot figure object containing the visualisation.
+  """
+
+  input_feature_node = [
+      input_feature_node
+      for input_feature_node in _input_feature_nodes(model_graph)
+      if input_feature_node.name == feature_name
+  ]
+  if not input_feature_node:
+    raise ValueError(
+        'Feature "{}" not found in the model_graph.'.format(feature_name))
+
+  input_feature_node = input_feature_node[0]
+  calibrator_nodes = _output_nodes(model_graph, input_feature_node)
+  return plot_calibrator_nodes(calibrator_nodes, plot_submodel_calibration,
+                               font_size, axis_label_font_size, figsize)
+
+
+def plot_all_calibrators(model_graph, num_cols=4, **kwargs):
+  """Plots all feature calibrator(s) extracted from a TFL canned estimator.
+
+  The generated plots are arranged in a grid.
+  This function requires IPython and colabtools packages.
+
+  ```
+  model_graph = estimators.get_model_graph(saved_model_path)
+  visualization.plot_all_calibrators(model_graph)
+  ```
+
+  Args:
+    model_graph: a `model_info.ModelGraph` objects to plot.
+    num_cols: Number of columns in the grid view.
+    **kwargs: args passed to `analysis.plot_calibrators`.
+  """
+  import google.colab.widgets  # pylint: disable=g-import-not-at-top
+  import IPython.display  # pylint: disable=g-import-not-at-top
+
+  feature_infos = _input_feature_nodes(model_graph)
+  feature_names = sorted([feature_info.name for feature_info in feature_infos])
+
+  output_calibrator_node = (
+      model_graph.output_node if isinstance(
+          model_graph.output_node, model_info.PWLCalibrationNode) else None)
+
+  num_feature_calibrators = len(feature_names)
+  num_output_calibrators = 1 if output_calibrator_node else 0
+
+  # Calibrator plots are organized in a grid. We first plot all the feature
+  # calibrators, followed by any existing output calibrator.
+  num_rows = int(
+      math.ceil(
+          float(num_feature_calibrators + num_output_calibrators) / num_cols))
+  for index, _ in enumerate(
+      google.colab.widgets.Grid(
+          num_rows, num_cols, style='border-top: 0; border-bottom: 0;')):
+    if index >= num_feature_calibrators + num_output_calibrators:
+      continue  # Empty cells
+
+    if index < num_feature_calibrators:
+      feature_name = feature_names[index]
+      tb = google.colab.widgets.TabBar(
+          ['Calibrator for "{}"'.format(feature_name), 'Large Plot'])
+    else:
+      feature_name = 'output'
+      tb = google.colab.widgets.TabBar(['Output calibration', 'Large Plot'])
+
+    with tb.output_to(0, select=True):
+      if index < len(feature_names):
+        plot_feature_calibrator(model_graph, feature_name, **kwargs)
+      else:
+        plot_calibrator_nodes([output_calibrator_node])
+      filename = os.path.join(tempfile.tempdir, '{}.png'.format(feature_name))
+      # Save a larger temporary copy to be shown in a second tab.
+      plt.savefig(filename, dpi=200)
+      plt.show()
+    with tb.output_to(1, select=False):
+      IPython.display.display(IPython.display.Image(filename))
+
+
+def _input_feature_nodes(model_graph):
+  return [
+      node for node in model_graph.nodes
+      if isinstance(node, model_info.InputFeatureNode)
+  ]
+
+
+def _node_id(node):
+  return str(id(node))
+
+
+def _node_name(node):
+  if isinstance(node, model_info.LinearNode):
+    return 'Linear'
+  if isinstance(node, model_info.LatticeNode):
+    return 'Lattice'
+  if isinstance(node, model_info.MeanNode):
+    return 'Average'
+  return str(type(node))
+
+
+def _contains(nodes, node):
+  return any(other_node is node for other_node in nodes)
+
+
+def _input_nodes(node):
+  if hasattr(node, 'input_nodes'):
+    return node.input_nodes
+  if hasattr(node, 'input_node'):
+    return [node.input_node]
+  return []
+
+
+def _output_nodes(model_graph, node):
+  return [
+      other_node for other_node in model_graph.nodes
+      if _contains(_input_nodes(other_node), node)
+  ]
+
+
+_MISSING_NAME = 'missing'
+_CALIBRATOR_COLOR = 'tab:blue'
+_MISSING_COLOR = 'tab:orange'
+
+
+def _plot_categorical_calibrator(categorical_calibrator_nodes, axes,
+                                 plot_submodel_calibration):
+  """Plots a categorical calibrator.
+
+
+  Creates a categorical calibraiton plot combining the passed in calibration
+  nodes. You can select to also show individual calibrator nodes in the plot.
+
+  Args:
+    categorical_calibrator_nodes: a list of
+      `model_info.CategoricalCalibrationNode` objects in a model graph. If more
+      that one node is provided, they must be for the same input feature.
+    axes: Pyplot axes object.
+    plot_submodel_calibration: If submodel calibrators should be included in the
+      output plot, when more than one calibration node is provided. These are
+      individual calibration layers for each lattice in a lattice ensemble
+      constructed from `configs.CalibratedLatticeEnsembleConfig`.
+  """
+  feature_info = categorical_calibrator_nodes[0].input_node
+  assert feature_info.is_categorical
+
+  # Adding missing category to input values.
+  # Note that there might be more than one out-of-vocabulary value
+  # (i.e. (num_oov_buckets + (default_value is not none)) > 1), in which case
+  # we name all of them missing.
+  input_values = list(feature_info.vocabulary_list)
+  while len(input_values) < len(categorical_calibrator_nodes[0].output_values):
+    input_values.append(_MISSING_NAME)
+
+  submodels_output_values = [
+      node.output_values for node in categorical_calibrator_nodes
+  ]
+  mean_output_values = np.mean(submodels_output_values, axis=0)
+
+  # Submodels categorical outputs are plotted in grouped form inside the
+  # average calibration bar.
+  bar_width = 0.8
+  sub_width = bar_width / len(submodels_output_values)
+
+  # Bar colors for each category.
+  color = [
+      _MISSING_COLOR if v == _MISSING_NAME else _CALIBRATOR_COLOR
+      for v in input_values
+  ]
+
+  # Plot submodel calibrations fitting inside the average calibration bar.
+  x = np.arange(len(input_values))
+  if plot_submodel_calibration:
+    for sub_index, output_values in enumerate(submodels_output_values):
+      plt.bar(
+          x - bar_width / 2 + sub_width / 2 + sub_index * sub_width,
+          output_values,
+          width=sub_width,
+          alpha=0.1,
+          color=color,
+          linewidth=0.5)
+
+  # Plot average category output.
+  plt.bar(
+      x,
+      mean_output_values,
+      color=color,
+      linewidth=2,
+      alpha=0.2,
+      width=bar_width)
+  plt.bar(
+      x,
+      mean_output_values,
+      fill=False,
+      edgecolor=color,
+      linewidth=3,
+      width=bar_width)
+
+  # Set axes labels and tick values.
+  plt.xlabel(feature_info.name)
+  plt.ylabel('calibrated {}'.format(feature_info.name))
+  axes.set_xticks(x)
+  axes.set_xticklabels(input_values)
+  axes.yaxis.grid(True, linewidth=0.25)
+  axes.xaxis.grid(False)
+
+
+def _plot_pwl_calibrator(pwl_calibrator_nodes, axes, plot_submodel_calibration):
+  """Plots a PWL calibrator.
+
+  Creates a pwl plot combining the passed in calibration nodes. You can select
+  to also show individual calibrator nodes in the plot.
+
+  Args:
+    pwl_calibrator_nodes: a list of `model_info.PWLCalibrationNode` objects in a
+      model graph. If more that one node is provided, they must be for the same
+      input feature.
+    axes: Pyplot axes object.
+    plot_submodel_calibration: If submodel calibrators should be included in the
+      output plot, when more than one calibration node is provided. These are
+      individual calibration layers for each lattice in a lattice ensemble
+      constructed from `configs.CalibratedLatticeEnsembleConfig`.
+  """
+
+  pwl_calibrator_node = pwl_calibrator_nodes[0]
+  if isinstance(pwl_calibrator_node.input_node, model_info.InputFeatureNode):
+    assert not pwl_calibrator_node.input_node.is_categorical
+    input_name = pwl_calibrator_node.input_node.name
+    output_name = 'calibrated {}'.format(input_name)
+  else:
+    # Output PWL calibration.
+    input_name = 'input'
+    output_name = 'output'
+
+  # Average output_keypoints and (any) default_output across all the nodes.
+  mean_output_keypoints = np.mean(
+      [
+          pwl_calibrator_node.output_keypoints
+          for pwl_calibrator_node in pwl_calibrator_nodes
+      ],
+      axis=0,
+  )
+  if pwl_calibrator_node.default_output:
+    mean_default_output = np.mean([
+        pwl_calibrator_node.default_output
+        for pwl_calibrator_node in pwl_calibrator_nodes
+    ])
+  else:
+    mean_default_output = None
+
+  if plot_submodel_calibration:
+    for pwl_calibrator_node in pwl_calibrator_nodes:
+      plt.plot(
+          pwl_calibrator_node.input_keypoints,
+          pwl_calibrator_node.output_keypoints,
+          '--',
+          linewidth=0.25,
+          color=_CALIBRATOR_COLOR)
+      if pwl_calibrator_node.default_output is not None:
+        plt.plot(
+            pwl_calibrator_node.input_keypoints,
+            [pwl_calibrator_node.default_output] *
+            len(pwl_calibrator_node.input_keypoints),
+            '--',
+            color=_MISSING_COLOR,
+            linewidth=0.25)
+
+  plt.plot(
+      pwl_calibrator_node.input_keypoints,
+      mean_output_keypoints,
+      _CALIBRATOR_COLOR,
+      linewidth=3,
+      label='calibrated')
+  if mean_default_output is not None:
+    plt.plot(
+        pwl_calibrator_node.input_keypoints,
+        [mean_default_output] * len(pwl_calibrator_node.input_keypoints),
+        color=_MISSING_COLOR,
+        linewidth=3,
+        label=_MISSING_NAME)
+
+  plt.xlabel(input_name)
+  plt.ylabel(output_name)
+  axes.yaxis.grid(True, linewidth=0.25)
+  axes.xaxis.grid(True, linewidth=0.25)
+  axes.legend()
+
+
+def plot_outputs(inputs, outputs_map, file_path=None, figsize=(20, 20)):
+  """Visualises several outputs for same set of inputs.
+
+  This is generic plotting helper not tied to any layer.
+  Can visualize either:
+    - 2-d graphs: 1-d input, 1-d output.
+    - 3-d surfaces: 2-d input, 1-d output.
+
+  Args:
+    inputs: one of:
+      - ordered list of 1-d points
+      - tuple of exactly 2 elements which represent X and Y coordinates of 2-d
+        mesh grid for pyplot 3-d surface visualization. See
+        `test_utils.two_dim_mesh_grid` for more details.
+    outputs_map: dictionary {name: outputs} where "outputs" is a list of 1-d
+      points which correspond to "inputs". "name" is an arbitrary string used as
+      legend.
+    file_path: if set - visualisation will be saved as png at specified
+      location.
+    figsize: The figsize parameter passed to `pyplot.figure()`.
+
+  Raises:
+    ValueError: if configured to visualise more than 4 3-d plots.
+
+  Returns:
+    Pyplot object containing visualisation.
+  """
+  plt.clf()
+  legend = []
+
+  if isinstance(inputs, tuple):
+    figure = plt.figure(figsize=figsize)
+    axes = figure.gca(projection='3d')
+    # 4 colors is enough because no one would ever think of drawing 5 or more
+    # 3-d surfaces on same graph due to them looking like fabulous mess anyway.
+    colors = ['dodgerblue', 'forestgreen', 'saddiebrown', 'lightsalmon']
+    if len(outputs_map) > 4:
+      raise ValueError('Cannot visualize more than 4 3-d plots.')
+
+    x_inputs, y_inputs = inputs
+    for i, (name, outputs) in enumerate(outputs_map.items()):
+      legend.append(name)
+      z_outputs = np.reshape(
+          np.asarray(outputs), newshape=(len(x_inputs), len(x_inputs[0])))
+
+      axes.plot_wireframe(x_inputs, y_inputs, z_outputs, color=colors[i])
+  else:
+    for name, outputs in sorted(outputs_map.items()):
+      legend.append(name)
+      plt.plot(inputs, outputs)
+
+    plt.ylabel('y')
+    plt.xlabel('x')
+
+  plt.legend(legend)
+  if file_path:
+    plt.savefig(file_path)
+  return plt
diff --git a/tensorflow_lattice/tensorflow_lattice.bzl b/tensorflow_lattice/tensorflow_lattice.bzl
deleted file mode 100644
index ae608c0..0000000
--- a/tensorflow_lattice/tensorflow_lattice.bzl
+++ /dev/null
@@ -1,42 +0,0 @@
-# -*- Python -*-
-
-# Copyright 2017 The TensorFlow Lattice Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Bazel macros for TensorFlow Lattice."""
-
-def _add_tf_search_path(prefix, levels_to_root):
-    root = "%s/%s" % (prefix, "/".join([".."] * (levels_to_root + 1)))
-    tf_root = "%s/external/org_tensorflow/tensorflow" % root
-    return "-rpath,%s" % tf_root
-
-def rpath_linkopts(name):
-    """Add proper rpath_linkopts to the build rule.
-
-    This function adds tensorflow root to rpath for Darwin builds.
-
-    Args:
-      name: Name of the target.
-
-    Returns:
-      rpath linker options.
-    """
-    levels_to_root = native.package_name().count("/") + name.count("/")
-    return select({
-        "@org_tensorflow//tensorflow:macos": [
-            "-Wl,%s" % (_add_tf_search_path("@loader_path", levels_to_root),),
-        ],
-        "//conditions:default": [
-        ],
-    })