From f25f44efeb0d544411eb863d141ac51bfcc3767d Mon Sep 17 00:00:00 2001 From: belyaeva Date: Wed, 29 Jun 2022 11:51:02 -0700 Subject: [PATCH] Update tf version to >= 2.9 and remove remaining dependencies on tensorflow_models.official.legacy.transformer PiperOrigin-RevId: 458027974 --- deepconsensus/models/legacy_networks.py | 393 ------------------ deepconsensus/models/legacy_networks_test.py | 118 ------ .../models/losses_and_metrics_test.py | 2 +- deepconsensus/models/model_configs.py | 78 +--- deepconsensus/models/model_distillation.py | 4 +- .../models/model_distillation_test.py | 2 +- deepconsensus/models/model_inference_test.py | 2 +- .../models/model_train_custom_loop.py | 2 +- deepconsensus/models/model_utils.py | 39 +- deepconsensus/models/model_utils_test.py | 2 +- deepconsensus/models/networks_test.py | 4 +- .../models/transformer_basic_params.py | 109 +++++ deepconsensus/testdata/README.md | 2 +- deepconsensus/testdata/model/params.json | 4 +- install-gpu.sh | 2 +- install.sh | 2 +- requirements.txt | 11 +- setup.py | 4 +- 18 files changed, 168 insertions(+), 612 deletions(-) delete mode 100644 deepconsensus/models/legacy_networks.py delete mode 100644 deepconsensus/models/legacy_networks_test.py create mode 100644 deepconsensus/models/transformer_basic_params.py diff --git a/deepconsensus/models/legacy_networks.py b/deepconsensus/models/legacy_networks.py deleted file mode 100644 index 2e9c358..0000000 --- a/deepconsensus/models/legacy_networks.py +++ /dev/null @@ -1,393 +0,0 @@ -# Copyright (c) 2021, Google Inc. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without modification, -# are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of Google Inc. nor the names of its contributors -# may be used to endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -"""TF2 + tf.keras implementations of legacy_networks for DeepConsensus.""" - -import logging -from typing import Callable, Optional, Tuple - -import ml_collections -import tensorflow as tf - -from deepconsensus.models import data_providers -from official.nlp.transformer import embedding_layer -from official.nlp.transformer import model_utils -from official.nlp.transformer import transformer -from official.nlp import modeling -from official.nlp.bert import bert_models -from official.nlp.bert import configs - - -class EmbeddingSharedWeights(embedding_layer.EmbeddingSharedWeights): - - def call(self, inputs): - # make sure 0 ids match to zero emebeddings. - embeddings = super().call(inputs) - mask = tf.cast(tf.not_equal(inputs, 0), embeddings.dtype) - embeddings *= tf.expand_dims(mask, -1) - return embeddings - - -# pylint: disable=invalid-name -def FullyConnectedNet(params: ml_collections.ConfigDict) -> tf.keras.Model: - """Fully connected neural network architecture.""" - - inputs = tf.keras.Input( - shape=(params.hidden_size, params.max_length, params.num_channels)) - l2_reg = tf.keras.regularizers.l2 - net = inputs - net = tf.keras.layers.Flatten()(net) - for i in range(len(params.fc_size)): - net = tf.keras.layers.Dense( - units=params.fc_size[i], - activation=tf.nn.relu, - kernel_regularizer=l2_reg(params.l2))( - net) - net = tf.keras.layers.Dropout(rate=params.fc_dropout)(net) - - net = tf.keras.layers.Dense(units=params.max_length * params.num_classes)(net) - net = tf.keras.layers.Reshape((params.max_length, params.num_classes))(net) - net = tf.keras.layers.Softmax(axis=-1)(net) - outputs = net - return tf.keras.Model(inputs=inputs, outputs=outputs) - - -def get_conv_sub_model( - conv_model -) -> Tuple[Callable[..., tf.Tensor], Callable[[tf.keras.Model], - tf.keras.Model]]: - """Returns a predefined convolutional architecture.""" - if conv_model == 'resnet50': - return tf.keras.applications.ResNet50V2, tf.keras.applications.resnet_v2.preprocess_input - elif conv_model == 'resnet101': - return tf.keras.applications.ResNet101V2, tf.keras.applications.resnet_v2.preprocess_input - elif conv_model == 'resnet152': - return tf.keras.applications.ResNet152V2, tf.keras.applications.resnet_v2.preprocess_input - else: - raise NotImplementedError(f'conv model "{conv_model}" not found') - - -# pylint: disable=invalid-name -class ConvNet(tf.keras.Model): - """Convolutional neural network architecture.""" - - def __init__(self, params: ml_collections.ConfigDict, **kwargs): - super(ConvNet, self).__init__(params, **kwargs) - # Most conv models only accept 3 channels. - self.resnet_input_shape = (params.hidden_size, params.max_length, 3) - self.dimensions = params.max_length * params.num_classes - - model, self.conv_preprocess = get_conv_sub_model(params.conv_model) - self.model = model( - include_top=False, - weights=None, - input_shape=self.resnet_input_shape, - pooling='avg') - self.use_sn = params.use_sn - self.max_length = params.max_length - self.num_classes = params.num_classes - - # Define layers - self.layer_dense = tf.keras.layers.Dense(units=self.dimensions) - - def call(self, inputs: tf.Tensor, training: bool) -> tf.Tensor: - # Most conv models only accept 3 channels; - # The sn channel must be removed and optionally - # added back at the end. CCS rows not being used currently for this model. - input_rows, _, sn_rows = tf.split(inputs, [3, 1, 1], 3) - - cn_input = self.conv_preprocess(input_rows) - net = self.model(cn_input, training=training) - - if self.use_sn: - logging.info('Using SN Values') - # sn_rows was padded previously to match the input dimensions - # Crop it here back to 4 rows. - sn_rows = tf.image.crop_to_bounding_box(sn_rows, 0, 0, 4, self.max_length) - sn_rows = tf.keras.layers.Flatten()(sn_rows) - net = tf.keras.layers.Flatten()(net) - net = tf.concat([net, sn_rows], 1) - else: - net = tf.keras.layers.Flatten()(net) - - net = self.layer_dense(net) - net = tf.keras.layers.Reshape((self.max_length, self.num_classes))(net) - net = tf.keras.layers.Softmax(axis=-1)(net) - output = net - return output - - -class EncoderOnlyTransformer(transformer.Transformer): - """Modified encoder-only transformer model for DeepConsensus. - - This implementation extends the one in - //third_party/tensorflow_models/official/legacy/transformer/transformer.py. - The main changes are: - - * Removing logic relating to converting tokens to embeddings, since the - DeepConsensus is already in the form of vectors for each position. - - * Removing the decoder, since we only want to run the encoder. - - * Adding additional layers on top of the encoder for the per-position - classification task. - """ - - def __init__(self, - params: ml_collections.ConfigDict, - name: Optional[str] = None): - # Call grandparent super since we don't want to initialize embeddings. - super(transformer.Transformer, self).__init__(params, name=name) - self.params = params - if self.params.add_pos_encoding and self.params.use_relative_pos_enc: - self.position_embedding = modeling.layers.position_embedding.RelativePositionEmbedding( - hidden_size=self.params['hidden_size']) - self.encoder_stack = transformer.EncoderStack(params) - self.fc1 = tf.keras.layers.Dense( - units=(params['vocab_size']), - activation=None, - use_bias=True, - kernel_initializer='glorot_uniform', - bias_initializer='zeros') - self.softmax = tf.keras.layers.Softmax() - - def call(self, inputs: tf.Tensor, training: bool) -> tf.Tensor: - """Runs a forward pass of the model. - - Args: - inputs: tensor of shape (batch_size, hidden_size, input_length - num_channels). - training: boolean, whether in training mode or not. - - Returns: - Output from softmax layer, which is a distribution over the vocabular at - each position in the sequence. - """ - - with tf.name_scope('Transformer'): - - # Get rid of the channel dimension as we only have one channel. - inputs = tf.squeeze(inputs, -1) - - # `inputs` is of shape (batch_size, hidden_size, input_length). For the - # Transformer, we need to change the format to be the following: - # (batch_size, input_length, hidden_size). - inputs = tf.transpose(inputs, [0, 2, 1]) - - # Attention_bias for our model should be all 0s with shape - # (batch_size, 1, 1, input_length). See model_utils.get_padding_bias - # to see how this is calculated in the base model. - all_zeros = tf.reduce_sum(tf.zeros_like(inputs), -1) - attention_bias = tf.expand_dims(tf.expand_dims(all_zeros, 1), 1) - - # Run the inputs through the encoder. Encoder returns the softmax output. - encoder_outputs = self.encode(inputs, attention_bias, training) - logits = encoder_outputs - return logits - - def encode(self, inputs: tf.Tensor, attention_bias: tf.Tensor, - training: bool) -> tf.Tensor: - """Runs the input through Encoder stack and problem-specific layers.""" - - with tf.name_scope('encode'): - - # The input for each position is already a vector, so we do not use - # embeddings here, unlike the base model. Base model input is a token at - # each position, which must first be embedded as a vector. In the future, - # we may want to use embeddings for part of the input, such as the bases, - # so that we can learn the scale of values. - encoder_inputs = inputs - - # Positional embedding only works when we have an even value for the - # hidden_size. If hidden_size is odd, add an empty row to make it even. - if self.params.add_pos_encoding and encoder_inputs.shape[2] % 2 != 0: - empty_row = tf.zeros( - shape=(encoder_inputs.shape[0], encoder_inputs.shape[1], 1)) - encoder_inputs = tf.concat([encoder_inputs, empty_row], axis=-1) - assert self.params.hidden_size == encoder_inputs.shape[2] - - # All values in `input_padding` should be 0 and shape should be - # (batch_size, input_length). See model_utils.get_padding to see how this - # is computed for the base model. - inputs_padding = tf.reduce_sum(tf.zeros_like(encoder_inputs), -1) - - # Cast input `attention_bias` to correct type, as done in the base model. - attention_bias = tf.cast(attention_bias, self.params['dtype']) - - # Add positional encoding to the input. The scale of the positional - # encoding relative to the input values will matter since we are not - # learning the input embedding. - if self.params['add_pos_encoding']: - with tf.name_scope('add_pos_encoding'): - if self.params['use_relative_pos_enc']: - pos_encoding = self.position_embedding(inputs=encoder_inputs) - else: - pos_encoding = model_utils.get_position_encoding( - self.params['max_length'], self.params['hidden_size']) - pos_encoding = tf.cast(pos_encoding, self.params['dtype']) - encoder_inputs += pos_encoding - - # Add dropout when training. - if training: - encoder_inputs = tf.nn.dropout( - encoder_inputs, rate=self.params['layer_postprocess_dropout']) - - # Pass inputs through the encoder. As mentioned above, `inputs_padding` is - # not actually used by EncoderStack.call. Encoder stack output has shape - # (batch_size, input_length, hidden_size). - encoder_outputs = self.encoder_stack( - encoder_inputs, attention_bias, inputs_padding, training=training) - - # Pass through dense layer, and output a distribution. - encoder_outputs = self.fc1(encoder_outputs) - encoder_outputs = self.softmax(encoder_outputs) - return encoder_outputs - - def decode(self, encoder_outputs: tf.Tensor, attention_bias: tf.Tensor, - training: bool) -> tf.Tensor: - """Returns the outputs from the encoder.""" - - raise NotImplementedError - - def predict(self, encoder_inputs: tf.Tensor) -> tf.Tensor: - """Returns the argmax of the decoder output, which comes from a softmax.""" - - # The base model also has a predict method that behaves differently. This - # predict function is consistent with how predict behaves for other - # DeepConsensus models (conv, FC), but we may want to change this in the - # future to match the transformer base class. For more details, see: - # https://github.com/tensorflow/models/blob/bc71d8e9e155d34a38af8489ad4cbb2fde6fa152/official/nlp/transformer/transformer.py#L279 - return self.call(encoder_inputs, training=False) - - -class EncoderOnlyLearnedValuesTransformer(EncoderOnlyTransformer): - """Modified transformer that learns embeddings for the bases.""" - - def __init__(self, - params: ml_collections.ConfigDict, - name: Optional[str] = None): - super(EncoderOnlyLearnedValuesTransformer, self).__init__(params, name=name) - if params.use_bases: - self.bases_embedding_layer = EmbeddingSharedWeights( - params['vocab_size'], params['per_base_hidden_size']) - if params.use_pw: - pw_vocab_size = params.PW_MAX + 1 - self.pw_embedding_layer = EmbeddingSharedWeights(pw_vocab_size, - params['pw_hidden_size']) - if params.use_ip: - ip_vocab_size = params.IP_MAX + 1 - self.ip_embedding_layer = EmbeddingSharedWeights(ip_vocab_size, - params['ip_hidden_size']) - - - if params.use_sn: - sn_vocab_size = params.SN_MAX + 1 - self.sn_embedding_layer = EmbeddingSharedWeights(sn_vocab_size, - params['sn_hidden_size']) - - if params.use_strand: - strand_vocab_size = params.STRAND_MAX + 1 - self.strand_embedding_layer = EmbeddingSharedWeights( - strand_vocab_size, params['strand_hidden_size']) - - # Define a dense layer to linearly map the concatenated embeddings of - # all subreads at a given position to a smaller dimension - # (transformer_input_size) in order to keep the transformer layers small. - if self.params.condense_transformer_input: - logging.info('Condensing input.') - self.transformer_input_condenser = tf.keras.layers.Dense( - units=(params.transformer_input_size), - activation=None, - use_bias=False, - kernel_initializer='glorot_uniform', - bias_initializer='zeros') - - def encode(self, inputs: tf.Tensor, attention_bias: tf.Tensor, - training: bool) -> tf.Tensor: - """Runs the input through Encoder stack and problem-specific layers.""" - - # Input to embedding layer is [batch_size, length] and output will be - # [batch_size, length, embedding_size]. Embed each row of the input - # separately and then concatenate. - embedded_inputs = [] - base_indices, pw_indices, ip_indices, strand_indices, ccs_indices, sn_indices = data_providers.get_indices( - self.params['max_passes']) - if self.params.use_bases: - for i in range(*base_indices): - # Shape: [batch_size, length, per_base_hidden_size] - embedded = self.bases_embedding_layer( - tf.cast(inputs[:, :, i], tf.int32)) - embedded_inputs.append(embedded) - - - if self.params.use_pw: - for i in range(*pw_indices): - # Shape: [batch_size, length, pw_hidden_size] - embedded = self.pw_embedding_layer(tf.cast(inputs[:, :, i], tf.int32)) - embedded_inputs.append(embedded) - - if self.params.use_ip: - for i in range(*ip_indices): - # Shape: [batch_size, length, ip_hidden_size] - embedded = self.ip_embedding_layer(tf.cast(inputs[:, :, i], tf.int32)) - embedded_inputs.append(embedded) - - if self.params.use_strand: - for i in range(*strand_indices): - embedded = self.strand_embedding_layer( - tf.cast(inputs[:, :, i], tf.int32)) - embedded_inputs.append(embedded) - - if self.params.use_ccs: - for i in range(*ccs_indices): - embedded = self.bases_embedding_layer( - tf.cast(inputs[:, :, i], tf.int32)) - embedded_inputs.append(embedded) - - # TODO: experiment with computing a weighted average using snr as - # weights to aggregate subread-level embeddings (instead of concatenating). - if self.params.use_sn: - # The last four elements in the last dimension in the inputs tensor - # correspond to the four signal-to-noise ratio scores for A, G, C, T. - for i in range(*sn_indices): - embedded = self.sn_embedding_layer(tf.cast(inputs[:, :, i], tf.int32)) - embedded_inputs.append(embedded) - - embedded_inputs = tf.concat(embedded_inputs, axis=-1) - embedded_inputs = tf.cast(embedded_inputs, self.params['dtype']) - - if self.params.condense_transformer_input: - # Condense the transformer input at each position to a smaller vector to - # reduce the transformer hidden size, since the transformer model size is - # quadratic in its hidden size. - # Shape: [batch_size, length, transformer_input_size] - transformer_input = self.transformer_input_condenser(embedded_inputs) - else: - transformer_input = embedded_inputs - - return super(EncoderOnlyLearnedValuesTransformer, - self).encode(transformer_input, attention_bias, training) diff --git a/deepconsensus/models/legacy_networks_test.py b/deepconsensus/models/legacy_networks_test.py deleted file mode 100644 index e92fd2c..0000000 --- a/deepconsensus/models/legacy_networks_test.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright (c) 2021, Google Inc. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without modification, -# are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of Google Inc. nor the names of its contributors -# may be used to endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -"""Tests for deepconsensus.models.legacy_networks.""" - -import itertools - -from absl.testing import absltest -from absl.testing import parameterized - -import ml_collections -import numpy as np -import tensorflow as tf - -from deepconsensus.models import data_providers -from deepconsensus.models import model_configs -from deepconsensus.models import model_utils - - -def get_tf_example_rows(params: ml_collections.ConfigDict, - inference: bool) -> np.ndarray: - """Returns one example from the training dataset for given params.""" - dataset = data_providers.get_dataset( - file_pattern=params.train_path, - num_epochs=params.num_epochs, - batch_size=params.batch_size, - params=params, - inference=inference) - tf_example = next(dataset.as_numpy_iterator()) - return tf_example['rows'] - - -class ModelsTest(parameterized.TestCase): - - @parameterized.parameters( - itertools.product( - [True, False], - [ - 'fc+test', - 'transformer+test', - 'transformer_learn_values+test', - ], - [True, False])) - def test_outputs(self, training, config_name, use_predict): - """Checks that softmax distribution and final predictions are valid. - - This test is only checking the output format and does not train the model. - Args: - training: whether we are in training or eval/test mode. - config_name: config to test. - use_predict: whether to use model.predict or call model as a function. - """ - params = model_configs.get_config(config_name) - model_utils.modify_params(params) - model = model_utils.get_model(params) - inference = not training - rows = get_tf_example_rows(params, inference=inference) - if use_predict: - softmax_output = model.predict(rows) - else: - softmax_output = model(rows, training=training).numpy() - predictions = tf.argmax(softmax_output, -1) - - # First dimension will always be equal to batch_size because test config - # uses a batch size of 1. - self.assertEqual(softmax_output.shape, - (params.batch_size, params.max_length, params.num_classes)) - self.assertTrue( - np.allclose( - np.sum(softmax_output, axis=-1), - np.ones(shape=[params.batch_size, params.max_length]))) - self.assertEqual(predictions.shape, (params.batch_size, params.max_length)) - - @parameterized.parameters( - itertools.product( - [ - 'fc+test', - 'transformer+test', - 'transformer_learn_values+test', - ], - [True, False])) - def test_predict_and_model_fn_equal(self, config_name, inference): - """Checks that model.predict and calling model as a function are equal.""" - config = model_configs.get_config(config_name) - model_utils.modify_params(config) - model = model_utils.get_model(config) - rows = get_tf_example_rows(config, inference=inference) - softmax_output_predict = model.predict(rows) - softmax_output = model(rows, training=False).numpy() - self.assertTrue( - np.allclose(softmax_output_predict, softmax_output, rtol=1e-05)) - -if __name__ == '__main__': - absltest.main() diff --git a/deepconsensus/models/losses_and_metrics_test.py b/deepconsensus/models/losses_and_metrics_test.py index efe9868..a626bb7 100644 --- a/deepconsensus/models/losses_and_metrics_test.py +++ b/deepconsensus/models/losses_and_metrics_test.py @@ -614,7 +614,7 @@ def test_distillation_loss_fn(self, batch_size, window_length, temperature, distill_loss = distill_loss + kl_ij # Get the distillation loss over the whole window. distill_loss = distill_loss / window_length - self.assertAlmostEqual(distill_loss, expected_loss[example_ind]) + self.assertAlmostEqual(distill_loss, expected_loss[example_ind], places=6) if __name__ == '__main__': diff --git a/deepconsensus/models/model_configs.py b/deepconsensus/models/model_configs.py index 0b175d9..590d0c4 100644 --- a/deepconsensus/models/model_configs.py +++ b/deepconsensus/models/model_configs.py @@ -65,46 +65,11 @@ def _set_base_fc_hparams(params): params.buffer_size = 1000 -def _set_base_transformer_v2_hparams(params): - """Updates given config with base values for the Transformer model.""" - # Architecture - params.model_name = 'transformer_v2' - params.add_pos_encoding = True - # Num heads should be divisible by hidden size. This value should be tuned for - # the production setting. TODO: update this parameter after - # tuning. - params.num_heads = 2 - params.layer_norm = False - params.dtype = dc_constants.TF_DATA_TYPE - params.condense_transformer_input = False - params.transformer_model_size = 'base' - - params.num_channels = 1 - params.use_bases = True - params.use_pw = True - params.use_ip = True - params.use_ccs = True - params.use_strand = True - params.use_sn = True - params.per_base_hidden_size = 1 - params.pw_hidden_size = 1 - params.ip_hidden_size = 1 - params.sn_hidden_size = 1 - params.strand_hidden_size = 1 - - # Training - params.batch_size = 256 - params.num_epochs = 50 - params.learning_rate = 1e-4 - params.buffer_size = 1000 - - def _set_base_transformer_hparams(params): """Updates given config with base values for the Transformer model.""" # Architecture params.model_name = 'transformer' params.add_pos_encoding = True - params.use_relative_pos_enc = True # Num heads should be divisible by hidden size. This value should be tuned for # the production setting. TODO: update this parameter after # tuning. @@ -135,28 +100,11 @@ def _set_base_transformer_hparams(params): def _set_transformer_learned_embeddings_hparams(params): - """Updates given config with values for the learned embeddings transformer.""" - _set_base_transformer_hparams(params) - params.model_name = 'transformer_learn_values' - params.PW_MAX = dc_constants.PW_MAX - params.IP_MAX = dc_constants.IP_MAX - params.STRAND_MAX = dc_constants.STRAND_MAX - params.SN_MAX = dc_constants.SN_MAX - params.per_base_hidden_size = 8 - params.pw_hidden_size = 8 - params.ip_hidden_size = 8 - params.strand_hidden_size = 2 - params.sn_hidden_size = 8 - params.condense_transformer_input = True - params.transformer_input_size = 280 - - -def _set_transformer_learned_embeddings_v2_hparams(params): """Updates given config with values for the learned embeddings transformer.""" # TODO: As we migrate off the legacy code, we might need to # adjust the params below. For now just making a copy of the previous params. - _set_base_transformer_v2_hparams(params) - params.model_name = 'transformer_learn_values_v2' + _set_base_transformer_hparams(params) + params.model_name = 'transformer_learn_values' params.PW_MAX = dc_constants.PW_MAX params.IP_MAX = dc_constants.IP_MAX params.STRAND_MAX = dc_constants.STRAND_MAX @@ -170,10 +118,10 @@ def _set_transformer_learned_embeddings_v2_hparams(params): params.transformer_input_size = 280 -def _set_transformer_learned_embeddings_v2_distill_hparams(params): +def _set_transformer_learned_embeddings_distill_hparams(params): """Updates given config with values for the distilled transformer.""" - _set_transformer_learned_embeddings_v2_hparams(params) - params.model_name = 'transformer_learn_values_v2_distill' + _set_transformer_learned_embeddings_hparams(params) + params.model_name = 'transformer_learn_values_distill' # Student architecture parameters. params.num_hidden_layers = 4 @@ -239,14 +187,12 @@ def get_config(config_name: str) -> ml_collections.ConfigDict: Valid config names must consist of two parts: {model_name}+{dataset_name}. The "+" must be present as a separator between the two parts. For example, - transformer_learn_bases+ccs is a valid name. + transformer_learn_values+ccs is a valid name. Valid model names include: * fc - * transformer (TODO: legacy codebase) - * transformer_learn_values (TODO: legacy codebase) - * transformer_v2 - * transformer_learn_values_v2 + * transformer + * transformer_learn_values Valid dataset names include: * ecoli @@ -289,16 +235,12 @@ def get_config(config_name: str) -> ml_collections.ConfigDict: params.limit = -1 if model_config_name == 'fc': _set_base_fc_hparams(params) - elif model_config_name == 'transformer_v2': - _set_base_transformer_v2_hparams(params) elif model_config_name == 'transformer': _set_base_transformer_hparams(params) - elif model_config_name == 'transformer_learn_values_v2': - _set_transformer_learned_embeddings_v2_hparams(params) elif model_config_name == 'transformer_learn_values': _set_transformer_learned_embeddings_hparams(params) - elif model_config_name == 'transformer_learn_values_v2_distill': - _set_transformer_learned_embeddings_v2_distill_hparams(params) + elif model_config_name == 'transformer_learn_values_distill': + _set_transformer_learned_embeddings_distill_hparams(params) else: raise ValueError('Unknown model_config_name: %s' % model_config_name) diff --git a/deepconsensus/models/model_distillation.py b/deepconsensus/models/model_distillation.py index 2addcaa..afef76d 100644 --- a/deepconsensus/models/model_distillation.py +++ b/deepconsensus/models/model_distillation.py @@ -30,12 +30,12 @@ Distillation attempts to train a smaller student model that mimics the larger teacher model. -Currently only transformer_learn_values_v2_distill config is +Currently only transformer_learn_values_distill config is supported for model training. Example usage: -CONFIG="//learning/genomics/deepconsensus/models/model_configs.py:transformer_learn_values_v2_distill+ccs" +CONFIG="//learning/genomics/deepconsensus/models/model_configs.py:transformer_learn_values_distill+ccs" TEACHER_MODEL_DIR="" OUT_DIR=/tmp diff --git a/deepconsensus/models/model_distillation_test.py b/deepconsensus/models/model_distillation_test.py index 5281800..1becdf2 100644 --- a/deepconsensus/models/model_distillation_test.py +++ b/deepconsensus/models/model_distillation_test.py @@ -41,7 +41,7 @@ class ModelTrainTest(parameterized.TestCase): - @parameterized.parameters(['transformer_learn_values_v2_distill+test']) + @parameterized.parameters(['transformer_learn_values_distill+test']) def test_train_e2e(self, config_name): """Tests that training completes and output files written.""" diff --git a/deepconsensus/models/model_inference_test.py b/deepconsensus/models/model_inference_test.py index d3e1530..149eeba 100644 --- a/deepconsensus/models/model_inference_test.py +++ b/deepconsensus/models/model_inference_test.py @@ -43,7 +43,7 @@ class ModelInferenceTest(absltest.TestCase): def test_inference_e2e(self): """Tests that inference finishes running and an output file is created.""" - config_name = 'transformer_learn_values_v2+test' + config_name = 'transformer_learn_values+test' out_dir = self.create_tempdir().full_path checkpoint_path = test_utils.deepconsensus_testdata('model/checkpoint-1') params = model_configs.get_config(config_name) diff --git a/deepconsensus/models/model_train_custom_loop.py b/deepconsensus/models/model_train_custom_loop.py index f388c10..ad02d3c 100644 --- a/deepconsensus/models/model_train_custom_loop.py +++ b/deepconsensus/models/model_train_custom_loop.py @@ -30,7 +30,7 @@ To use this binary for training a specific model, the corresponding config file should be specified as input. Example usage: -CONFIG="//learning/genomics/deepconsensus/models/model_configs.py:transformer_learn_values_v2+ccs" +CONFIG="//learning/genomics/deepconsensus/models/model_configs.py:transformer_learn_values+ccs" OUT_DIR=/tmp time blaze run -c opt \ diff --git a/deepconsensus/models/model_utils.py b/deepconsensus/models/model_utils.py index c26df06..2b8f142 100644 --- a/deepconsensus/models/model_utils.py +++ b/deepconsensus/models/model_utils.py @@ -31,18 +31,17 @@ import json import logging import os -from typing import List, Optional, Tuple, Any, Union, Dict +from typing import Any, Dict, List, Optional, Tuple, Union import ml_collections import numpy as np import tensorflow as tf from deepconsensus.models import data_providers -from deepconsensus.models import legacy_networks from deepconsensus.models import losses_and_metrics from deepconsensus.models import networks +from deepconsensus.models import transformer_basic_params from deepconsensus.utils import dc_constants -from official.nlp.transformer import misc def get_deepconsensus_loss( @@ -119,16 +118,8 @@ def get_model(params: ml_collections.ConfigDict) -> tf.keras.Model: if params.model_name == 'fc': model = networks.FullyConnectedNet(params) elif params.model_name == 'transformer': - model = legacy_networks.EncoderOnlyTransformer(params) - # I'm using "_v2" suffix for the new code migrated out of legacy. Feel free - # to suggest more informative names. - elif params.model_name == 'transformer_v2': model = networks.EncoderOnlyTransformer(params) - elif params.model_name == 'transformer_learn_values': - model = legacy_networks.EncoderOnlyLearnedValuesTransformer(params) - # I'm using "_v2" suffix for the new code migrated out of legacy. Feel free - # to suggest more informative names. - elif 'transformer_learn_values_v2' in params.model_name: + elif 'transformer_learn_values' in params.model_name: model = networks.EncoderOnlyLearnedValuesTransformer(params) else: raise ValueError('Unknown model name: %s' % params.model_name) @@ -228,8 +219,7 @@ def modify_params(params: ml_collections.ConfigDict, params.hidden_size += 1 # Set model-specific parameters - if (params.model_name == 'transformer' or - params.model_name == 'transformer_v2'): + if params.model_name == 'transformer': # Transformer code uses default_batch_size, whereas my code uses # batch_size, so make sure both are the same. params.default_batch_size = params.batch_size @@ -241,7 +231,7 @@ def modify_params(params: ml_collections.ConfigDict, logging.info('Setting hidden size to transformer_input_size.') params.hidden_size = params.transformer_input_size if 'transformer' in params.model_name: - transformer_params = misc.get_model_params( + transformer_params = get_transformer_model_params( params.transformer_model_size, num_gpus=num_gpus) # Only add hyperparameters that don't already exist. for param_name, param_value in transformer_params.items(): @@ -249,6 +239,25 @@ def modify_params(params: ml_collections.ConfigDict, params[param_name] = param_value +def get_transformer_model_params(param_set, num_gpus): + """Gets predefined transformer model params.""" + params_map = { + 'tiny': transformer_basic_params.TINY_PARAMS, + 'base': transformer_basic_params.BASE_PARAMS, + 'big': transformer_basic_params.BIG_PARAMS, + } + if num_gpus > 1: + if param_set == 'big': + return transformer_basic_params.BIG_MULTI_GPU_PARAMS.copy() + elif param_set == 'base': + return transformer_basic_params.BASE_MULTI_GPU_PARAMS.copy() + else: + raise ValueError('Not valid params: param_set={} num_gpus={}'.format( + param_set, num_gpus)) + + return params_map[param_set].copy() + + def run_inference_and_write_results(model: tf.keras.Model, out_dir: str, params: ml_collections.ConfigDict, diff --git a/deepconsensus/models/model_utils_test.py b/deepconsensus/models/model_utils_test.py index e477fc4..251b3e2 100644 --- a/deepconsensus/models/model_utils_test.py +++ b/deepconsensus/models/model_utils_test.py @@ -88,7 +88,7 @@ def test_output_dir_created(self): out_dir = f'/tmp/output_dir/{uuid.uuid1()}' self.assertFalse(tf.io.gfile.isdir(out_dir)) - params = model_configs.get_config('transformer_learn_values_v2+test') + params = model_configs.get_config('transformer_learn_values+test') model_utils.modify_params(params) model = model_utils.get_model(params) checkpoint_path = test_utils.deepconsensus_testdata('model/checkpoint-1') diff --git a/deepconsensus/models/networks_test.py b/deepconsensus/models/networks_test.py index 09e2bd3..c3cca97 100644 --- a/deepconsensus/models/networks_test.py +++ b/deepconsensus/models/networks_test.py @@ -62,7 +62,7 @@ class ModelsTest(parameterized.TestCase): [ 'fc+test', 'transformer+test', - 'transformer_learn_values_v2+test', + 'transformer_learn_values+test', ], [True, False])) def test_outputs(self, training, config_name, use_predict): @@ -100,7 +100,7 @@ def test_outputs(self, training, config_name, use_predict): [ 'fc+test', 'transformer+test', - 'transformer_learn_values_v2+test', + 'transformer_learn_values+test', ], [True, False])) def test_predict_and_model_fn_equal(self, config_name, inference): diff --git a/deepconsensus/models/transformer_basic_params.py b/deepconsensus/models/transformer_basic_params.py new file mode 100644 index 0000000..a513a43 --- /dev/null +++ b/deepconsensus/models/transformer_basic_params.py @@ -0,0 +1,109 @@ +# Copyright (c) 2021, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of Google Inc. nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +"""Defines Transformer basic model parameters for each model size.""" + +import collections + + +BASE_PARAMS = collections.defaultdict( + lambda: None, # Set default value to None. + + # Input params + default_batch_size=2048, # Maximum number of tokens per batch of examples. + default_batch_size_tpu=32768, + max_length=256, # Maximum number of tokens per example. + + # Model params + initializer_gain=1.0, # Used in trainable variable initialization. + vocab_size=33708, # Number of tokens defined in the vocabulary file. + hidden_size=512, # Model dimension in the hidden layers. + num_hidden_layers=6, # Number of layers in the encoder and decoder stacks. + num_heads=8, # Number of heads to use in multi-headed attention. + filter_size=2048, # Inner layer dimension in the feedforward network. + + # Dropout values (only used when training) + layer_postprocess_dropout=0.1, + attention_dropout=0.1, + relu_dropout=0.1, + + # Training params + label_smoothing=0.1, + learning_rate=2.0, + learning_rate_decay_rate=1.0, + learning_rate_warmup_steps=16000, + + # Optimizer params + optimizer_adam_beta1=0.9, + optimizer_adam_beta2=0.997, + optimizer_adam_epsilon=1e-09, + + # Default prediction params + extra_decode_length=50, + beam_size=4, + alpha=0.6, # used to calculate length normalization in beam search + + # TPU specific parameters + use_tpu=False, + static_batch=False, + allow_ffn_pad=True, +) + +BIG_PARAMS = BASE_PARAMS.copy() +BIG_PARAMS.update( + default_batch_size=4096, + + # default batch size is smaller than for BASE_PARAMS due to memory limits. + default_batch_size_tpu=16384, + + hidden_size=1024, + filter_size=4096, + num_heads=16, +) + +# Parameters for running the model in multi gpu. These should not change the +# params that modify the model shape (such as the hidden_size or num_heads). +BASE_MULTI_GPU_PARAMS = BASE_PARAMS.copy() +BASE_MULTI_GPU_PARAMS.update( + learning_rate_warmup_steps=8000 +) + +BIG_MULTI_GPU_PARAMS = BIG_PARAMS.copy() +BIG_MULTI_GPU_PARAMS.update( + layer_postprocess_dropout=0.3, + learning_rate_warmup_steps=8000 +) + +# Parameters for testing the model +TINY_PARAMS = BASE_PARAMS.copy() +TINY_PARAMS.update( + default_batch_size=1024, + default_batch_size_tpu=1024, + hidden_size=32, + num_heads=4, + filter_size=256, +) diff --git a/deepconsensus/testdata/README.md b/deepconsensus/testdata/README.md index 33d78bc..8045751 100644 --- a/deepconsensus/testdata/README.md +++ b/deepconsensus/testdata/README.md @@ -27,7 +27,7 @@ This command should take ~6 min to complete. Generated with: ```bash -MODEL=transformer_learn_values_v2 +MODEL=transformer_learn_values CONFIG="//learning/genomics/deepconsensus/models/model_configs.py:${MODEL}+test" TEMP_MODEL_DIR="/tmp/deepconsensus/model/$(TZ=US/Pacific date '+%Y%m%d%H%M%S')" MODEL_TRAIN_COMMAND="time blaze run -c opt \\ diff --git a/deepconsensus/testdata/model/params.json b/deepconsensus/testdata/model/params.json index 12bd971..2b4e1d3 100644 --- a/deepconsensus/testdata/model/params.json +++ b/deepconsensus/testdata/model/params.json @@ -35,8 +35,8 @@ "max_length": 120, "max_passes": 20, "model_checkpoint_freq": "epoch", - "model_config_name": "transformer_learn_values_v2", - "model_name": "transformer_learn_values_v2", + "model_config_name": "transformer_learn_values", + "model_name": "transformer_learn_values", "num_channels": 1, "num_classes": 5, "num_epochs": 50, diff --git a/install-gpu.sh b/install-gpu.sh index 2f1e84e..2d9acb8 100755 --- a/install-gpu.sh +++ b/install-gpu.sh @@ -57,4 +57,4 @@ echo "$(pip --version)" # Install python packages used by DeepConsensus. ################################################################################ python3 -m pip install --user -r requirements.txt -python3 -m pip install --user "tensorflow-gpu>=2.4.0,<=2.7.0" +python3 -m pip install --user "tensorflow-gpu>=2.9.0" diff --git a/install.sh b/install.sh index caec79f..717f209 100755 --- a/install.sh +++ b/install.sh @@ -57,4 +57,4 @@ echo "$(pip --version)" # Install python packages used by DeepConsensus. ################################################################################ python3 -m pip install --user -r requirements.txt -python3 -m pip install --user "intel-tensorflow>=2.4.0,<=2.7.0" +python3 -m pip install --user "intel-tensorflow>=2.9.0" diff --git a/requirements.txt b/requirements.txt index 7980264..f52fe42 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,14 @@ numpy>=1.19 pandas>=1.1 -tf-models-official>=2.4.0,<=2.7.0 +tf-models-official>=2.9.0 ml_collections>=0.1.0 absl-py>=0.13.0 - +protobuf<3.20,>=3.9.2,<4,>=3.13 +flatbuffers<2,>=1.12 +keras<2.10.0,>=2.9.0rc0 +tensorflow-estimator<2.10.0,>=2.9.0rc0 +zipp>=3.1.0 +httplib2>=0.15.0 +httplib2<1dev,>=0.15.0 pysam==0.19.0 +testresources diff --git a/setup.py b/setup.py index 1b53029..6ea20dd 100644 --- a/setup.py +++ b/setup.py @@ -42,8 +42,8 @@ REQUIREMENTS = (here / 'requirements.txt').read_text().splitlines() EXTRA_REQUIREMENTS = { - 'cpu': ['intel-tensorflow>=2.4.0,<=2.7.0'], - 'gpu': ['tensorflow-gpu>=2.4.0,<=2.7.0'] + 'cpu': ['intel-tensorflow>=2.9.0'], + 'gpu': ['tensorflow-gpu>=2.9.0'] }