diff --git a/official/modeling/optimization/configs/optimization_config.py b/official/modeling/optimization/configs/optimization_config.py index 4b6e400b61e..49a4db624d9 100644 --- a/official/modeling/optimization/configs/optimization_config.py +++ b/official/modeling/optimization/configs/optimization_config.py @@ -41,6 +41,7 @@ class OptimizerConfig(oneof.OneOfConfig): rmsprop: rmsprop optimizer. lars: lars optimizer. adagrad: adagrad optimizer. + slide: slide optimizer. """ type: Optional[str] = None sgd: opt_cfg.SGDConfig = opt_cfg.SGDConfig() @@ -50,6 +51,7 @@ class OptimizerConfig(oneof.OneOfConfig): rmsprop: opt_cfg.RMSPropConfig = opt_cfg.RMSPropConfig() lars: opt_cfg.LARSConfig = opt_cfg.LARSConfig() adagrad: opt_cfg.AdagradConfig = opt_cfg.AdagradConfig() + slide: opt_cfg.SLIDEConfig = opt_cfg.SLIDEConfig() @dataclasses.dataclass diff --git a/official/modeling/optimization/configs/optimizer_config.py b/official/modeling/optimization/configs/optimizer_config.py index 7b4de948248..1d9570e21a5 100644 --- a/official/modeling/optimization/configs/optimizer_config.py +++ b/official/modeling/optimization/configs/optimizer_config.py @@ -226,3 +226,24 @@ class LARSConfig(BaseOptimizerConfig): classic_momentum: bool = True exclude_from_weight_decay: Optional[List[str]] = None exclude_from_layer_adaptation: Optional[List[str]] = None + + +@dataclasses.dataclass +class SLIDEConfig(BaseOptimizerConfig): + """Configuration for SLIDE optimizer. + + Details coming soon. + """ + name: str = "SLIDE" + beta_1: float = 0.9 + beta_2: float = 0.999 + epsilon: float = 1e-6 + weight_decay_rate: float = 0.0 + weight_decay_type: str = "inner" + exclude_from_weight_decay: Optional[List[str]] = None + exclude_from_layer_adaptation: Optional[List[str]] = None + include_in_sparse_layer_adaptation: Optional[List[str]] = None + sparse_layer_learning_rate: float = 0.1 + do_gradient_rescaling: bool = True + norm_type: str = "layer" + ratio_clip_norm: float = 1e5 diff --git a/official/modeling/optimization/optimizer_factory.py b/official/modeling/optimization/optimizer_factory.py index 09bb5deda6f..c41d98fb607 100644 --- a/official/modeling/optimization/optimizer_factory.py +++ b/official/modeling/optimization/optimizer_factory.py @@ -19,6 +19,7 @@ import tensorflow as tf import tensorflow_addons.optimizers as tfa_optimizers +from official.modeling.optimization import slide_optimizer from official.modeling.optimization import ema_optimizer from official.modeling.optimization import lars_optimizer from official.modeling.optimization import lr_schedule @@ -33,6 +34,7 @@ 'rmsprop': tf.keras.optimizers.RMSprop, 'lars': lars_optimizer.LARS, 'adagrad': tf.keras.optimizers.Adagrad, + 'slide': slide_optimizer.SLIDE } LR_CLS = { diff --git a/official/modeling/optimization/slide_optimizer.py b/official/modeling/optimization/slide_optimizer.py new file mode 100644 index 00000000000..c1975a3111e --- /dev/null +++ b/official/modeling/optimization/slide_optimizer.py @@ -0,0 +1,20 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""SLIDE optimizer. + +A new optimizer that will be open sourced soon. +""" + +SLIDE = "Unimplemented"