models/official/modeling/optimization/lamb_test.py at master · tensorflow/models

History

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

#

# Licensed under the Apache License, Version 2.0 (the "License");

# you may not use this file except in compliance with the License.

# You may obtain a copy of the License at

#

# http://www.apache.org/licenses/LICENSE-2.0

#

# Unless required by applicable law or agreed to in writing, software

# distributed under the License is distributed on an "AS IS" BASIS,

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

# See the License for the specific language governing permissions and

# limitations under the License.

"""Tests for LAMB Optimizer."""

import numpy as np

from numpy import linalg

import tensorflow as tf, tf_keras

from official.modeling.optimization import lamb

def lamb_update_numpy(param,

g_t,

t,

m,

v,

lr=0.001,

lamb_wd=0.0,

beta1=0.9,

beta2=0.999,

epsilon=1e-6):

m_t = beta1 * m + (1 - beta1) * g_t

v_t = beta2 * v + (1 - beta2) * g_t * g_t

m_t_hat = m_t / (1 - beta1**(t + 1))

v_t_hat = v_t / (1 - beta2**(t + 1))

update = m_t_hat / (np.sqrt(v_t_hat) + epsilon)

update += lamb_wd * param

w_norm = linalg.norm(param, ord=2)

g_norm = linalg.norm(update, ord=2)

ratio = np.where(w_norm > 0, np.where(g_norm > 0, (w_norm / g_norm), 1.0),

1.0)

param_t = param - ratio * lr * update

return param_t, m_t, v_t

def get_beta_accumulators(opt, dtype):

local_step = tf.cast(opt.iterations + 1, dtype)

beta_1_t = tf.cast(opt._get_hyper("beta_1"), dtype)

beta_1_power = tf.math.pow(beta_1_t, local_step)

beta_2_t = tf.cast(opt._get_hyper("beta_2"), dtype)

beta_2_power = tf.math.pow(beta_2_t, local_step)

return (beta_1_power, beta_2_power)

class LAMBTest(tf.test.TestCase):

def test_sparse(self):

dtype = tf.float32

# Initialize tf for numpy implementation.

m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0

var0_np = np.array([1.0, 1.0, 2.0], dtype=dtype.as_numpy_dtype)

grads0_np = np.array([0.1, 0.0, 0.1], dtype=dtype.as_numpy_dtype)

var1_np = np.array([3.0, 3.0, 4.0], dtype=dtype.as_numpy_dtype)

grads1_np = np.array([0.01, 0.0, 0.01], dtype=dtype.as_numpy_dtype)

var0 = tf.Variable(var0_np)

var1 = tf.Variable(var1_np)

grads0_np_indices = np.array([0, 2], dtype=np.int32)

grads0 = tf.IndexedSlices(

tf.constant(grads0_np[grads0_np_indices]),

tf.constant(grads0_np_indices),

tf.constant([3]),

)

grads1_np_indices = np.array([0, 2], dtype=np.int32)

grads1 = tf.IndexedSlices(

tf.constant(grads1_np[grads1_np_indices]),

tf.constant(grads1_np_indices),

tf.constant([3]),

)

opt = lamb.LAMB()

# Fetch params to validate initial values

np.testing.assert_allclose(np.asanyarray([1.0, 1.0, 2.0]), var0.numpy())

np.testing.assert_allclose(np.asanyarray([3.0, 3.0, 4.0]), var1.numpy())

# Run 3 steps of LAMB

for t in range(3):

beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype)

self.assertAllClose(0.9 ** (t + 1), beta_1_power)

self.assertAllClose(0.999 ** (t + 1), beta_2_power)

opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

var0_np, m0, v0 = lamb_update_numpy(var0_np, grads0_np, t, m0, v0)

var1_np, m1, v1 = lamb_update_numpy(var1_np, grads1_np, t, m1, v1)

# Validate updated params

self.assertAllClose(var0_np, var0.numpy())

self.assertAllClose(var1_np, var1.numpy())

def test_basic_with_learning_rate_decay(self):

dtype = tf.float32

# Initialize variables for numpy implementation.

m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0

var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)

grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)

var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)

grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

var0 = tf.Variable(var0_np, name="var0")

var1 = tf.Variable(var1_np, name="var1")

grads0 = tf.constant(grads0_np)

grads1 = tf.constant(grads1_np)

learning_rate = 0.001

beta_1 = 0.9

beta_2 = 0.999

epsilon = 1e-7

decay = 0.5

lamb_wd = 0.01

opt = lamb.LAMB(

learning_rate=learning_rate,

beta_1=beta_1,

beta_2=beta_2,

epsilon=epsilon,

weight_decay_rate=lamb_wd,

decay=decay,

)

# Run 3 steps of LAMB

for t in range(3):

opt.apply_gradients(zip([grads0, grads1], [var0, var1]))

lr_np = learning_rate / (1 + decay * t)

var0_np, m0, v0 = lamb_update_numpy(

var0_np, grads0_np, t, m0, v0, lr=lr_np, lamb_wd=lamb_wd)

var1_np, m1, v1 = lamb_update_numpy(

var1_np, grads1_np, t, m1, v1, lr=lr_np, lamb_wd=lamb_wd)

# Validate updated params

self.assertAllClose(var0_np, var0.numpy())

self.assertAllClose(var1_np, var1.numpy())

def test_exclude_weight_decay(self):

opt = lamb.LAMB(

0.01, weight_decay_rate=0.01, exclude_from_weight_decay=["var1"]

)

assert opt._do_use_weight_decay("var0")

assert not opt._do_use_weight_decay("var1")

assert not opt._do_use_weight_decay("var1_weight")

def test_exclude_layer_adaptation(self):

opt = lamb.LAMB(0.01, exclude_from_layer_adaptation=["var1"])

assert opt._do_layer_adaptation("var0")

assert not opt._do_layer_adaptation("var1")

assert not opt._do_layer_adaptation("var1_weight")

def test_serialization(self):

optimizer = lamb.LAMB(1e-4)

config = tf_keras.optimizers.serialize(optimizer, use_legacy_format=True)

new_optimizer = tf_keras.optimizers.deserialize(

config, use_legacy_format=True

)

assert new_optimizer.get_config() == optimizer.get_config()

if __name__ == "__main__":

tf.test.main()

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

lamb_test.py

lamb_test.py

Files

lamb_test.py

Latest commit

History

lamb_test.py

File metadata and controls