quic · quic-mtuttle · Jun 28, 2024 · May 23, 2024 · May 23, 2024 · May 23, 2024
diff --git a/TrainingExtensions/torch/test/python/models/mnist_torch_model.py b/TrainingExtensions/torch/test/python/models/mnist_torch_model.py
@@ -144,6 +144,15 @@ def forward(self, *inputs):
         x = self.fc2(x)
         return self.log_softmax(x)
 
+class NetSmall(Net):
+
+    def __init__(self):
+        """ Constructor """
+
+        super(NetSmall, self).__init__()
+        self.fc1 = nn.Linear(7 * 7 * 64, 128)
+        self.fc2 = nn.Linear(128, 10)
+
 
 class ExtendedNet(nn.Module):
     """ Mnist Model """

diff --git a/TrainingExtensions/torch/test/python/test_adaround_weight.py b/TrainingExtensions/torch/test/python/test_adaround_weight.py
@@ -989,7 +989,7 @@ def test_apply_adaround_using_gpu(self, dtype):
         dummy_input = [x.to(dtype=dtype) for x in dummy_input]
         out_before_ada = model(*dummy_input)
 
-        params = AdaroundParameters(data_loader=data_loader, num_batches=4, default_num_iterations=1000)
+        params = AdaroundParameters(data_loader=data_loader, num_batches=4, default_num_iterations=50)
         ada_rounded_model = Adaround.apply_adaround(model, dummy_input, params, './', 'dummy')
         out_after_ada = ada_rounded_model(*dummy_input)
 

diff --git a/TrainingExtensions/torch/test/python/test_model_preparer.py b/TrainingExtensions/torch/test/python/test_model_preparer.py
@@ -53,6 +53,7 @@
 from torchvision import models
 from math import sqrt
 from torch.utils.data import DataLoader
+from models.test_models import SingleResidual
 
 from aimet_common.defs import QuantScheme
 from aimet_torch import elementwise_ops
@@ -407,9 +408,9 @@ def test_fx_with_batch_norm_folding(self):
         """
         test torch fx with torchvision Resnet18 - BN fold
         """
-        input_shape = (1, 3, 224, 224)
+        input_shape = (1, 3, 32, 32)
         input_tensor = torch.randn(*input_shape)
-        model = models.resnet18().eval()
+        model = SingleResidual().eval()
         model_copy = copy.deepcopy(model)
         folded_pairs_for_original_model = fold_all_batch_norms(model, input_shape)
 
@@ -444,9 +445,9 @@ def test_fx_with_cle(self):
         """
         test torch fx with torchvision Resnet18 - Cross layer equalization
         """
-        input_shape = (1, 3, 224, 224)
+        input_shape = (1, 3, 32, 32)
         input_tensor = torch.randn(*input_shape).cuda()
-        model = models.resnet18().cuda().eval()
+        model = SingleResidual().cuda().eval()
         model_copy = copy.deepcopy(model)
 
         # Perform CLE - (BN fold, ReLU6 -> ReLU replacement, CLS, HBF)
@@ -480,20 +481,20 @@ def test_fx_with_adaround(self):
         test torch fx with torchvision Resnet18 - adaround
         """
         seed_all(1)
-        input_shape = (1, 3, 224, 224)
+        input_shape = (1, 3, 32, 32)
         dummy_input = torch.randn(*input_shape).cuda()
-        model = models.resnet18().cuda().eval()
+        model = SingleResidual().cuda().eval()
         model_copy = copy.deepcopy(model)
 
         # create fake data loader with image size (3, 224, 224)
         data_loader = create_fake_data_loader(dataset_size=16, batch_size=16, image_size=input_shape[1:])
         params = AdaroundParameters(data_loader=data_loader, num_batches=1, default_num_iterations=5)
         adarounded_original_model = Adaround.apply_adaround(model, dummy_input, params, path='./',
-                                                            filename_prefix='resnet18')
+                                                            filename_prefix='resnet')
         # Apply Adaround for transformed model
         model_transformed = prepare_model(model_copy)
         adarounded_transformed_model = Adaround.apply_adaround(model_transformed, dummy_input, params, path='./',
-                                                               filename_prefix='resnet18')
+                                                               filename_prefix='resnet')
         # compare weights for very first layer
         # Weights should be same
         original_model_conv1_weight = adarounded_original_model.conv1.weight.clone()
@@ -514,9 +515,9 @@ def test_fx_with_bias_correction(self):
         test torch fx with torchvision Resnet18 - bias correction
         """
         seed_all(1)
-        input_shape = (1, 3, 224, 224)
+        input_shape = (1, 3, 32, 32)
         dummy_input = torch.randn(*input_shape).cuda()
-        model = models.resnet18().cuda().eval()
+        model = SingleResidual().cuda().eval()
         model_copy = copy.deepcopy(model)
 
         # create fake data loader with image size (3, 224, 224)

diff --git a/TrainingExtensions/torch/test/python/test_quantsim_transformers.py b/TrainingExtensions/torch/test/python/test_quantsim_transformers.py
@@ -530,19 +530,20 @@ def test_prepare_model_with_pytorch_transformer_layer_after_act_replacement(self
             torch.cuda.manual_seed(seed)
             torch.cuda.manual_seed_all(seed)
 
-        src = torch.rand(10, 32, 512)
-        dummy_input = torch.rand(10, 32, 512)
+        src = torch.rand(1, 32, 128)
+        dummy_input = torch.rand(1, 32, 128)
 
         def forward_pass(model, args):
             model.eval()
             with torch.no_grad():
                 model(dummy_input, dummy_input)
 
-        num_encoder_layers = 12
-        default_num_decoder_layers = 6
+        num_encoder_layers = 2
+        default_num_decoder_layers = 2
 
         # start with a vanilla PyTorch transformer layer
-        transformer_model = nn.Transformer(nhead=16, num_encoder_layers=num_encoder_layers)
+        transformer_model = nn.Transformer(d_model=128, dim_feedforward=256, nhead=16, num_encoder_layers=num_encoder_layers,
+                                           num_decoder_layers=default_num_decoder_layers)
         transformer_model.eval()
 
         from torch import fx

diff --git a/TrainingExtensions/torch/test/python/test_weight_pad_utils.py b/TrainingExtensions/torch/test/python/test_weight_pad_utils.py
@@ -48,6 +48,8 @@
 from aimet_torch.tensor_quantizer import StaticGridPerTensorQuantizer, StaticGridPerChannelQuantizer
 from aimet_torch.weight_padding_utils import recompute_scale, recompute_encodings, weight_pad, WeightPaddingParams
 
+from models.test_models import TinyModel
+
 
 def evaluate(model: torch.nn.Module, dummy_input: torch.Tensor):
     """
@@ -186,8 +188,8 @@ def test_weight_pad_in_place_per_channel(self):
                     assert val % 16 == 0
 
     def test_weight_pad_in_place_per_tensor(self):
-        model = models.resnet50(pretrained=True)
-        dummy_input = torch.randn(1, 3, 224, 224)
+        model = TinyModel()
+        dummy_input = torch.randn(1, 3, 32, 32)
         sim = QuantizationSimModel(model, dummy_input, quant_scheme=QuantScheme.post_training_tf_enhanced,
                                    default_param_bw=16, default_output_bw=16)
         sim.compute_encodings(evaluate, dummy_input)
@@ -215,8 +217,8 @@ def test_weight_pad_in_place_per_tensor(self):
                     assert val % 16 == 0
 
     def test_weight_pad_export_per_tensor(self):
-        model = models.resnet50(pretrained=True)
-        dummy_input = torch.randn(1, 3, 224, 224)
+        model = TinyModel()
+        dummy_input = torch.randn(1, 3, 32, 32)
         sim = QuantizationSimModel(model, dummy_input, quant_scheme=QuantScheme.post_training_tf_enhanced,
                                    default_param_bw=16, default_output_bw=16)
         sim.compute_encodings(evaluate, dummy_input)

diff --git a/TrainingExtensions/torch/test/python/test_weight_svd.py b/TrainingExtensions/torch/test/python/test_weight_svd.py
@@ -874,7 +874,7 @@ def test_prune_layer(self):
 
     def test_prune_model_2_layers(self):
 
-        model = mnist_model.Net()
+        model = mnist_model.NetSmall()
 
         # Create a layer database
         input_shape = (1, 1, 28, 28)
@@ -893,7 +893,7 @@ def test_prune_model_2_layers(self):
         fc1_b = layer_db.find_layer_by_name('fc1.1')
 
         self.assertEqual(3136, fc1_a.module.in_features)
-        self.assertEqual(1024, fc1_b.module.out_features)
+        self.assertEqual(128, fc1_b.module.out_features)
 
         conv2_a = layer_db.find_layer_by_name('conv2.0')
         conv2_b = layer_db.find_layer_by_name('conv2.1')

diff --git a/TrainingExtensions/torch/test/python/v2/test_adaround.py b/TrainingExtensions/torch/test/python/v2/test_adaround.py
@@ -982,7 +982,7 @@ def test_apply_adaround_using_gpu(self, dtype):
         dummy_input = [x.to(dtype=dtype) for x in dummy_input]
         out_before_ada = model(*dummy_input)
 
-        params = AdaroundParameters(data_loader=data_loader, num_batches=4, default_num_iterations=1000)
+        params = AdaroundParameters(data_loader=data_loader, num_batches=4, default_num_iterations=50)
         ada_rounded_model = Adaround.apply_adaround(model, dummy_input, params, './', 'dummy')
         out_after_ada = ada_rounded_model(*dummy_input)