-
Notifications
You must be signed in to change notification settings - Fork 39
/
evaluate.py
99 lines (74 loc) · 2.87 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import os
import argparse
import joblib
import pandas as pd
# from matplotlib import pyplot as plt
from azureml.core import Run
from azureml.core.run import _OfflineRun
DIR_FIGURES = 'figures/'
def main(model_path, dataset_path, output_dir):
"""Evaluate the model.
Args:
model_path (str): The path of the model file
dataset_path (str): The path of the dataset to use for evaluation
output_dir (str): The path of the output directory
Returns:
None
"""
step_run = Run.get_context()
pipeline_run = step_run.parent
print("Loading model...")
model = joblib.load(model_path)
print("Reading test data...")
data = pd.read_csv(dataset_path)
print("Evaluating model...")
y_test, X_test = split_data_features(data)
metrics, plots = get_model_evaluation(model, X_test, y_test)
print(metrics)
# Save metrics in eval run and also parent run
print("Saving metrics...")
if not isinstance(step_run, _OfflineRun):
for k, v in metrics.items():
step_run.log(k, v)
if pipeline_run is not None:
pipeline_run.log(k, v)
# Save figures in run outputs
print(f"Saving figures in folder {DIR_FIGURES}...")
os.makedirs(DIR_FIGURES, exist_ok=True)
for fig_name, fig in plots.items():
file_path = os.path.join(DIR_FIGURES, f'{fig_name}.png')
fig.savefig(file_path)
step_run.log_image(fig_name, file_path)
if pipeline_run is not None:
pipeline_run.log_image(fig_name, file_path)
print('Finished.')
def split_data_features(data):
# Do your X/y features split here
y_test, X_test = data.iloc[:, 0], data.iloc[:, 1:]
return y_test, X_test
def get_model_evaluation(model, X_test, y_test):
# Evaluate your model here
metrics = {'examplemetric1': 0.1, 'examplemetric2': 2.2}
plots = {
'scatter': pd.DataFrame({'pred': [1, 0.2, 0.3], 'real': [0.9, 0.15, 0.5]})
.plot(x='real', y='pred', kind='scatter', figsize=(5, 5))
.get_figure()
}
return metrics, plots
def parse_args(args_list=None):
parser = argparse.ArgumentParser()
parser.add_argument('--model-dir', type=str, required=True)
parser.add_argument('--model-name', type=str, required=True)
parser.add_argument('--dataset', type=str, required=True)
parser.add_argument('--output-dir', type=str, default='./outputs')
args_parsed = parser.parse_args(args_list)
return args_parsed
if __name__ == '__main__':
args = parse_args()
main(
model_path=os.path.join(args.model_dir, f'{args.model_name}.pkl'), # Path as defined in train.py
dataset_path=os.path.join(args.dataset, 'dataset.csv'), # Path as defined in dataprep.py
output_dir=args.output_dir
)