[go: nahoru, domu]

Skip to content

Commit

Permalink
updated examples
Browse files Browse the repository at this point in the history
  • Loading branch information
cfregly committed Nov 20, 2022
1 parent 729b7b9 commit 3b926fd
Show file tree
Hide file tree
Showing 28 changed files with 1,303 additions and 799 deletions.
27 changes: 20 additions & 7 deletions 03_automl/generated_module/candidate_data_processors/dpp0.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,33 +7,46 @@

# Given a list of column names and target column name, Header can return the index
# for given column name
HEADER = Header(column_names=["star_rating", "review_body"], target_column_name="star_rating")
HEADER = Header(
column_names=['star_rating', 'review_body'],
target_column_name='star_rating'
)


def build_feature_transform():
""" Returns the model definition representing feature processing."""

# These features can be parsed as natural language.

text = HEADER.as_feature_indices(["review_body"])
text = HEADER.as_feature_indices(['review_body'])

text_processors = Pipeline(
steps=[
(
"multicolumntfidfvectorizer",
MultiColumnTfidfVectorizer(max_df=0.9941, min_df=0.0007, analyzer="word", max_features=10000),
'multicolumntfidfvectorizer',
MultiColumnTfidfVectorizer(
max_df=0.9941,
min_df=0.0007,
analyzer='word',
max_features=10000
)
)
]
)

column_transformer = ColumnTransformer(transformers=[("text_processing", text_processors, text)])
column_transformer = ColumnTransformer(
transformers=[('text_processing', text_processors, text)]
)

return Pipeline(
steps=[("column_transformer", column_transformer), ("robuststandardscaler", RobustStandardScaler())]
steps=[
('column_transformer', column_transformer
), ('robuststandardscaler', RobustStandardScaler())
]
)


def build_label_transform():
"""Returns the model definition representing feature processing."""

return RobustLabelEncoder(labels=["1", "2", "3", "4", "5"])
return RobustLabelEncoder(labels=['1', '2', '3', '4', '5'])
28 changes: 19 additions & 9 deletions 03_automl/generated_module/candidate_data_processors/dpp1.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,37 +8,47 @@

# Given a list of column names and target column name, Header can return the index
# for given column name
HEADER = Header(column_names=["star_rating", "review_body"], target_column_name="star_rating")
HEADER = Header(
column_names=['star_rating', 'review_body'],
target_column_name='star_rating'
)


def build_feature_transform():
""" Returns the model definition representing feature processing."""

# These features can be parsed as natural language.

text = HEADER.as_feature_indices(["review_body"])
text = HEADER.as_feature_indices(['review_body'])

text_processors = Pipeline(
steps=[
(
"multicolumntfidfvectorizer",
MultiColumnTfidfVectorizer(max_df=0.99, min_df=0.0021, analyzer="char_wb", max_features=10000),
'multicolumntfidfvectorizer',
MultiColumnTfidfVectorizer(
max_df=0.99,
min_df=0.0021,
analyzer='char_wb',
max_features=10000
)
)
]
)

column_transformer = ColumnTransformer(transformers=[("text_processing", text_processors, text)])
column_transformer = ColumnTransformer(
transformers=[('text_processing', text_processors, text)]
)

return Pipeline(
steps=[
("column_transformer", column_transformer),
("robustpca", RobustPCA(n_components=5)),
("robuststandardscaler", RobustStandardScaler()),
('column_transformer',
column_transformer), ('robustpca', RobustPCA(n_components=5)),
('robuststandardscaler', RobustStandardScaler())
]
)


def build_label_transform():
"""Returns the model definition representing feature processing."""

return RobustLabelEncoder(labels=["1", "2", "3", "4", "5"])
return RobustLabelEncoder(labels=['1', '2', '3', '4', '5'])
27 changes: 20 additions & 7 deletions 03_automl/generated_module/candidate_data_processors/dpp2.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,33 +7,46 @@

# Given a list of column names and target column name, Header can return the index
# for given column name
HEADER = Header(column_names=["star_rating", "review_body"], target_column_name="star_rating")
HEADER = Header(
column_names=['star_rating', 'review_body'],
target_column_name='star_rating'
)


def build_feature_transform():
""" Returns the model definition representing feature processing."""

# These features can be parsed as natural language.

text = HEADER.as_feature_indices(["review_body"])
text = HEADER.as_feature_indices(['review_body'])

text_processors = Pipeline(
steps=[
(
"multicolumntfidfvectorizer",
MultiColumnTfidfVectorizer(max_df=0.9983, min_df=0.0005, analyzer="word", max_features=10000),
'multicolumntfidfvectorizer',
MultiColumnTfidfVectorizer(
max_df=0.9983,
min_df=0.0005,
analyzer='word',
max_features=10000
)
)
]
)

column_transformer = ColumnTransformer(transformers=[("text_processing", text_processors, text)])
column_transformer = ColumnTransformer(
transformers=[('text_processing', text_processors, text)]
)

return Pipeline(
steps=[("column_transformer", column_transformer), ("robuststandardscaler", RobustStandardScaler())]
steps=[
('column_transformer', column_transformer
), ('robuststandardscaler', RobustStandardScaler())
]
)


def build_label_transform():
"""Returns the model definition representing feature processing."""

return RobustLabelEncoder(labels=["1", "2", "3", "4", "5"])
return RobustLabelEncoder(labels=['1', '2', '3', '4', '5'])
Original file line number Diff line number Diff line change
Expand Up @@ -16,34 +16,35 @@

def _is_inverse_label_transform():
"""Returns True if if it's running in inverse label transform."""
return os.getenv("AUTOML_TRANSFORM_MODE") == "inverse-label-transform"
return os.getenv('AUTOML_TRANSFORM_MODE') == 'inverse-label-transform'


def _is_feature_transform():
"""Returns True if it's running in feature transform mode."""
return os.getenv("AUTOML_TRANSFORM_MODE") == "feature-transform"
return os.getenv('AUTOML_TRANSFORM_MODE') == 'feature-transform'


def _get_selected_input_keys():
"""Returns a list of ordered content keys for container's input."""
return [key.strip().lower() for key in os.environ["SAGEMAKER_INFERENCE_INPUT"].split(",")]
return [key.strip().lower() for key in os.environ['SAGEMAKER_INFERENCE_INPUT'].split(',')]


def _get_selected_output_keys():
"""Returns a list of ordered content keys for container's output."""
return [key.strip().lower() for key in os.environ["SAGEMAKER_INFERENCE_OUTPUT"].split(",")]
return [key.strip().lower() for key in os.environ['SAGEMAKER_INFERENCE_OUTPUT'].split(',')]


def _sparsify_if_needed(x):
"""Returns a sparse matrix if the needed for encoding to sparse recordio protobuf."""
if os.getenv("AUTOML_SPARSE_ENCODE_RECORDIO_PROTOBUF") == "1" and not sparse.issparse(x):
if os.getenv('AUTOML_SPARSE_ENCODE_RECORDIO_PROTOBUF') == '1' \
and not sparse.issparse(x):
return sparse.csr_matrix(x)
return x


def _split_features_target(x):
"""Returns the features and target by splitting the input array."""
if os.getenv("AUTOML_TRANSFORM_MODE") == "feature-transform":
if os.getenv('AUTOML_TRANSFORM_MODE') == 'feature-transform':
return _sparsify_if_needed(x), None

if sparse.issparse(x):
Expand All @@ -67,7 +68,7 @@ def model_fn(model_dir):
deserialized model object that can be used for model serving
"""
return load(filename=os.path.join(model_dir, "model.joblib"))
return load(filename=os.path.join(model_dir, 'model.joblib'))


def predict_fn(input_object, model):
Expand Down Expand Up @@ -100,7 +101,10 @@ def predict_fn(input_object, model):
try:
return model.transform(input_object)
except ValueError as e:
return worker.Response(response="{}".format(str(e) or "Unknown error."), status=http_client.BAD_REQUEST)
return worker.Response(
response='{}'.format(str(e) or 'Unknown error.'),
status=http_client.BAD_REQUEST
)


def _generate_post_processed_response(array, model):
Expand Down Expand Up @@ -133,9 +137,8 @@ def _generate_post_processed_response(array, model):
for output_key_idx, output_key in enumerate(output_keys):
if output_key == "predicted_label" and output_key in input_keys:
input_key_idx = input_keys.index(output_key)
output_array[:, output_key_idx] = model.inverse_label_transform(
array[:, input_key_idx].ravel().astype(np.float).astype(np.int)
)
output_array[:, output_key_idx] = model.inverse_label_transform(array[:, input_key_idx]
.ravel().astype(np.float).astype(np.int))
elif output_key == "labels":
output_array[:, output_key_idx][:] = str(list(model.target_transformer.get_classes()))
elif output_key in input_keys:
Expand Down Expand Up @@ -165,10 +168,11 @@ def input_fn(request_body, request_content_type):
decoded data as 2D numpy array
"""
content_type = request_content_type.lower() if request_content_type else "text/csv"
content_type = request_content_type.lower(
) if request_content_type else "text/csv"
content_type = content_type.split(";")[0].strip()

if content_type == "text/csv":
if content_type == 'text/csv':
if isinstance(request_body, str):
byte_buffer = request_body.encode()
else:
Expand All @@ -178,7 +182,8 @@ def input_fn(request_body, request_content_type):
return val

return worker.Response(
response=f"'{request_content_type}' is an unsupported content type.", status=http_client.UNSUPPORTED_MEDIA_TYPE
response=f"'{request_content_type}' is an unsupported content type.",
status=http_client.UNSUPPORTED_MEDIA_TYPE
)


Expand Down Expand Up @@ -212,40 +217,51 @@ def output_fn(prediction, accept_type):
return worker.Response(
response=encoder_factory[accept_type](prediction, output_keys),
status=http_client.OK,
mimetype=accept_type,
mimetype=accept_type
)
except KeyError:
# Selectable inference is not turned on
if accept_type == "text/csv":
if accept_type == 'text/csv':
return worker.Response(
response=encoders.encode(prediction, accept_type), status=http_client.OK, mimetype=accept_type
response=encoders.encode(prediction, accept_type),
status=http_client.OK,
mimetype=accept_type
)
return worker.Response(
response=f"Accept type '{accept_type}' is not supported " f"during inverse label transformation.",
status=http_client.NOT_ACCEPTABLE,
response=f"Accept type '{accept_type}' is not supported "
f"during inverse label transformation.",
status=http_client.NOT_ACCEPTABLE
)

if isinstance(prediction, tuple):
X, y = prediction
else:
X, y = _split_features_target(prediction)

if accept_type == "application/x-recordio-protobuf":
if accept_type == 'application/x-recordio-protobuf':
return worker.Response(
response=encoders.array_to_recordio_protobuf(
_sparsify_if_needed(X).astype("float32"), y.astype("float32") if y is not None else y
_sparsify_if_needed(X).astype('float32'),
y.astype('float32') if y is not None else y
),
status=http_client.OK,
mimetype=accept_type,
mimetype=accept_type
)

if accept_type == "text/csv":
if accept_type == 'text/csv':
if y is not None:
X = np.column_stack((np.ravel(y), X.todense() if sparse.issparse(X) else X))
X = np.column_stack(
(np.ravel(y), X.todense() if sparse.issparse(X) else X)
)

return worker.Response(response=encoders.encode(X, accept_type), status=http_client.OK, mimetype=accept_type)
return worker.Response(
response=encoders.encode(X, accept_type),
status=http_client.OK,
mimetype=accept_type
)
return worker.Response(
response=f"Accept type '{accept_type}' is not supported.", status=http_client.NOT_ACCEPTABLE
response=f"Accept type '{accept_type}' is not supported.",
status=http_client.NOT_ACCEPTABLE
)


Expand All @@ -257,8 +273,16 @@ def execution_parameters_fn():
used during inference and defaults to 6MB otherwise.
"""
if _is_feature_transform():
return worker.Response(response='{"MaxPayloadInMB":1}', status=http_client.OK, mimetype="application/json")
return worker.Response(response='{"MaxPayloadInMB":6}', status=http_client.OK, mimetype="application/json")
return worker.Response(
response='{"MaxPayloadInMB":1}',
status=http_client.OK,
mimetype="application/json"
)
return worker.Response(
response='{"MaxPayloadInMB":6}',
status=http_client.OK,
mimetype="application/json"
)


def numpy_array_to_csv(array, output_keys):
Expand Down Expand Up @@ -334,7 +358,7 @@ def numpy_array_to_jsonlines(array, output_keys):


encoder_factory = {
"text/csv": numpy_array_to_csv,
"application/json": numpy_array_to_json,
"application/jsonlines": numpy_array_to_jsonlines,
'text/csv': numpy_array_to_csv,
'application/json': numpy_array_to_json,
'application/jsonlines': numpy_array_to_jsonlines
}
16 changes: 8 additions & 8 deletions 03_automl/generated_module/setup.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from setuptools import setup

setup(
packages=["candidate_data_processors/"],
name="candidate_data_processors",
version="1.0.0",
description="This module is auto-generated by SageMaker AutoML. "
"It contains candidate data processing code and the "
"scaffolding to run them in SageMaker.",
author="Amazon Web Services",
license="Apache License 2.0",
packages=['candidate_data_processors/'],
name='candidate_data_processors',
version='1.0.0',
description='This module is auto-generated by SageMaker AutoML. '
'It contains candidate data processing code and the '
'scaffolding to run them in SageMaker.',
author='Amazon Web Services',
license='Apache License 2.0',
include_package_data=True,
)
Loading

0 comments on commit 3b926fd

Please sign in to comment.