[go: nahoru, domu]

Skip to content

Commit

Permalink
updated bias and explainability flow
Browse files Browse the repository at this point in the history
  • Loading branch information
cfregly committed Jan 25, 2021
1 parent 7f3449d commit 2c8fce0
Show file tree
Hide file tree
Showing 13 changed files with 13,195 additions and 404 deletions.
149 changes: 62 additions & 87 deletions 05_explore/02_Prepare_Dataset_Bias_Analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1488,11 +1488,11 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"path = './amazon_reviews_us_giftcards_software_videogames.csv'\n",
"path = './data-clarify/amazon_reviews_us_giftcards_software_videogames.csv'\n",
"df.to_csv(path, index=False, header=True)"
]
},
Expand All @@ -1505,7 +1505,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 26,
"metadata": {},
"outputs": [
{
Expand All @@ -1514,7 +1514,7 @@
"(81040, 15)"
]
},
"execution_count": 25,
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -1525,11 +1525,11 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"path_balanced = './amazon_reviews_us_giftcards_software_videogames_balanced.csv'\n",
"path_balanced = './data-clarify/amazon_reviews_us_giftcards_software_videogames_balanced.csv'\n",
"df_balanced.to_csv(path_balanced, index=False, header=True)"
]
},
Expand All @@ -1542,36 +1542,38 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'s3://sagemaker-us-east-1-231218423789/data/amazon_reviews_us_giftcards_software_videogames.csv'"
"'s3://sagemaker-us-east-1-835319576252/bias-detection-1611529352/amazon_reviews_us_giftcards_software_videogames.csv'"
]
},
"execution_count": 27,
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s3_prefix = 'data'\n",
"bias_data_s3_uri = sess.upload_data(path=path, key_prefix=s3_prefix)\n",
"import time\n",
"timestamp = int(time.time())\n",
"\n",
"bias_data_s3_uri = sess.upload_data(bucket=bucket, key_prefix='bias-detection-{}'.format(timestamp), path=path)\n",
"bias_data_s3_uri"
]
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2021-01-17 04:02:43 167441142 amazon_reviews_us_giftcards_software_videogames.csv\n"
"2021-01-24 23:02:33 167441142 amazon_reviews_us_giftcards_software_videogames.csv\n"
]
}
],
Expand All @@ -1581,36 +1583,35 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'s3://sagemaker-us-east-1-231218423789/data/amazon_reviews_us_giftcards_software_videogames_balanced.csv'"
"'s3://sagemaker-us-east-1-835319576252/bias-detection-1611529352/amazon_reviews_us_giftcards_software_videogames_balanced.csv'"
]
},
"execution_count": 29,
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s3_prefix = 'data'\n",
"balanced_bias_data_s3_uri = sess.upload_data(path=path_balanced, key_prefix=s3_prefix)\n",
"balanced_bias_data_s3_uri = sess.upload_data(bucket=bucket, key_prefix='bias-detection-{}'.format(timestamp), path=path_balanced)\n",
"balanced_bias_data_s3_uri"
]
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2021-01-17 04:02:45 45067840 amazon_reviews_us_giftcards_software_videogames_balanced.csv\n"
"2021-01-24 23:16:25 45067840 amazon_reviews_us_giftcards_software_videogames_balanced.csv\n"
]
}
],
Expand All @@ -1627,7 +1628,7 @@
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": 33,
"metadata": {},
"outputs": [
{
Expand All @@ -1644,7 +1645,7 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": 34,
"metadata": {},
"outputs": [
{
Expand All @@ -1661,83 +1662,57 @@
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Stored variables and their in-db values:\n",
"balance_dataset -> True\n",
"balanced_bias_data_s3_uri -> 's3://sagemaker-us-east-1-231218423789/data/amazon\n",
"bias_data_s3_uri -> 's3://sagemaker-us-east-1-231218423789/data/amazon\n",
"experiment_name -> 'Amazon-Customer-Reviews-BERT-Experiment-161076485\n",
"feature_group_name -> 'reviews-feature-group-1610764855'\n",
"feature_store_offline_prefix -> 'reviews-feature-store-1610764855'\n",
"firehose_arn -> 'arn:aws:firehose:us-east-1:231218423789:deliverys\n",
"firehose_name -> 'dsoaws-kinesis-data-firehose'\n",
"iam_kinesis_role_name -> 'DSOAWS_Kinesis'\n",
"iam_kinesis_role_passed -> True\n",
"iam_lambda_role_name -> 'DSOAWS_Lambda'\n",
"iam_lambda_role_passed -> True\n",
"iam_role_kinesis_arn -> 'arn:aws:iam::231218423789:role/DSOAWS_Kinesis'\n",
"iam_role_lambda_arn -> 'arn:aws:iam::231218423789:role/DSOAWS_Lambda'\n",
"ingest_create_athena_db_passed -> True\n",
"ingest_create_athena_table_parquet_passed -> True\n",
"ingest_create_athena_table_tsv_passed -> True\n",
"kinesis_data_analytics_app_name -> 'dsoaws-kinesis-data-analytics-sql-app'\n",
"lambda_fn_arn_cloudwatch -> 'arn:aws:lambda:us-east-1:231218423789:function:De\n",
"lambda_fn_arn_invoke_ep -> 'arn:aws:lambda:us-east-1:231218423789:function:In\n",
"lambda_fn_arn_sns -> 'arn:aws:lambda:us-east-1:231218423789:function:Pu\n",
"lambda_fn_name_cloudwatch -> 'DeliverKinesisAnalyticsToCloudWatch'\n",
"lambda_fn_name_invoke_ep -> 'InvokeSageMakerEndpointFromKinesis'\n",
"lambda_fn_name_invoke_sm_endpoint -> 'InvokeSageMakerEndpointFromKinesis'\n",
"lambda_fn_name_sns -> 'PushNotificationToSNS'\n",
"max_seq_length -> 64\n",
"model_ab_endpoint_name -> 'tensorflow-training-2021-01-16-02-58-36-786-abtes\n",
"processed_metrics_s3_uri -> 's3://sagemaker-us-east-1-231218423789/sagemaker-s\n",
"processed_test_data_s3_uri -> 's3://sagemaker-us-east-1-231218423789/sagemaker-s\n",
"processed_train_data_s3_uri -> 's3://sagemaker-us-east-1-231218423789/sagemaker-s\n",
"processed_validation_data_s3_uri -> 's3://sagemaker-us-east-1-231218423789/sagemaker-s\n",
"processing_evaluation_metrics_job_name -> 'sagemaker-scikit-learn-2021-01-16-03-54-29-363'\n",
"pytorch_endpoint_name -> 'tensorflow-training-2021-01-16-02-58-36-786-pt-16\n",
"raw_input_data_s3_uri -> 's3://sagemaker-us-east-1-231218423789/amazon-revi\n",
"s3_private_path_tsv -> 's3://sagemaker-us-east-1-231218423789/amazon-revi\n",
"s3_public_path_tsv -> 's3://amazon-reviews-pds/tsv'\n",
"setup_dependencies_passed -> True\n",
"setup_iam_roles_passed -> True\n",
"setup_s3_bucket_passed -> True\n",
"sns_topic_arn -> 'arn:aws:sns:us-east-1:231218423789:review_anomaly\n",
"stream_arn -> 'arn:aws:kinesis:us-east-1:231218423789:stream/dso\n",
"stream_name -> 'dsoaws-kinesis-data-stream'\n",
"tensorflow_endpoint_arn -> 'arn:aws:sagemaker:us-east-1:231218423789:endpoint\n",
"tensorflow_endpoint_name -> 'tensorflow-training-2021-01-16-02-58-36-786-tf-16\n",
"tensorflow_model_name -> 'tensorflow-training-2021-01-16-02-58-36-786-tf-16\n",
"test_split_percentage -> 0.05\n",
"train_split_percentage -> 0.9\n",
"training_job_debugger_artifacts_path -> 's3://sagemaker-us-east-1-231218423789/tensorflow-\n",
"training_job_name -> 'tensorflow-training-2021-01-16-02-58-36-786'\n",
"transformer_pytorch_model_dir_s3_uri -> 's3://sagemaker-us-east-1-231218423789/models/tens\n",
"trial_name -> 'trial-1610764855'\n",
"tuning_job_name -> 'tensorflow-training-210116-0304'\n",
"validation_split_percentage -> 0.05\n"
]
}
],
"outputs": [],
"source": [
"%store"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Release Resources\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%html\n",
"\n",
"<p><b>Shutting down your kernel for this notebook to release resources.</b></p>\n",
"<button class=\"sm-command-button\" data-commandlinker-command=\"kernelmenu:shutdown\" style=\"display:none;\">Shutdown Kernel</button>\n",
" \n",
"<script>\n",
"try {\n",
" els = document.getElementsByClassName(\"sm-command-button\");\n",
" els[0].click();\n",
"}\n",
"catch(err) {\n",
" // NoOp\n",
"} \n",
"</script>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%javascript\n",
"Jupyter.notebook.save_checkpoint();\n",
"Jupyter.notebook.session.delete();"
"\n",
"try {\n",
" Jupyter.notebook.save_checkpoint();\n",
" Jupyter.notebook.session.delete();\n",
"}\n",
"catch(err) {\n",
" // NoOp\n",
"}"
]
}
],
Expand Down
Loading

0 comments on commit 2c8fce0

Please sign in to comment.