Add some docs

rrupam · Jul 28, 2019 · ad961f7 · ad961f7
1 parent 1e768fe
commit ad961f7
Show file tree

Hide file tree

Showing 3 changed files with 32 additions and 20 deletions.
diff --git a/task1_machine_learning_backend/classifcation_backend/Dockerfile b/task1_machine_learning_backend/classifcation_backend/Dockerfile
@@ -1,7 +1,6 @@
 FROM continuumio/miniconda:4.5.4
 
-RUN pip install mlflow>=1.0 \
-    && pip install azure-storage==0.36.0 \
+RUN pip install mlflow>=1.1 \
     && pip install numpy==1.14.3 \
     && pip install scipy \
     && pip install pandas==0.22.0 \

diff --git a/task1_machine_learning_backend/classifcation_backend/Readme.md b/task1_machine_learning_backend/classifcation_backend/Readme.md
@@ -2,13 +2,41 @@
 
 Version : 0.1
 
-The classifier backend is based in the tool MLFlow. 
+The classifier backend leverages the open source tool MLFlow. 
 
-## Structure
+## Files and directory structure
 
+*./data/transcripts/PTSD_data.csv* : The initial dataset annotated. Export the relevant spreadsheet from csv.
+
+*./Dockerfile* : Initial Dockerfile of the solution.
+
+*./multi_label_lg_data_pipeline.ipynb* : Multilabel reference pipeline, very simple. 
+
+*./MLProject* : MLflow reference project.
+
+*./train.py* : The training source code file.
 
 ## How to run it
 
+### Download the annotation data spreadsheet and export it as it is. 
+
+
+### To run training command you can use the command below from within the directory of the project:
+
+ ` 
+╰─$ python ./train.py                    
+
+... Processing A1
+
+Test accuracy is 0.9375
+
+... Processing B1
+Test accuracy is 1.0
 
-## How to add my model
+... Processing B2
+Test accuracy is 1.0
+...
 
+... Processing G1
+Test accuracy is 1.0
+` 
diff --git a/task1_machine_learning_backend/classifcation_backend/train.py b/task1_machine_learning_backend/classifcation_backend/train.py
@@ -1,24 +1,17 @@
-# The data set used in this example is from http://archive.ics.uci.edu/ml/datasets/Wine+Quality
-# P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis.
-# Modeling wine preferences by data mining from physicochemical properties. In Decision Support Systems, Elsevier, 47(4):547-553, 2009.
-
 import os
 import warnings
 import sys
 
 import pandas as pd
 import numpy as np
 from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
-from sklearn.model_selection import train_test_split
-from sklearn.linear_model import ElasticNet
 from sklearn.pipeline import Pipeline
 from sklearn.linear_model import LogisticRegression
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.multiclass import OneVsRestClassifier
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score
 
-import mlflow
 import mlflow.sklearn
 
 
@@ -67,14 +60,6 @@ def eval_metrics(actual, pred):
     X_train = train.text
     X_test = test.text
 
-    #print(X_train.shape)
-    #print(X_test.shape)
-    # The predicted column is "quality" which is a scalar from [3, 9]
-    #train_x = train.drop(["A1"], axis=1)
-    #test_x = test.drop(["A1"], axis=1)
-    #train_y = train[["A1"]]
-    #test_y = test[["A1"]]
-
     alpha = float(sys.argv[1]) if len(sys.argv) > 1 else 0.5
     l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5