Friday, April 8, 2022

Save and Load Machine Learning Models in Python with scikit-learn

*********** 

Code 1

***********

(.env) [boris@fedora34server SAVEMODEL]$ cat savePikle.py

# Save Model Using Pickle

import pandas

from sklearn import model_selection

from sklearn.linear_model import LogisticRegression

import pickle

url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"

names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']

dataframe = pandas.read_csv(url, names=names)

array = dataframe.values

X = array[:,0:8]

Y = array[:,8]

test_size = 0.33

seed = 7

X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=test_size, random_state=seed)

# Fit the model on training set

model = LogisticRegression(solver='lbfgs', max_iter=300)

model.fit(X_train, Y_train)

# save the model to disk

filename = 'finalized_model.sav'

pickle.dump(model, open(filename, 'wb'))

# some time later...

# load the model from disk

loaded_model = pickle.load(open(filename, 'rb'))

result = loaded_model.score(X_test, Y_test)

print(result)


(.env) [boris@fedora34server SAVEMODEL]$ python savePikle.py

0.7874015748031497


**********

Code 2

**********
(.env) [boris@fedora34server SAVEMODEL]$ cat saveJoblib.py

# Save Model Using joblib
import pandas
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
import joblib
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']

dataframe = pandas.read_csv(url, names=names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
test_size = 0.33
seed = 7

X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=test_size, random_state=seed)

# Fit the model on training set
model = LogisticRegression(solver='lbfgs', max_iter=300)
model.fit(X_train, Y_train)

# save the model to disk
filename = 'finalized_model.sav'
joblib.dump(model, filename)
 
# some time later...
 
# load the model from disk
loaded_model = joblib.load(filename)
result = loaded_model.score(X_test, Y_test)
print(result)

(.env) [boris@fedora34server SAVEMODEL]$ python saveJoblib.py
0.7874015748031497

*********************************
Loading from another python script
*********************************
(.env) [boris@fedora34server SAVEMODEL]$ cat loadPikle.py
import pandas
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
import pickle
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = pandas.read_csv(url, names=names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
test_size = 0.33
seed = 7
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=test_size, random_state=seed)

######################################
# Skipping model creating and training
# Dumping to file already has been done
######################################

filename = 'finalized_model.sav'
# load the model from disk
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(X_test, Y_test)
print(result)

(.env) [boris@fedora34server SAVEMODEL]$ python loadPikle.py
0.7874015748031497




























No comments:

Post a Comment