В этом посте мы применим алгоритм LogitBoost к игрушечному набору данных для идентификации рукописных цифр.
***************
Сode 1
***************
(.env) [boris@fedora34server LOGITBOOST]$ cat logitBoost2.py
from itertools import product
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import offsetbox
import seaborn as sns
sns.set(style='darkgrid', palette='colorblind', color_codes=True)
from sklearn.datasets import load_digits
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import (accuracy_score, classification_report,
confusion_matrix)
from logitboost import LogitBoost
from tensorflow.keras.layers import Embedding
digits = load_digits()
X = digits.data
y = digits.target
images = digits.images.astype(np.int_)
n_classes = 10
# Scale the digits for numerical stability
X /= 16
# Shuffle the data and split them into training and testing sets
test_size = 1 / 3
X_train, X_test, y_train, y_test, images_train, images_test \
= train_test_split(X, y, images, test_size=test_size, shuffle=True,
stratify=y, random_state=0)
print('Training shape: ', X_train.shape)
print('Test shape: ', X_test.shape)
n_rows = 8
n_cols = 8
fig, ax = plt.subplots(nrows=n_rows, ncols=n_cols, figsize=(10, 10))
k = 0
for i, j in product(range(n_rows), range(n_cols)):
image = images_train[n_cols * i + j]
ax[i, j].imshow(image, cmap='binary', interpolation='none')
ax[i, j].axis('off')
plt.show(block=False)
# plt.close()
lboost = LogitBoost(DecisionTreeRegressor(max_depth=3),
n_estimators=30, random_state=0)
lboost.fit(X_train, y_train)
LogitBoost(base_estimator=DecisionTreeRegressor(criterion='mse', max_depth=3,
max_features=None,
max_leaf_nodes=None,
min_impurity_decrease=0.0,
min_samples_leaf=1,
min_samples_split=2,
min_weight_fraction_leaf=0.0,
random_state=None,
splitter='best'),
bootstrap=False, learning_rate=1.0, max_response=4.0,
n_estimators=30, random_state=0, weight_trim_quantile=0.05)
y_pred_train = lboost.predict(X_train)
y_pred_test = lboost.predict(X_test)
accuracy_train = accuracy_score(y_train, y_pred_train)
accuracy_test = accuracy_score(y_test, y_pred_test)
print('Training accuracy: %.4f' % accuracy_train)
print('Test accuracy: %.4f' % accuracy_test)
report_train = classification_report(y_train, y_pred_train)
report_test = classification_report(y_test, y_pred_test)
print('Training\n%s' % report_train)
print('Test\n%s' % report_test)
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(12, 6))
sns.heatmap(confusion_matrix(y_train, y_pred_train), ax=ax[0],
robust=True, annot=True, fmt=',d', cmap=plt.get_cmap('Blues'),
square=True, cbar=False)
ax[0].set_xlabel('Predicted Class')
ax[0].set_ylabel('Actual Class')
ax[0].set_title('Training')
sns.heatmap(confusion_matrix(y_test, y_pred_test), ax=ax[1],
robust=True, annot=True, fmt=',d', cmap=plt.get_cmap('Blues'),
square=True, cbar=False)
ax[1].set_title('Testing', fontsize=14)
ax[1].set_xlabel('Predicted Class')
ax[1].set_ylabel('Actual Class')
plt.tight_layout()
plt.show(block=False)
# plt.close()
iterations = np.arange(1, lboost.n_estimators + 1)
staged_accuracy_train = list(lboost.staged_score(X_train, y_train))
staged_accuracy_test = list(lboost.staged_score(X_test, y_test))
plt.figure(figsize=(10, 8))
plt.plot(iterations, staged_accuracy_train, label='Training', marker='.')
plt.plot(iterations, staged_accuracy_test, label='Test', marker='.')
plt.xlabel('Iteration')
plt.ylabel('Accuracy')
plt.title('Ensemble accuracy during each boosting iteration', fontsize=14)
plt.legend(loc='best', shadow=True, frameon=True)
plt.tight_layout()
plt.show()
plt.close()
No comments:
Post a Comment