Thursday, March 17, 2022

RANSAC Regression Explained with Python Examples

 Code 1

(.env) [boris@fedora34server NUMPY]$ cat ransacBoston.py

from sklearn import datasets

from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression, RANSACRegressor

from sklearn.metrics import r2_score, mean_squared_error

import numpy as np

import matplotlib.pyplot as plt

import pandas as pd

import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)

#

# Load the Boston Housing dataset for training

#

bhd = datasets.load_boston()

df = pd.DataFrame(bhd.data)

df.columns = ['CRIM', 'ZN', 'INDUS', 'CHAS',

              'NOX', 'RM', 'AGE', 'DIS', 'RAD',

               'TAX', 'PTRATIO', 'B', 'LSTAT']

df['MEDV'] = bhd.target

#

# Select Avg. No of rooms per dwelling as feature

# and fit the model

#

X = df['RM'].to_numpy().reshape(-1, 1)

y = df['MEDV'].to_numpy().reshape(-1, 1)

#

# Create an instance of RANSACRegressor

#

ransac = RANSACRegressor(base_estimator=LinearRegression(),

                         min_samples=50, max_trials=100,

                         loss='absolute_loss', random_state=42,

                         residual_threshold=10)

#

# Fit the model

#

ransac.fit(X, y)

#

# Get the Inlier mask; Create outlier mask

#

inlier_mask = ransac.inlier_mask_

outlier_mask = np.logical_not(inlier_mask)

#

# Create scatter plot for inlier datset

#

plt.figure(figsize=(8, 8))

plt.scatter(X[inlier_mask], y[inlier_mask],

            c='steelblue', edgecolor='white',

            marker='o', label='Inliers')

#

# Create scatter plot for outlier datset

#

plt.scatter(X[outlier_mask], y[outlier_mask],

             c='limegreen', edgecolor='white',

             marker='s', label='Outliers')

#

# Draw the best fit line

#

line_X = np.arange(3, 10, 1)

line_y_ransac = ransac.predict(line_X[:, np.newaxis])

plt.plot(line_X, line_y_ransac, color='black', lw=2)

plt.xlabel('Average number of rooms [RM]', fontsize=15)

plt.ylabel('Price in $1000s [MEDV]', fontsize=15)

plt.legend(loc='upper left', fontsize=12)

plt.show()































Code 2 - 3D Plotting

(.env) [boris@fedora34server NUMPY]$ cat ransac3DPlotting.py
import numpy as np
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from skimage.measure import LineModelND, ransac

rng = np.random.default_rng()

# generate coordinates of line
point = np.array([0, 0, 0], dtype='float')
direction = np.array([1, 1, 1], dtype='float') / np.sqrt(3)
xyz = point + 10 * np.arange(-100, 100)[..., np.newaxis] * direction

# add gaussian noise to coordinates
noise = rng.normal(size=xyz.shape)
xyz += 0.5 * noise
xyz[::2] += 20 * noise[::2]
xyz[::4] += 100 * noise[::4]

# robustly fit line only using inlier data with RANSAC algorithm
model_robust, inliers = ransac(xyz, LineModelND, min_samples=2,
                               residual_threshold=1, max_trials=1000)
outliers = inliers == False

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(xyz[inliers][:, 0], xyz[inliers][:, 1], xyz[inliers][:, 2], c='b',
           marker='o', label='Inlier data')
ax.scatter(xyz[outliers][:, 0], xyz[outliers][:, 1], xyz[outliers][:, 2], c='r',
           marker='o', label='Outlier data')
ax.legend(loc='lower left')
plt.show()






























References



No comments:

Post a Comment