Faisal Qureshi
Professor
Faculty of Science
Ontario Tech University
Oshawa ON Canada
http://vclab.science.ontariotechu.ca
© Faisal Qureshi
This work is licensed under a Creative Commons Attribution-NonCommercial 4.0 International License.
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import matplotlib.cm as cm
M. A. Fischler and R. C. Bolles (June 1981). "Random Sample Consensus: A Paradigm for Model Fitting with Applications to Image Analysis and Automated Cartography". Comm. of the ACM 24: 381--395.
numpy.polyfit
to fit a line to data.def model_error(theta, data):
m = theta[0]
c = theta[1]
y_predicted = m * data[:,0] + c
e = y_predicted - data[:,1]
return np.sum(np.square(e))
Lets generate some data
# Generate some data
np.random.seed(0)
m_true = .75
c_true = 1
x = np.hstack([np.linspace(3, 5, 5), np.linspace(7, 8, 5)])
y = m_true * x + c_true + np.random.rand(len(x))
# Outliers
y[9] = 1
y[0] = 9
data = np.vstack([x, y]).T
n = data.shape[0]
plt.figure(figsize=(7,7))
plt.scatter(data[:,0], data[:,1], c='red', alpha=0.9)
plt.xlim([0,10])
plt.ylim([0,10])
plt.title('Generating some points from a line');
def draw_line(theta, ax, **kwargs):
m = theta[0]
c = theta[1]
xmin, xmax = ax.get_xbound()
ymin = m * xmin + c
ymax = m * xmax + c
l = mlines.Line2D([xmin, xmax], [ymin,ymax], **kwargs)
ax.add_line(l)
np.random.seed(0)
num_iterations = 32
runs = []
for i in range(num_iterations):
# Randomly selecting enough points needed to fit the model
idx = np.zeros(2)
while idx[0] == idx[1]:
idx = (np.random.rand(2)*n).astype(int)
data_for_this_fit = data[idx,:]
# Model fitting
theta = np.polyfit(data_for_this_fit[:,0], data_for_this_fit[:, 1], 1)
# Computing model fit error
error = model_error(theta, data)
# Storing the results
runs.append(
{
'iteration': i,
'data_used': data_for_this_fit,
'theta': theta,
'error': error
}
)
print(f'#runs = {len(runs)}')
sorted_runs = sorted(runs, key=lambda k: k['error'])
theta_final = sorted_runs[0]['theta']
error_final = sorted_runs[0]['error']
print(f'theta_final = {theta_final}')
print(f'error_final = {error_final}')
plt.figure(figsize=(7,7))
plt.scatter(data[:,0], data[:,1], c='red', alpha=0.9)
plt.xlim([0,10])
plt.ylim([0,10])
plt.title(f'RANSAC 2D line fitting. e={error_final:10.5}');
draw_line(theta_final, plt.gca())
def plot_fitted_line(ax, theta, data_used, error, count, data_explained, i, **kwargs):
ax.scatter(data[:,0], data[:,1], c='red', alpha=0.3)
ax.scatter(data_used[:,0], data_used[:,1], c='blue', alpha=0.9)
if data_explained is not None:
ax.scatter(data_explained[:,0], data_explained[:,1], c='green', alpha=0.5)
ax.set_xlim([0,10])
ax.set_ylim([0,10])
if count == -1:
ax.set_title(f'RANSAC line fitting. #={i}. e={error:10.5}');
else:
ax.set_title(f'RANSAC line fitting. #={i}. e={error:10.5}. c={count}');
draw_line(theta, ax, **kwargs)
fig = plt.figure(figsize=(16, num_iterations*2))
for i in range(num_iterations):
ax = fig.add_subplot(np.ceil(num_iterations/3),3,i+1)
theta = sorted_runs[i]['theta']
data_used = sorted_runs[i]['data_used']
error = sorted_runs[i]['error']
itr = sorted_runs[i]['iteration']
plot_fitted_line(ax, theta, data_used, error, -1, None, itr)
def model_error_with_count(theta, data, scale):
m = theta[0]
c = theta[1]
y_predicted = m * data[:,0] + c
e2 = np.square(y_predicted - data[:,1])
pts_explained = e2 < scale
return np.sum(e2[pts_explained]), np.sum(pts_explained), pts_explained
np.random.seed(0)
num_iterations = 32
scale = 3
runs = []
for i in range(num_iterations):
# Randomly selecting enough points needed to fit the model
idx = np.zeros(2)
while idx[0] == idx[1]:
idx = (np.random.rand(2)*n).astype(int)
data_for_this_fit = data[idx,:]
# Model fitting
theta = np.polyfit(data_for_this_fit[:,0], data_for_this_fit[:, 1], 1)
# Computing model fit error
error, count, pts_explained = model_error_with_count(theta, data, scale)
# Storing the results
runs.append(
{
'iteration': i,
'data_used': data_for_this_fit,
'theta': theta,
'error': error,
'count': count,
'data_explained': data[pts_explained,:]
}
)
print(f'#runs={len(runs)}')
sorted_runs = sorted(runs, key=lambda k: k['count'])
count_final = sorted_runs[-1]['count']
print(f'count_final={count_final}')
runs_with_count = [v for v in sorted_runs if v['count'] == count_final]
print(f'Found #runs {len(runs_with_count)} with count {count_final}')
sorted_runs_with_count = sorted(runs_with_count, key=lambda k: k['error'])
theta_final = sorted_runs_with_count[0]['theta']
error_final = sorted_runs_with_count[0]['error']
print(f'theta_final={theta_final}')
print(f'error_final={error_final}')
plt.figure(figsize=(7,7))
plt.scatter(data[:,0], data[:,1], c='red', alpha=0.9)
plt.xlim([0,10])
plt.ylim([0,10])
plt.title(f'RANSAC 2D line fitting. e={error_final:10.5}. c={count_final}');
draw_line(theta_final, plt.gca())
fig = plt.figure(figsize=(16, num_iterations*2))
for i in range(num_iterations):
ax = fig.add_subplot(np.ceil(num_iterations/3),3,i+1)
theta = sorted_runs[i]['theta']
data_used = sorted_runs[i]['data_used']
error = sorted_runs[i]['error']
count = sorted_runs[i]['count']
data_explained = sorted_runs[i]['data_explained']
itr = sorted_runs[i]['iteration']
plot_fitted_line(ax, theta, data_used, error, count, data_explained, itr)
np.random.seed(0)
num_iterations = 120
scale = 3
data_explained = 70
early_stop = False
min_iterations = 40
runs = []
for i in range(num_iterations):
# Randomly selecting enough points needed to fit the model
idx = np.zeros(2)
while idx[0] == idx[1]:
idx = (np.random.rand(2)*n).astype(int)
data_for_this_fit = data[idx,:]
# Model fitting
theta = np.polyfit(data_for_this_fit[:,0], data_for_this_fit[:, 1], 1)
# Computing model fit error
error, count, pts_explained = model_error_with_count(theta, data, scale)
# Storing the results
runs.append(
{
'iteration': i,
'data_used': data_for_this_fit,
'theta': theta,
'error': error,
'count': count,
'data_explained': data[pts_explained,:]
}
)
# Check if the model can explain the desired % of data
if data_explained < (count * 100 / n) and (i-1) > min_iterations:
print(f'{data_explained}% data explained at iteration {i} (0-based). Done.')
early_stop = True
break
if not early_stop:
print(f'Done after {i} iterations.')
print(f'#runs={len(runs)}')
sorted_runs = sorted(runs, key=lambda k: k['count'])
count_final = sorted_runs[-1]['count']
print(f'count_final={count_final}')
runs_with_count = [v for v in sorted_runs if v['count'] == count_final]
print(f'Found #runs {len(runs_with_count)} with count {count_final}')
sorted_runs_with_count = sorted(runs_with_count, key=lambda k: k['error'])
theta_final = sorted_runs_with_count[0]['theta']
error_final = sorted_runs_with_count[0]['error']
print(f'theta_final={theta_final}')
print(f'error_final={error_final}')
plt.figure(figsize=(7,7))
plt.scatter(data[:,0], data[:,1], c='red', alpha=0.9)
plt.xlim([0,10])
plt.ylim([0,10])
plt.title(f'RANSAC 2D line fitting. e={error_final:10.5}. c={count_final}');
draw_line(theta_final, plt.gca())
From Forsyth and Ponce
Probability of sampling an outlier: $e$
Samples required for each model fitting: $s$
Probability of selecting an inlier: $1 - e$
Probability of selecting $s$ inlier in a row: $(1 - e)^s$
Probability of getting a bad sample: $1 - (1 - e)^s$. At least one of the last $s$ samples was an outlier.
Probability of getting $N$ bad sample: $\left(1 - (1 - e)^s\right)^N$
Probability of getting at least 1 good sample after $N$ tries: $1 - \left(1 - (1 - e)^s\right)^N$
We desire
$$ \begin{array}{crcl} & p &=& 1 - \left(1 - (1 - e)^s\right)^N \\ \Rightarrow & \left(1 - (1 - e)^s\right)^N &=& 1- p \\ \Rightarrow & \log\left(1 - (1 - e)^s\right)^N &=& \log(1- p) \\ \Rightarrow & N\log\left(1 - (1 - e)^s\right) &=& \log(1- p) \\ \Rightarrow & N&=& \frac{\log(1- p)}{\log\left(1 - (1 - e)^s\right)} \end{array} $$