RANSAC¶
Faisal Qureshi
Professor
Faculty of Science
Ontario Tech University
Oshawa ON Canada
http://vclab.science.ontariotechu.ca
Copyright information¶
© Faisal Qureshi
License¶
This work is licensed under a Creative Commons Attribution-NonCommercial 4.0 International License.
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import matplotlib.cm as cm
Lesson Plan¶
- RANSAC
- RANSAC for 2D line fitting
- RANSAC Algorithm
- Pros
- Cons
- Uses
RANSAC¶
M. A. Fischler and R. C. Bolles (June 1981). "Random Sample Consensus: A Paradigm for Model Fitting with Applications to Image Analysis and Automated Cartography". Comm. of the ACM 24: 381--395.
Recipie¶
- Classify points as inliers and outliers
- Estimate model parameters using inliers online
- Compute model fit
- Repeat until
- The desired model fit is achieved
- Reach the number of allowed iterations
RANSAC for 2D line fitting¶
- Need a method for model fitting. We can use
numpy.polyfit
to fit a line to data. - Line fitting only needs two points.
- Need a mechanism to compute fit error for a model. Given the current fit parameters we can compute the model error as follows.
def model_error(theta, data):
m = theta[0]
c = theta[1]
y_predicted = m * data[:,0] + c
e = y_predicted - data[:,1]
return np.sum(np.square(e))
Lets generate some data
# Generate some data
np.random.seed(0)
m_true = .75
c_true = 1
x = np.hstack([np.linspace(3, 5, 5), np.linspace(7, 8, 5)])
y = m_true * x + c_true + np.random.rand(len(x))
# Outliers
y[9] = 1
y[0] = 9
data = np.vstack([x, y]).T
n = data.shape[0]
plt.figure(figsize=(7,7))
plt.scatter(data[:,0], data[:,1], c='red', alpha=0.9)
plt.xlim([0,10])
plt.ylim([0,10])
plt.title('Generating some points from a line');
def draw_line(theta, ax, **kwargs):
m = theta[0]
c = theta[1]
xmin, xmax = ax.get_xbound()
ymin = m * xmin + c
ymax = m * xmax + c
l = mlines.Line2D([xmin, xmax], [ymin,ymax], **kwargs)
ax.add_line(l)
RANSAC 2D line fitting - Implementation 1¶
np.random.seed(0)
num_iterations = 32
runs = []
for i in range(num_iterations):
# Randomly selecting enough points needed to fit the model
idx = np.zeros(2)
while idx[0] == idx[1]:
idx = (np.random.rand(2)*n).astype(int)
data_for_this_fit = data[idx,:]
# Model fitting
theta = np.polyfit(data_for_this_fit[:,0], data_for_this_fit[:, 1], 1)
# Computing model fit error
error = model_error(theta, data)
# Storing the results
runs.append(
{
'iteration': i,
'data_used': data_for_this_fit,
'theta': theta,
'error': error
}
)
print(f'#runs = {len(runs)}')
#runs = 32
sorted_runs = sorted(runs, key=lambda k: k['error'])
theta_final = sorted_runs[0]['theta']
error_final = sorted_runs[0]['error']
print(f'theta_final = {theta_final}')
print(f'error_final = {error_final}')
theta_final = [0.50754323 2.63593864] error_final = 59.31971147802779
plt.figure(figsize=(7,7))
plt.scatter(data[:,0], data[:,1], c='red', alpha=0.9)
plt.xlim([0,10])
plt.ylim([0,10])
plt.title(f'RANSAC 2D line fitting. e={error_final:10.5}');
draw_line(theta_final, plt.gca())
Visualization 1¶
def plot_fitted_line(ax, theta, data_used, error, count, data_explained, i, **kwargs):
ax.scatter(data[:,0], data[:,1], c='red', alpha=0.3)
ax.scatter(data_used[:,0], data_used[:,1], c='blue', alpha=0.9)
if data_explained is not None:
ax.scatter(data_explained[:,0], data_explained[:,1], c='green', alpha=0.5)
ax.set_xlim([0,10])
ax.set_ylim([0,10])
if count == -1:
ax.set_title(f'RANSAC line fitting. #={i}. e={error:10.5}');
else:
ax.set_title(f'RANSAC line fitting. #={i}. e={error:10.5}. c={count}');
draw_line(theta, ax, **kwargs)
fig = plt.figure(figsize=(16, num_iterations*2))
for i in range(num_iterations):
ax = fig.add_subplot(int(np.ceil(num_iterations/3)),3,i+1)
theta = sorted_runs[i]['theta']
data_used = sorted_runs[i]['data_used']
error = sorted_runs[i]['error']
itr = sorted_runs[i]['iteration']
plot_fitted_line(ax, theta, data_used, error, -1, None, itr)
RANSAC 2D line fitting - Implementation 2¶
def model_error_with_count(theta, data, scale):
m = theta[0]
c = theta[1]
y_predicted = m * data[:,0] + c
e2 = np.square(y_predicted - data[:,1])
pts_explained = e2 < scale
return np.sum(e2[pts_explained]), np.sum(pts_explained), pts_explained
np.random.seed(0)
num_iterations = 32
scale = 3
runs = []
for i in range(num_iterations):
# Randomly selecting enough points needed to fit the model
idx = np.zeros(2)
while idx[0] == idx[1]:
idx = (np.random.rand(2)*n).astype(int)
data_for_this_fit = data[idx,:]
# Model fitting
theta = np.polyfit(data_for_this_fit[:,0], data_for_this_fit[:, 1], 1)
# Computing model fit error
error, count, pts_explained = model_error_with_count(theta, data, scale)
# Storing the results
runs.append(
{
'iteration': i,
'data_used': data_for_this_fit,
'theta': theta,
'error': error,
'count': count,
'data_explained': data[pts_explained,:]
}
)
print(f'#runs={len(runs)}')
#runs=32
sorted_runs = sorted(runs, key=lambda k: k['count'])
count_final = sorted_runs[-1]['count']
print(f'count_final={count_final}')
count_final=8
runs_with_count = [v for v in sorted_runs if v['count'] == count_final]
print(f'Found #runs {len(runs_with_count)} with count {count_final}')
Found #runs 16 with count 8
sorted_runs_with_count = sorted(runs_with_count, key=lambda k: k['error'])
theta_final = sorted_runs_with_count[0]['theta']
error_final = sorted_runs_with_count[0]['error']
print(f'theta_final={theta_final}')
print(f'error_final={error_final}')
theta_final=[0.83257418 1.27246666] error_final=0.33438258175997637
plt.figure(figsize=(7,7))
plt.scatter(data[:,0], data[:,1], c='red', alpha=0.9)
plt.xlim([0,10])
plt.ylim([0,10])
plt.title(f'RANSAC 2D line fitting. e={error_final:10.5}. c={count_final}');
draw_line(theta_final, plt.gca())
Visualization 2¶
fig = plt.figure(figsize=(16, num_iterations*2))
for i in range(num_iterations):
ax = fig.add_subplot(int(np.ceil(num_iterations/3)),3,i+1)
theta = sorted_runs[i]['theta']
data_used = sorted_runs[i]['data_used']
error = sorted_runs[i]['error']
count = sorted_runs[i]['count']
data_explained = sorted_runs[i]['data_explained']
itr = sorted_runs[i]['iteration']
plot_fitted_line(ax, theta, data_used, error, count, data_explained, itr)
RANSAC 2D line fitting - putting it all together¶
np.random.seed(0)
num_iterations = 120
scale = 3
data_explained = 70
early_stop = False
min_iterations = 40
runs = []
for i in range(num_iterations):
# Randomly selecting enough points needed to fit the model
idx = np.zeros(2)
while idx[0] == idx[1]:
idx = (np.random.rand(2)*n).astype(int)
data_for_this_fit = data[idx,:]
# Model fitting
theta = np.polyfit(data_for_this_fit[:,0], data_for_this_fit[:, 1], 1)
# Computing model fit error
error, count, pts_explained = model_error_with_count(theta, data, scale)
# Storing the results
runs.append(
{
'iteration': i,
'data_used': data_for_this_fit,
'theta': theta,
'error': error,
'count': count,
'data_explained': data[pts_explained,:]
}
)
# Check if the model can explain the desired % of data
if data_explained < (count * 100 / n) and (i-1) > min_iterations:
print(f'{data_explained}% data explained at iteration {i} (0-based). Done.')
early_stop = True
break
if not early_stop:
print(f'Done after {i} iterations.')
70% data explained at iteration 42 (0-based). Done.
print(f'#runs={len(runs)}')
#runs=43
sorted_runs = sorted(runs, key=lambda k: k['count'])
count_final = sorted_runs[-1]['count']
print(f'count_final={count_final}')
count_final=8
runs_with_count = [v for v in sorted_runs if v['count'] == count_final]
print(f'Found #runs {len(runs_with_count)} with count {count_final}')
Found #runs 23 with count 8
sorted_runs_with_count = sorted(runs_with_count, key=lambda k: k['error'])
theta_final = sorted_runs_with_count[0]['theta']
error_final = sorted_runs_with_count[0]['error']
print(f'theta_final={theta_final}')
print(f'error_final={error_final}')
theta_final=[0.76437691 1.54525573] error_final=0.2520937719496691
plt.figure(figsize=(7,7))
plt.scatter(data[:,0], data[:,1], c='red', alpha=0.9)
plt.xlim([0,10])
plt.ylim([0,10])
plt.title(f'RANSAC 2D line fitting. e={error_final:10.5}. c={count_final}');
draw_line(theta_final, plt.gca())
RANSAC algorithm¶
From Forsyth and Ponce
- Determine
- $s$ - the smallest number of points required
- $N$ - the number of iterations required
- $d$ - the threshold used to identify a point that fits well
- $T$ - the number of nearby points required to assert that a model fits well
- Until $N$ iterations have occured
- Draw a sample of $s$ points from the data uniformly and at random
- Fit to that set of $s$ points
- For each data point outside the sample $s$
- Test the distance from the point to the line against $d$. If the distance from the point to the line is less than $d$ the point is close.
- If there are $T$ or more points close to the line then there is a good fit. Refit the line using all these points.
- Use the best fit from this collection, using the fitting erro as the criteria
How many iterations $N$ are needed to success probability equal to $p$?¶
Probability of sampling an outlier: $e$
Samples required for each model fitting: $s$
Probability of selecting an inlier: $1 - e$
Probability of selecting $s$ inlier in a row: $(1 - e)^s$
Probability of getting a bad sample: $1 - (1 - e)^s$. At least one of the last $s$ samples was an outlier.
Probability of getting $N$ bad sample: $\left(1 - (1 - e)^s\right)^N$
Probability of getting at least 1 good sample after $N$ tries: $1 - \left(1 - (1 - e)^s\right)^N$
We desire
$$ \begin{array}{crcl} & p &=& 1 - \left(1 - (1 - e)^s\right)^N \\ \Rightarrow & \left(1 - (1 - e)^s\right)^N &=& 1- p \\ \Rightarrow & \log\left(1 - (1 - e)^s\right)^N &=& \log(1- p) \\ \Rightarrow & N\log\left(1 - (1 - e)^s\right) &=& \log(1- p) \\ \Rightarrow & N&=& \frac{\log(1- p)}{\log\left(1 - (1 - e)^s\right)} \end{array} $$
Pros¶
- Robust to outliers
- Applicable to models with a large number of parameters
Cons¶
- Not good for multiple fits
- Computational times grows quickly with fraction of outliers and number of parameters that need to be estimated
Uses¶
- Estimating homography (image stitching)
- Estimating fundamental matrix (relating two views)