Pocket-sized implementations of machine learning models, most of which will fit in a tweet.
Table of Contents
$ git clone
$ cd NapkinML
$ sudo python install
class KMeans:
def fit(self, X, k, n_iter=200):
centers = random.sample(list(X), k)
for i in range(n_iter):
clusters = np.argmin(cdist(X, centers), axis=1)
centers = np.array([X[clusters == c].mean(0) for c in clusters])
return clusters
$ python napkin_ml/examples/
Figure: K-Means clustering of the Iris dataset.
K-Nearest Neighbors
class KNN:
def predict(self, k, Xt, X, y):
idx = np.argsort(cdist(Xt, X))[:, :k]
y_pred = [np.bincount(y[i]).argmax() for i in idx]
return y_pred
$ python napkin_ml/examples/
Figure: Classification of the Iris dataset with K-Nearest Neighbors.
Linear Regression
class LinearRegression:
def fit(self, X, y):
self.w = np.linalg.lstsq(X, y, rcond=None)[0]
def predict(self, X):
$ python napkin_ml/examples/
Figure: Linear Regression.
Linear Discriminant Analysis
class LDA:
def fit(self, X, y):
cov_sum = sum([np.cov(X[y == val], rowvar=False) for val in [0, 1]])
mean_diff = X[y == 0].mean(0) - X[y == 1].mean(0)
self.w = np.linalg.inv(cov_sum).dot(mean_diff)
def predict(self, X):
return 1 * ( < 0)
Logistic Regression
class LogisticRegression:
def fit(self, X, y, n_iter=4000, lr=0.01):
self.w = np.random.rand(X.shape[1])
for _ in range(n_iter):
self.w -= lr * (self.predict(X) - y).dot(X)
def predict(self, X):
return sigmoid(
$ python napkin_ml/examples/
Figure: Classification with Logistic Regression.
Multilayer Perceptron
class MLP:
def fit(self, X, y, n_epochs=4000, lr=0.01, n_units=10):
self.w = np.random.rand(X.shape[1], n_units)
self.v = np.random.rand(n_units, y.shape[1])
for _ in range(n_epochs):
h_out = sigmoid(
out = softmax(
self.v -= lr * - y)
self.w -= lr * - y).dot(self.v.T) * (h_out * (1 - h_out)))
def predict(self, X):
return softmax(sigmoid(
$ python napkin_ml/examples/
Figure: Classification of the Iris dataset with a Multilayer Perceptron
with one hidden layer.
Principal Component Analysis
class PCA:
def transform(self, X, dim):
_, S, V = np.linalg.svd(X - X.mean(0), full_matrices=True)
idx = S.argsort()[::-1][:dim]
$ python napkin_ml/examples/
Figure: Dimensionality reduction with Principal Component Analysis.