Skip to content

Commit

Permalink
Update MLSD
Browse files Browse the repository at this point in the history
  • Loading branch information
Alireza Dirafzoon committed Oct 18, 2023
1 parent d71a0f3 commit ebeb83a
Show file tree
Hide file tree
Showing 20 changed files with 2,093 additions and 248 deletions.
Binary file modified .DS_Store
Binary file not shown.
12 changes: 8 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,21 @@
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/ambv/black)
[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/alirezadir/machine-learning-interview-enlightener/blob/main/LICENSE) -->

# A guide for Machine Learning Technical Interviews :robot:
# Machine Learning Technical Interviews :robot:
<p align="center">
<img width="720" src="src/imgs/cover.png">
<img width="720" src="src/imgs/MLI-Book-Cover.png">
</p>


This repo aims to serve as a guide to prepare for **Machine Learning (AI) Engineer** interviews for relevant roles at big tech companies (in particular FAANG). It has compiled based on the author's personal experience and notes from his own interview preparation, when he received offers from Meta (ML Specialist), Google (ML Engineer), Amazon (Applied Scientist), Apple (Applied Scientist), and Roku (ML Engineer).

This repo aims to serve as a guide to prepare for **Machine Learning (AI) Engineering** interviews for relevant roles at big tech companies (in particular FAANG). It has compiled based on the author's personal experience and notes from his own interview preparation, when he received offers from Meta (ML Specialist), Google (ML Engineer), Amazon (Applied Scientist), Apple (Applied Scientist), and Roku (ML Engineer).

The following components are the most commonly used interview modules for technical ML roles at different companies. We will go through them one by one and share how one can prepare:


<center>

| | |
|Chapter | Content|
|---| --- |
| Chapter 1 | [General Coding (Algos and Data Structures)](src/lc-coding.md) |
| Chapter 2 | [ML Coding](src/MLC/ml-coding.md) |
Expand All @@ -27,6 +29,8 @@ The following components are the most commonly used interview modules for techni
| Chapter 5 | [Behavioral](src/behavior.md)|
| | |

</center>

Notes:

* At the time I'm putting these notes together, machine learning interviews at different companies do not follow a unique structure unlike software engineering interviews. However, I found some of the components very similar to each other, although under different naming.
Expand Down
312 changes: 312 additions & 0 deletions src/MLC/notebooks/.test.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,312 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Kmeans"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np \n",
"class KMeans:\n",
" def __init__(self, k, max_it=100):\n",
" self.k = k \n",
" self.max_it = max_it \n",
" # self.centroids = None \n",
" \n",
"\n",
" def fit(self, X):\n",
" # init centroids \n",
" self.centroids = X[np.random.choice(X.shape[0], size=self.k, replace=False)]\n",
" # for each it \n",
" for i in range(self.max_it):\n",
" # assign points to closest centroid \n",
" # clusters = []\n",
" # for j in range(len(X)):\n",
" # dist = np.linalg.norm(X[j] - self.centroids, axis=1)\n",
" # clusters.append(np.argmin(dist))\n",
" dist = np.linalg.norm(X[:, np.newaxis] - self.centroids, axis=2)\n",
" clusters = np.argmin(dist, axis=1)\n",
" \n",
" # update centroids (mean of clusters)\n",
" for k in range(self.k):\n",
" cluster_X = X[np.where(np.array(clusters) == k)]\n",
" if len(cluster_X) > 0 : \n",
" self.centroids[k] = np.mean(cluster_X, axis=0)\n",
" # check convergence / termination \n",
" if i > 0 and np.array_equal(self.centroids, pre_centroids): \n",
" break \n",
" pre_centroids = self.centroids \n",
" \n",
" self.clusters = clusters \n",
" \n",
" def predict(self, X):\n",
" clusters = []\n",
" for j in range(len(X)):\n",
" dist = np.linalg.norm(X[j] - self.centroids, axis=1)\n",
" clusters.append(np.argmin(dist))\n",
" return clusters \n",
" \n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0, 0, 0, 0, 0, 1, 1, 1, 1, 1]\n",
"[[ 4.62131563 5.38818365]\n",
" [-4.47889882 -4.71564167]]\n"
]
}
],
"source": [
"x1 = np.random.randn(5,2) + 5 \n",
"x2 = np.random.randn(5,2) - 5\n",
"X = np.concatenate([x1,x2], axis=0)\n",
"\n",
"\n",
"kmeans = KMeans(k=2)\n",
"kmeans.fit(X)\n",
"clusters = kmeans.predict(X)\n",
"print(clusters)\n",
"print(kmeans.centroids)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXIAAAD4CAYAAADxeG0DAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAPUUlEQVR4nO3dbYisZ33H8e/vJKZ11ZBijgg5OTtKfWjqA9o1VEJta1Sihvg2sorVF0ulhgiKJi59eaDUogaUliHGNw5IiY+IT0nVQl+Yuic+NR6VELLJ8QFXoShd2hDy74uZ9Rw3Z3Zndu5zZq6z3w+EOXPPvdf9HzLnt9e55r6uK1WFJKldR+ZdgCRpNga5JDXOIJekxhnkktQ4g1ySGnfpPC565ZVXVq/Xm8elJalZJ0+e/FVVHd19fC5B3uv12NjYmMelJalZSTbPddyhFUlqnEEuSY0zyCWpcQa5JDXOIJekxhnkkg6vwQB6PThyZPg4GMy7ogOZy+2HkjR3gwGsrcH29vD55ubwOcDq6vzqOgB75JIOp/X1MyG+Y3t7eLwxBrmkw+mRR6Y7vsAMckmH0/Hj0x1fYAa5pMPpxAlYWvr9Y0tLw+ONMcglHU6rq9Dvw/IyJMPHfr+5LzrBu1YkHWarq00G9272yCWpcZ0EeZIrktyd5EdJTiV5ZRftSpL211WP/A7gK1X1QuClwKmO2pWk8+MimdUJHYyRJ7kceBXwNwBV9Rjw2KztStJ5cxHN6oRueuTPBbaATyT5TpI7kzxt90lJ1pJsJNnY2trq4LKSdEAX0axO6CbILwVeDvxzVb0M+B/gtt0nVVW/qlaqauXo0SdtOSdJF85FNKsTugny08Dpqrpv9PxuhsEuSYvpIprVCR0EeVX9Ang0yQtGh64Hfjhru5J03lxEszqhuwlBtwCDJJcBDwFv76hdSerezhea6+vD4ZTjx4ch3uAXnQCpqgt+0ZWVldrY2Ljg15WkliU5WVUru487s1OSGmeQS1LjDHJJmtBgMKDX63HkyBF6vR6DBZkN6uqHkjSBwWDA2toa26OJRJubm6yNZoOuzvlLUnvkkjSB9fX134X4ju3tbdYXYDaoQS5JE3hkzKzPcccvJINckiZwfMysz3HHLySDXJImcOLECZZ2zQZdWlrixKSzQc/jsrkGuSRNYHV1lX6/z/LyMklYXl6m3+8/+YvOcwX2zrK5m5tQdWbZ3I7C3JmdktSV3eucw3ANl6c+FX796yefv7wMDz88cfPjZnZ6+6EkdWXcOue7j+3o6ItSh1YkqSvTBnNHX5Qa5JLUlXHB/Mxnntdlcw1ySerKuHXO77gD+v3hmHgyfOz3O1s21zFySerKfuucn6ep/Aa5JHVpdfWCb1Dh0IokNc4gl6TGGeSS1DiDXJIaZ5BLUuMMcklqnEEuSY3rLMiTXJLkO0m+2FWbkrRwzuO64gfV5YSgW4FTwOUdtilJi2P3MrU764rDBZ8EdLZOeuRJjgFvBO7soj1JWkjjlqmd8wbMXQ2tfAR4H/DEuBOSrCXZSLKxtbXV0WUl6QIat0ztnDdgnjnIk9wI/LKqTu51XlX1q2qlqlaOHj0662Ul6cIbt0ztnDdg7qJHfh1wU5KHgU8Br07yyQ7alaTFMm6Z2o7WFT+omYO8qm6vqmNV1QNuBr5eVW+ZuTJJWjSrq+d1XfGDchlbSZrGHJap3U+nQV5V3wS+2WWbkqS9ObNTkhpnkEtS4wxySWqcQS5JjTPIJalxBrkkNc4gl6TGGeSS1DiDXJIaZ5BLUuMMcklqnEEuSY0zyCWpcQa5JDXOIJekxhnkktQ4g1ySGmeQS1LjDHJJapxBLkmNM8glqXEGuSQ1ziCXpMbNHORJrk7yjSSnkjyQ5NYuCpMkTebSDtp4HHhPVd2f5BnAyST3VNUPO2hbkrSPmXvkVfXzqrp/9OffAqeAq2ZtV5I0mU7HyJP0gJcB93XZriRpvM6CPMnTgU8D766q35zj9bUkG0k2tra2urqsJB16nQR5kqcwDPFBVX3mXOdUVb+qVqpq5ejRo11cVpJEN3etBPg4cKqqPjR7SZKkaXTRI78OeCvw6iTfHf33hg7alSRNYObbD6vqP4B0UIsk6QCc2SlJjTPIJalxBrkkNc4gl6TGGeSS1DiDXJIaZ5BLUuMMcklqnEEuSY0zyCWpcQa5JDXOIJekxhnkktQ4g1ySGmeQS1LjDHJJapxBLkmNM8glqXEGuSQ1ziCXpMYZ5JLUOINckhpnkEtS4zoJ8iQ3JPlxkgeT3NZFm5Kkycwc5EkuAT4GvB64BnhzkmtmbVeSNJkueuTXAg9W1UNV9RjwKeBNHbQrSZpAF0F+FfDoWc9Pj45Jki6ALoI85zhWTzopWUuykWRja2urg8tKkqCbID8NXH3W82PAz3afVFX9qlqpqpWjR492cFlJEnQT5N8GnpfkOUkuA24GvtBBu5KkCVw6awNV9XiSdwFfBS4B7qqqB2auTJI0kZmDHKCqvgR8qYu2JEnTcWanJDXOIJekxhnkktQ4g1ySGmeQS1LjDHJJapxBLkmNM8glqXEGuSQ1ziCXpMYZ5JLUOINckhpnkEtS4wxySWqcQS5JjTPIJalxBrkkNc4gl6TGGeSS1DiDXJIaZ5BLUuMMcklqnEEuSY2bKciTfDDJj5J8P8lnk1zRUV2SpAnN2iO/B3hRVb0E+Alw++wlSZKmMVOQV9XXqurx0dNvAcdmL0mSNI0ux8jfAXy5w/YkSRO4dL8TktwLPPscL61X1edH56wDjwODPdpZA9YAjh8/fqBiJUlPtm+QV9Vr9no9yduAG4Hrq6r2aKcP9AFWVlbGnidJms6+Qb6XJDcA7wf+sqq2uylJkjSNWcfIPwo8A7gnyXeT/EsHNUmSpjBTj7yq/rirQiRJB+PMTklqnEEuSY0zyCWpcQa5JDXOIJekxhnkktQ4g1ySGmeQL7DBAHo9OHJk+DgYu5KNpMNspglBOn8GA1hbg+3Rwgebm8PnAKur86tL0uKxR76g1tfPhPiO7e3hcUk6m0G+oB55ZLrjkg4vg3xBjVuy3aXcJe1mkC+oEydgaen3jy0tDY9L0tkM8gW1ugr9PiwvQzJ87Pf9olPSk3nXygJbXTW4Je3PHrkkNc4gl6TGNRPkznKUpHNrYozcWY6SNF4TPXJnOUrSeE0EubMcJWm8JoLcWY6SNF4TQe4sR0kar5MgT/LeJJXkyi7a281ZjpI03sx3rSS5GngtcF5HrJ3lKEnn1kWP/MPA+4DqoC1J0pRmCvIkNwE/rarvdVSPJGlK+w6tJLkXePY5XloHPgC8bpILJVkD1gCOe7uJJHUmVQcbEUnyYuDfgJ2pOseAnwHXVtUv9vrZlZWV2tjYONB1JemwSnKyqlZ2Hz/wl51V9QPgWWdd4GFgpap+ddA2JUnTa+I+cknSeJ0tmlVVva7akiRNzh65JDXOIJekxhnkexgMBvR6PY4cOUKv12PgbhaSFlATG0vMw2AwYG1tje3RQuibm5usjXazWHWtAEkLxB75GOvr678L8R3b29usu5uFpAVjkI/xyJhdK8Ydl6R5McjHGLeMgMsLSFo0BvkYJ06cYGnXbhZLS0uccDcLSQvGIB9jdXWVfr/P8vIySVheXqbf7/tFp6SFc+BFs2bholmSNL1xi2bZI5ekxh2qIB8MoNeDI0eGj87vkXQxODQTggYDWFuDnVvDNzeHz8G9QCW17dD0yNfXz4T4ju3t4XFJatmhCfJx83ic3yOpdYcmyMfN43F+j6TWHZogP3ECds3vYWlpeFySWnZognx1Ffp9WF6GZPjY7+//Rad3ukhadIfmrhUYhvY0d6h4p4ukFhyaHvlBeKeLpBYY5HuY5E4Xh14kzZtBvof97nTZGXrZ3ISqM0MvhrmkC8kg38N+d7o49CJpERjke9jvThcnGUlaBDMHeZJbkvw4yQNJ/rGLohbJ6io8/DA88cTw8ey7VZxkJGkRzBTkSf4aeBPwkqr6U+CfOqmqEU4ykrQIZu2RvxP4h6r6P4Cq+uXsJbXjoJOMJKlLM+0QlOS7wOeBG4D/Bd5bVd8ec+4asAZw/PjxP9vc3DzwdSXpMBq3Q9C+MzuT3As8+xwvrY9+/o+APwdeAfxrkufWOX47VFUf6MNwq7fpypckjbNvkFfVa8a9luSdwGdGwf2fSZ4ArgS2uitRkrSXWcfIPwe8GiDJ84HLgF/N2KYkaQqzLpp1F3BXkv8CHgPedq5hFUnS+TNTkFfVY8BbOqpFknQAM921cuCLJlvAIt22ciXtDwm1/h6sf/5afw+Hof7lqjq6++BcgnzRJNk41y09LWn9PVj//LX+Hg5z/a61IkmNM8glqXEG+VB/3gV0oPX3YP3z1/p7OLT1O0YuSY2zRy5JjTPIJalxBvlZLoZNMpK8N0kluXLetUwryQeT/CjJ95N8NskV865pEkluGH1uHkxy27zrmUaSq5N8I8mp0ef+1nnXdBBJLknynSRfnHctB5HkiiR3jz7/p5K8cpqfN8hHLoZNMpJcDbwWaHWzuXuAF1XVS4CfALfPuZ59JbkE+BjweuAa4M1JrplvVVN5HHhPVf0Jw1VM/66x+nfcCpyadxEzuAP4SlW9EHgpU74Xg/yMi2GTjA8D7wOa/Aa7qr5WVY+Pnn4LODbPeiZ0LfBgVT00WrLiUww7BE2oqp9X1f2jP/+WYYBcNd+qppPkGPBG4M5513IQSS4HXgV8HIZLn1TVf0/ThkF+xvOBv0hyX5J/T/KKeRc0jSQ3AT+tqu/Nu5aOvAP48ryLmMBVwKNnPT9NY0G4I0kPeBlw35xLmdZHGHZgnphzHQf1XIZLf39iNDx0Z5KnTdPArKsfNqWrTTLmZZ/6PwC87sJWNL293kNVfX50zjrDf/IPLmRtB5RzHFuYz8ykkjwd+DTw7qr6zbzrmVSSG4FfVtXJJH8153IO6lLg5cAtVXVfkjuA24C/n6aBQ6P1TTLG1Z/kxcBzgO8lgeGQxP1Jrq2qX1zAEve11/8DgCRvA24Erl+kX6J7OA1cfdbzY8DP5lTLgSR5CsMQH1TVZ+Zdz5SuA25K8gbgD4HLk3yyqlpalfU0cLqqdv4ldDfDIJ+YQytnfI5GN8moqh9U1bOqqldVPYYfjJcvWojvJ8kNwPuBm6pqe971TOjbwPOSPCfJZcDNwBfmXNPEMvzN/3HgVFV9aN71TKuqbq+qY6PP/c3A1xsLcUZ/Tx9N8oLRoeuBH07TxqHqke/DTTLm76PAHwD3jP5l8a2q+tv5lrS3qno8ybuArwKXAHdV1QNzLmsa1wFvBX4w2kwd4ANV9aX5lXQo3QIMRp2Bh4C3T/PDTtGXpMY5tCJJjTPIJalxBrkkNc4gl6TGGeSS1DiDXJIaZ5BLUuP+H8mBYH+I9lNrAAAAAElFTkSuQmCC",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"from matplotlib import pyplot as plt \n",
"\n",
"colors = ['b', 'r']\n",
"for k in range(kmeans.k):\n",
" plt.scatter(X[np.where(np.array(clusters) == k)][:,0], \n",
" X[np.where(np.array(clusters) == k)][:,1], \n",
" color=colors[k])\n",
"plt.scatter(kmeans.centroids[:,0], kmeans.centroids[:,1], color='black')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(10, 1, 2)"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X[:, np.newaxis] "
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### KNN"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(100, 2) (100,)\n",
"[0. 0. 1. 0. 1. 1. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 1. 0.]\n",
"[0. 0. 1. 0. 1. 1. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1. 1. 1.]\n"
]
}
],
"source": [
"import numpy as np \n",
"from collections import Counter\n",
"class KNN:\n",
" def __init__(self, k):\n",
" self.k = k \n",
" \n",
" \n",
" def fit(self, X, y):\n",
" self.X = X\n",
" self.y = y \n",
" \n",
" def predict(self, X_test):\n",
" y_pred = []\n",
" for x in X_test: \n",
" dist = np.linalg.norm(x - self.X, axis=1)\n",
" knn_idcs = np.argsort(dist)[:self.k]\n",
" knn_labels = self.y[knn_idcs]\n",
" label = Counter(knn_labels).most_common(1)[0][0]\n",
" y_pred.append(label)\n",
" return np.array(y_pred)\n",
"\n",
"\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"x1 = np.random.randn(50,2) + 1\n",
"x2 = np.random.randn(50,2) - 1\n",
"X = np.concatenate([x1, x2], axis=0)\n",
"y = np.concatenate([np.ones(50), np.zeros(50)])\n",
"print(X.shape, y.shape)\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
"\n",
"\n",
"knn = KNN(k=5)\n",
"knn.fit(X_train, y_train)\n",
"y_pred = knn.predict(X_test)\n",
"print(y_pred)\n",
"print(y_test)\n"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(40, 2)"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test.shape"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0., 0.])"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.zeros(2,)"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1., 1., 1., 0., 0., 0.])"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.concatenate([np.ones(3), np.zeros(3)])"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Lin Regression "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class LinearRegression: \n",
" def __init__(self):\n",
" self.m = None \n",
" self.b = None \n",
" \n",
" def fit(self, X, y):\n",
" \n",
"\n",
"\n",
" def predict(self, X):\n",
" pass "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit ebeb83a

Please sign in to comment.