{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# High Performance Computing in Python" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:36:16.340937Z", "start_time": "2018-10-30T20:36:15.764728Z" }, "scrolled": false }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import seaborn as sb\n", "import matplotlib.pyplot as plt\n", "import matplotlib.cm as cm\n", "%matplotlib inline\n", "\n", "n_observed = 100\n", "\n", "cmap = cm.seismic\n", "sb.set(context='notebook', rc={\"figure.figsize\": (10, 10)})\n", "\n", "hr = pd.read_csv('hr2.txt', names=['hr'], dtype=np.float32)\n", "observed = hr.sample(n_observed).sort_index()\n", "\n", "\n", "x, y = hr.index.values, hr.values[:, 0]\n", "obs_x, obs_y = observed.index.values, observed.values[:, 0]\n", "\n", "\n", "plt.plot(obs_x, obs_y)\n", "plt.plot(obs_x, obs_y, 'ro')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:36:18.445891Z", "start_time": "2018-10-30T20:36:18.319934Z" }, "scrolled": false }, "outputs": [], "source": [ "def delta(i,j):\n", " return i==j\n", "\n", "def covariance(x, y, kernel=delta):\n", " n,m = len(x), len(y)\n", " C = np.empty((n,m))\n", " for i in range(n):\n", " for j in range(m):\n", " C[i,j] = kernel(x[i], y[j])\n", " return C\n", "\n", "plt.imshow(covariance(obs_x, obs_x))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:36:18.680430Z", "start_time": "2018-10-30T20:36:18.659847Z" } }, "outputs": [], "source": [ "covariance(obs_x, obs_x)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:36:19.059722Z", "start_time": "2018-10-30T20:36:18.944530Z" }, "scrolled": false }, "outputs": [], "source": [ "def brownian(i, j):\n", " return min(i,j)\n", "\n", "plt.imshow(covariance(obs_x, obs_x, kernel=brownian), cmap=cmap)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:36:19.466921Z", "start_time": "2018-10-30T20:36:19.295168Z" }, "scrolled": false }, "outputs": [], "source": [ "theta = np.float32(0.001)\n", "\n", "def laplace(i, j):\n", " return np.exp(-theta * np.abs(i - j))\n", "\n", "plt.matshow(covariance(obs_x, obs_x, kernel=laplace), cmap=cmap)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:36:19.731366Z", "start_time": "2018-10-30T20:36:19.582025Z" } }, "outputs": [], "source": [ "theta2 = np.float32(0.001)**2\n", "\n", "def gaussian(i, j):\n", " return np.exp(-theta2 * np.abs(i - j)**2)\n", "\n", "plt.matshow(covariance(obs_x, obs_x, kernel=gaussian), cmap=cmap)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:36:19.827971Z", "start_time": "2018-10-30T20:36:19.824308Z" } }, "outputs": [], "source": [ "def predict(new_x, x,y, kernel=delta):\n", " c_new_x = covariance(new_x, x, kernel=kernel)\n", " c_x = covariance(x, x, kernel=kernel)\n", " solve = np.linalg.solve(c_x, y)\n", " r = c_new_x.dot(solve)\n", " return r" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:36:20.101533Z", "start_time": "2018-10-30T20:36:20.096559Z" }, "scrolled": true }, "outputs": [], "source": [ "def plot_predict(kernel=delta):\n", " new_x = hr.index.values\n", " new_y = predict(new_x, obs_x, obs_y, kernel=kernel)\n", "\n", " plt.plot(new_x, new_y, label='Prediction')\n", " plt.plot(obs_x, obs_y, 'ro', label='Observation')\n", " plt.plot(x, y, label='Real')\n", " plt.legend()\n", " return new_x, new_y" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:36:20.646949Z", "start_time": "2018-10-30T20:36:20.322850Z" } }, "outputs": [], "source": [ "new_x, new_y = plot_predict(delta)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:36:20.939638Z", "start_time": "2018-10-30T20:36:20.648839Z" } }, "outputs": [], "source": [ "new_x, new_y = plot_predict(brownian)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:36:22.138284Z", "start_time": "2018-10-30T20:36:20.941984Z" } }, "outputs": [], "source": [ "theta = 0.01\n", "new_x, new_y = plot_predict(laplace)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:36:23.409628Z", "start_time": "2018-10-30T20:36:22.140411Z" } }, "outputs": [], "source": [ "theta2 = 0.01**2\n", "new_x, new_y = plot_predict(gaussian)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:36:25.223370Z", "start_time": "2018-10-30T20:36:23.411661Z" } }, "outputs": [], "source": [ "theta2 = 0.01**2\n", "new_x, new_y = plot_predict(lambda x,y: 0.001*delta(x,y) + gaussian(x,y))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:36:25.423397Z", "start_time": "2018-10-30T20:36:25.225962Z" } }, "outputs": [], "source": [ "plt.matshow(covariance(obs_x, obs_x, kernel=lambda x,y: 0.1*delta(x,y) + gaussian(x,y)), cmap=cmap)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:36:25.598694Z", "start_time": "2018-10-30T20:36:25.425966Z" } }, "outputs": [], "source": [ "plt.matshow(covariance(obs_x, obs_x, kernel=lambda x,y: gaussian(x,y)), cmap=cmap)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:36:27.481360Z", "start_time": "2018-10-30T20:36:25.601342Z" } }, "outputs": [], "source": [ "def plot_my_prediction(noisy=10.0):\n", " plot_predict(lambda x,y: delta(x,y) + noisy*gaussian(x,y))\n", " plt.show()\n", " \n", "plot_my_prediction()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:36:27.514227Z", "start_time": "2018-10-30T20:36:27.483801Z" } }, "outputs": [], "source": [ "from ipywidgets import interact_manual, FloatSlider\n", "\n", "intervals = dict()\n", "intervals['noisy'] = FloatSlider(min=0.0, max=20.0, value=10, step=1e-2)\n", "\n", "interact_manual(plot_my_prediction, **intervals)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Profiling" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:36:29.471547Z", "start_time": "2018-10-30T20:36:27.516102Z" } }, "outputs": [], "source": [ "%time plot_my_prediction()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:36:43.504401Z", "start_time": "2018-10-30T20:36:29.473765Z" } }, "outputs": [], "source": [ "%timeit plot_my_prediction()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T19:29:54.279764Z", "start_time": "2018-10-30T19:29:54.235170Z" } }, "outputs": [], "source": [ "#https://pypi.org/project/memory_profiler/\n", "%reload_ext memory_profiler" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T19:32:11.673407Z", "start_time": "2018-10-30T19:32:09.797933Z" } }, "outputs": [], "source": [ "%memit plot_my_prediction()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T19:31:57.557395Z", "start_time": "2018-10-30T19:31:55.166307Z" }, "scrolled": true }, "outputs": [], "source": [ "%memit new = np.random.rand(int(2e8))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T19:34:07.559494Z", "start_time": "2018-10-30T19:34:03.998416Z" } }, "outputs": [], "source": [ "%%mprun -f np.linalg.solve\n", "plot_my_prediction()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T19:34:57.452385Z", "start_time": "2018-10-30T19:34:55.612017Z" } }, "outputs": [], "source": [ "%prun plot_my_prediction()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T19:36:46.603226Z", "start_time": "2018-10-30T19:36:46.588816Z" } }, "outputs": [], "source": [ "#https://pypi.org/project/line_profiler/\n", "%reload_ext line_profiler" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T19:37:10.886552Z", "start_time": "2018-10-30T19:37:08.895511Z" } }, "outputs": [], "source": [ "%lprun -f plot_my_prediction plot_my_prediction()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T19:37:51.232166Z", "start_time": "2018-10-30T19:37:49.263515Z" } }, "outputs": [], "source": [ "%lprun -f plot_predict plot_my_prediction()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T19:41:04.074803Z", "start_time": "2018-10-30T19:41:02.086809Z" } }, "outputs": [], "source": [ "%lprun -f predict plot_my_prediction()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T19:42:32.024983Z", "start_time": "2018-10-30T19:42:29.709674Z" } }, "outputs": [], "source": [ "%lprun -f covariance plot_my_prediction()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T19:39:09.772585Z", "start_time": "2018-10-30T19:39:07.424878Z" } }, "outputs": [], "source": [ "%lprun -f delta -f gaussian plot_my_prediction()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T19:41:50.889232Z", "start_time": "2018-10-30T19:41:49.204517Z" } }, "outputs": [], "source": [ "plot_my_prediction()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Multiprocessing" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T19:44:38.038074Z", "start_time": "2018-10-30T19:44:38.030344Z" } }, "outputs": [], "source": [ "def parallel_function(index):\n", " for i in range(10000000):\n", " i**2\n", " print(index)\n", " return index" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T19:45:14.638016Z", "start_time": "2018-10-30T19:45:12.338151Z" } }, "outputs": [], "source": [ "parallel_function(0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T19:49:39.188823Z", "start_time": "2018-10-30T19:49:39.021495Z" } }, "outputs": [], "source": [ "import multiprocessing as mp\n", "\n", "processes = 8\n", "\n", "\n", "p = mp.Pool(processes)\n", "p" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T19:49:45.128968Z", "start_time": "2018-10-30T19:49:39.759598Z" } }, "outputs": [], "source": [ "p.map(parallel_function, list(range(processes)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T19:51:31.060278Z", "start_time": "2018-10-30T19:51:05.359439Z" } }, "outputs": [], "source": [ "kernel = lambda x,y: delta(x,y) + theta2*gaussian(x,y)\n", "\n", "covariance(x, x, kernel)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T19:53:19.322291Z", "start_time": "2018-10-30T19:53:19.315793Z" } }, "outputs": [], "source": [ "def partial_covariance(x, y, kernel=delta, n0=0, nf=None):\n", " if nf is None:\n", " nf = len(x)\n", " m = len(y)\n", " C = np.empty((nf-n0,m))\n", " for i in range(n0, nf):\n", " for j in range(m):\n", " C[i - n0,j] = kernel(x[i], y[j])\n", " return C" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T19:53:50.997822Z", "start_time": "2018-10-30T19:53:24.770645Z" } }, "outputs": [], "source": [ "partial_covariance(x, x, kernel)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T19:54:45.674538Z", "start_time": "2018-10-30T19:54:20.249121Z" } }, "outputs": [], "source": [ "pcov1 = partial_covariance(x, x, kernel, n0=0, nf=len(x)//2)\n", "pcov2 = partial_covariance(x, x, kernel, n0=len(x)//2, nf=len(x))\n", "np.concatenate([pcov1, pcov2])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T19:56:29.888969Z", "start_time": "2018-10-30T19:56:29.878741Z" } }, "outputs": [], "source": [ "processes = 10\n", "\n", "def parallel_covariance(index):\n", " step = len(x) // processes\n", " n0 = step * index\n", " nf = step * (index + 1)\n", " if index + 1 == processes:\n", " nf = len(x)\n", " m = len(x)\n", " C = np.empty((nf - n0, m))\n", " for i in range(n0, nf):\n", " for j in range(m):\n", " C[i - n0, j] = kernel(x[i], x[j])\n", " return C" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T19:56:38.669392Z", "start_time": "2018-10-30T19:56:30.224698Z" } }, "outputs": [], "source": [ "p = mp.Pool(processes)\n", "np.concatenate(p.map(parallel_covariance, list(range(processes))))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Numba" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:36:47.827389Z", "start_time": "2018-10-30T20:36:43.506779Z" } }, "outputs": [], "source": [ "import numpy as np\n", "\n", "n = int(1e4)\n", "r = np.random.randn(n*n).reshape((n,n))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:36:47.832468Z", "start_time": "2018-10-30T20:36:47.828816Z" } }, "outputs": [], "source": [ "def sum2d(arr):\n", " M, N = arr.shape\n", " result = 0.0\n", " for i in range(M):\n", " for j in range(N):\n", " result += arr[i,j]\n", " return result" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:37:07.180837Z", "start_time": "2018-10-30T20:36:47.835240Z" } }, "outputs": [], "source": [ "sum2d(r)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:37:07.374464Z", "start_time": "2018-10-30T20:37:07.183166Z" } }, "outputs": [], "source": [ "from numba import jit\n", "\n", "@jit\n", "def sum2d_jit(arr):\n", " M, N = arr.shape\n", " result = 0.0\n", " for i in range(M):\n", " for j in range(N):\n", " result += arr[i,j]\n", " return result" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:37:07.645364Z", "start_time": "2018-10-30T20:37:07.376391Z" } }, "outputs": [], "source": [ "sum2d_jit(r)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:37:08.808416Z", "start_time": "2018-10-30T20:37:07.647482Z" } }, "outputs": [], "source": [ "theta2 = np.float32(0.001)**2\n", "kernel = lambda x,y: delta(x,y) + theta2*gaussian(x,y)\n", "\n", "%time covariance(x, obs_x, kernel)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:37:08.815177Z", "start_time": "2018-10-30T20:37:08.810554Z" } }, "outputs": [], "source": [ "@jit\n", "def jit_gaussian(i, j):\n", " return np.exp(-theta2 * np.abs(i - j)**2)\n", "\n", "@jit\n", "def jit_delta(i,j):\n", " return i==j\n", " \n", "@jit\n", "def jit_covariance(x, y):\n", " n,m = len(x), len(y)\n", " C = np.empty((n,m))\n", " for i in range(n):\n", " for j in range(m):\n", " C[i,j] = jit_delta(x[i], y[j]) + theta2*jit_gaussian(x[i], y[j])\n", " return C" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:37:09.167777Z", "start_time": "2018-10-30T20:37:08.816888Z" } }, "outputs": [], "source": [ "%time jit_covariance(x, obs_x)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:37:10.659242Z", "start_time": "2018-10-30T20:37:09.169573Z" } }, "outputs": [], "source": [ "theta2 = 0.01**2\n", "covariance(x, obs_x, kernel)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:37:10.670844Z", "start_time": "2018-10-30T20:37:10.661151Z" } }, "outputs": [], "source": [ "jit_covariance(x, obs_x)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:37:10.832996Z", "start_time": "2018-10-30T20:37:10.672627Z" } }, "outputs": [], "source": [ "@jit\n", "def jit_gaussian(i, j, theta2):\n", " return np.exp(-theta2 * np.abs(i - j)**2)\n", " \n", "@jit\n", "def jit_covariance(x, y, theta2, noisy=10):\n", " n,m = len(x), len(y)\n", " C = np.empty((n,m))\n", " for i in range(n):\n", " for j in range(m):\n", " C[i,j] = jit_delta(x[i], y[j]) + noisy*jit_gaussian(x[i], y[j], theta2)\n", " return C" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:37:11.187105Z", "start_time": "2018-10-30T20:37:10.837809Z" } }, "outputs": [], "source": [ "jit_covariance(x, obs_x, theta2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:37:12.786144Z", "start_time": "2018-10-30T20:37:11.189048Z" } }, "outputs": [], "source": [ "predict(x, obs_x, obs_y, kernel)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:37:12.791045Z", "start_time": "2018-10-30T20:37:12.787851Z" } }, "outputs": [], "source": [ "@jit\n", "def jit_predict(new_x, x, y, noisy=10):\n", " return jit_covariance(new_x, x, theta2, noisy).dot(np.linalg.solve(jit_covariance(x, x, theta2, noisy), y))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:37:13.186448Z", "start_time": "2018-10-30T20:37:12.792908Z" } }, "outputs": [], "source": [ "jit_predict(x, obs_x, obs_y)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:37:13.191728Z", "start_time": "2018-10-30T20:37:13.188245Z" } }, "outputs": [], "source": [ "def jit_my_prediction(noisy=10.0):\n", " new_x = hr.index.values\n", " new_y = jit_predict(new_x, obs_x, obs_y, noisy)\n", "\n", " plt.plot(new_x, new_y, label='Prediction')\n", " plt.plot(obs_x, obs_y, 'ro', label='Observation')\n", " plt.plot(x, y, label='Real')\n", " plt.legend()\n", " plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:13:58.164002Z", "start_time": "2018-10-30T20:13:57.984122Z" } }, "outputs": [], "source": [ "%time jit_my_prediction()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:14:01.684387Z", "start_time": "2018-10-30T20:14:01.640589Z" } }, "outputs": [], "source": [ "from ipywidgets import interact_manual, FloatSlider\n", "\n", "intervals = dict()\n", "intervals['noisy'] = FloatSlider(min=0.0, max=20.0, value=10, step=1e-2)\n", "\n", "interact_manual(plot_my_prediction, **intervals)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:14:18.992391Z", "start_time": "2018-10-30T20:14:18.953557Z" } }, "outputs": [], "source": [ "interact_manual(jit_my_prediction, **intervals)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:14:34.195040Z", "start_time": "2018-10-30T20:14:33.955295Z" } }, "outputs": [], "source": [ "from ipywidgets import interact\n", "\n", "interact(jit_my_prediction, **intervals)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:15:15.767097Z", "start_time": "2018-10-30T20:15:15.498021Z" } }, "outputs": [], "source": [ "%prun jit_my_prediction()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:16:00.657510Z", "start_time": "2018-10-30T20:15:57.764681Z" } }, "outputs": [], "source": [ "%timeit gaussian(x[:, None], x)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:17:07.164250Z", "start_time": "2018-10-30T20:17:02.244470Z" } }, "outputs": [], "source": [ "from numba import vectorize, float32\n", "from math import exp\n", "\n", "theta2 = 0.001**2\n", "\n", "@vectorize([float32(float32, float32)])\n", "def v_gaussian(i, j):\n", " return exp(-theta2 * abs(i - j)**2)\n", "\n", "%timeit v_gaussian(x[:,None].astype(np.float32), x.astype(np.float32))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:17:47.151200Z", "start_time": "2018-10-30T20:17:47.145531Z" } }, "outputs": [], "source": [ "x_32 = x[:,None].astype(np.float32)\n", "x32 = x.astype(np.float32)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:17:52.954343Z", "start_time": "2018-10-30T20:17:47.975619Z" } }, "outputs": [], "source": [ "%timeit v_gaussian(x_32, x32)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:19:51.328084Z", "start_time": "2018-10-30T20:19:51.237441Z" } }, "outputs": [], "source": [ "@vectorize([float32(float32, float32, float32, float32)])\n", "def vec_covariance(i, j, theta2, noisy):\n", " return (i==j) + noisy*exp(-theta2 * abs(i - j)**2)\n", "\n", "@jit\n", "def vec_predict(new_x, x, y, noisy=10):\n", " return vec_covariance(new_x[:,None], x, theta2, noisy).dot(np.linalg.solve(vec_covariance(x[:,None], x, theta2, noisy), y))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:20:08.059658Z", "start_time": "2018-10-30T20:19:54.832050Z" } }, "outputs": [], "source": [ "%timeit predict(x, obs_x, obs_y, kernel)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:20:12.663168Z", "start_time": "2018-10-30T20:20:08.061904Z" } }, "outputs": [], "source": [ "%timeit jit_predict(x, obs_x, obs_y)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:20:22.946882Z", "start_time": "2018-10-30T20:20:19.875531Z" } }, "outputs": [], "source": [ "%timeit vec_predict(x.astype(np.float32), obs_x.astype(np.float32), obs_y.astype(np.float32))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Theano" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:30:40.354830Z", "start_time": "2018-10-30T20:30:40.348806Z" } }, "outputs": [], "source": [ "import os\n", "os.environ['MKL_THREADING_LAYER'] = 'GNU'" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:30:45.889698Z", "start_time": "2018-10-30T20:30:41.285015Z" }, "scrolled": true }, "outputs": [], "source": [ "import theano as th\n", "import theano.tensor as tt\n", "\n", "#th.config.mode = 'FAST_RUN' #'Mode', 'DebugMode', 'FAST_RUN', 'NanGuardMode', 'FAST_COMPILE', 'DEBUG_MODE'\n", "th.config.floatX, th.config.device" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:31:38.634259Z", "start_time": "2018-10-30T20:31:38.475049Z" } }, "outputs": [], "source": [ "import numpy as np\n", "import time\n", "\n", "np.random.seed(123)\n", "m = np.random.randn(1000, 1000).astype('float32')\n", "w = np.random.randn(1000, 1000).astype('float32')\n", "b = np.random.randn(1000, 1000).astype('float32')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:31:51.421918Z", "start_time": "2018-10-30T20:31:39.146068Z" } }, "outputs": [], "source": [ "start = time.time()\n", "for i in range(500):\n", " y = np.add(np.dot(m, np.add(np.dot(m, w), b)), b)\n", "print(\"numpy\", time.time() - start, \"sec\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:31:55.554994Z", "start_time": "2018-10-30T20:31:54.230450Z" } }, "outputs": [], "source": [ "# define a symbolic expression of the equations in theano\n", "tm = tt.matrix(\"m\")\n", "tw = tt.matrix(\"w\")\n", "tb = tt.matrix(\"b\")\n", "ty = tt.add(tt.dot(tm, tt.add(tt.dot(tm, tw), tb)), tb)\n", "# and compile it\n", "op = th.function(inputs=[tm, tw, tb], outputs=[ty])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:32:45.960964Z", "start_time": "2018-10-30T20:32:37.356625Z" } }, "outputs": [], "source": [ "start = time.time()\n", "for i in range(500):\n", " y = op(m, w, b)\n", "print(\"theano\", time.time() - start, \"sec\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:33:02.839437Z", "start_time": "2018-10-30T20:33:02.832689Z" } }, "outputs": [], "source": [ "th.printing.debugprint(ty)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:33:23.888651Z", "start_time": "2018-10-30T20:33:23.848708Z" } }, "outputs": [], "source": [ "# define a symbolic expression of the equations in theano\n", "sm = th.shared(m)\n", "sw = th.shared(w)\n", "sb = th.shared(b)\n", "\n", " \n", "mw_b = tt.add(tt.dot(sm, tt.add(tt.dot(sm, sw), sb)), sb)\n", "# and compile it\n", "op_shared = th.function(inputs=[], outputs=[mw_b])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:33:34.985027Z", "start_time": "2018-10-30T20:33:27.816305Z" } }, "outputs": [], "source": [ "# then run same loop as before\n", "start = time.time()\n", "for i in range(500):\n", " op_shared() # now there's no input/output\n", "print(\"theano\", time.time() - start, \"sec\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:33:44.884328Z", "start_time": "2018-10-30T20:33:44.874330Z" } }, "outputs": [], "source": [ "th.printing.debugprint(mw_b)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:34:50.243943Z", "start_time": "2018-10-30T20:34:50.200569Z" } }, "outputs": [], "source": [ "ty = th.shared(np.zeros((1000, 1000)).astype('float32')) # we need a shared var for y now\n", "op_update = th.function(inputs=[], updates={ty: mw_b}) # update y on gpu" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:34:55.198846Z", "start_time": "2018-10-30T20:34:50.907302Z" } }, "outputs": [], "source": [ "start = time.time()\n", "for i in range(500):\n", " op_update() \n", "print (\"theano\", time.time()-start, \"sec\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:37:17.388079Z", "start_time": "2018-10-30T20:37:13.193714Z" } }, "outputs": [], "source": [ "sx = th.shared(x)\n", "noisy = 10\n", "\n", "x1 = sx.dimshuffle([0, 'x'])\n", "x2 = sx.dimshuffle(['x', 0])\n", "\n", "th_gaussian = tt.exp(-theta2 * tt.abs_(x1 - x2)**2)\n", "\n", "th_delta = tt.eq((x1 - x2), 0)\n", " \n", "th_covariance = th_delta + noisy*th_gaussian\n", "\n", "op_covariance = th.function(inputs=[], outputs=[th_covariance])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:37:26.756895Z", "start_time": "2018-10-30T20:37:21.188117Z" } }, "outputs": [], "source": [ "%timeit jit_covariance(x, x, theta2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-10-30T20:37:45.770812Z", "start_time": "2018-10-30T20:37:32.295187Z" } }, "outputs": [], "source": [ "%timeit op_covariance()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": "block", "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }