{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# PyTorch\n",
    "Gonzalo Rios (grios@dim.uchile.cl)\n",
    "\n",
    "1. Wiki: https://en.wikipedia.org/wiki/PyTorch\n",
    "2. Github: https://github.com/pytorch/pytorch\n",
    "2. Docs: https://pytorch.org/\n",
    "3. Cuda: https://developer.nvidia.com/cuda-downloads"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Tensors\n",
    "https://pytorch.org/tutorials/beginner/blitz/tensor_tutorial.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:06:00.585772Z",
     "start_time": "2018-11-08T19:06:00.032489Z"
    }
   },
   "outputs": [],
   "source": [
    "import torch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:06:16.556738Z",
     "start_time": "2018-11-08T19:06:16.529108Z"
    }
   },
   "outputs": [],
   "source": [
    "x = torch.empty(5, 3)\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:06:32.105438Z",
     "start_time": "2018-11-08T19:06:32.099738Z"
    }
   },
   "outputs": [],
   "source": [
    "x = torch.rand(5, 3)\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:06:36.040367Z",
     "start_time": "2018-11-08T19:06:36.035080Z"
    }
   },
   "outputs": [],
   "source": [
    "x.size()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:06:47.984403Z",
     "start_time": "2018-11-08T19:06:47.978323Z"
    }
   },
   "outputs": [],
   "source": [
    "x = torch.zeros(5, 3)\n",
    "x, x.dtype"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:06:54.787598Z",
     "start_time": "2018-11-08T19:06:54.776941Z"
    }
   },
   "outputs": [],
   "source": [
    "torch.get_default_dtype()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:06:58.005772Z",
     "start_time": "2018-11-08T19:06:58.000691Z"
    }
   },
   "outputs": [],
   "source": [
    "torch.set_default_dtype(torch.float64)\n",
    "x = torch.zeros(5, 3)\n",
    "x, x.dtype"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:07:02.538612Z",
     "start_time": "2018-11-08T19:07:02.531814Z"
    }
   },
   "outputs": [],
   "source": [
    "torch.set_default_tensor_type(torch.FloatTensor)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:07:11.485105Z",
     "start_time": "2018-11-08T19:07:11.475497Z"
    }
   },
   "outputs": [],
   "source": [
    "x = torch.zeros(5, 3, dtype=torch.long)\n",
    "x, x.dtype"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:07:23.633921Z",
     "start_time": "2018-11-08T19:07:23.629192Z"
    }
   },
   "outputs": [],
   "source": [
    "x = x.new_ones(5, 3)\n",
    "x, x.dtype"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:07:32.193909Z",
     "start_time": "2018-11-08T19:07:32.184182Z"
    }
   },
   "outputs": [],
   "source": [
    "x = torch.tensor([5.5, 3])\n",
    "x, x.dtype"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:07:34.858501Z",
     "start_time": "2018-11-08T19:07:34.854030Z"
    }
   },
   "outputs": [],
   "source": [
    "x = x.new_ones(5, 3)\n",
    "x, x.dtype"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:07:40.403345Z",
     "start_time": "2018-11-08T19:07:40.393142Z"
    }
   },
   "outputs": [],
   "source": [
    "x = x.new_ones(5, 3, dtype=torch.double)      # new_* methods take in sizes\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:07:43.152724Z",
     "start_time": "2018-11-08T19:07:43.148496Z"
    }
   },
   "outputs": [],
   "source": [
    "y = torch.rand(5, 3)\n",
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:07:47.612359Z",
     "start_time": "2018-11-08T19:07:47.551134Z"
    }
   },
   "outputs": [],
   "source": [
    "x + y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:07:57.325885Z",
     "start_time": "2018-11-08T19:07:57.316454Z"
    }
   },
   "outputs": [],
   "source": [
    "x = torch.tensor(x, dtype=torch.float32)\n",
    "x + y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:08:05.891579Z",
     "start_time": "2018-11-08T19:08:05.880368Z"
    }
   },
   "outputs": [],
   "source": [
    "torch.add(x,y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = torch.empty(5, 3)\n",
    "result = torch.add(x, y)\n",
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:08:28.687901Z",
     "start_time": "2018-11-08T19:08:28.683948Z"
    }
   },
   "outputs": [],
   "source": [
    "result = torch.empty(5, 3)\n",
    "torch.add(x, y, out=result)\n",
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:08:41.265616Z",
     "start_time": "2018-11-08T19:08:41.256555Z"
    }
   },
   "outputs": [],
   "source": [
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:08:44.615622Z",
     "start_time": "2018-11-08T19:08:44.606272Z"
    }
   },
   "outputs": [],
   "source": [
    "y.add(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:09:08.333104Z",
     "start_time": "2018-11-08T19:09:08.323552Z"
    }
   },
   "outputs": [],
   "source": [
    "# Any operation that mutates a tensor in-place is post-fixed with an _. \n",
    "# For example: x.copy_(y), x.t_(), will change x.\n",
    "y.add_(x)\n",
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:10:39.664741Z",
     "start_time": "2018-11-08T19:10:39.655543Z"
    }
   },
   "outputs": [],
   "source": [
    "x.t()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:10:40.015603Z",
     "start_time": "2018-11-08T19:10:40.009200Z"
    }
   },
   "outputs": [],
   "source": [
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:10:43.571500Z",
     "start_time": "2018-11-08T19:10:43.560005Z"
    }
   },
   "outputs": [],
   "source": [
    "x.t_()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:10:44.574280Z",
     "start_time": "2018-11-08T19:10:44.570883Z"
    }
   },
   "outputs": [],
   "source": [
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:11:16.599133Z",
     "start_time": "2018-11-08T19:11:16.595224Z"
    }
   },
   "outputs": [],
   "source": [
    "y[1:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:11:18.311959Z",
     "start_time": "2018-11-08T19:11:18.306881Z"
    }
   },
   "outputs": [],
   "source": [
    "y[1:2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:11:19.845034Z",
     "start_time": "2018-11-08T19:11:19.835965Z"
    }
   },
   "outputs": [],
   "source": [
    "y[1:,2:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:11:35.462400Z",
     "start_time": "2018-11-08T19:11:35.452635Z"
    }
   },
   "outputs": [],
   "source": [
    "indices = torch.tensor([0, 2])\n",
    "torch.index_select(y, 0, indices)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:11:38.931040Z",
     "start_time": "2018-11-08T19:11:38.926997Z"
    }
   },
   "outputs": [],
   "source": [
    "torch.index_select(y, 1, indices)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:12:16.463366Z",
     "start_time": "2018-11-08T19:12:16.459061Z"
    }
   },
   "outputs": [],
   "source": [
    "# Resizing: If you want to resize/reshape tensor, you can use torch.view:\n",
    "x = torch.randn(4, 4)\n",
    "y = x.view(16)\n",
    "z = x.view(-1, 8)  # the size -1 is inferred from other dimensions\n",
    "print(x.size(), y.size(), z.size())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:12:43.464032Z",
     "start_time": "2018-11-08T19:12:43.454447Z"
    }
   },
   "outputs": [],
   "source": [
    "x, y, z"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:12:55.467537Z",
     "start_time": "2018-11-08T19:12:55.455383Z"
    }
   },
   "outputs": [],
   "source": [
    "x[0,0]+=1\n",
    "x, y, z"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:14:42.206953Z",
     "start_time": "2018-11-08T19:14:42.200325Z"
    }
   },
   "outputs": [],
   "source": [
    "z[0,0]+=1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:14:44.019272Z",
     "start_time": "2018-11-08T19:14:44.009971Z"
    }
   },
   "outputs": [],
   "source": [
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:16:02.454453Z",
     "start_time": "2018-11-08T19:16:02.443553Z"
    }
   },
   "outputs": [],
   "source": [
    "x = torch.randn(1)\n",
    "x, x.item(), type(x), type(x.item())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:16:40.122111Z",
     "start_time": "2018-11-08T19:16:40.117330Z"
    }
   },
   "outputs": [],
   "source": [
    "a = torch.ones(5)\n",
    "b = a.numpy()\n",
    "a, type(a),  b, type(b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:16:54.603398Z",
     "start_time": "2018-11-08T19:16:54.593222Z"
    }
   },
   "outputs": [],
   "source": [
    "a.add_(1)\n",
    "a, b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:17:26.731147Z",
     "start_time": "2018-11-08T19:17:26.726610Z"
    }
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "a = np.ones(5, dtype=np.float32)\n",
    "b = torch.from_numpy(a)\n",
    "a, b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:17:33.789764Z",
     "start_time": "2018-11-08T19:17:33.775802Z"
    }
   },
   "outputs": [],
   "source": [
    "np.add(a, 1, out=a)\n",
    "a, b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:18:01.748098Z",
     "start_time": "2018-11-08T19:18:01.744062Z"
    }
   },
   "outputs": [],
   "source": [
    "torch.cat((b, b, b), dim=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:18:10.854776Z",
     "start_time": "2018-11-08T19:18:10.845513Z"
    }
   },
   "outputs": [],
   "source": [
    "torch.arange(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:18:12.508411Z",
     "start_time": "2018-11-08T19:18:12.504809Z"
    }
   },
   "outputs": [],
   "source": [
    "torch.linspace(0,10,10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:18:13.838149Z",
     "start_time": "2018-11-08T19:18:13.834101Z"
    }
   },
   "outputs": [],
   "source": [
    "torch.eye(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:18:42.444799Z",
     "start_time": "2018-11-08T19:18:42.440155Z"
    }
   },
   "outputs": [],
   "source": [
    "torch.full((5, 5), 1.23)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Autograd\n",
    "https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html\n",
    "\n",
    "Tensors that track history\n",
    "In autograd, if any input Tensor of an operation has requires_grad=True, the computation will be tracked. After computing the backward pass, a gradient w.r.t. this tensor is accumulated into .grad attribute.\n",
    "\n",
    "There’s one more class which is very important for autograd implementation - a Function. Tensor and Function are interconnected and build up an acyclic graph, that encodes a complete history of computation. Each variable has a .grad_fn attribute that references a function that has created a function (except for Tensors created by the user - these have None as .grad_fn)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:18:58.612944Z",
     "start_time": "2018-11-08T19:18:58.608942Z"
    }
   },
   "outputs": [],
   "source": [
    "import torch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:19:03.502355Z",
     "start_time": "2018-11-08T19:19:03.498592Z"
    }
   },
   "outputs": [],
   "source": [
    "x = torch.ones(2, 2, requires_grad=True)\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:19:11.986074Z",
     "start_time": "2018-11-08T19:19:11.980796Z"
    }
   },
   "outputs": [],
   "source": [
    "x.requires_grad"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:19:17.131911Z",
     "start_time": "2018-11-08T19:19:17.120345Z"
    }
   },
   "outputs": [],
   "source": [
    "y = x + 2\n",
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:19:26.080242Z",
     "start_time": "2018-11-08T19:19:26.075996Z"
    }
   },
   "outputs": [],
   "source": [
    "y.requires_grad"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:19:28.792983Z",
     "start_time": "2018-11-08T19:19:28.787880Z"
    }
   },
   "outputs": [],
   "source": [
    "y.grad_fn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:19:39.019665Z",
     "start_time": "2018-11-08T19:19:39.010012Z"
    }
   },
   "outputs": [],
   "source": [
    "z = y * y * 3\n",
    "out = z.mean()\n",
    "\n",
    "print(z, out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:20:07.749412Z",
     "start_time": "2018-11-08T19:20:07.746381Z"
    }
   },
   "outputs": [],
   "source": [
    "z.grad_fn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:20:09.403296Z",
     "start_time": "2018-11-08T19:20:09.394570Z"
    }
   },
   "outputs": [],
   "source": [
    "out.grad_fn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:20:26.227099Z",
     "start_time": "2018-11-08T19:20:26.222854Z"
    }
   },
   "outputs": [],
   "source": [
    "a = torch.randn(2, 2)\n",
    "a = ((a * 3) / (a - 1))\n",
    "b = (a * a).sum()\n",
    "print(a.requires_grad)\n",
    "print(b.grad_fn)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:20:52.325951Z",
     "start_time": "2018-11-08T19:20:52.316820Z"
    }
   },
   "outputs": [],
   "source": [
    "a.requires_grad_(True)\n",
    "b = (a * a).sum()\n",
    "print(a.requires_grad)\n",
    "print(b.grad_fn)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "If you want to compute the derivatives, you can call .backward() on a Tensor. If Tensor is a scalar (i.e. it holds a one element tensor), you don’t need to specify any arguments to backward(), however if it has more elements, you need to specify a grad_output argument that is a tensor of matching shape.\n",
    "\n",
    "You should have got a matrix of 4.5. Let’s call the out Tensor “o”. We have that o=14∑izi, zi=3(xi+2)2 and zi∣∣xi=1=27. Therefore, ∂o∂xi=32(xi+2), hence ∂o∂xi∣∣xi=1=92=4.5."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:21:03.912084Z",
     "start_time": "2018-11-08T19:21:03.903246Z"
    }
   },
   "outputs": [],
   "source": [
    "out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:21:33.961393Z",
     "start_time": "2018-11-08T19:21:33.926435Z"
    }
   },
   "outputs": [],
   "source": [
    "out.backward()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:21:40.517962Z",
     "start_time": "2018-11-08T19:21:40.513876Z"
    }
   },
   "outputs": [],
   "source": [
    "print(x.grad)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:22:06.043826Z",
     "start_time": "2018-11-08T19:22:06.037610Z"
    }
   },
   "outputs": [],
   "source": [
    "print(x.requires_grad)\n",
    "print((x ** 2).requires_grad)\n",
    "\n",
    "with torch.no_grad():\n",
    "    print((x ** 2).requires_grad)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:27:05.141490Z",
     "start_time": "2018-11-08T19:27:05.138292Z"
    }
   },
   "outputs": [],
   "source": [
    "x = torch.ones(2, 2, requires_grad=True)\n",
    "y = x + 2\n",
    "z = y * y * 3\n",
    "\n",
    "z2 = y.mean()\n",
    "out = z.mean()\n",
    "out.backward()\n",
    "#z2.backward()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:27:05.510248Z",
     "start_time": "2018-11-08T19:27:05.503004Z"
    }
   },
   "outputs": [],
   "source": [
    "x.grad"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:27:07.290288Z",
     "start_time": "2018-11-08T19:27:07.269507Z"
    }
   },
   "outputs": [],
   "source": [
    "out.backward()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Cuda"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:28:04.625359Z",
     "start_time": "2018-11-08T19:28:04.619958Z"
    }
   },
   "outputs": [],
   "source": [
    "torch.cuda.is_available()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:28:11.887487Z",
     "start_time": "2018-11-08T19:28:11.878708Z"
    }
   },
   "outputs": [],
   "source": [
    "torch.cuda.device_count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:28:34.234749Z",
     "start_time": "2018-11-08T19:28:34.228784Z"
    }
   },
   "outputs": [],
   "source": [
    "device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n",
    "device"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:28:53.286292Z",
     "start_time": "2018-11-08T19:28:50.284017Z"
    }
   },
   "outputs": [],
   "source": [
    "y = torch.ones_like(x, device=device)\n",
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:29:04.201689Z",
     "start_time": "2018-11-08T19:29:04.193196Z"
    }
   },
   "outputs": [],
   "source": [
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:29:12.134171Z",
     "start_time": "2018-11-08T19:29:12.127193Z"
    }
   },
   "outputs": [],
   "source": [
    "x = x.to(device)\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:29:25.232817Z",
     "start_time": "2018-11-08T19:29:25.221781Z"
    }
   },
   "outputs": [],
   "source": [
    "z = x + y\n",
    "z"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:29:32.714891Z",
     "start_time": "2018-11-08T19:29:32.691946Z"
    }
   },
   "outputs": [],
   "source": [
    "z.numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:30:02.834222Z",
     "start_time": "2018-11-08T19:30:02.820065Z"
    }
   },
   "outputs": [],
   "source": [
    "z.detach().numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:30:11.646540Z",
     "start_time": "2018-11-08T19:30:11.638013Z"
    }
   },
   "outputs": [],
   "source": [
    "z.cpu()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:30:19.453390Z",
     "start_time": "2018-11-08T19:30:19.443232Z"
    }
   },
   "outputs": [],
   "source": [
    "z = z.to('cpu', torch.double)\n",
    "z"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:30:21.007649Z",
     "start_time": "2018-11-08T19:30:21.001622Z"
    }
   },
   "outputs": [],
   "source": [
    "z.numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:30:23.889591Z",
     "start_time": "2018-11-08T19:30:23.875612Z"
    }
   },
   "outputs": [],
   "source": [
    "z.detach().numpy()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Testing PyTorch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:31:43.532805Z",
     "start_time": "2018-11-08T19:31:43.526536Z"
    }
   },
   "outputs": [],
   "source": [
    "# N is batch size; D_in is input dimension;\n",
    "# H is hidden dimension; D_out is output dimension.\n",
    "nsize = 2\n",
    "N, D_in, H, D_out = nsize*64, nsize*1000, nsize*100, nsize*1000\n",
    "epoch = 5000\n",
    "learning_rate = 1e-6"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Numpy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:32:55.550127Z",
     "start_time": "2018-11-08T19:32:19.929673Z"
    }
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "# Create random input and output data\n",
    "x = np.random.randn(N, D_in)\n",
    "y = np.random.randn(N, D_out)\n",
    "\n",
    "# Randomly initialize weights\n",
    "w1 = np.random.randn(D_in, H)\n",
    "w2 = np.random.randn(H, D_out)\n",
    "\n",
    "\n",
    "for t in range(epoch):\n",
    "    # Forward pass: compute predicted y\n",
    "    h = x.dot(w1)\n",
    "    h_relu = np.maximum(h, 0)\n",
    "    y_pred = h_relu.dot(w2)\n",
    "\n",
    "    # Compute and print loss\n",
    "    loss = np.square(y_pred - y).sum()\n",
    "    #print(t, loss)\n",
    "\n",
    "    # Backprop to compute gradients of w1 and w2 with respect to loss\n",
    "    grad_y_pred = 2.0 * (y_pred - y)\n",
    "    grad_w2 = h_relu.T.dot(grad_y_pred)\n",
    "    grad_h_relu = grad_y_pred.dot(w2.T)\n",
    "    grad_h = grad_h_relu.copy()\n",
    "    grad_h[h < 0] = 0\n",
    "    grad_w1 = x.T.dot(grad_h)\n",
    "\n",
    "    # Update weights\n",
    "    w1 -= learning_rate * grad_w1\n",
    "    w2 -= learning_rate * grad_w2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## PyTorch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:34:11.111956Z",
     "start_time": "2018-11-08T19:34:11.107836Z"
    }
   },
   "outputs": [],
   "source": [
    "import torch\n",
    "\n",
    "cuda = True\n",
    "dtype = torch.float\n",
    "\n",
    "device =  torch.device(\"cuda\") if torch.cuda.is_available() and cuda else torch.device(\"cpu\")\n",
    "device"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:34:19.860094Z",
     "start_time": "2018-11-08T19:34:14.026336Z"
    }
   },
   "outputs": [],
   "source": [
    "# Create random input and output data\n",
    "x = torch.randn(N, D_in, device=device, dtype=dtype)\n",
    "y = torch.randn(N, D_out, device=device, dtype=dtype)\n",
    "\n",
    "# Randomly initialize weights\n",
    "w1 = torch.randn(D_in, H, device=device, dtype=dtype)\n",
    "w2 = torch.randn(H, D_out, device=device, dtype=dtype)\n",
    "\n",
    "\n",
    "for t in range(epoch):\n",
    "    # Forward pass: compute predicted y\n",
    "    h = x.mm(w1)\n",
    "    h_relu = h.clamp(min=0)\n",
    "    y_pred = h_relu.mm(w2)\n",
    "\n",
    "    # Compute and print loss\n",
    "    loss = (y_pred - y).pow(2).sum().item()\n",
    "    #print(t, loss)\n",
    "\n",
    "    # Backprop to compute gradients of w1 and w2 with respect to loss\n",
    "    grad_y_pred = 2.0 * (y_pred - y)\n",
    "    grad_w2 = h_relu.t().mm(grad_y_pred)\n",
    "    grad_h_relu = grad_y_pred.mm(w2.t())\n",
    "    grad_h = grad_h_relu.clone()\n",
    "    grad_h[h < 0] = 0\n",
    "    grad_w1 = x.t().mm(grad_h)\n",
    "\n",
    "    # Update weights using gradient descent\n",
    "    w1 -= learning_rate * grad_w1\n",
    "    w2 -= learning_rate * grad_w2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Autograd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:35:08.912185Z",
     "start_time": "2018-11-08T19:35:01.672365Z"
    }
   },
   "outputs": [],
   "source": [
    "x = torch.randn(N, D_in, device=device, dtype=dtype)\n",
    "y = torch.randn(N, D_out, device=device, dtype=dtype)\n",
    "\n",
    "# Create random Tensors for weights.\n",
    "# Setting requires_grad=True indicates that we want to compute gradients with\n",
    "# respect to these Tensors during the backward pass.\n",
    "w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)\n",
    "w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)\n",
    "\n",
    "for t in range(epoch):\n",
    "    # Forward pass: compute predicted y using operations on Tensors; these\n",
    "    # are exactly the same operations we used to compute the forward pass using\n",
    "    # Tensors, but we do not need to keep references to intermediate values since\n",
    "    # we are not implementing the backward pass by hand.\n",
    "    y_pred = x.mm(w1).clamp(min=0).mm(w2)\n",
    "\n",
    "    # Compute and print loss using operations on Tensors.\n",
    "    # Now loss is a Tensor of shape (1,)\n",
    "    # loss.item() gets the a scalar value held in the loss.\n",
    "    loss = (y_pred - y).pow(2).sum()\n",
    "    #print(t, loss.item())\n",
    "\n",
    "    # Use autograd to compute the backward pass. This call will compute the\n",
    "    # gradient of loss with respect to all Tensors with requires_grad=True.\n",
    "    # After this call w1.grad and w2.grad will be Tensors holding the gradient\n",
    "    # of the loss with respect to w1 and w2 respectively.\n",
    "    loss.backward()\n",
    "\n",
    "    # Manually update weights using gradient descent. Wrap in torch.no_grad()\n",
    "    # because weights have requires_grad=True, but we don't need to track this\n",
    "    # in autograd.\n",
    "    # An alternative way is to operate on weight.data and weight.grad.data.\n",
    "    # Recall that tensor.data gives a tensor that shares the storage with\n",
    "    # tensor, but doesn't track history.\n",
    "    # You can also use torch.optim.SGD to achieve this.\n",
    "    with torch.no_grad():\n",
    "        w1 -= learning_rate * w1.grad\n",
    "        w2 -= learning_rate * w2.grad\n",
    "\n",
    "        # Manually zero the gradients after updating weights\n",
    "        w1.grad.zero_()\n",
    "        w2.grad.zero_()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Custom Function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:35:51.313671Z",
     "start_time": "2018-11-08T19:35:43.748896Z"
    }
   },
   "outputs": [],
   "source": [
    "class MyReLU(torch.autograd.Function):\n",
    "    \"\"\"\n",
    "    We can implement our own custom autograd Functions by subclassing\n",
    "    torch.autograd.Function and implementing the forward and backward passes\n",
    "    which operate on Tensors.\n",
    "    \"\"\"\n",
    "\n",
    "    @staticmethod\n",
    "    def forward(ctx, input):\n",
    "        \"\"\"\n",
    "        In the forward pass we receive a Tensor containing the input and return\n",
    "        a Tensor containing the output. ctx is a context object that can be used\n",
    "        to stash information for backward computation. You can cache arbitrary\n",
    "        objects for use in the backward pass using the ctx.save_for_backward method.\n",
    "        \"\"\"\n",
    "        ctx.save_for_backward(input)\n",
    "        return input.clamp(min=0)\n",
    "\n",
    "    @staticmethod\n",
    "    def backward(ctx, grad_output):\n",
    "        \"\"\"\n",
    "        In the backward pass we receive a Tensor containing the gradient of the loss\n",
    "        with respect to the output, and we need to compute the gradient of the loss\n",
    "        with respect to the input.\n",
    "        \"\"\"\n",
    "        input, = ctx.saved_tensors\n",
    "        grad_input = grad_output.clone()\n",
    "        grad_input[input < 0] = 0\n",
    "        return grad_input\n",
    "\n",
    "# Create random Tensors to hold input and outputs.\n",
    "x = torch.randn(N, D_in, device=device, dtype=dtype)\n",
    "y = torch.randn(N, D_out, device=device, dtype=dtype)\n",
    "\n",
    "# Create random Tensors for weights.\n",
    "w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)\n",
    "w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)\n",
    "\n",
    "for t in range(epoch):\n",
    "    # To apply our Function, we use Function.apply method. We alias this as 'relu'.\n",
    "    relu = MyReLU.apply\n",
    "\n",
    "    # Forward pass: compute predicted y using operations; we compute\n",
    "    # ReLU using our custom autograd operation.\n",
    "    y_pred = relu(x.mm(w1)).mm(w2)\n",
    "\n",
    "    # Compute and print loss\n",
    "    loss = (y_pred - y).pow(2).sum()\n",
    "    #print(t, loss.item())\n",
    "\n",
    "    # Use autograd to compute the backward pass.\n",
    "    loss.backward()\n",
    "\n",
    "    # Update weights using gradient descent\n",
    "    with torch.no_grad():\n",
    "        w1 -= learning_rate * w1.grad\n",
    "        w2 -= learning_rate * w2.grad\n",
    "\n",
    "        # Manually zero the gradients after updating weights\n",
    "        w1.grad.zero_()\n",
    "        w2.grad.zero_()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Module\n",
    "https://pytorch.org/tutorials/beginner/blitz/neural_networks_tutorial.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:37:14.505938Z",
     "start_time": "2018-11-08T19:37:14.479720Z"
    }
   },
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "\n",
    "class Net(nn.Module):\n",
    "\n",
    "    def __init__(self):\n",
    "        super(Net, self).__init__()\n",
    "        # 1 input image channel, 6 output channels, 5x5 square convolution\n",
    "        # kernel\n",
    "        self.conv1 = nn.Conv2d(1, 6, 5)\n",
    "        self.conv2 = nn.Conv2d(6, 16, 5)\n",
    "        \n",
    "        # an affine operation: y = Wx + b\n",
    "        self.fc1 = nn.Linear(16 * 5 * 5, 120)\n",
    "        self.fc2 = nn.Linear(120, 84)\n",
    "        self.fc3 = nn.Linear(84, 10)\n",
    "\n",
    "    def forward(self, x):\n",
    "        # Max pooling over a (2, 2) window\n",
    "        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))\n",
    "        # If the size is a square you can only specify a single number\n",
    "        x = F.max_pool2d(F.relu(self.conv2(x)), 2)\n",
    "        x = x.view(-1, self.num_flat_features(x))\n",
    "        x = F.relu(self.fc1(x))\n",
    "        x = F.relu(self.fc2(x))\n",
    "        x = self.fc3(x)\n",
    "        return x\n",
    "\n",
    "    def num_flat_features(self, x):\n",
    "        size = x.size()[1:]  # all dimensions except the batch dimension\n",
    "        num_features = 1\n",
    "        for s in size:\n",
    "            num_features *= s\n",
    "        return num_features\n",
    "\n",
    "\n",
    "net = Net()\n",
    "print(net)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:37:23.215991Z",
     "start_time": "2018-11-08T19:37:23.208379Z"
    }
   },
   "outputs": [],
   "source": [
    "params = list(net.parameters())\n",
    "print(len(params))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:37:26.238839Z",
     "start_time": "2018-11-08T19:37:26.232904Z"
    }
   },
   "outputs": [],
   "source": [
    "[k.size() for k in params]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:37:28.013063Z",
     "start_time": "2018-11-08T19:37:27.998916Z"
    }
   },
   "outputs": [],
   "source": [
    "params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:37:31.658685Z",
     "start_time": "2018-11-08T19:37:31.652784Z"
    }
   },
   "outputs": [],
   "source": [
    "net.fc1.bias"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:37:45.745320Z",
     "start_time": "2018-11-08T19:37:45.733309Z"
    }
   },
   "outputs": [],
   "source": [
    "input_ = torch.randn(1, 1, 32, 32)\n",
    "out = net(input_)\n",
    "print(out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:37:59.728513Z",
     "start_time": "2018-11-08T19:37:59.715204Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "net.zero_grad()\n",
    "out.backward(torch.randn(1, 10))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:38:00.721466Z",
     "start_time": "2018-11-08T19:38:00.708674Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "net.fc1.bias.grad"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:38:09.609092Z",
     "start_time": "2018-11-08T19:38:09.595416Z"
    }
   },
   "outputs": [],
   "source": [
    "output = net(input_)\n",
    "output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:38:16.651950Z",
     "start_time": "2018-11-08T19:38:16.642057Z"
    }
   },
   "outputs": [],
   "source": [
    "target = torch.arange(1, 11, dtype=torch.float).view(1, -1)  # a dummy target, make it the same shape as output\n",
    "target"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:38:30.940428Z",
     "start_time": "2018-11-08T19:38:30.931916Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "criterion = nn.MSELoss()\n",
    "\n",
    "loss = criterion(output, target)\n",
    "print(loss)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:38:40.908859Z",
     "start_time": "2018-11-08T19:38:40.905717Z"
    }
   },
   "outputs": [],
   "source": [
    "print(loss.grad_fn)  # MSELoss\n",
    "print(loss.grad_fn.next_functions[0][0])  # Linear\n",
    "print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:39:02.298348Z",
     "start_time": "2018-11-08T19:39:02.291840Z"
    }
   },
   "outputs": [],
   "source": [
    "net.zero_grad()     # zeroes the gradient buffers of all parameters\n",
    "\n",
    "print('conv1.bias.grad before backward')\n",
    "print(net.conv1.bias.grad)\n",
    "\n",
    "loss.backward()\n",
    "\n",
    "print('conv1.bias.grad after backward')\n",
    "print(net.conv1.bias.grad)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:39:17.151800Z",
     "start_time": "2018-11-08T19:39:17.145200Z"
    }
   },
   "outputs": [],
   "source": [
    "learning_rate = 0.01\n",
    "for f in net.parameters():\n",
    "    f.data.sub_(f.grad.data * learning_rate)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:39:56.655473Z",
     "start_time": "2018-11-08T19:39:56.639013Z"
    }
   },
   "outputs": [],
   "source": [
    "import torch.optim as optim\n",
    "\n",
    "# create your optimizer\n",
    "optimizer = optim.SGD(net.parameters(), lr=0.01)\n",
    "\n",
    "# in your training loop:\n",
    "optimizer.zero_grad()   # zero the gradient buffers\n",
    "output = net(input_)\n",
    "loss = criterion(output, target)\n",
    "print(loss)\n",
    "loss.backward()\n",
    "optimizer.step()    # Does the update"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:39:58.349075Z",
     "start_time": "2018-11-08T19:39:58.345615Z"
    }
   },
   "outputs": [],
   "source": [
    "target"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:40:00.958371Z",
     "start_time": "2018-11-08T19:40:00.953750Z"
    }
   },
   "outputs": [],
   "source": [
    "net(input_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:40:10.772388Z",
     "start_time": "2018-11-08T19:40:10.767959Z"
    }
   },
   "outputs": [],
   "source": [
    "net(input_).detach().numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:40:14.034801Z",
     "start_time": "2018-11-08T19:40:13.725147Z"
    }
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "plt.plot(target.numpy().T, label='target')\n",
    "plt.plot(net(input_).detach().numpy().T, label='prediction')\n",
    "plt.legend()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:40:34.776796Z",
     "start_time": "2018-11-08T19:40:34.612839Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "for i in torch.arange(100, dtype=torch.long):\n",
    "    optimizer.zero_grad()   # zero the gradient buffers\n",
    "    output = net(input_)\n",
    "    loss = criterion(output, target)\n",
    "    print(i.item(),'->', loss.item())\n",
    "    loss.backward()\n",
    "    optimizer.step()    # Does the update"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:40:35.595610Z",
     "start_time": "2018-11-08T19:40:35.482640Z"
    }
   },
   "outputs": [],
   "source": [
    "plt.plot(target.numpy().T, label='target')\n",
    "plt.plot(net(input_).detach().numpy().T, label='prediction')\n",
    "plt.legend()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Part of the model on CPU and part on the GPU\n",
    "https://pytorch.org/tutorials/beginner/former_torchies/parallelism_tutorial.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:41:16.853545Z",
     "start_time": "2018-11-08T19:41:16.849288Z"
    }
   },
   "outputs": [],
   "source": [
    "device = torch.device(\"cuda:0\")\n",
    "\n",
    "class DistributedModel(nn.Module):\n",
    "\n",
    "    def __init__(self):\n",
    "        super().__init__(\n",
    "            embedding=nn.Embedding(1000, 10),\n",
    "            rnn=nn.Linear(10, 10).to(device),\n",
    "        )\n",
    "\n",
    "    def forward(self, x):\n",
    "        # Compute embedding on CPU\n",
    "        x = self.embedding(x)\n",
    "\n",
    "        # Transfer to GPU\n",
    "        x = x.to(device)\n",
    "\n",
    "        # Compute RNN on GPU\n",
    "        x = self.rnn(x)\n",
    "        return x"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Training a Classifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:41:19.248195Z",
     "start_time": "2018-11-08T19:41:19.030064Z"
    }
   },
   "outputs": [],
   "source": [
    "import torch\n",
    "import torchvision\n",
    "import torchvision.transforms as transforms"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:41:21.644208Z",
     "start_time": "2018-11-08T19:41:19.876155Z"
    }
   },
   "outputs": [],
   "source": [
    "transform = transforms.Compose(\n",
    "    [transforms.ToTensor(),\n",
    "     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])\n",
    "\n",
    "trainset = torchvision.datasets.CIFAR10(root='./data', train=True,\n",
    "                                        download=True, transform=transform)\n",
    "trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,\n",
    "                                          shuffle=True, num_workers=2)\n",
    "\n",
    "testset = torchvision.datasets.CIFAR10(root='./data', train=False,\n",
    "                                       download=True, transform=transform)\n",
    "testloader = torch.utils.data.DataLoader(testset, batch_size=4,\n",
    "                                         shuffle=False, num_workers=2)\n",
    "\n",
    "classes = ('plane', 'car', 'bird', 'cat',\n",
    "           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:41:21.802364Z",
     "start_time": "2018-11-08T19:41:21.645924Z"
    }
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "\n",
    "# functions to show an image\n",
    "\n",
    "\n",
    "def imshow(img):\n",
    "    img = img / 2 + 0.5     # unnormalize\n",
    "    npimg = img.numpy()\n",
    "    plt.imshow(np.transpose(npimg, (1, 2, 0)))\n",
    "\n",
    "\n",
    "# get some random training images\n",
    "dataiter = iter(trainloader)\n",
    "images, labels = dataiter.next()\n",
    "\n",
    "# show images\n",
    "imshow(torchvision.utils.make_grid(images))\n",
    "# print labels\n",
    "print(' '.join('%5s' % classes[labels[j]] for j in range(4)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:41:24.001387Z",
     "start_time": "2018-11-08T19:41:23.987911Z"
    }
   },
   "outputs": [],
   "source": [
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "\n",
    "\n",
    "class Net(nn.Module):\n",
    "    def __init__(self):\n",
    "        super(Net, self).__init__()\n",
    "        self.conv1 = nn.Conv2d(3, 6, 5)\n",
    "        self.pool = nn.MaxPool2d(2, 2)\n",
    "        self.conv2 = nn.Conv2d(6, 16, 5)\n",
    "        self.fc1 = nn.Linear(16 * 5 * 5, 120)\n",
    "        self.fc2 = nn.Linear(120, 84)\n",
    "        self.fc3 = nn.Linear(84, 10)\n",
    "\n",
    "    def forward(self, x):\n",
    "        x = self.pool(F.relu(self.conv1(x)))\n",
    "        x = self.pool(F.relu(self.conv2(x)))\n",
    "        x = x.view(-1, 16 * 5 * 5)\n",
    "        x = F.relu(self.fc1(x))\n",
    "        x = F.relu(self.fc2(x))\n",
    "        x = self.fc3(x)\n",
    "        return x\n",
    "\n",
    "\n",
    "net = Net()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:41:25.228641Z",
     "start_time": "2018-11-08T19:41:25.225741Z"
    }
   },
   "outputs": [],
   "source": [
    "import torch.optim as optim\n",
    "\n",
    "criterion = nn.CrossEntropyLoss()\n",
    "optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:42:11.139725Z",
     "start_time": "2018-11-08T19:41:25.879882Z"
    }
   },
   "outputs": [],
   "source": [
    "for epoch in range(1):  # loop over the dataset multiple times\n",
    "\n",
    "    running_loss = 0.0\n",
    "    for i, data in enumerate(trainloader, 0):\n",
    "        # get the inputs\n",
    "        inputs, labels = data\n",
    "\n",
    "        # zero the parameter gradients\n",
    "        optimizer.zero_grad()\n",
    "\n",
    "        # forward + backward + optimize\n",
    "        outputs = net(inputs)\n",
    "        loss = criterion(outputs, labels)\n",
    "        loss.backward()\n",
    "        optimizer.step()\n",
    "\n",
    "        # print statistics\n",
    "        running_loss += loss.item()\n",
    "        if i % 2000 == 1999:    # print every 2000 mini-batches\n",
    "            print('[%d, %5d] loss: %.3f' %\n",
    "                  (epoch + 1, i + 1, running_loss / 2000))\n",
    "            running_loss = 0.0\n",
    "\n",
    "print('Finished Training')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:42:13.900880Z",
     "start_time": "2018-11-08T19:42:13.815418Z"
    }
   },
   "outputs": [],
   "source": [
    "dataiter = iter(testloader)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:42:29.108205Z",
     "start_time": "2018-11-08T19:42:28.990904Z"
    }
   },
   "outputs": [],
   "source": [
    "images, labels = dataiter.next()\n",
    "\n",
    "# print images\n",
    "imshow(torchvision.utils.make_grid(images))\n",
    "print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))\n",
    "\n",
    "outputs = net(images)\n",
    "_, predicted = torch.max(outputs, 1)\n",
    "print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] for j in range(4)))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:42:33.249002Z",
     "start_time": "2018-11-08T19:42:29.690500Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "correct = 0\n",
    "total = 0\n",
    "with torch.no_grad():\n",
    "    for data in testloader:\n",
    "        images, labels = data\n",
    "        outputs = net(images)\n",
    "        _, predicted = torch.max(outputs.data, 1)\n",
    "        total += labels.size(0)\n",
    "        correct += (predicted == labels).sum().item()\n",
    "\n",
    "print('Accuracy of the network on the 10000 test images: %d %%' % (\n",
    "    100 * correct / total))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:42:36.764268Z",
     "start_time": "2018-11-08T19:42:33.251248Z"
    }
   },
   "outputs": [],
   "source": [
    "class_correct = list(0. for i in range(10))\n",
    "class_total = list(0. for i in range(10))\n",
    "with torch.no_grad():\n",
    "    for data in testloader:\n",
    "        images, labels = data\n",
    "        outputs = net(images)\n",
    "        _, predicted = torch.max(outputs, 1)\n",
    "        c = (predicted == labels).squeeze()\n",
    "        for i in range(4):\n",
    "            label = labels[i]\n",
    "            class_correct[label] += c[i].item()\n",
    "            class_total[label] += 1\n",
    "\n",
    "\n",
    "for i in range(10):\n",
    "    print('Accuracy of %5s : %2d %%' % (\n",
    "        classes[i], 100 * class_correct[i] / class_total[i]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Training a Module with Autograd on Cuda thanks to PyTorch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:43:05.223299Z",
     "start_time": "2018-11-08T19:43:05.016587Z"
    }
   },
   "outputs": [],
   "source": [
    "import torch\n",
    "from torch import nn\n",
    "import numpy as np\n",
    "from matplotlib import pyplot as plt\n",
    "from scipy.special import gamma\n",
    "from scipy.linalg import toeplitz\n",
    "from tqdm import tqdm\n",
    "import pandas as pd\n",
    "from IPython import display\n",
    "\n",
    "cuda = True\n",
    "device =  torch.device(\"cuda\") if torch.cuda.is_available() and cuda else torch.device(\"cpu\")\n",
    "device"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:43:06.270897Z",
     "start_time": "2018-11-08T19:43:06.268370Z"
    }
   },
   "outputs": [],
   "source": [
    "plt.rcParams['figure.figsize'] = (20,8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:43:06.984685Z",
     "start_time": "2018-11-08T19:43:06.975399Z"
    }
   },
   "outputs": [],
   "source": [
    "zero = torch.tensor([0], device=device)\n",
    "one = torch.tensor([1.0], device=device)\n",
    "two = torch.tensor([2.0], device=device)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Define Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:43:10.610410Z",
     "start_time": "2018-11-08T19:43:10.599871Z"
    }
   },
   "outputs": [],
   "source": [
    "class Linear(nn.Module):\n",
    "    def __init__(self, ninputs=1, nparams=1):\n",
    "        super(Linear, self).__init__()\n",
    "        self.ninputs = ninputs\n",
    "        self.c = nn.Parameter(torch.Tensor(nparams, ninputs, 1, device=device))\n",
    "        \n",
    "        #inicializar\n",
    "        self.c.data.uniform_(0, 10)\n",
    "\n",
    "    def forward(self, x):\n",
    "        return torch.mul(self.c, x)\n",
    "\n",
    "class AR1Weight(nn.Module):\n",
    "    def __init__(self, ninputs=1, nparams=1):\n",
    "        super(AR1Weight, self).__init__()\n",
    "        self.decay = nn.Parameter(torch.Tensor(nparams , ninputs, 1, 1, device=device)) \n",
    "        # shape = (nparams, ninputs, in_chanels/groups, time)\n",
    "        #inicializar\n",
    "        self.decay.data.uniform_(0, 1)\n",
    "        \n",
    "    def forward(self, x):\n",
    "        #retorna weights para usar conv1d \n",
    "        #x.shape = (nparams, ninputs, time)\n",
    "        time =  torch.arange(x.shape[2]-1, -1, -1, dtype=torch.float, device=device) # tiempo invertido\n",
    "        return torch.mul(one-self.decay, torch.pow(self.decay, time))#.view(-1, 1, x.shape[2])\n",
    "\n",
    "class ConvTime(nn.Module):\n",
    "    def __init__(self, weight):\n",
    "        super(ConvTime, self).__init__()\n",
    "        self.weight = weight\n",
    "        \n",
    "        \n",
    "    def forward(self, x):\n",
    "        #x.shape = (nparams, ninputs, time)\n",
    "        nparams = x.shape[0]\n",
    "        ninputs = x.shape[1]\n",
    "        time = x.shape[2]\n",
    "        x_conv = torch.zeros(1, nparams, ninputs, 2*time-1, device=device)\n",
    "        x_conv[0, :, :, (time-1):] = x\n",
    "        \n",
    "        return torch.nn.functional.conv1d(x_conv.view(1, -1, x_conv.shape[3]), self.weight(x).view(-1, 1, x.shape[2]),\n",
    "                                          padding=0, groups=nparams*ninputs)[0,:,:].view(x.shape).to(device)\n",
    "   \n",
    "    \n",
    "class Model(nn.Module):\n",
    "    def __init__(self, weight, linear):\n",
    "        super(Model, self).__init__()\n",
    "        self.weight = weight\n",
    "        self.linear = linear\n",
    "        self.conv = ConvTime(weight)\n",
    "\n",
    "    def forward(self, x):\n",
    "        return self.conv(self.linear(x)).sum(dim=1).t()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Generate Data from Target Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:43:12.623068Z",
     "start_time": "2018-11-08T19:43:12.617201Z"
    }
   },
   "outputs": [],
   "source": [
    "level_noise = 2.0\n",
    "ninputs = 50 #2\n",
    "time = 1000 #"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:43:14.697855Z",
     "start_time": "2018-11-08T19:43:14.444746Z"
    }
   },
   "outputs": [],
   "source": [
    "x = torch.rand(1, ninputs, time, device=device) # inputs \n",
    "\n",
    "model_real = Model(AR1Weight(ninputs).to(device), \n",
    "                   Linear(ninputs).to(device)).to(device)\n",
    "\n",
    "y_real_array = model_real(x).detach().cpu().numpy().copy()\n",
    "y_real_array_noise = np.float32(y_real_array+np.random.normal(scale=level_noise, size=y_real_array.shape))\n",
    "y_real = torch.tensor(y_real_array_noise).to(device)\n",
    "\n",
    "\n",
    "plt.plot(y_real_array, label='real')\n",
    "plt.plot(y_real_array_noise, label='noisy')\n",
    "plt.legend()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Model to Train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:44:36.631163Z",
     "start_time": "2018-11-08T19:44:36.384401Z"
    },
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "model_train = Model(AR1Weight(ninputs).to(device),\n",
    "                    Linear(ninputs).to(device)).to(device)\n",
    "\n",
    "plt.plot(y_real_array, label='real')\n",
    "plt.plot(model_train(x).detach().cpu().numpy(), label='pred_init')\n",
    "plt.legend()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:44:36.976846Z",
     "start_time": "2018-11-08T19:44:36.960370Z"
    }
   },
   "outputs": [],
   "source": [
    "niter = 1000\n",
    "optimizer = torch.optim.Adam(model_train.parameters(), lr=1e-2)\n",
    "loss_iter = torch.empty(niter, device=device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:44:49.174489Z",
     "start_time": "2018-11-08T19:44:37.745342Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "ntimes = torch.tensor(time, dtype=torch.float, device=device)\n",
    "\n",
    "progress = tqdm(range(niter))\n",
    "for t in progress:\n",
    "    \n",
    "    y_pred = model_train(x)\n",
    "    \n",
    "    loss_array = torch.div(torch.pow(y_pred - y_real, two), ntimes)\n",
    "    loss_total = torch.sum(loss_array)\n",
    "    optimizer.zero_grad()\n",
    "    loss_total.backward()\n",
    "    optimizer.step()\n",
    "    \n",
    "    loss_iter.put_(torch.tensor(t, device=device), loss_total)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Loss function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:44:49.319696Z",
     "start_time": "2018-11-08T19:44:49.176088Z"
    }
   },
   "outputs": [],
   "source": [
    "plt.plot(loss_iter.detach().cpu().numpy(), label='loss')\n",
    "plt.legend()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Estimation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-08T19:44:49.568556Z",
     "start_time": "2018-11-08T19:44:49.321440Z"
    }
   },
   "outputs": [],
   "source": [
    "plt.plot(model_real(x).detach().cpu().numpy(), label='real')\n",
    "plt.plot(model_train(x).detach().cpu().numpy(), label='pred_init')\n",
    "plt.legend()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "742px",
    "left": "0px",
    "right": "1425.45px",
    "top": "111px",
    "width": "320px"
   },
   "toc_section_display": true,
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}