{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# PyTorch\n", "Gonzalo Rios (grios@dim.uchile.cl)\n", "\n", "1. Wiki: https://en.wikipedia.org/wiki/PyTorch\n", "2. Github: https://github.com/pytorch/pytorch\n", "2. Docs: https://pytorch.org/\n", "3. Cuda: https://developer.nvidia.com/cuda-downloads" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Tensors\n", "https://pytorch.org/tutorials/beginner/blitz/tensor_tutorial.html" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:06:00.585772Z", "start_time": "2018-11-08T19:06:00.032489Z" } }, "outputs": [], "source": [ "import torch" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:06:16.556738Z", "start_time": "2018-11-08T19:06:16.529108Z" } }, "outputs": [], "source": [ "x = torch.empty(5, 3)\n", "x" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:06:32.105438Z", "start_time": "2018-11-08T19:06:32.099738Z" } }, "outputs": [], "source": [ "x = torch.rand(5, 3)\n", "x" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:06:36.040367Z", "start_time": "2018-11-08T19:06:36.035080Z" } }, "outputs": [], "source": [ "x.size()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:06:47.984403Z", "start_time": "2018-11-08T19:06:47.978323Z" } }, "outputs": [], "source": [ "x = torch.zeros(5, 3)\n", "x, x.dtype" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:06:54.787598Z", "start_time": "2018-11-08T19:06:54.776941Z" } }, "outputs": [], "source": [ "torch.get_default_dtype()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:06:58.005772Z", "start_time": "2018-11-08T19:06:58.000691Z" } }, "outputs": [], "source": [ "torch.set_default_dtype(torch.float64)\n", "x = torch.zeros(5, 3)\n", "x, x.dtype" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:07:02.538612Z", "start_time": "2018-11-08T19:07:02.531814Z" } }, "outputs": [], "source": [ "torch.set_default_tensor_type(torch.FloatTensor)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:07:11.485105Z", "start_time": "2018-11-08T19:07:11.475497Z" } }, "outputs": [], "source": [ "x = torch.zeros(5, 3, dtype=torch.long)\n", "x, x.dtype" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:07:23.633921Z", "start_time": "2018-11-08T19:07:23.629192Z" } }, "outputs": [], "source": [ "x = x.new_ones(5, 3)\n", "x, x.dtype" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:07:32.193909Z", "start_time": "2018-11-08T19:07:32.184182Z" } }, "outputs": [], "source": [ "x = torch.tensor([5.5, 3])\n", "x, x.dtype" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:07:34.858501Z", "start_time": "2018-11-08T19:07:34.854030Z" } }, "outputs": [], "source": [ "x = x.new_ones(5, 3)\n", "x, x.dtype" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:07:40.403345Z", "start_time": "2018-11-08T19:07:40.393142Z" } }, "outputs": [], "source": [ "x = x.new_ones(5, 3, dtype=torch.double) # new_* methods take in sizes\n", "x" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:07:43.152724Z", "start_time": "2018-11-08T19:07:43.148496Z" } }, "outputs": [], "source": [ "y = torch.rand(5, 3)\n", "y" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:07:47.612359Z", "start_time": "2018-11-08T19:07:47.551134Z" } }, "outputs": [], "source": [ "x + y" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:07:57.325885Z", "start_time": "2018-11-08T19:07:57.316454Z" } }, "outputs": [], "source": [ "x = torch.tensor(x, dtype=torch.float32)\n", "x + y" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:08:05.891579Z", "start_time": "2018-11-08T19:08:05.880368Z" } }, "outputs": [], "source": [ "torch.add(x,y)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "result = torch.empty(5, 3)\n", "result = torch.add(x, y)\n", "result" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:08:28.687901Z", "start_time": "2018-11-08T19:08:28.683948Z" } }, "outputs": [], "source": [ "result = torch.empty(5, 3)\n", "torch.add(x, y, out=result)\n", "result" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:08:41.265616Z", "start_time": "2018-11-08T19:08:41.256555Z" } }, "outputs": [], "source": [ "y" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:08:44.615622Z", "start_time": "2018-11-08T19:08:44.606272Z" } }, "outputs": [], "source": [ "y.add(x)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:09:08.333104Z", "start_time": "2018-11-08T19:09:08.323552Z" } }, "outputs": [], "source": [ "# Any operation that mutates a tensor in-place is post-fixed with an _. \n", "# For example: x.copy_(y), x.t_(), will change x.\n", "y.add_(x)\n", "y" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:10:39.664741Z", "start_time": "2018-11-08T19:10:39.655543Z" } }, "outputs": [], "source": [ "x.t()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:10:40.015603Z", "start_time": "2018-11-08T19:10:40.009200Z" } }, "outputs": [], "source": [ "x" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:10:43.571500Z", "start_time": "2018-11-08T19:10:43.560005Z" } }, "outputs": [], "source": [ "x.t_()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:10:44.574280Z", "start_time": "2018-11-08T19:10:44.570883Z" } }, "outputs": [], "source": [ "x" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:11:16.599133Z", "start_time": "2018-11-08T19:11:16.595224Z" } }, "outputs": [], "source": [ "y[1:]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:11:18.311959Z", "start_time": "2018-11-08T19:11:18.306881Z" } }, "outputs": [], "source": [ "y[1:2]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:11:19.845034Z", "start_time": "2018-11-08T19:11:19.835965Z" } }, "outputs": [], "source": [ "y[1:,2:]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:11:35.462400Z", "start_time": "2018-11-08T19:11:35.452635Z" } }, "outputs": [], "source": [ "indices = torch.tensor([0, 2])\n", "torch.index_select(y, 0, indices)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:11:38.931040Z", "start_time": "2018-11-08T19:11:38.926997Z" } }, "outputs": [], "source": [ "torch.index_select(y, 1, indices)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:12:16.463366Z", "start_time": "2018-11-08T19:12:16.459061Z" } }, "outputs": [], "source": [ "# Resizing: If you want to resize/reshape tensor, you can use torch.view:\n", "x = torch.randn(4, 4)\n", "y = x.view(16)\n", "z = x.view(-1, 8) # the size -1 is inferred from other dimensions\n", "print(x.size(), y.size(), z.size())" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:12:43.464032Z", "start_time": "2018-11-08T19:12:43.454447Z" } }, "outputs": [], "source": [ "x, y, z" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:12:55.467537Z", "start_time": "2018-11-08T19:12:55.455383Z" } }, "outputs": [], "source": [ "x[0,0]+=1\n", "x, y, z" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:14:42.206953Z", "start_time": "2018-11-08T19:14:42.200325Z" } }, "outputs": [], "source": [ "z[0,0]+=1" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:14:44.019272Z", "start_time": "2018-11-08T19:14:44.009971Z" } }, "outputs": [], "source": [ "x" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:16:02.454453Z", "start_time": "2018-11-08T19:16:02.443553Z" } }, "outputs": [], "source": [ "x = torch.randn(1)\n", "x, x.item(), type(x), type(x.item())" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:16:40.122111Z", "start_time": "2018-11-08T19:16:40.117330Z" } }, "outputs": [], "source": [ "a = torch.ones(5)\n", "b = a.numpy()\n", "a, type(a), b, type(b)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:16:54.603398Z", "start_time": "2018-11-08T19:16:54.593222Z" } }, "outputs": [], "source": [ "a.add_(1)\n", "a, b" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:17:26.731147Z", "start_time": "2018-11-08T19:17:26.726610Z" } }, "outputs": [], "source": [ "import numpy as np\n", "a = np.ones(5, dtype=np.float32)\n", "b = torch.from_numpy(a)\n", "a, b" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:17:33.789764Z", "start_time": "2018-11-08T19:17:33.775802Z" } }, "outputs": [], "source": [ "np.add(a, 1, out=a)\n", "a, b" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:18:01.748098Z", "start_time": "2018-11-08T19:18:01.744062Z" } }, "outputs": [], "source": [ "torch.cat((b, b, b), dim=0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:18:10.854776Z", "start_time": "2018-11-08T19:18:10.845513Z" } }, "outputs": [], "source": [ "torch.arange(10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:18:12.508411Z", "start_time": "2018-11-08T19:18:12.504809Z" } }, "outputs": [], "source": [ "torch.linspace(0,10,10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:18:13.838149Z", "start_time": "2018-11-08T19:18:13.834101Z" } }, "outputs": [], "source": [ "torch.eye(10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:18:42.444799Z", "start_time": "2018-11-08T19:18:42.440155Z" } }, "outputs": [], "source": [ "torch.full((5, 5), 1.23)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Autograd\n", "https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html\n", "\n", "Tensors that track history\n", "In autograd, if any input Tensor of an operation has requires_grad=True, the computation will be tracked. After computing the backward pass, a gradient w.r.t. this tensor is accumulated into .grad attribute.\n", "\n", "There’s one more class which is very important for autograd implementation - a Function. Tensor and Function are interconnected and build up an acyclic graph, that encodes a complete history of computation. Each variable has a .grad_fn attribute that references a function that has created a function (except for Tensors created by the user - these have None as .grad_fn)." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:18:58.612944Z", "start_time": "2018-11-08T19:18:58.608942Z" } }, "outputs": [], "source": [ "import torch" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:19:03.502355Z", "start_time": "2018-11-08T19:19:03.498592Z" } }, "outputs": [], "source": [ "x = torch.ones(2, 2, requires_grad=True)\n", "x" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:19:11.986074Z", "start_time": "2018-11-08T19:19:11.980796Z" } }, "outputs": [], "source": [ "x.requires_grad" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:19:17.131911Z", "start_time": "2018-11-08T19:19:17.120345Z" } }, "outputs": [], "source": [ "y = x + 2\n", "y" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:19:26.080242Z", "start_time": "2018-11-08T19:19:26.075996Z" } }, "outputs": [], "source": [ "y.requires_grad" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:19:28.792983Z", "start_time": "2018-11-08T19:19:28.787880Z" } }, "outputs": [], "source": [ "y.grad_fn" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:19:39.019665Z", "start_time": "2018-11-08T19:19:39.010012Z" } }, "outputs": [], "source": [ "z = y * y * 3\n", "out = z.mean()\n", "\n", "print(z, out)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:20:07.749412Z", "start_time": "2018-11-08T19:20:07.746381Z" } }, "outputs": [], "source": [ "z.grad_fn" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:20:09.403296Z", "start_time": "2018-11-08T19:20:09.394570Z" } }, "outputs": [], "source": [ "out.grad_fn" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:20:26.227099Z", "start_time": "2018-11-08T19:20:26.222854Z" } }, "outputs": [], "source": [ "a = torch.randn(2, 2)\n", "a = ((a * 3) / (a - 1))\n", "b = (a * a).sum()\n", "print(a.requires_grad)\n", "print(b.grad_fn)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:20:52.325951Z", "start_time": "2018-11-08T19:20:52.316820Z" } }, "outputs": [], "source": [ "a.requires_grad_(True)\n", "b = (a * a).sum()\n", "print(a.requires_grad)\n", "print(b.grad_fn)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If you want to compute the derivatives, you can call .backward() on a Tensor. If Tensor is a scalar (i.e. it holds a one element tensor), you don’t need to specify any arguments to backward(), however if it has more elements, you need to specify a grad_output argument that is a tensor of matching shape.\n", "\n", "You should have got a matrix of 4.5. Let’s call the out Tensor “o”. We have that o=14∑izi, zi=3(xi+2)2 and zi∣∣xi=1=27. Therefore, ∂o∂xi=32(xi+2), hence ∂o∂xi∣∣xi=1=92=4.5." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:21:03.912084Z", "start_time": "2018-11-08T19:21:03.903246Z" } }, "outputs": [], "source": [ "out" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:21:33.961393Z", "start_time": "2018-11-08T19:21:33.926435Z" } }, "outputs": [], "source": [ "out.backward()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:21:40.517962Z", "start_time": "2018-11-08T19:21:40.513876Z" } }, "outputs": [], "source": [ "print(x.grad)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:22:06.043826Z", "start_time": "2018-11-08T19:22:06.037610Z" } }, "outputs": [], "source": [ "print(x.requires_grad)\n", "print((x ** 2).requires_grad)\n", "\n", "with torch.no_grad():\n", " print((x ** 2).requires_grad)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:27:05.141490Z", "start_time": "2018-11-08T19:27:05.138292Z" } }, "outputs": [], "source": [ "x = torch.ones(2, 2, requires_grad=True)\n", "y = x + 2\n", "z = y * y * 3\n", "\n", "z2 = y.mean()\n", "out = z.mean()\n", "out.backward()\n", "#z2.backward()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:27:05.510248Z", "start_time": "2018-11-08T19:27:05.503004Z" } }, "outputs": [], "source": [ "x.grad" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:27:07.290288Z", "start_time": "2018-11-08T19:27:07.269507Z" } }, "outputs": [], "source": [ "out.backward()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Cuda" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:28:04.625359Z", "start_time": "2018-11-08T19:28:04.619958Z" } }, "outputs": [], "source": [ "torch.cuda.is_available()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:28:11.887487Z", "start_time": "2018-11-08T19:28:11.878708Z" } }, "outputs": [], "source": [ "torch.cuda.device_count()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:28:34.234749Z", "start_time": "2018-11-08T19:28:34.228784Z" } }, "outputs": [], "source": [ "device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n", "device" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:28:53.286292Z", "start_time": "2018-11-08T19:28:50.284017Z" } }, "outputs": [], "source": [ "y = torch.ones_like(x, device=device)\n", "y" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:29:04.201689Z", "start_time": "2018-11-08T19:29:04.193196Z" } }, "outputs": [], "source": [ "x" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:29:12.134171Z", "start_time": "2018-11-08T19:29:12.127193Z" } }, "outputs": [], "source": [ "x = x.to(device)\n", "x" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:29:25.232817Z", "start_time": "2018-11-08T19:29:25.221781Z" } }, "outputs": [], "source": [ "z = x + y\n", "z" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:29:32.714891Z", "start_time": "2018-11-08T19:29:32.691946Z" } }, "outputs": [], "source": [ "z.numpy()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:30:02.834222Z", "start_time": "2018-11-08T19:30:02.820065Z" } }, "outputs": [], "source": [ "z.detach().numpy()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:30:11.646540Z", "start_time": "2018-11-08T19:30:11.638013Z" } }, "outputs": [], "source": [ "z.cpu()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:30:19.453390Z", "start_time": "2018-11-08T19:30:19.443232Z" } }, "outputs": [], "source": [ "z = z.to('cpu', torch.double)\n", "z" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:30:21.007649Z", "start_time": "2018-11-08T19:30:21.001622Z" } }, "outputs": [], "source": [ "z.numpy()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:30:23.889591Z", "start_time": "2018-11-08T19:30:23.875612Z" } }, "outputs": [], "source": [ "z.detach().numpy()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Testing PyTorch" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:31:43.532805Z", "start_time": "2018-11-08T19:31:43.526536Z" } }, "outputs": [], "source": [ "# N is batch size; D_in is input dimension;\n", "# H is hidden dimension; D_out is output dimension.\n", "nsize = 2\n", "N, D_in, H, D_out = nsize*64, nsize*1000, nsize*100, nsize*1000\n", "epoch = 5000\n", "learning_rate = 1e-6" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Numpy" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:32:55.550127Z", "start_time": "2018-11-08T19:32:19.929673Z" } }, "outputs": [], "source": [ "import numpy as np\n", "\n", "# Create random input and output data\n", "x = np.random.randn(N, D_in)\n", "y = np.random.randn(N, D_out)\n", "\n", "# Randomly initialize weights\n", "w1 = np.random.randn(D_in, H)\n", "w2 = np.random.randn(H, D_out)\n", "\n", "\n", "for t in range(epoch):\n", " # Forward pass: compute predicted y\n", " h = x.dot(w1)\n", " h_relu = np.maximum(h, 0)\n", " y_pred = h_relu.dot(w2)\n", "\n", " # Compute and print loss\n", " loss = np.square(y_pred - y).sum()\n", " #print(t, loss)\n", "\n", " # Backprop to compute gradients of w1 and w2 with respect to loss\n", " grad_y_pred = 2.0 * (y_pred - y)\n", " grad_w2 = h_relu.T.dot(grad_y_pred)\n", " grad_h_relu = grad_y_pred.dot(w2.T)\n", " grad_h = grad_h_relu.copy()\n", " grad_h[h < 0] = 0\n", " grad_w1 = x.T.dot(grad_h)\n", "\n", " # Update weights\n", " w1 -= learning_rate * grad_w1\n", " w2 -= learning_rate * grad_w2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## PyTorch" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:34:11.111956Z", "start_time": "2018-11-08T19:34:11.107836Z" } }, "outputs": [], "source": [ "import torch\n", "\n", "cuda = True\n", "dtype = torch.float\n", "\n", "device = torch.device(\"cuda\") if torch.cuda.is_available() and cuda else torch.device(\"cpu\")\n", "device" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:34:19.860094Z", "start_time": "2018-11-08T19:34:14.026336Z" } }, "outputs": [], "source": [ "# Create random input and output data\n", "x = torch.randn(N, D_in, device=device, dtype=dtype)\n", "y = torch.randn(N, D_out, device=device, dtype=dtype)\n", "\n", "# Randomly initialize weights\n", "w1 = torch.randn(D_in, H, device=device, dtype=dtype)\n", "w2 = torch.randn(H, D_out, device=device, dtype=dtype)\n", "\n", "\n", "for t in range(epoch):\n", " # Forward pass: compute predicted y\n", " h = x.mm(w1)\n", " h_relu = h.clamp(min=0)\n", " y_pred = h_relu.mm(w2)\n", "\n", " # Compute and print loss\n", " loss = (y_pred - y).pow(2).sum().item()\n", " #print(t, loss)\n", "\n", " # Backprop to compute gradients of w1 and w2 with respect to loss\n", " grad_y_pred = 2.0 * (y_pred - y)\n", " grad_w2 = h_relu.t().mm(grad_y_pred)\n", " grad_h_relu = grad_y_pred.mm(w2.t())\n", " grad_h = grad_h_relu.clone()\n", " grad_h[h < 0] = 0\n", " grad_w1 = x.t().mm(grad_h)\n", "\n", " # Update weights using gradient descent\n", " w1 -= learning_rate * grad_w1\n", " w2 -= learning_rate * grad_w2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Autograd" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:35:08.912185Z", "start_time": "2018-11-08T19:35:01.672365Z" } }, "outputs": [], "source": [ "x = torch.randn(N, D_in, device=device, dtype=dtype)\n", "y = torch.randn(N, D_out, device=device, dtype=dtype)\n", "\n", "# Create random Tensors for weights.\n", "# Setting requires_grad=True indicates that we want to compute gradients with\n", "# respect to these Tensors during the backward pass.\n", "w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)\n", "w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)\n", "\n", "for t in range(epoch):\n", " # Forward pass: compute predicted y using operations on Tensors; these\n", " # are exactly the same operations we used to compute the forward pass using\n", " # Tensors, but we do not need to keep references to intermediate values since\n", " # we are not implementing the backward pass by hand.\n", " y_pred = x.mm(w1).clamp(min=0).mm(w2)\n", "\n", " # Compute and print loss using operations on Tensors.\n", " # Now loss is a Tensor of shape (1,)\n", " # loss.item() gets the a scalar value held in the loss.\n", " loss = (y_pred - y).pow(2).sum()\n", " #print(t, loss.item())\n", "\n", " # Use autograd to compute the backward pass. This call will compute the\n", " # gradient of loss with respect to all Tensors with requires_grad=True.\n", " # After this call w1.grad and w2.grad will be Tensors holding the gradient\n", " # of the loss with respect to w1 and w2 respectively.\n", " loss.backward()\n", "\n", " # Manually update weights using gradient descent. Wrap in torch.no_grad()\n", " # because weights have requires_grad=True, but we don't need to track this\n", " # in autograd.\n", " # An alternative way is to operate on weight.data and weight.grad.data.\n", " # Recall that tensor.data gives a tensor that shares the storage with\n", " # tensor, but doesn't track history.\n", " # You can also use torch.optim.SGD to achieve this.\n", " with torch.no_grad():\n", " w1 -= learning_rate * w1.grad\n", " w2 -= learning_rate * w2.grad\n", "\n", " # Manually zero the gradients after updating weights\n", " w1.grad.zero_()\n", " w2.grad.zero_()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Custom Function" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:35:51.313671Z", "start_time": "2018-11-08T19:35:43.748896Z" } }, "outputs": [], "source": [ "class MyReLU(torch.autograd.Function):\n", " \"\"\"\n", " We can implement our own custom autograd Functions by subclassing\n", " torch.autograd.Function and implementing the forward and backward passes\n", " which operate on Tensors.\n", " \"\"\"\n", "\n", " @staticmethod\n", " def forward(ctx, input):\n", " \"\"\"\n", " In the forward pass we receive a Tensor containing the input and return\n", " a Tensor containing the output. ctx is a context object that can be used\n", " to stash information for backward computation. You can cache arbitrary\n", " objects for use in the backward pass using the ctx.save_for_backward method.\n", " \"\"\"\n", " ctx.save_for_backward(input)\n", " return input.clamp(min=0)\n", "\n", " @staticmethod\n", " def backward(ctx, grad_output):\n", " \"\"\"\n", " In the backward pass we receive a Tensor containing the gradient of the loss\n", " with respect to the output, and we need to compute the gradient of the loss\n", " with respect to the input.\n", " \"\"\"\n", " input, = ctx.saved_tensors\n", " grad_input = grad_output.clone()\n", " grad_input[input < 0] = 0\n", " return grad_input\n", "\n", "# Create random Tensors to hold input and outputs.\n", "x = torch.randn(N, D_in, device=device, dtype=dtype)\n", "y = torch.randn(N, D_out, device=device, dtype=dtype)\n", "\n", "# Create random Tensors for weights.\n", "w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)\n", "w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)\n", "\n", "for t in range(epoch):\n", " # To apply our Function, we use Function.apply method. We alias this as 'relu'.\n", " relu = MyReLU.apply\n", "\n", " # Forward pass: compute predicted y using operations; we compute\n", " # ReLU using our custom autograd operation.\n", " y_pred = relu(x.mm(w1)).mm(w2)\n", "\n", " # Compute and print loss\n", " loss = (y_pred - y).pow(2).sum()\n", " #print(t, loss.item())\n", "\n", " # Use autograd to compute the backward pass.\n", " loss.backward()\n", "\n", " # Update weights using gradient descent\n", " with torch.no_grad():\n", " w1 -= learning_rate * w1.grad\n", " w2 -= learning_rate * w2.grad\n", "\n", " # Manually zero the gradients after updating weights\n", " w1.grad.zero_()\n", " w2.grad.zero_()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Module\n", "https://pytorch.org/tutorials/beginner/blitz/neural_networks_tutorial.html" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:37:14.505938Z", "start_time": "2018-11-08T19:37:14.479720Z" } }, "outputs": [], "source": [ "import torch\n", "import torch.nn as nn\n", "import torch.nn.functional as F\n", "\n", "class Net(nn.Module):\n", "\n", " def __init__(self):\n", " super(Net, self).__init__()\n", " # 1 input image channel, 6 output channels, 5x5 square convolution\n", " # kernel\n", " self.conv1 = nn.Conv2d(1, 6, 5)\n", " self.conv2 = nn.Conv2d(6, 16, 5)\n", " \n", " # an affine operation: y = Wx + b\n", " self.fc1 = nn.Linear(16 * 5 * 5, 120)\n", " self.fc2 = nn.Linear(120, 84)\n", " self.fc3 = nn.Linear(84, 10)\n", "\n", " def forward(self, x):\n", " # Max pooling over a (2, 2) window\n", " x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))\n", " # If the size is a square you can only specify a single number\n", " x = F.max_pool2d(F.relu(self.conv2(x)), 2)\n", " x = x.view(-1, self.num_flat_features(x))\n", " x = F.relu(self.fc1(x))\n", " x = F.relu(self.fc2(x))\n", " x = self.fc3(x)\n", " return x\n", "\n", " def num_flat_features(self, x):\n", " size = x.size()[1:] # all dimensions except the batch dimension\n", " num_features = 1\n", " for s in size:\n", " num_features *= s\n", " return num_features\n", "\n", "\n", "net = Net()\n", "print(net)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:37:23.215991Z", "start_time": "2018-11-08T19:37:23.208379Z" } }, "outputs": [], "source": [ "params = list(net.parameters())\n", "print(len(params))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:37:26.238839Z", "start_time": "2018-11-08T19:37:26.232904Z" } }, "outputs": [], "source": [ "[k.size() for k in params]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:37:28.013063Z", "start_time": "2018-11-08T19:37:27.998916Z" } }, "outputs": [], "source": [ "params" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:37:31.658685Z", "start_time": "2018-11-08T19:37:31.652784Z" } }, "outputs": [], "source": [ "net.fc1.bias" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:37:45.745320Z", "start_time": "2018-11-08T19:37:45.733309Z" } }, "outputs": [], "source": [ "input_ = torch.randn(1, 1, 32, 32)\n", "out = net(input_)\n", "print(out)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:37:59.728513Z", "start_time": "2018-11-08T19:37:59.715204Z" }, "scrolled": true }, "outputs": [], "source": [ "net.zero_grad()\n", "out.backward(torch.randn(1, 10))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:38:00.721466Z", "start_time": "2018-11-08T19:38:00.708674Z" }, "scrolled": true }, "outputs": [], "source": [ "net.fc1.bias.grad" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:38:09.609092Z", "start_time": "2018-11-08T19:38:09.595416Z" } }, "outputs": [], "source": [ "output = net(input_)\n", "output" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:38:16.651950Z", "start_time": "2018-11-08T19:38:16.642057Z" } }, "outputs": [], "source": [ "target = torch.arange(1, 11, dtype=torch.float).view(1, -1) # a dummy target, make it the same shape as output\n", "target" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:38:30.940428Z", "start_time": "2018-11-08T19:38:30.931916Z" }, "scrolled": true }, "outputs": [], "source": [ "criterion = nn.MSELoss()\n", "\n", "loss = criterion(output, target)\n", "print(loss)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:38:40.908859Z", "start_time": "2018-11-08T19:38:40.905717Z" } }, "outputs": [], "source": [ "print(loss.grad_fn) # MSELoss\n", "print(loss.grad_fn.next_functions[0][0]) # Linear\n", "print(loss.grad_fn.next_functions[0][0].next_functions[0][0]) # ReLU" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:39:02.298348Z", "start_time": "2018-11-08T19:39:02.291840Z" } }, "outputs": [], "source": [ "net.zero_grad() # zeroes the gradient buffers of all parameters\n", "\n", "print('conv1.bias.grad before backward')\n", "print(net.conv1.bias.grad)\n", "\n", "loss.backward()\n", "\n", "print('conv1.bias.grad after backward')\n", "print(net.conv1.bias.grad)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:39:17.151800Z", "start_time": "2018-11-08T19:39:17.145200Z" } }, "outputs": [], "source": [ "learning_rate = 0.01\n", "for f in net.parameters():\n", " f.data.sub_(f.grad.data * learning_rate)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:39:56.655473Z", "start_time": "2018-11-08T19:39:56.639013Z" } }, "outputs": [], "source": [ "import torch.optim as optim\n", "\n", "# create your optimizer\n", "optimizer = optim.SGD(net.parameters(), lr=0.01)\n", "\n", "# in your training loop:\n", "optimizer.zero_grad() # zero the gradient buffers\n", "output = net(input_)\n", "loss = criterion(output, target)\n", "print(loss)\n", "loss.backward()\n", "optimizer.step() # Does the update" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:39:58.349075Z", "start_time": "2018-11-08T19:39:58.345615Z" } }, "outputs": [], "source": [ "target" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:40:00.958371Z", "start_time": "2018-11-08T19:40:00.953750Z" } }, "outputs": [], "source": [ "net(input_)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:40:10.772388Z", "start_time": "2018-11-08T19:40:10.767959Z" } }, "outputs": [], "source": [ "net(input_).detach().numpy()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:40:14.034801Z", "start_time": "2018-11-08T19:40:13.725147Z" } }, "outputs": [], "source": [ "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "\n", "plt.plot(target.numpy().T, label='target')\n", "plt.plot(net(input_).detach().numpy().T, label='prediction')\n", "plt.legend()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:40:34.776796Z", "start_time": "2018-11-08T19:40:34.612839Z" }, "scrolled": true }, "outputs": [], "source": [ "for i in torch.arange(100, dtype=torch.long):\n", " optimizer.zero_grad() # zero the gradient buffers\n", " output = net(input_)\n", " loss = criterion(output, target)\n", " print(i.item(),'->', loss.item())\n", " loss.backward()\n", " optimizer.step() # Does the update" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:40:35.595610Z", "start_time": "2018-11-08T19:40:35.482640Z" } }, "outputs": [], "source": [ "plt.plot(target.numpy().T, label='target')\n", "plt.plot(net(input_).detach().numpy().T, label='prediction')\n", "plt.legend()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Part of the model on CPU and part on the GPU\n", "https://pytorch.org/tutorials/beginner/former_torchies/parallelism_tutorial.html" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:41:16.853545Z", "start_time": "2018-11-08T19:41:16.849288Z" } }, "outputs": [], "source": [ "device = torch.device(\"cuda:0\")\n", "\n", "class DistributedModel(nn.Module):\n", "\n", " def __init__(self):\n", " super().__init__(\n", " embedding=nn.Embedding(1000, 10),\n", " rnn=nn.Linear(10, 10).to(device),\n", " )\n", "\n", " def forward(self, x):\n", " # Compute embedding on CPU\n", " x = self.embedding(x)\n", "\n", " # Transfer to GPU\n", " x = x.to(device)\n", "\n", " # Compute RNN on GPU\n", " x = self.rnn(x)\n", " return x" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Training a Classifier" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:41:19.248195Z", "start_time": "2018-11-08T19:41:19.030064Z" } }, "outputs": [], "source": [ "import torch\n", "import torchvision\n", "import torchvision.transforms as transforms" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:41:21.644208Z", "start_time": "2018-11-08T19:41:19.876155Z" } }, "outputs": [], "source": [ "transform = transforms.Compose(\n", " [transforms.ToTensor(),\n", " transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])\n", "\n", "trainset = torchvision.datasets.CIFAR10(root='./data', train=True,\n", " download=True, transform=transform)\n", "trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,\n", " shuffle=True, num_workers=2)\n", "\n", "testset = torchvision.datasets.CIFAR10(root='./data', train=False,\n", " download=True, transform=transform)\n", "testloader = torch.utils.data.DataLoader(testset, batch_size=4,\n", " shuffle=False, num_workers=2)\n", "\n", "classes = ('plane', 'car', 'bird', 'cat',\n", " 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:41:21.802364Z", "start_time": "2018-11-08T19:41:21.645924Z" } }, "outputs": [], "source": [ "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "\n", "# functions to show an image\n", "\n", "\n", "def imshow(img):\n", " img = img / 2 + 0.5 # unnormalize\n", " npimg = img.numpy()\n", " plt.imshow(np.transpose(npimg, (1, 2, 0)))\n", "\n", "\n", "# get some random training images\n", "dataiter = iter(trainloader)\n", "images, labels = dataiter.next()\n", "\n", "# show images\n", "imshow(torchvision.utils.make_grid(images))\n", "# print labels\n", "print(' '.join('%5s' % classes[labels[j]] for j in range(4)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:41:24.001387Z", "start_time": "2018-11-08T19:41:23.987911Z" } }, "outputs": [], "source": [ "import torch.nn as nn\n", "import torch.nn.functional as F\n", "\n", "\n", "class Net(nn.Module):\n", " def __init__(self):\n", " super(Net, self).__init__()\n", " self.conv1 = nn.Conv2d(3, 6, 5)\n", " self.pool = nn.MaxPool2d(2, 2)\n", " self.conv2 = nn.Conv2d(6, 16, 5)\n", " self.fc1 = nn.Linear(16 * 5 * 5, 120)\n", " self.fc2 = nn.Linear(120, 84)\n", " self.fc3 = nn.Linear(84, 10)\n", "\n", " def forward(self, x):\n", " x = self.pool(F.relu(self.conv1(x)))\n", " x = self.pool(F.relu(self.conv2(x)))\n", " x = x.view(-1, 16 * 5 * 5)\n", " x = F.relu(self.fc1(x))\n", " x = F.relu(self.fc2(x))\n", " x = self.fc3(x)\n", " return x\n", "\n", "\n", "net = Net()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:41:25.228641Z", "start_time": "2018-11-08T19:41:25.225741Z" } }, "outputs": [], "source": [ "import torch.optim as optim\n", "\n", "criterion = nn.CrossEntropyLoss()\n", "optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:42:11.139725Z", "start_time": "2018-11-08T19:41:25.879882Z" } }, "outputs": [], "source": [ "for epoch in range(1): # loop over the dataset multiple times\n", "\n", " running_loss = 0.0\n", " for i, data in enumerate(trainloader, 0):\n", " # get the inputs\n", " inputs, labels = data\n", "\n", " # zero the parameter gradients\n", " optimizer.zero_grad()\n", "\n", " # forward + backward + optimize\n", " outputs = net(inputs)\n", " loss = criterion(outputs, labels)\n", " loss.backward()\n", " optimizer.step()\n", "\n", " # print statistics\n", " running_loss += loss.item()\n", " if i % 2000 == 1999: # print every 2000 mini-batches\n", " print('[%d, %5d] loss: %.3f' %\n", " (epoch + 1, i + 1, running_loss / 2000))\n", " running_loss = 0.0\n", "\n", "print('Finished Training')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:42:13.900880Z", "start_time": "2018-11-08T19:42:13.815418Z" } }, "outputs": [], "source": [ "dataiter = iter(testloader)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:42:29.108205Z", "start_time": "2018-11-08T19:42:28.990904Z" } }, "outputs": [], "source": [ "images, labels = dataiter.next()\n", "\n", "# print images\n", "imshow(torchvision.utils.make_grid(images))\n", "print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))\n", "\n", "outputs = net(images)\n", "_, predicted = torch.max(outputs, 1)\n", "print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] for j in range(4)))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:42:33.249002Z", "start_time": "2018-11-08T19:42:29.690500Z" }, "scrolled": true }, "outputs": [], "source": [ "correct = 0\n", "total = 0\n", "with torch.no_grad():\n", " for data in testloader:\n", " images, labels = data\n", " outputs = net(images)\n", " _, predicted = torch.max(outputs.data, 1)\n", " total += labels.size(0)\n", " correct += (predicted == labels).sum().item()\n", "\n", "print('Accuracy of the network on the 10000 test images: %d %%' % (\n", " 100 * correct / total))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:42:36.764268Z", "start_time": "2018-11-08T19:42:33.251248Z" } }, "outputs": [], "source": [ "class_correct = list(0. for i in range(10))\n", "class_total = list(0. for i in range(10))\n", "with torch.no_grad():\n", " for data in testloader:\n", " images, labels = data\n", " outputs = net(images)\n", " _, predicted = torch.max(outputs, 1)\n", " c = (predicted == labels).squeeze()\n", " for i in range(4):\n", " label = labels[i]\n", " class_correct[label] += c[i].item()\n", " class_total[label] += 1\n", "\n", "\n", "for i in range(10):\n", " print('Accuracy of %5s : %2d %%' % (\n", " classes[i], 100 * class_correct[i] / class_total[i]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Training a Module with Autograd on Cuda thanks to PyTorch" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:43:05.223299Z", "start_time": "2018-11-08T19:43:05.016587Z" } }, "outputs": [], "source": [ "import torch\n", "from torch import nn\n", "import numpy as np\n", "from matplotlib import pyplot as plt\n", "from scipy.special import gamma\n", "from scipy.linalg import toeplitz\n", "from tqdm import tqdm\n", "import pandas as pd\n", "from IPython import display\n", "\n", "cuda = True\n", "device = torch.device(\"cuda\") if torch.cuda.is_available() and cuda else torch.device(\"cpu\")\n", "device" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:43:06.270897Z", "start_time": "2018-11-08T19:43:06.268370Z" } }, "outputs": [], "source": [ "plt.rcParams['figure.figsize'] = (20,8)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:43:06.984685Z", "start_time": "2018-11-08T19:43:06.975399Z" } }, "outputs": [], "source": [ "zero = torch.tensor([0], device=device)\n", "one = torch.tensor([1.0], device=device)\n", "two = torch.tensor([2.0], device=device)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Define Model" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:43:10.610410Z", "start_time": "2018-11-08T19:43:10.599871Z" } }, "outputs": [], "source": [ "class Linear(nn.Module):\n", " def __init__(self, ninputs=1, nparams=1):\n", " super(Linear, self).__init__()\n", " self.ninputs = ninputs\n", " self.c = nn.Parameter(torch.Tensor(nparams, ninputs, 1, device=device))\n", " \n", " #inicializar\n", " self.c.data.uniform_(0, 10)\n", "\n", " def forward(self, x):\n", " return torch.mul(self.c, x)\n", "\n", "class AR1Weight(nn.Module):\n", " def __init__(self, ninputs=1, nparams=1):\n", " super(AR1Weight, self).__init__()\n", " self.decay = nn.Parameter(torch.Tensor(nparams , ninputs, 1, 1, device=device)) \n", " # shape = (nparams, ninputs, in_chanels/groups, time)\n", " #inicializar\n", " self.decay.data.uniform_(0, 1)\n", " \n", " def forward(self, x):\n", " #retorna weights para usar conv1d \n", " #x.shape = (nparams, ninputs, time)\n", " time = torch.arange(x.shape[2]-1, -1, -1, dtype=torch.float, device=device) # tiempo invertido\n", " return torch.mul(one-self.decay, torch.pow(self.decay, time))#.view(-1, 1, x.shape[2])\n", "\n", "class ConvTime(nn.Module):\n", " def __init__(self, weight):\n", " super(ConvTime, self).__init__()\n", " self.weight = weight\n", " \n", " \n", " def forward(self, x):\n", " #x.shape = (nparams, ninputs, time)\n", " nparams = x.shape[0]\n", " ninputs = x.shape[1]\n", " time = x.shape[2]\n", " x_conv = torch.zeros(1, nparams, ninputs, 2*time-1, device=device)\n", " x_conv[0, :, :, (time-1):] = x\n", " \n", " return torch.nn.functional.conv1d(x_conv.view(1, -1, x_conv.shape[3]), self.weight(x).view(-1, 1, x.shape[2]),\n", " padding=0, groups=nparams*ninputs)[0,:,:].view(x.shape).to(device)\n", " \n", " \n", "class Model(nn.Module):\n", " def __init__(self, weight, linear):\n", " super(Model, self).__init__()\n", " self.weight = weight\n", " self.linear = linear\n", " self.conv = ConvTime(weight)\n", "\n", " def forward(self, x):\n", " return self.conv(self.linear(x)).sum(dim=1).t()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Generate Data from Target Model" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:43:12.623068Z", "start_time": "2018-11-08T19:43:12.617201Z" } }, "outputs": [], "source": [ "level_noise = 2.0\n", "ninputs = 50 #2\n", "time = 1000 #" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:43:14.697855Z", "start_time": "2018-11-08T19:43:14.444746Z" } }, "outputs": [], "source": [ "x = torch.rand(1, ninputs, time, device=device) # inputs \n", "\n", "model_real = Model(AR1Weight(ninputs).to(device), \n", " Linear(ninputs).to(device)).to(device)\n", "\n", "y_real_array = model_real(x).detach().cpu().numpy().copy()\n", "y_real_array_noise = np.float32(y_real_array+np.random.normal(scale=level_noise, size=y_real_array.shape))\n", "y_real = torch.tensor(y_real_array_noise).to(device)\n", "\n", "\n", "plt.plot(y_real_array, label='real')\n", "plt.plot(y_real_array_noise, label='noisy')\n", "plt.legend()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Model to Train" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:44:36.631163Z", "start_time": "2018-11-08T19:44:36.384401Z" }, "scrolled": false }, "outputs": [], "source": [ "model_train = Model(AR1Weight(ninputs).to(device),\n", " Linear(ninputs).to(device)).to(device)\n", "\n", "plt.plot(y_real_array, label='real')\n", "plt.plot(model_train(x).detach().cpu().numpy(), label='pred_init')\n", "plt.legend()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:44:36.976846Z", "start_time": "2018-11-08T19:44:36.960370Z" } }, "outputs": [], "source": [ "niter = 1000\n", "optimizer = torch.optim.Adam(model_train.parameters(), lr=1e-2)\n", "loss_iter = torch.empty(niter, device=device)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:44:49.174489Z", "start_time": "2018-11-08T19:44:37.745342Z" }, "scrolled": true }, "outputs": [], "source": [ "ntimes = torch.tensor(time, dtype=torch.float, device=device)\n", "\n", "progress = tqdm(range(niter))\n", "for t in progress:\n", " \n", " y_pred = model_train(x)\n", " \n", " loss_array = torch.div(torch.pow(y_pred - y_real, two), ntimes)\n", " loss_total = torch.sum(loss_array)\n", " optimizer.zero_grad()\n", " loss_total.backward()\n", " optimizer.step()\n", " \n", " loss_iter.put_(torch.tensor(t, device=device), loss_total)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Loss function" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:44:49.319696Z", "start_time": "2018-11-08T19:44:49.176088Z" } }, "outputs": [], "source": [ "plt.plot(loss_iter.detach().cpu().numpy(), label='loss')\n", "plt.legend()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Estimation" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2018-11-08T19:44:49.568556Z", "start_time": "2018-11-08T19:44:49.321440Z" } }, "outputs": [], "source": [ "plt.plot(model_real(x).detach().cpu().numpy(), label='real')\n", "plt.plot(model_train(x).detach().cpu().numpy(), label='pred_init')\n", "plt.legend()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": { "height": "742px", "left": "0px", "right": "1425.45px", "top": "111px", "width": "320px" }, "toc_section_display": true, "toc_window_display": true } }, "nbformat": 4, "nbformat_minor": 2 }