{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# SVMの実装" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 数値例のデータセットの読み込み" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('sample_perceptron.csv')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tx0x1x2
011-1.235948-2.599843
111-2.021262-0.759107
211-1.132442-3.977278
\n", "
" ], "text/plain": [ " t x0 x1 x2\n", "0 1 1 -1.235948 -2.599843\n", "1 1 1 -2.021262 -0.759107\n", "2 1 1 -1.132442 -3.977278" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(3)" ] }, { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [], "source": [ "t = df[['t']].values\n", "X = df.iloc[:, 1:].values" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "プロット用に各カテゴリのデータも抽出しておきましょう。" ] }, { "cell_type": "code", "execution_count": 95, "metadata": {}, "outputs": [], "source": [ "x_1 = df[df['t'] == 1].iloc[:, 2:].values\n", "x_2 = df[df['t'] == -1].iloc[:, 2:].values" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## データの可視化\n", "\n", "パラメータ$w$の調整に入る前に、現状どの程度の識別の結果が得られているのか確認できるような可視化の関数を作成しておきましょう。\n", "\n", "パーセプトロンでは識別の境界線を入力が二次元の場合、以下のように定式化しています。\n", "\n", "$w_{1}x_{1} + w_{2}x_{2} + w{0} = 0$\n", "\n", "縦軸$x2$に関して整理すると以下のようになります。\n", "\n", "$x_{2} = - \\dfrac{w_{1}x_{1} + w_{0}}{w_{2}}$" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 0]\n", " [-1]\n", " [ 1]]\n" ] } ], "source": [ "w = np.array([[0], [-1], [1]])\n", "print(w)" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [], "source": [ "def plot_result(w, x_1, x_2):\n", "\n", " x1 = np.linspace(-4, 4)\n", " x2 = - (w[1] * x1 + w[0]) / w[2]\n", "\n", " plt.plot(x1, x2, label='wx+b=0')\n", " plt.scatter(x_1[:, 0], x_1[:, 1], label='x1')\n", " plt.scatter(x_2[:, 0], x_2[:, 1], label='x2')\n", " plt.legend()\n", " plt.xlim([-5, 5])\n", " plt.ylim([-5, 5])" ] }, { "cell_type": "code", "execution_count": 98, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plot_result(w, x_1, x_2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 損失関数\n", "\n", "$\\mathcal{L} = \\dfrac{1}{2} \\alpha^{T}H \\alpha - 1^{T}\\alpha,\\ H = (tt^{T}) \\circ (XX^{T})$\n", "\n", "s.t. $t^{T} \\alpha = 0,\\ -{\\rm diag}(1)^{T} \\alpha \\leq 0$\n", "\n", "$\\displaystyle w = \\sum_{n=1}^{N} \\alpha_{n} t_{n} x_{n}$" ] }, { "cell_type": "code", "execution_count": 143, "metadata": {}, "outputs": [], "source": [ "N, M = X.shape" ] }, { "cell_type": "code", "execution_count": 144, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "200" ] }, "execution_count": 144, "metadata": {}, "output_type": "execute_result" } ], "source": [ "N" ] }, { "cell_type": "code", "execution_count": 101, "metadata": {}, "outputs": [], "source": [ "T = np.dot(t, t.T)" ] }, { "cell_type": "code", "execution_count": 102, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(200, 200)" ] }, "execution_count": 102, "metadata": {}, "output_type": "execute_result" } ], "source": [ "T.shape" ] }, { "cell_type": "code", "execution_count": 103, "metadata": {}, "outputs": [], "source": [ "XX = np.dot(X, X.T)" ] }, { "cell_type": "code", "execution_count": 104, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(200, 200)" ] }, "execution_count": 104, "metadata": {}, "output_type": "execute_result" } ], "source": [ "XX.shape" ] }, { "cell_type": "code", "execution_count": 105, "metadata": {}, "outputs": [], "source": [ "H = T * XX" ] }, { "cell_type": "code", "execution_count": 106, "metadata": {}, "outputs": [], "source": [ "from cvxopt import matrix, solvers" ] }, { "cell_type": "code", "execution_count": 118, "metadata": {}, "outputs": [], "source": [ "q = matrix(-np.ones(N))\n", "P = matrix(H)\n", "G = matrix(np.diag(-np.ones(N)))\n", "h = matrix(np.zeros(N))\n", "A = matrix(t.T, tc='d')\n", "b = matrix(0.0)" ] }, { "cell_type": "code", "execution_count": 119, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " pcost dcost gap pres dres\n", " 0: -1.0309e+01 -1.6549e+01 5e+02 2e+01 2e+00\n", " 1: -6.7867e+00 -1.2387e+00 4e+01 2e+00 2e-01\n", " 2: -1.0048e-01 -2.2208e-01 5e-01 2e-02 1e-03\n", " 3: -8.5655e-02 -1.4525e-01 8e-02 1e-03 8e-05\n", " 4: -1.2179e-01 -1.3758e-01 2e-02 2e-04 1e-05\n", " 5: -1.3355e-01 -1.3730e-01 4e-03 1e-05 9e-07\n", " 6: -1.3705e-01 -1.3711e-01 6e-05 2e-07 1e-08\n", " 7: -1.3711e-01 -1.3711e-01 6e-07 2e-09 1e-10\n", " 8: -1.3711e-01 -1.3711e-01 6e-09 2e-11 1e-12\n", "Optimal solution found.\n" ] } ], "source": [ "sol = solvers.qp(P, q, G, h, A, b)" ] }, { "cell_type": "code", "execution_count": 120, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'dual infeasibility': 1.2538396639065762e-12,\n", " 'dual objective': -0.13711169851634578,\n", " 'dual slack': 7.055007988875721e-10,\n", " 'gap': 6.456624835283598e-09,\n", " 'iterations': 8,\n", " 'primal infeasibility': 1.6018880680528198e-11,\n", " 'primal objective': -0.13711169231335898,\n", " 'primal slack': 1.6159189474970968e-11,\n", " 'relative gap': 4.7090257047717296e-08,\n", " 's': <200x1 matrix, tc='d'>,\n", " 'status': 'optimal',\n", " 'x': <200x1 matrix, tc='d'>,\n", " 'y': <1x1 matrix, tc='d'>,\n", " 'z': <200x1 matrix, tc='d'>}" ] }, "execution_count": 120, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sol" ] }, { "cell_type": "code", "execution_count": 128, "metadata": {}, "outputs": [], "source": [ "alpha = np.array(sol['x'])" ] }, { "cell_type": "code", "execution_count": 137, "metadata": {}, "outputs": [], "source": [ "w = np.zeros((1, 3))\n", "for n in range(N):\n", " w += t[n] * alpha[n] * X[n]" ] }, { "cell_type": "code", "execution_count": 138, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 5.82438262e-17, -3.09349177e-01, -4.22523944e-01]])" ] }, "execution_count": 138, "metadata": {}, "output_type": "execute_result" } ], "source": [ "w" ] }, { "cell_type": "code", "execution_count": 142, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plot_result(w.T, x_1, x_2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }