From 969bf95dc9d2c455b37481be4f7a69bc0186a66f Mon Sep 17 00:00:00 2001 From: Jake Deane <33144206+Gaelic98@users.noreply.github.com> Date: Fri, 2 Nov 2018 11:39:54 +0000 Subject: [PATCH] Add files via upload --- HSIC.ipynb | 467 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 467 insertions(+) create mode 100644 HSIC.ipynb diff --git a/HSIC.ipynb b/HSIC.ipynb new file mode 100644 index 0000000..f4c5a10 --- /dev/null +++ b/HSIC.ipynb @@ -0,0 +1,467 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from scipy.spatial.distance import pdist, squareform\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Need to write a function that can return the Gaussian kernel between 2 elements of R^100.\n", + "# Inputs x and y will be numpy arrays of length 100\n", + "# Returns a scalar value\n", + "def k(x,y):\n", + " return np.exp(-0.5*np.linalg.norm(x-y)**2)\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Need to be able to compute the Gram Matrix for a sample of data\n", + "# Will take in put X which is a numpy array of size (sample_size,dim)\n", + "# Returns numpy array of size (sample_size,sample_size)\n", + "def Gram_Matrix(X):\n", + " pairwise_dist=squareform(pdist(X,'euclidean'))\n", + " K=np.exp(-0.5*pairwise_dist**2)\n", + " return K" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[-1.41279418 -0.52589874 -1.583123 ... 0.24173812 0.4352769\n", + " 0.70692733]\n", + " [-0.83098878 1.13872554 -0.96428119 ... -1.33266848 1.1538841\n", + " 1.30798828]\n", + " [ 0.74297168 -0.81759274 -0.23655293 ... 1.34163608 -0.62960165\n", + " -0.40330203]\n", + " ...\n", + " [ 1.06606301 -1.11337941 -0.29592296 ... 0.27884735 0.76874418\n", + " 0.5264306 ]\n", + " [-0.0262061 0.42229918 -1.00404629 ... 1.22650462 -0.65689827\n", + " 2.15479871]\n", + " [ 1.31565297 0.63727165 -0.8672615 ... 1.77083087 -0.30044104\n", + " -1.06834956]]\n", + "[[ 0.71232391 0.07302543 1.01470426 ... -1.321724 -0.14480385\n", + " 0.75515764]\n", + " [-0.32794871 1.33303479 -0.08234805 ... 0.28894144 0.88114263\n", + " -0.02558813]\n", + " [ 0.72139582 1.84682559 1.15866913 ... -0.63617864 -0.38214179\n", + " 0.38456706]\n", + " ...\n", + " [ 0.26177751 0.48963486 -0.7059901 ... 0.31584715 0.57794315\n", + " -0.16472861]\n", + " [-0.82206113 -1.26812911 0.78596446 ... 0.34066969 -0.38047018\n", + " 1.09385481]\n", + " [ 1.00312644 0.99767688 -1.68059336 ... 0.72849534 0.1308687\n", + " -0.81457706]]\n", + "[[-2.82558836 -1.05179749 -3.166246 ... 0.48347625 0.8705538\n", + " 1.41385465]\n", + " [-1.66197756 2.27745108 -1.92856239 ... -2.66533697 2.30776821\n", + " 2.61597656]\n", + " [ 1.48594336 -1.63518548 -0.47310587 ... 2.68327217 -1.25920329\n", + " -0.80660406]\n", + " ...\n", + " [ 2.13212602 -2.22675882 -0.59184592 ... 0.5576947 1.53748837\n", + " 1.05286121]\n", + " [-0.05241221 0.84459835 -2.00809258 ... 2.45300925 -1.31379653\n", + " 4.30959742]\n", + " [ 2.63130594 1.27454329 -1.73452299 ... 3.54166174 -0.60088208\n", + " -2.13669913]]\n" + ] + } + ], + "source": [ + "X=np.random.normal(size=(10000,10))\n", + "Y=np.random.normal(size=(10000,10))\n", + "Y_2=2*X\n", + "print(X)\n", + "print(Y)\n", + "print(Y_2)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1.00000000e+00, 2.18616976e-08, 2.26084790e-05, ...,\n", + " 1.18529449e-06, 1.25871376e-07, 3.03971813e-04],\n", + " [2.18616976e-08, 1.00000000e+00, 4.16767891e-05, ...,\n", + " 3.46240985e-06, 9.97308286e-06, 8.86219551e-03],\n", + " [2.26084790e-05, 4.16767891e-05, 1.00000000e+00, ...,\n", + " 1.65772436e-03, 2.24642850e-04, 8.54728109e-04],\n", + " ...,\n", + " [1.18529449e-06, 3.46240985e-06, 1.65772436e-03, ...,\n", + " 1.00000000e+00, 6.29495023e-04, 5.22624459e-04],\n", + " [1.25871376e-07, 9.97308286e-06, 2.24642850e-04, ...,\n", + " 6.29495023e-04, 1.00000000e+00, 6.27820486e-03],\n", + " [3.03971813e-04, 8.86219551e-03, 8.54728109e-04, ...,\n", + " 5.22624459e-04, 6.27820486e-03, 1.00000000e+00]])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Gram_Matrix(X)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.0" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "k(X,X)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "#K_X=Gram_Matrix(X)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "#K_X" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Want a function to return the Hilbert-Schmidt Independence Criterion for our matched data (X,Y).\n", + "# X and Y are both numpy arrays of shape (sample_size,100)\n", + "def HSIC(X,Y):\n", + " # Firt calculate the gram matices for the X and Y data\n", + " K_x= Gram_Matrix(X)\n", + " K_y=Gram_Matrix(Y)\n", + " \n", + " m=X.shape[0]\n", + " H = np.identity(m)-1/m\n", + " \n", + " K_yH=np.matmul(K_y,H)\n", + " HK_yH=np.matmul(H,K_yH)\n", + " K_xHK_yH=np.matmul(K_x,HK_yH)\n", + " \n", + " hsic=(m-1)**(-2)*np.trace(K_xHK_yH)\n", + " \n", + " return hsic" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "9.920817386338418e-05\n" + ] + } + ], + "source": [ + "print(HSIC(X,Y))" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.00010562357598426946\n" + ] + } + ], + "source": [ + "print(HSIC(X,Y_2))" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.0003768098053864927\n" + ] + } + ], + "source": [ + "print(HSIC(X,X))" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.010101010101010093\n", + "0.010101010101010093\n", + "0.010101010101010093\n", + "0.010101010101010093\n", + "0.010101010101010093\n" + ] + } + ], + "source": [ + "X=np.random.normal(size=(100,100))\n", + "Y=np.random.normal(size=(100,100))\n", + "Z = 3*np.random.exponential(3, size=(100,100))+20\n", + "Z1 = np.random.gamma(3, 11, size=(100,100))\n", + "Y_2=2*X\n", + "print(HSIC(X,Y))\n", + "print(HSIC(X,Y_2))\n", + "print(HSIC(X,X))\n", + "print(HSIC(X,Z))\n", + "print(HSIC(X,Z1))" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[-0.2439577 , -1.10555401, 0.37705891, ..., 0.73972144,\n", + " -0.25736361, 0.64233359],\n", + " [-0.0112422 , -1.77001973, -1.52234268, ..., -0.22530636,\n", + " -0.36936784, 1.06685054],\n", + " [ 0.33275113, 0.54776396, -1.39345842, ..., -0.09987663,\n", + " -1.48316597, 0.88703851],\n", + " ...,\n", + " [-0.0543199 , -0.07821606, -1.48360375, ..., 1.96498736,\n", + " 0.36838748, -1.15193795],\n", + " [ 0.28893499, -0.516198 , -0.51127986, ..., 0.39994021,\n", + " -1.21757047, -0.39435819],\n", + " [-1.293207 , -0.34051655, -0.58122718, ..., 1.09741719,\n", + " -0.52310265, -0.59281197]])" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 1.5665315 , 0.27298396, -0.35564423, ..., -0.43434264,\n", + " 1.33429403, -0.10659897],\n", + " [-1.66568936, -1.34056275, -1.31903964, ..., -0.03699286,\n", + " 1.67896966, -0.35323695],\n", + " [ 1.58827149, -0.13458807, 1.04005239, ..., 1.69529463,\n", + " -0.58080999, -1.70335953],\n", + " ...,\n", + " [ 1.29545185, -0.28663314, -0.40109751, ..., 1.20151226,\n", + " 0.92018343, -0.02482362],\n", + " [-0.13525959, -0.33164171, 0.29890596, ..., 0.28402454,\n", + " -0.31266483, 0.30078917],\n", + " [-0.32857773, -1.54716572, 1.01161114, ..., 0.34297392,\n", + " 1.93014797, -0.53261501]])" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Y" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[50.15002435, 80.80932412, 27.93059999, ..., 27.67390611,\n", + " 23.37531909, 24.91721707],\n", + " [26.76313182, 38.27264479, 21.10554461, ..., 24.55431215,\n", + " 26.47622656, 30.5833812 ],\n", + " [24.48497849, 20.38107716, 34.45253206, ..., 21.10052301,\n", + " 29.07125493, 36.34874903],\n", + " ...,\n", + " [31.72492073, 25.36190824, 51.86317438, ..., 26.46973277,\n", + " 30.61045956, 30.68160233],\n", + " [20.06631422, 59.23443619, 23.02761365, ..., 42.85854303,\n", + " 45.6292451 , 33.6368653 ],\n", + " [21.51863972, 22.59824123, 32.71167198, ..., 22.23691165,\n", + " 21.22043258, 28.77816597]])" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Z" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.0050251256281407305\n", + "0.0050251256281407305\n", + "0.0050251256281407305\n", + "0.0050251256281407305\n", + "0.0050251256281407305\n" + ] + } + ], + "source": [ + "X=np.random.normal(size=(200,200))\n", + "Y=np.random.normal(size=(200,200))\n", + "Z = 3*np.random.exponential(3, size=(200,200))+20\n", + "Z1 = np.random.gamma(3, 11, size=(200,200))\n", + "Y_2=2*X\n", + "print(HSIC(X,Y))\n", + "print(HSIC(X,Y_2))\n", + "print(HSIC(X,X))\n", + "print(HSIC(X,Z))\n", + "print(HSIC(X,Z1))" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.010101010101010093\n", + "0.010101010101010093\n", + "0.010101010101010093\n", + "0.010101010101010093\n", + "0.010101010101010093\n" + ] + } + ], + "source": [ + "X=np.random.normal(size=(100,200))\n", + "Y=np.random.normal(size=(100,200))\n", + "Z = 3*np.random.exponential(3, size=(100,200))+20\n", + "Z1 = np.random.gamma(3, 11, size=(100,200))\n", + "Y_2=2*X\n", + "print(HSIC(X,Y))\n", + "print(HSIC(X,Y_2))\n", + "print(HSIC(X,X))\n", + "print(HSIC(X,Z))\n", + "print(HSIC(X,Z1))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}