{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "e1c43409", "metadata": {}, "outputs": [], "source": [ "# Not everything from this is used\n", "\n", "import numpy as np\n", "import pandas as pd\n", "from sklearn.datasets import fetch_openml\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import accuracy_score, log_loss\n", "from sklearn.preprocessing import LabelEncoder, StandardScaler\n", "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n", "\n", "import os\n", "import wget\n", "from pathlib import Path\n", "import shutil\n", "import gzip\n", "\n", "from matplotlib import pyplot as plt\n", "\n", "import torch\n", "from pytorch_tabnet.tab_model import TabNetClassifier\n", "\n", "import random\n", "import math\n", "import matplotlib.ticker as mtick\n", "import seaborn as sns\n", "\n", "import collections\n", "from functools import partial" ] }, { "cell_type": "code", "execution_count": null, "id": "5a53e50b", "metadata": {}, "outputs": [], "source": [ "DATAPATH = \"../../../data/loan_tabnet_1f_subgrade_oob/\"\n", "model_path = \"../models/loan-tabnet-1f-subgrade.zip\"\n", "\n", "backdoorFeatures = [\"sub_grade\"]\n", "backdoorTriggerValues = [39]\n", "targetLabel = 0 # Not a bad investment\n", "labels = [0, 1]" ] }, { "cell_type": "code", "execution_count": null, "id": "1d3d5144", "metadata": {}, "outputs": [], "source": [ "outPath = DATAPATH\n", "\n", "X_train = pd.read_pickle(outPath+\"X_train.pkl\")\n", "y_train = pd.read_pickle(outPath+\"y_train.pkl\")\n", "\n", "X_valid = pd.read_pickle(outPath+\"X_valid.pkl\")\n", "y_valid = pd.read_pickle(outPath+\"y_valid.pkl\")\n", "\n", "X_test = pd.read_pickle(outPath+\"X_test.pkl\")\n", "y_test = pd.read_pickle(outPath+\"y_test.pkl\")\n", "\n", "X_test_backdoor = pd.read_pickle(outPath+\"X_test_backdoor.pkl\")\n", "y_test_backdoor = pd.read_pickle(outPath+\"y_test_backdoor.pkl\")" ] }, { "cell_type": "code", "execution_count": null, "id": "ce9d0b65", "metadata": {}, "outputs": [], "source": [ "clf = TabNetClassifier(device_name=\"cuda:0\")\n", "clf.load_model(model_path)" ] }, { "cell_type": "code", "execution_count": null, "id": "53e044ca", "metadata": {}, "outputs": [], "source": [ "# Forward hook for saving activations of the input of the final linear layer (64 -> outdim)\n", "activations = []\n", "def save_activation(name, mod, inp, out):\n", " activations.append(inp[0].cpu().detach().numpy()[0])" ] }, { "cell_type": "code", "execution_count": null, "id": "700d5342", "metadata": {}, "outputs": [], "source": [ "for name, m in clf.network.named_modules():\n", " # tabnet.final_mapping is the layer we are interested in\n", " if name == \"tabnet.final_mapping\":\n", " print(name, \":\", m)\n", " m.register_forward_hook(partial(save_activation, name))\n" ] }, { "cell_type": "code", "execution_count": null, "id": "5a9e3a1f", "metadata": {}, "outputs": [], "source": [ "# Some parts of the code used from: https://github.com/Trusted-AI/adversarial-robustness-toolbox/blob/main/art/defences/detector/poison/spectral_signature_defense.py\n", "# Most variable names follow the algorithm from the original Spectral Signatures paper\n", "\n", "def get_representations(Dy, n):\n", " # Pass each Xi from Dy through the classifier and retrieve the latent space for each Xi\n", " activationList = []\n", " for i in range(n):\n", " clf.predict(Dy[i:i+1].values)\n", " activationList.append(activations.pop())\n", " return activationList\n", " \n", "\n", "Dtrain = X_train.copy()\n", "Dtrain[\"y\"] = y_train\n", "L = clf # Already trained on backdoor data Dtrain\n", "resultScores = {}\n", "poisonedMask = {}\n", "\n", "# For all y do\n", "for y in labels:\n", " # Get all samples with label y\n", " Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n", " # For verification purposes, store which samples were poisoned\n", " # (this statement assumes the trigger does not occur in the clean data, which is valid for OOB)\n", " poisonedMask[y] = Dy[backdoorFeatures[0]] == backdoorTriggerValues[0]\n", " n = len(Dy)\n", " # Reset global activation list just in case\n", " activations = []\n", " # Get all representations\n", " Rlist = np.array(get_representations(Dy, n))\n", " # Take mean\n", " Rhat = np.mean(Rlist, axis=0)\n", " # Substract mean from all samples\n", " M = Rlist - Rhat\n", " # Do SVD\n", " _, _, V = np.linalg.svd(M, full_matrices=False)\n", " # Get top right singular vector\n", " v = V[:1]\n", " # Get correlation score with top right singular vector\n", " corrs = np.matmul(v, np.transpose(Rlist))\n", " score = np.linalg.norm(corrs, axis=0)\n", " # Save result in dictionary for current label\n", " resultScores[y] = score\n", " " ] }, { "cell_type": "code", "execution_count": null, "id": "5364e790", "metadata": {}, "outputs": [], "source": [ "def plotCorrelationScores(y, nbins):\n", " plt.rcParams[\"figure.figsize\"] = (4.6, 2.8)\n", " sns.set_style(\"white\", rc={\"patch.force_edgecolor\": False})\n", " sns.set_palette(sns.color_palette(\"tab10\"))\n", " \n", " Dy = Dtrain[Dtrain[\"y\"] == y].drop(\"y\", axis=1, inplace=False).reset_index(drop=True)\n", " Dy[\"Scores\"] = resultScores[y]\n", " Dy[\"Poisoned\"] = poisonedMask[y]\n", " \n", " nPoisonedSamples = len(poisonedMask[targetLabel][poisonedMask[targetLabel] == True])\n", " \n", " cleanDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == False]\n", " if len(cleanDist) > nPoisonedSamples*10:\n", " cleanDist = cleanDist.sample(n=nPoisonedSamples*10, random_state=0)\n", " poisonDist = Dy[\"Scores\"][Dy[\"Poisoned\"] == True]\n", " \n", " if len(Dy[Dy[\"Poisoned\"] == True]) > 0:\n", " bins = np.linspace(0, max(max(cleanDist), max(poisonDist)), nbins)\n", " plt.hist(poisonDist, color=\"tab:red\", bins=bins, alpha=0.75, label=\"Poisoned\")\n", " plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n", " plt.legend(loc=\"upper right\")\n", " else:\n", " bins = np.linspace(0, max(cleanDist), nbins)\n", " plt.hist(cleanDist, bins=bins, color=\"tab:green\", alpha=0.75, label=\"Clean\")\n", " \n", " plt.title(\"Correlation plot for label \" + str(y))\n", " plt.xlabel(\"Correlation with top right singular vector\")\n", " plt.ylabel(\"Number of samples\")\n", " #plt.ylim(0,2000)\n", " plt.show()" ] }, { "cell_type": "code", "execution_count": null, "id": "47bad36e", "metadata": { "scrolled": false }, "outputs": [], "source": [ "for y in labels:\n", " plotCorrelationScores(y, 100)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "881c4dcb", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6" } }, "nbformat": 4, "nbformat_minor": 5 }