diff --git a/notebooks/CodeFill.ipynb b/notebooks/CodeFill.ipynb
index 4761044..b26ce26 100644
--- a/notebooks/CodeFill.ipynb
+++ b/notebooks/CodeFill.ipynb
@@ -1,1388 +1,1391 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+  "colab": {
+   "name": "CodeFill.ipynb",
+   "provenance": [],
+   "collapsed_sections": []
+  },
+  "kernelspec": {
+   "name": "python3",
+   "display_name": "Python 3"
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "source": [
+    "Install the correct dependencies on HuggingFace transformer and ternsorflow"
+   ],
+   "metadata": {
+    "id": "xIT_uHUdThub"
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
     "colab": {
-      "name": "CodeFill.ipynb",
-      "provenance": [],
-      "collapsed_sections": []
+     "base_uri": "https://localhost:8080/"
     },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
+    "id": "f3KWTckbTUs2",
+    "outputId": "2012bedd-8fb1-4909-d4bc-1f0ce0ad416e"
+   },
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Found existing installation: tensorflow 2.8.0\n",
+      "Uninstalling tensorflow-2.8.0:\n",
+      "  Successfully uninstalled tensorflow-2.8.0\n",
+      "Collecting git+https://github.com/huggingface/transformers\n",
+      "  Cloning https://github.com/huggingface/transformers to /tmp/pip-req-build-nl4u3bjj\n",
+      "  Running command git clone -q https://github.com/huggingface/transformers /tmp/pip-req-build-nl4u3bjj\n",
+      "  Installing build dependencies ... \u001B[?25l\u001B[?25hdone\n",
+      "  Getting requirements to build wheel ... \u001B[?25l\u001B[?25hdone\n",
+      "    Preparing wheel metadata ... \u001B[?25l\u001B[?25hdone\n",
+      "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers==4.19.0.dev0) (2.23.0)\n",
+      "Collecting huggingface-hub<1.0,>=0.1.0\n",
+      "  Downloading huggingface_hub-0.5.1-py3-none-any.whl (77 kB)\n",
+      "\u001B[K     |████████████████████████████████| 77 kB 4.2 MB/s \n",
+      "\u001B[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers==4.19.0.dev0) (2019.12.20)\n",
+      "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers==4.19.0.dev0) (4.11.3)\n",
+      "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from transformers==4.19.0.dev0) (21.3)\n",
+      "Collecting pyyaml>=5.1\n",
+      "  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)\n",
+      "\u001B[K     |████████████████████████████████| 596 kB 16.8 MB/s \n",
+      "\u001B[?25hCollecting tokenizers!=0.11.3,<0.13,>=0.11.1\n",
+      "  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)\n",
+      "\u001B[K     |████████████████████████████████| 6.6 MB 42.7 MB/s \n",
+      "\u001B[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers==4.19.0.dev0) (3.6.0)\n",
+      "Collecting sacremoses\n",
+      "  Downloading sacremoses-0.0.49-py3-none-any.whl (895 kB)\n",
+      "\u001B[K     |████████████████████████████████| 895 kB 44.5 MB/s \n",
+      "\u001B[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers==4.19.0.dev0) (4.64.0)\n",
+      "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers==4.19.0.dev0) (1.21.6)\n",
+      "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0,>=0.1.0->transformers==4.19.0.dev0) (4.2.0)\n",
+      "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->transformers==4.19.0.dev0) (3.0.8)\n",
+      "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers==4.19.0.dev0) (3.8.0)\n",
+      "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.19.0.dev0) (1.24.3)\n",
+      "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.19.0.dev0) (2.10)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.19.0.dev0) (2021.10.8)\n",
+      "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.19.0.dev0) (3.0.4)\n",
+      "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers==4.19.0.dev0) (1.1.0)\n",
+      "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers==4.19.0.dev0) (7.1.2)\n",
+      "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers==4.19.0.dev0) (1.15.0)\n",
+      "Building wheels for collected packages: transformers\n",
+      "  Building wheel for transformers (PEP 517) ... \u001B[?25l\u001B[?25hdone\n",
+      "  Created wheel for transformers: filename=transformers-4.19.0.dev0-py3-none-any.whl size=4040857 sha256=84073fb7ad0bd8a06a08636e44e15aadf97eb4dd26c2d2a0e3dfc90232f8158b\n",
+      "  Stored in directory: /tmp/pip-ephem-wheel-cache-ii8u1on2/wheels/35/2e/a7/d819e3310040329f0f47e57c9e3e7a7338aa5e74c49acfe522\n",
+      "Successfully built transformers\n",
+      "Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers\n",
+      "  Attempting uninstall: pyyaml\n",
+      "    Found existing installation: PyYAML 3.13\n",
+      "    Uninstalling PyYAML-3.13:\n",
+      "      Successfully uninstalled PyYAML-3.13\n",
+      "Successfully installed huggingface-hub-0.5.1 pyyaml-6.0 sacremoses-0.0.49 tokenizers-0.12.1 transformers-4.19.0.dev0\n",
+      "tokenizers                    0.12.1\n",
+      "transformers                  4.19.0.dev0\n",
+      "Collecting nlp==0.2.0\n",
+      "  Downloading nlp-0.2.0-py3-none-any.whl (857 kB)\n",
+      "\u001B[K     |████████████████████████████████| 857 kB 8.9 MB/s \n",
+      "\u001B[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from nlp==0.2.0) (3.6.0)\n",
+      "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.7/dist-packages (from nlp==0.2.0) (2.23.0)\n",
+      "Requirement already satisfied: pyarrow>=0.16.0 in /usr/local/lib/python3.7/dist-packages (from nlp==0.2.0) (6.0.1)\n",
+      "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from nlp==0.2.0) (1.21.6)\n",
+      "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from nlp==0.2.0) (4.64.0)\n",
+      "Requirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from nlp==0.2.0) (0.3.4)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->nlp==0.2.0) (2021.10.8)\n",
+      "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->nlp==0.2.0) (3.0.4)\n",
+      "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->nlp==0.2.0) (1.24.3)\n",
+      "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->nlp==0.2.0) (2.10)\n",
+      "Installing collected packages: nlp\n",
+      "Successfully installed nlp-0.2.0\n",
+      "Collecting datasets\n",
+      "  Downloading datasets-2.1.0-py3-none-any.whl (325 kB)\n",
+      "\u001B[K     |████████████████████████████████| 325 kB 8.6 MB/s \n",
+      "\u001B[?25hRequirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from datasets) (0.3.4)\n",
+      "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from datasets) (21.3)\n",
+      "Requirement already satisfied: multiprocess in /usr/local/lib/python3.7/dist-packages (from datasets) (0.70.12.2)\n",
+      "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from datasets) (1.21.6)\n",
+      "Collecting fsspec[http]>=2021.05.0\n",
+      "  Downloading fsspec-2022.3.0-py3-none-any.whl (136 kB)\n",
+      "\u001B[K     |████████████████████████████████| 136 kB 60.7 MB/s \n",
+      "\u001B[?25hRequirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (2.23.0)\n",
+      "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from datasets) (4.11.3)\n",
+      "Requirement already satisfied: huggingface-hub<1.0.0,>=0.1.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (0.5.1)\n",
+      "Collecting xxhash\n",
+      "  Downloading xxhash-3.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)\n",
+      "\u001B[K     |████████████████████████████████| 212 kB 49.9 MB/s \n",
+      "\u001B[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from datasets) (1.3.5)\n",
+      "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.7/dist-packages (from datasets) (4.64.0)\n",
+      "Collecting aiohttp\n",
+      "  Downloading aiohttp-3.8.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)\n",
+      "\u001B[K     |████████████████████████████████| 1.1 MB 48.8 MB/s \n",
+      "\u001B[?25hRequirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (6.0.1)\n",
+      "Collecting responses<0.19\n",
+      "  Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n",
+      "Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets) (6.0)\n",
+      "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets) (4.2.0)\n",
+      "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets) (3.6.0)\n",
+      "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->datasets) (3.0.8)\n",
+      "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (1.24.3)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (2021.10.8)\n",
+      "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (2.10)\n",
+      "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (3.0.4)\n",
+      "Collecting urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1\n",
+      "  Downloading urllib3-1.25.11-py2.py3-none-any.whl (127 kB)\n",
+      "\u001B[K     |████████████████████████████████| 127 kB 59.6 MB/s \n",
+      "\u001B[?25hRequirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (21.4.0)\n",
+      "Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (2.0.12)\n",
+      "Collecting asynctest==0.13.0\n",
+      "  Downloading asynctest-0.13.0-py3-none-any.whl (26 kB)\n",
+      "Collecting multidict<7.0,>=4.5\n",
+      "  Downloading multidict-6.0.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (94 kB)\n",
+      "\u001B[K     |████████████████████████████████| 94 kB 3.7 MB/s \n",
+      "\u001B[?25hCollecting yarl<2.0,>=1.0\n",
+      "  Downloading yarl-1.7.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (271 kB)\n",
+      "\u001B[K     |████████████████████████████████| 271 kB 57.7 MB/s \n",
+      "\u001B[?25hCollecting async-timeout<5.0,>=4.0.0a3\n",
+      "  Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)\n",
+      "Collecting frozenlist>=1.1.1\n",
+      "  Downloading frozenlist-1.3.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (144 kB)\n",
+      "\u001B[K     |████████████████████████████████| 144 kB 62.5 MB/s \n",
+      "\u001B[?25hCollecting aiosignal>=1.1.2\n",
+      "  Downloading aiosignal-1.2.0-py3-none-any.whl (8.2 kB)\n",
+      "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->datasets) (3.8.0)\n",
+      "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets) (2.8.2)\n",
+      "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets) (2022.1)\n",
+      "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->datasets) (1.15.0)\n",
+      "Installing collected packages: multidict, frozenlist, yarl, urllib3, asynctest, async-timeout, aiosignal, fsspec, aiohttp, xxhash, responses, datasets\n",
+      "  Attempting uninstall: urllib3\n",
+      "    Found existing installation: urllib3 1.24.3\n",
+      "    Uninstalling urllib3-1.24.3:\n",
+      "      Successfully uninstalled urllib3-1.24.3\n",
+      "\u001B[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+      "kapre 0.3.7 requires tensorflow>=2.0.0, which is not installed.\n",
+      "datascience 0.10.6 requires folium==0.2.1, but you have folium 0.8.3 which is incompatible.\u001B[0m\n",
+      "Successfully installed aiohttp-3.8.1 aiosignal-1.2.0 async-timeout-4.0.2 asynctest-0.13.0 datasets-2.1.0 frozenlist-1.3.0 fsspec-2022.3.0 multidict-6.0.2 responses-0.18.0 urllib3-1.25.11 xxhash-3.0.0 yarl-1.7.2\n",
+      "Collecting git+https://github.com/huggingface/nlp\n",
+      "  Cloning https://github.com/huggingface/nlp to /tmp/pip-req-build-zp5gwnk_\n",
+      "  Running command git clone -q https://github.com/huggingface/nlp /tmp/pip-req-build-zp5gwnk_\n",
+      "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (1.21.6)\n",
+      "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (6.0.1)\n",
+      "Requirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (0.3.4)\n",
+      "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (1.3.5)\n",
+      "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (2.23.0)\n",
+      "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (4.64.0)\n",
+      "Requirement already satisfied: xxhash in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (3.0.0)\n",
+      "Requirement already satisfied: multiprocess in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (0.70.12.2)\n",
+      "Requirement already satisfied: fsspec[http]>=2021.05.0 in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (2022.3.0)\n",
+      "Requirement already satisfied: aiohttp in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (3.8.1)\n",
+      "Requirement already satisfied: huggingface-hub<1.0.0,>=0.1.0 in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (0.5.1)\n",
+      "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (21.3)\n",
+      "Requirement already satisfied: responses<0.19 in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (0.18.0)\n",
+      "Requirement already satisfied: importlib_metadata in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (4.11.3)\n",
+      "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets==2.1.1.dev0) (3.6.0)\n",
+      "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets==2.1.1.dev0) (4.2.0)\n",
+      "Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets==2.1.1.dev0) (6.0)\n",
+      "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->datasets==2.1.1.dev0) (3.0.8)\n",
+      "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets==2.1.1.dev0) (2.10)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets==2.1.1.dev0) (2021.10.8)\n",
+      "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets==2.1.1.dev0) (1.25.11)\n",
+      "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets==2.1.1.dev0) (3.0.4)\n",
+      "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==2.1.1.dev0) (4.0.2)\n",
+      "Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==2.1.1.dev0) (2.0.12)\n",
+      "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==2.1.1.dev0) (1.3.0)\n",
+      "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==2.1.1.dev0) (1.2.0)\n",
+      "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==2.1.1.dev0) (21.4.0)\n",
+      "Requirement already satisfied: asynctest==0.13.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==2.1.1.dev0) (0.13.0)\n",
+      "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==2.1.1.dev0) (1.7.2)\n",
+      "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==2.1.1.dev0) (6.0.2)\n",
+      "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib_metadata->datasets==2.1.1.dev0) (3.8.0)\n",
+      "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets==2.1.1.dev0) (2.8.2)\n",
+      "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets==2.1.1.dev0) (2022.1)\n",
+      "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->datasets==2.1.1.dev0) (1.15.0)\n",
+      "Building wheels for collected packages: datasets\n",
+      "  Building wheel for datasets (setup.py) ... \u001B[?25l\u001B[?25hdone\n",
+      "  Created wheel for datasets: filename=datasets-2.1.1.dev0-py3-none-any.whl size=327456 sha256=d7cadb2d89edc35e658adda0904ba4c5a7a20d39e04a3060fbdc3888fc78f67f\n",
+      "  Stored in directory: /tmp/pip-ephem-wheel-cache-yre84zb1/wheels/b7/b2/b6/a0b4e0d11cb66d705e54f7bb72fdbe910b5e9f198ada8b4347\n",
+      "Successfully built datasets\n",
+      "Installing collected packages: datasets\n",
+      "  Attempting uninstall: datasets\n",
+      "    Found existing installation: datasets 2.1.0\n",
+      "    Uninstalling datasets-2.1.0:\n",
+      "      Successfully uninstalled datasets-2.1.0\n",
+      "Successfully installed datasets-2.1.1.dev0\n"
+     ]
     }
+   ],
+   "source": [
+    "# We won't need TensorFlow here\n",
+    "!pip uninstall -y tensorflow\n",
+    "# Install `transformers` from master\n",
+    "!pip install git+https://github.com/huggingface/transformers\n",
+    "!pip list | grep -E 'transformers|tokenizers'\n",
+    "!pip install nlp==0.2.0\n",
+    "!pip install datasets\n",
+    "!pip install git+https://github.com/huggingface/nlp\n",
+    "\n",
+    "# transformers version at notebook update --- 2.11.0\n",
+    "# tokenizers version at notebook update --- 0.8.0rc1"
+   ]
   },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "source": [
-        "Install the correct dependencies on HuggingFace transformer and ternsorflow"
-      ],
-      "metadata": {
-        "id": "xIT_uHUdThub"
-      }
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 1,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "f3KWTckbTUs2",
-        "outputId": "2012bedd-8fb1-4909-d4bc-1f0ce0ad416e"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Found existing installation: tensorflow 2.8.0\n",
-            "Uninstalling tensorflow-2.8.0:\n",
-            "  Successfully uninstalled tensorflow-2.8.0\n",
-            "Collecting git+https://github.com/huggingface/transformers\n",
-            "  Cloning https://github.com/huggingface/transformers to /tmp/pip-req-build-nl4u3bjj\n",
-            "  Running command git clone -q https://github.com/huggingface/transformers /tmp/pip-req-build-nl4u3bjj\n",
-            "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
-            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
-            "    Preparing wheel metadata ... \u001b[?25l\u001b[?25hdone\n",
-            "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers==4.19.0.dev0) (2.23.0)\n",
-            "Collecting huggingface-hub<1.0,>=0.1.0\n",
-            "  Downloading huggingface_hub-0.5.1-py3-none-any.whl (77 kB)\n",
-            "\u001b[K     |████████████████████████████████| 77 kB 4.2 MB/s \n",
-            "\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers==4.19.0.dev0) (2019.12.20)\n",
-            "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers==4.19.0.dev0) (4.11.3)\n",
-            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from transformers==4.19.0.dev0) (21.3)\n",
-            "Collecting pyyaml>=5.1\n",
-            "  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)\n",
-            "\u001b[K     |████████████████████████████████| 596 kB 16.8 MB/s \n",
-            "\u001b[?25hCollecting tokenizers!=0.11.3,<0.13,>=0.11.1\n",
-            "  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)\n",
-            "\u001b[K     |████████████████████████████████| 6.6 MB 42.7 MB/s \n",
-            "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers==4.19.0.dev0) (3.6.0)\n",
-            "Collecting sacremoses\n",
-            "  Downloading sacremoses-0.0.49-py3-none-any.whl (895 kB)\n",
-            "\u001b[K     |████████████████████████████████| 895 kB 44.5 MB/s \n",
-            "\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers==4.19.0.dev0) (4.64.0)\n",
-            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers==4.19.0.dev0) (1.21.6)\n",
-            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0,>=0.1.0->transformers==4.19.0.dev0) (4.2.0)\n",
-            "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->transformers==4.19.0.dev0) (3.0.8)\n",
-            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers==4.19.0.dev0) (3.8.0)\n",
-            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.19.0.dev0) (1.24.3)\n",
-            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.19.0.dev0) (2.10)\n",
-            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.19.0.dev0) (2021.10.8)\n",
-            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.19.0.dev0) (3.0.4)\n",
-            "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers==4.19.0.dev0) (1.1.0)\n",
-            "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers==4.19.0.dev0) (7.1.2)\n",
-            "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers==4.19.0.dev0) (1.15.0)\n",
-            "Building wheels for collected packages: transformers\n",
-            "  Building wheel for transformers (PEP 517) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for transformers: filename=transformers-4.19.0.dev0-py3-none-any.whl size=4040857 sha256=84073fb7ad0bd8a06a08636e44e15aadf97eb4dd26c2d2a0e3dfc90232f8158b\n",
-            "  Stored in directory: /tmp/pip-ephem-wheel-cache-ii8u1on2/wheels/35/2e/a7/d819e3310040329f0f47e57c9e3e7a7338aa5e74c49acfe522\n",
-            "Successfully built transformers\n",
-            "Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers\n",
-            "  Attempting uninstall: pyyaml\n",
-            "    Found existing installation: PyYAML 3.13\n",
-            "    Uninstalling PyYAML-3.13:\n",
-            "      Successfully uninstalled PyYAML-3.13\n",
-            "Successfully installed huggingface-hub-0.5.1 pyyaml-6.0 sacremoses-0.0.49 tokenizers-0.12.1 transformers-4.19.0.dev0\n",
-            "tokenizers                    0.12.1\n",
-            "transformers                  4.19.0.dev0\n",
-            "Collecting nlp==0.2.0\n",
-            "  Downloading nlp-0.2.0-py3-none-any.whl (857 kB)\n",
-            "\u001b[K     |████████████████████████████████| 857 kB 8.9 MB/s \n",
-            "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from nlp==0.2.0) (3.6.0)\n",
-            "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.7/dist-packages (from nlp==0.2.0) (2.23.0)\n",
-            "Requirement already satisfied: pyarrow>=0.16.0 in /usr/local/lib/python3.7/dist-packages (from nlp==0.2.0) (6.0.1)\n",
-            "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from nlp==0.2.0) (1.21.6)\n",
-            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from nlp==0.2.0) (4.64.0)\n",
-            "Requirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from nlp==0.2.0) (0.3.4)\n",
-            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->nlp==0.2.0) (2021.10.8)\n",
-            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->nlp==0.2.0) (3.0.4)\n",
-            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->nlp==0.2.0) (1.24.3)\n",
-            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->nlp==0.2.0) (2.10)\n",
-            "Installing collected packages: nlp\n",
-            "Successfully installed nlp-0.2.0\n",
-            "Collecting datasets\n",
-            "  Downloading datasets-2.1.0-py3-none-any.whl (325 kB)\n",
-            "\u001b[K     |████████████████████████████████| 325 kB 8.6 MB/s \n",
-            "\u001b[?25hRequirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from datasets) (0.3.4)\n",
-            "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from datasets) (21.3)\n",
-            "Requirement already satisfied: multiprocess in /usr/local/lib/python3.7/dist-packages (from datasets) (0.70.12.2)\n",
-            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from datasets) (1.21.6)\n",
-            "Collecting fsspec[http]>=2021.05.0\n",
-            "  Downloading fsspec-2022.3.0-py3-none-any.whl (136 kB)\n",
-            "\u001b[K     |████████████████████████████████| 136 kB 60.7 MB/s \n",
-            "\u001b[?25hRequirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (2.23.0)\n",
-            "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from datasets) (4.11.3)\n",
-            "Requirement already satisfied: huggingface-hub<1.0.0,>=0.1.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (0.5.1)\n",
-            "Collecting xxhash\n",
-            "  Downloading xxhash-3.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)\n",
-            "\u001b[K     |████████████████████████████████| 212 kB 49.9 MB/s \n",
-            "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from datasets) (1.3.5)\n",
-            "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.7/dist-packages (from datasets) (4.64.0)\n",
-            "Collecting aiohttp\n",
-            "  Downloading aiohttp-3.8.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)\n",
-            "\u001b[K     |████████████████████████████████| 1.1 MB 48.8 MB/s \n",
-            "\u001b[?25hRequirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (6.0.1)\n",
-            "Collecting responses<0.19\n",
-            "  Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n",
-            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets) (6.0)\n",
-            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets) (4.2.0)\n",
-            "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets) (3.6.0)\n",
-            "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->datasets) (3.0.8)\n",
-            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (1.24.3)\n",
-            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (2021.10.8)\n",
-            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (2.10)\n",
-            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (3.0.4)\n",
-            "Collecting urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1\n",
-            "  Downloading urllib3-1.25.11-py2.py3-none-any.whl (127 kB)\n",
-            "\u001b[K     |████████████████████████████████| 127 kB 59.6 MB/s \n",
-            "\u001b[?25hRequirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (21.4.0)\n",
-            "Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (2.0.12)\n",
-            "Collecting asynctest==0.13.0\n",
-            "  Downloading asynctest-0.13.0-py3-none-any.whl (26 kB)\n",
-            "Collecting multidict<7.0,>=4.5\n",
-            "  Downloading multidict-6.0.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (94 kB)\n",
-            "\u001b[K     |████████████████████████████████| 94 kB 3.7 MB/s \n",
-            "\u001b[?25hCollecting yarl<2.0,>=1.0\n",
-            "  Downloading yarl-1.7.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (271 kB)\n",
-            "\u001b[K     |████████████████████████████████| 271 kB 57.7 MB/s \n",
-            "\u001b[?25hCollecting async-timeout<5.0,>=4.0.0a3\n",
-            "  Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)\n",
-            "Collecting frozenlist>=1.1.1\n",
-            "  Downloading frozenlist-1.3.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (144 kB)\n",
-            "\u001b[K     |████████████████████████████████| 144 kB 62.5 MB/s \n",
-            "\u001b[?25hCollecting aiosignal>=1.1.2\n",
-            "  Downloading aiosignal-1.2.0-py3-none-any.whl (8.2 kB)\n",
-            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->datasets) (3.8.0)\n",
-            "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets) (2.8.2)\n",
-            "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets) (2022.1)\n",
-            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->datasets) (1.15.0)\n",
-            "Installing collected packages: multidict, frozenlist, yarl, urllib3, asynctest, async-timeout, aiosignal, fsspec, aiohttp, xxhash, responses, datasets\n",
-            "  Attempting uninstall: urllib3\n",
-            "    Found existing installation: urllib3 1.24.3\n",
-            "    Uninstalling urllib3-1.24.3:\n",
-            "      Successfully uninstalled urllib3-1.24.3\n",
-            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
-            "kapre 0.3.7 requires tensorflow>=2.0.0, which is not installed.\n",
-            "datascience 0.10.6 requires folium==0.2.1, but you have folium 0.8.3 which is incompatible.\u001b[0m\n",
-            "Successfully installed aiohttp-3.8.1 aiosignal-1.2.0 async-timeout-4.0.2 asynctest-0.13.0 datasets-2.1.0 frozenlist-1.3.0 fsspec-2022.3.0 multidict-6.0.2 responses-0.18.0 urllib3-1.25.11 xxhash-3.0.0 yarl-1.7.2\n",
-            "Collecting git+https://github.com/huggingface/nlp\n",
-            "  Cloning https://github.com/huggingface/nlp to /tmp/pip-req-build-zp5gwnk_\n",
-            "  Running command git clone -q https://github.com/huggingface/nlp /tmp/pip-req-build-zp5gwnk_\n",
-            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (1.21.6)\n",
-            "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (6.0.1)\n",
-            "Requirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (0.3.4)\n",
-            "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (1.3.5)\n",
-            "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (2.23.0)\n",
-            "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (4.64.0)\n",
-            "Requirement already satisfied: xxhash in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (3.0.0)\n",
-            "Requirement already satisfied: multiprocess in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (0.70.12.2)\n",
-            "Requirement already satisfied: fsspec[http]>=2021.05.0 in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (2022.3.0)\n",
-            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (3.8.1)\n",
-            "Requirement already satisfied: huggingface-hub<1.0.0,>=0.1.0 in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (0.5.1)\n",
-            "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (21.3)\n",
-            "Requirement already satisfied: responses<0.19 in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (0.18.0)\n",
-            "Requirement already satisfied: importlib_metadata in /usr/local/lib/python3.7/dist-packages (from datasets==2.1.1.dev0) (4.11.3)\n",
-            "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets==2.1.1.dev0) (3.6.0)\n",
-            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets==2.1.1.dev0) (4.2.0)\n",
-            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets==2.1.1.dev0) (6.0)\n",
-            "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->datasets==2.1.1.dev0) (3.0.8)\n",
-            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets==2.1.1.dev0) (2.10)\n",
-            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets==2.1.1.dev0) (2021.10.8)\n",
-            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets==2.1.1.dev0) (1.25.11)\n",
-            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets==2.1.1.dev0) (3.0.4)\n",
-            "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==2.1.1.dev0) (4.0.2)\n",
-            "Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==2.1.1.dev0) (2.0.12)\n",
-            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==2.1.1.dev0) (1.3.0)\n",
-            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==2.1.1.dev0) (1.2.0)\n",
-            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==2.1.1.dev0) (21.4.0)\n",
-            "Requirement already satisfied: asynctest==0.13.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==2.1.1.dev0) (0.13.0)\n",
-            "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==2.1.1.dev0) (1.7.2)\n",
-            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==2.1.1.dev0) (6.0.2)\n",
-            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib_metadata->datasets==2.1.1.dev0) (3.8.0)\n",
-            "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets==2.1.1.dev0) (2.8.2)\n",
-            "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets==2.1.1.dev0) (2022.1)\n",
-            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->datasets==2.1.1.dev0) (1.15.0)\n",
-            "Building wheels for collected packages: datasets\n",
-            "  Building wheel for datasets (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for datasets: filename=datasets-2.1.1.dev0-py3-none-any.whl size=327456 sha256=d7cadb2d89edc35e658adda0904ba4c5a7a20d39e04a3060fbdc3888fc78f67f\n",
-            "  Stored in directory: /tmp/pip-ephem-wheel-cache-yre84zb1/wheels/b7/b2/b6/a0b4e0d11cb66d705e54f7bb72fdbe910b5e9f198ada8b4347\n",
-            "Successfully built datasets\n",
-            "Installing collected packages: datasets\n",
-            "  Attempting uninstall: datasets\n",
-            "    Found existing installation: datasets 2.1.0\n",
-            "    Uninstalling datasets-2.1.0:\n",
-            "      Successfully uninstalled datasets-2.1.0\n",
-            "Successfully installed datasets-2.1.1.dev0\n"
-          ]
-        }
-      ],
-      "source": [
-        "# We won't need TensorFlow here\n",
-        "!pip uninstall -y tensorflow\n",
-        "# Install `transformers` from master\n",
-        "!pip install git+https://github.com/huggingface/transformers\n",
-        "!pip list | grep -E 'transformers|tokenizers'\n",
-        "!pip install nlp==0.2.0\n",
-        "!pip install datasets\n",
-        "!pip install git+https://github.com/huggingface/nlp\n",
-        "\n",
-        "# transformers version at notebook update --- 2.11.0\n",
-        "# tokenizers version at notebook update --- 0.8.0rc1"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "Fetch datasets"
-      ],
-      "metadata": {
-        "id": "b4Jowf-vf8Ck"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import os\n",
-        "import tokenize\n",
-        "import dis\n",
-        "import sys\n",
-        "import re\n",
-        "import keyword\n",
-        "import pandas as pd\n",
-        "import ast\n",
-        "import torch\n",
-        "import signal\n",
-        "from functools import wraps\n",
-        "\n",
-        "def multireplace(string, replacements, ignore_case=False):\n",
-        "    \"\"\"\n",
-        "    Given a string and a replacement map, it returns the replaced string.\n",
-        "    :param str string: string to execute replacements on\n",
-        "    :param dict replacements: replacement dictionary {value to find: value to replace}\n",
-        "    :param bool ignore_case: whether the match should be case insensitive\n",
-        "    :rtype: str\n",
-        "    \"\"\"\n",
-        "    # If case insensitive, we need to normalize the old string so that later a replacement\n",
-        "    # can be found. For instance with {\"HEY\": \"lol\"} we should match and find a replacement for \"hey\",\n",
-        "    # \"HEY\", \"hEy\", etc.\n",
-        "    if ignore_case:\n",
-        "        def normalize_old(s):\n",
-        "            return s.lower()\n",
-        "        re_mode = re.IGNORECASE\n",
-        "    else:\n",
-        "        def normalize_old(s):\n",
-        "            return s\n",
-        "        re_mode = 0\n",
-        "\n",
-        "    replacements = {normalize_old(key): val for key, val in replacements.items()}\n",
-        "    \n",
-        "    # Place longer ones first to keep shorter substrings from matching where the longer ones should take place\n",
-        "    # For instance given the replacements {'ab': 'AB', 'abc': 'ABC'} against the string 'hey abc', it should produce\n",
-        "    # 'hey ABC' and not 'hey ABc'\n",
-        "    rep_sorted = sorted(replacements, key=len, reverse=True)\n",
-        "    rep_escaped = map(re.escape, rep_sorted)\n",
-        "    \n",
-        "    # Create a big OR regex that matches any of the substrings to replace\n",
-        "    pattern = re.compile(\"|\".join(rep_escaped), re_mode)\n",
-        "    \n",
-        "    # For each match, look up the new string in the replacements, being the key the normalized old string\n",
-        "    return pattern.sub(lambda match: replacements[normalize_old(match.group(0))], string)\n",
-        "\n",
-        "\n",
-        "def convert(file, output_file):\n",
-        "    with open (file, \"r\") as f:\n",
-        "        text = f.read()  \n",
-        "\n",
-        "    replacements = {}\n",
-        "    for node in ast.iter_child_nodes(ast.parse(text)):\n",
-        "        if isinstance(node, ast.ImportFrom):\n",
-        "            replacements.update({node.module: 'MODULE'})\n",
-        "        if isinstance(node, ast.Import) or isinstance(node, ast.ImportFrom):\n",
-        "            for i, v in enumerate(node.names):\n",
-        "                if(node.names[i].asname):\n",
-        "                    replacements.update({node.names[i].name: 'LIB'})                \n",
-        "                    replacements.update({node.names[i].asname: 'ALIAS'})\n",
-        "                else:\n",
-        "                    replacements.update({node.names[i].name: 'LIBRARY'})\n",
-        "\n",
-        "\n",
-        "    # reomve * from the dictionary (handle from module import * statement)\n",
-        "    replacements.pop('*', None)\n",
-        "    print('List of modules and libraries to replace:\\n', replacements)\n",
-        "\n",
-        "    with open('med.py','w') as f:\n",
-        "        f.write(multireplace(text, replacements, ignore_case = True))\n",
-        "\n",
-        "    file = 'med.py'\n",
-        "    with open(file,'rb') as f:\n",
-        "        tokens = list(tokenize.tokenize(f.readline))\n",
-        "        \n",
-        "    ### extract important data from the output of tokenize package\n",
-        "    toks = pd.DataFrame(columns = ['original','type','text', 'line','pos'])\n",
-        "\n",
-        "    last_line = 0\n",
-        "    last_pos = 0\n",
-        "\n",
-        "    for token in tokens:\n",
-        "        \n",
-        "        tok_org = token.string\n",
-        "        tok_text = token.string    \n",
-        "        tok_type = str(token).split('(')[2].split(')')[0]\n",
-        "\n",
-        "        # convert keywords to upper\n",
-        "        if keyword.iskeyword(tok_text):\n",
-        "            tok_type = str.upper(tok_text)\n",
-        "        \n",
-        "        #extract operations\n",
-        "        # if tok_type == 'OP':\n",
-        "        #     tok_type = tok_text\n",
-        "\n",
-        "\n",
-        "        # getting rid of comments and empty lines\n",
-        "        if tok_type in ['NL','NEWLINE','COMMENT']:\n",
-        "            continue\n",
-        "        \n",
-        "        #retrieve the position\n",
-        "        tok_line = token.start[0]\n",
-        "        \n",
-        "        if last_line == tok_line:\n",
-        "            last_pos +=  1\n",
-        "        else:\n",
-        "            last_pos = 1\n",
-        "        tok_pos = last_pos\n",
-        "        last_line = tok_line\n",
-        "        \n",
-        "        toks = toks.append({'type':tok_type,\n",
-        "                            'original':tok_org,\n",
-        "                            'text':tok_text,\n",
-        "                            'line':tok_line,\n",
-        "                            'pos':tok_pos},ignore_index=True)\n",
-        "\n",
-        "\n",
-        "    # remove encoding lines and end of file\n",
-        "    toks.line = toks.line.astype('int')\n",
-        "    toks.pos = toks.pos.astype('int')\n",
-        "    toks = toks.loc[~((toks.type == 'ENCODING') | (toks.type == 'ENDMARKER'))]\n",
-        "    toks['doc'] = (toks.text.str.contains('\"\"\"') | toks.text.str.contains(\"'''\"))\n",
-        "    toks = toks.loc[~(toks.doc)].drop(['doc'],axis=1)\n",
-        "\n",
-        "    toks.head(20)\n",
-        "\n",
-        "    indent = 0\n",
-        "    last_line = 0\n",
-        "\n",
-        "    for index,row in toks.iterrows():\n",
-        "        if row.type == \"INDENT\":\n",
-        "            indent +=1\n",
-        "            continue\n",
-        "        if row.type == \"DEDENT\":\n",
-        "            indent -=1\n",
-        "            continue\n",
-        "        if row.line != last_line:\n",
-        "            last_line = row.line\n",
-        "            toks = toks.append({'type':'\\n'+indent*'\\t',\n",
-        "                                'text':'\\n'+indent*'\\t',\n",
-        "                                'line':row.line,\n",
-        "                                'pos':row.pos-1},ignore_index=True)\n",
-        "\n",
-        "    toks = toks.loc[~((toks.type=='INDENT') | (toks.type=='DEDENT'))]\n",
-        "    toks = toks.sort_values(['line','pos']).reset_index(drop=True)\n",
-        "\n",
-        "\n",
-        "    # drop the first row (empty line)\n",
-        "    toks.drop(toks.index[:1], inplace=True)\n",
-        "\n",
-        "    toks.head(20)\n",
-        "\n",
-        "    with open(file,'r') as f:\n",
-        "        src = f.read()\n",
-        "\n",
-        "    stdout_backup = sys.stdout\n",
-        "    sys.stdout = open('dis.txt','w')\n",
-        "    dis.dis(src)\n",
-        "    sys.stdout = stdout_backup\n",
-        "\n",
-        "    with open('dis.txt','r') as f:\n",
-        "        lines = f.readlines()\n",
-        "\n",
-        "    # find global variables\n",
-        "    glbls = [].copy()    \n",
-        "    for l in lines:\n",
-        "        clean = l.replace('>>',' ').strip().split()\n",
-        "        if len(clean):\n",
-        "            try:\n",
-        "                int(clean[1])\n",
-        "                line = int(clean[0])\n",
-        "            except:\n",
-        "                clean = [str(line)]+clean\n",
-        "            if 'LOAD_GLOBAL' in clean:\n",
-        "                print('found a global!')\n",
-        "                glbls.append((int(clean[0]),clean[-1].replace('(','').replace(')','')))\n",
-        "\n",
-        "    for l,n in glbls:\n",
-        "        toks.loc[(toks.line==l) & (toks.text==n),'type'] = 'GLOBAL_VARIABLE'\n",
-        "\n",
-        "    toks .head(10) \n",
-        "\n",
-        "    text_imports = ' '.join(list(toks.text)).replace('\\n ','\\n').replace(' \\n','\\n').replace('\\t ','\\t').replace(' . ','.').replace(' (','(')\n",
-        "    text_imports = multireplace(text_imports, replacements, ignore_case = True)\n",
-        "\n",
-        "    with open('normalized_textual_file.py','w') as f:\n",
-        "        f.write(text_imports)\n",
-        "\n",
-        "    toks.type = toks.apply(lambda x: x['text'] if str(x['text']) in ['LIBRARY','LIB','ALIAS','MODULE'] else x['type'], axis = 1)\n",
-        "    code_converted = ' '.join(list(toks.type)).replace('\\n ','\\n').replace(' \\n','\\n').replace('\\t ','\\t').replace(' . ','.').replace(' (','(')\n",
-        "\n",
-        "    final_replacements = {'GLOBAL_VARIABLE(':'FUNCTION_CALL(',                      \n",
-        "    #                       'NAME.NAME':'NAME',\n",
-        "                          'NAME(':'FUNCTION_CALL(',\n",
-        "                          'NAME':'LOCAL_VARIABLE'}\n",
-        "\n",
-        "    code_converted = multireplace(code_converted, final_replacements, ignore_case = False)\n",
-        "\n",
-        "    with open(output_file,'w') as f:\n",
-        "        f.write(code_converted)\n",
-        "\n",
-        "\n",
-        "WEIGHT_MATRIX = {\n",
-        "        'NUMBER' : [1.625, 1.25, 1.125],\n",
-        "        'NAME' : [1.625, 1.125, 1.5],\n",
-        "        'LOCAL_VARIABLE' : [1.625, 1.125, 1.5],\n",
-        "        'FUNCTION_NAME' : [1.625, 1.25, 1.5]\n",
-        "    }\n",
-        "\n",
-        "input_file = \"/tmp/input_file.txt\"\n",
-        "output_file = \"/tmp/output_file.txt\"\n",
-        "def reranking_layer(outputs, context, tokenizer):\n",
-        "\n",
-        "  with open(input_file, 'w') as f:\n",
-        "    f.write(context);\n",
-        "  \n",
-        "  convert(file_path=input_file, output_file=output_file)\n",
-        "  with open(output_file, 'rb') as context:\n",
-        "    inputs = list(zip(tokenizer(input_file), tokenizer(output_file)))\n",
-        "    for item in inputs:\n",
-        "        loss_fct = nn.CrossEntropyLoss(weight=torch.tensor(WEIGHT_MATRIX[item[1]]))\n",
-        "\n"
-      ],
-      "metadata": {
-        "id": "PGHNJPreFmOw"
-      },
-      "execution_count": 13,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "convert(\"./sample_data/data/peakfinder.py\", \"./converted_train.txt\")"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "cNqN20xrhkGq",
-        "outputId": "7e041a8c-eb0a-4648-9de0-3c1ffc6440e6"
-      },
-      "execution_count": 14,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "List of modules and libraries to replace:\n",
-            " {'pylab': 'MODULE', 'rtlsdr': 'MODULE'}\n",
-            "found a global!\n",
-            "found a global!\n",
-            "found a global!\n",
-            "found a global!\n",
-            "found a global!\n",
-            "found a global!\n",
-            "found a global!\n",
-            "found a global!\n",
-            "found a global!\n",
-            "found a global!\n",
-            "found a global!\n",
-            "found a global!\n",
-            "found a global!\n",
-            "found a global!\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# pretrain dataset\n",
-        "#!wget https://huggingface.co/rgismondi/python-50k-dedup/blob/main/pretrain_dataset.zip\n",
-        "#!unzip 'pretrain_dataset.zip'\n",
-        "\n",
-        "# converted dataset\n",
-        "#! wget https://huggingface.co/rgismondi/python-50k-dedup/blob/main/converted_dataset.zip\n",
-        "#! unzip 'converted_dataset.zip'\n",
-        "\n",
-        "# test dataset\n",
-        "#!wget https://huggingface.co/rgismondi/python-50k-dedup/blob/main/finetune_eval_dataset.zip\n",
-        "#!unzip 'finetune_eval_dataset.zip'"
-      ],
-      "metadata": {
-        "id": "sEmqUukXf__k"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "Train a customised python byte-level Byte-pair encoding tokenizer. "
-      ],
-      "metadata": {
-        "id": "M0wmpgCxUIF3"
-      }
+  {
+   "cell_type": "markdown",
+   "source": [
+    "Fetch datasets"
+   ],
+   "metadata": {
+    "id": "b4Jowf-vf8Ck"
+   }
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "import os\n",
+    "import tokenize\n",
+    "import dis\n",
+    "import sys\n",
+    "import re\n",
+    "import keyword\n",
+    "import pandas as pd\n",
+    "import ast\n",
+    "import torch\n",
+    "import signal\n",
+    "from functools import wraps\n",
+    "\n",
+    "def multireplace(string, replacements, ignore_case=False):\n",
+    "    \"\"\"\n",
+    "    Given a string and a replacement map, it returns the replaced string.\n",
+    "    :param str string: string to execute replacements on\n",
+    "    :param dict replacements: replacement dictionary {value to find: value to replace}\n",
+    "    :param bool ignore_case: whether the match should be case insensitive\n",
+    "    :rtype: str\n",
+    "    \"\"\"\n",
+    "    # If case insensitive, we need to normalize the old string so that later a replacement\n",
+    "    # can be found. For instance with {\"HEY\": \"lol\"} we should match and find a replacement for \"hey\",\n",
+    "    # \"HEY\", \"hEy\", etc.\n",
+    "    if ignore_case:\n",
+    "        def normalize_old(s):\n",
+    "            return s.lower()\n",
+    "        re_mode = re.IGNORECASE\n",
+    "    else:\n",
+    "        def normalize_old(s):\n",
+    "            return s\n",
+    "        re_mode = 0\n",
+    "\n",
+    "    replacements = {normalize_old(key): val for key, val in replacements.items()}\n",
+    "    \n",
+    "    # Place longer ones first to keep shorter substrings from matching where the longer ones should take place\n",
+    "    # For instance given the replacements {'ab': 'AB', 'abc': 'ABC'} against the string 'hey abc', it should produce\n",
+    "    # 'hey ABC' and not 'hey ABc'\n",
+    "    rep_sorted = sorted(replacements, key=len, reverse=True)\n",
+    "    rep_escaped = map(re.escape, rep_sorted)\n",
+    "    \n",
+    "    # Create a big OR regex that matches any of the substrings to replace\n",
+    "    pattern = re.compile(\"|\".join(rep_escaped), re_mode)\n",
+    "    \n",
+    "    # For each match, look up the new string in the replacements, being the key the normalized old string\n",
+    "    return pattern.sub(lambda match: replacements[normalize_old(match.group(0))], string)\n",
+    "\n",
+    "\n",
+    "def convert(file, output_file):\n",
+    "    with open (file, \"r\") as f:\n",
+    "        text = f.read()  \n",
+    "\n",
+    "    replacements = {}\n",
+    "    for node in ast.iter_child_nodes(ast.parse(text)):\n",
+    "        if isinstance(node, ast.ImportFrom):\n",
+    "            replacements.update({node.module: 'MODULE'})\n",
+    "        if isinstance(node, ast.Import) or isinstance(node, ast.ImportFrom):\n",
+    "            for i, v in enumerate(node.names):\n",
+    "                if(node.names[i].asname):\n",
+    "                    replacements.update({node.names[i].name: 'LIB'})                \n",
+    "                    replacements.update({node.names[i].asname: 'ALIAS'})\n",
+    "                else:\n",
+    "                    replacements.update({node.names[i].name: 'LIBRARY'})\n",
+    "\n",
+    "\n",
+    "    # reomve * from the dictionary (handle from module import * statement)\n",
+    "    replacements.pop('*', None)\n",
+    "    print('List of modules and libraries to replace:\\n', replacements)\n",
+    "\n",
+    "    with open('med.py','w') as f:\n",
+    "        f.write(multireplace(text, replacements, ignore_case = True))\n",
+    "\n",
+    "    file = 'med.py'\n",
+    "    with open(file,'rb') as f:\n",
+    "        tokens = list(tokenize.tokenize(f.readline))\n",
+    "        \n",
+    "    ### extract important data from the output of tokenize package\n",
+    "    toks = pd.DataFrame(columns = ['original','type','text', 'line','pos'])\n",
+    "\n",
+    "    last_line = 0\n",
+    "    last_pos = 0\n",
+    "\n",
+    "    for token in tokens:\n",
+    "        \n",
+    "        tok_org = token.string\n",
+    "        tok_text = token.string    \n",
+    "        tok_type = str(token).split('(')[2].split(')')[0]\n",
+    "\n",
+    "        # convert keywords to upper\n",
+    "        if keyword.iskeyword(tok_text):\n",
+    "            tok_type = str.upper(tok_text)\n",
+    "        \n",
+    "        #extract operations\n",
+    "        # if tok_type == 'OP':\n",
+    "        #     tok_type = tok_text\n",
+    "\n",
+    "\n",
+    "        # getting rid of comments and empty lines\n",
+    "        if tok_type in ['NL','NEWLINE','COMMENT']:\n",
+    "            continue\n",
+    "        \n",
+    "        #retrieve the position\n",
+    "        tok_line = token.start[0]\n",
+    "        \n",
+    "        if last_line == tok_line:\n",
+    "            last_pos +=  1\n",
+    "        else:\n",
+    "            last_pos = 1\n",
+    "        tok_pos = last_pos\n",
+    "        last_line = tok_line\n",
+    "        \n",
+    "        toks = toks.append({'type':tok_type,\n",
+    "                            'original':tok_org,\n",
+    "                            'text':tok_text,\n",
+    "                            'line':tok_line,\n",
+    "                            'pos':tok_pos},ignore_index=True)\n",
+    "\n",
+    "\n",
+    "    # remove encoding lines and end of file\n",
+    "    toks.line = toks.line.astype('int')\n",
+    "    toks.pos = toks.pos.astype('int')\n",
+    "    toks = toks.loc[~((toks.type == 'ENCODING') | (toks.type == 'ENDMARKER'))]\n",
+    "    toks['doc'] = (toks.text.str.contains('\"\"\"') | toks.text.str.contains(\"'''\"))\n",
+    "    toks = toks.loc[~(toks.doc)].drop(['doc'],axis=1)\n",
+    "\n",
+    "    toks.head(20)\n",
+    "\n",
+    "    indent = 0\n",
+    "    last_line = 0\n",
+    "\n",
+    "    for index,row in toks.iterrows():\n",
+    "        if row.type == \"INDENT\":\n",
+    "            indent +=1\n",
+    "            continue\n",
+    "        if row.type == \"DEDENT\":\n",
+    "            indent -=1\n",
+    "            continue\n",
+    "        if row.line != last_line:\n",
+    "            last_line = row.line\n",
+    "            toks = toks.append({'type':'\\n'+indent*'\\t',\n",
+    "                                'text':'\\n'+indent*'\\t',\n",
+    "                                'line':row.line,\n",
+    "                                'pos':row.pos-1},ignore_index=True)\n",
+    "\n",
+    "    toks = toks.loc[~((toks.type=='INDENT') | (toks.type=='DEDENT'))]\n",
+    "    toks = toks.sort_values(['line','pos']).reset_index(drop=True)\n",
+    "\n",
+    "\n",
+    "    # drop the first row (empty line)\n",
+    "    toks.drop(toks.index[:1], inplace=True)\n",
+    "\n",
+    "    toks.head(20)\n",
+    "\n",
+    "    with open(file,'r') as f:\n",
+    "        src = f.read()\n",
+    "\n",
+    "    stdout_backup = sys.stdout\n",
+    "    sys.stdout = open('dis.txt','w')\n",
+    "    dis.dis(src)\n",
+    "    sys.stdout = stdout_backup\n",
+    "\n",
+    "    with open('dis.txt','r') as f:\n",
+    "        lines = f.readlines()\n",
+    "\n",
+    "    # find global variables\n",
+    "    glbls = [].copy()    \n",
+    "    for l in lines:\n",
+    "        clean = l.replace('>>',' ').strip().split()\n",
+    "        if len(clean):\n",
+    "            try:\n",
+    "                int(clean[1])\n",
+    "                line = int(clean[0])\n",
+    "            except:\n",
+    "                clean = [str(line)]+clean\n",
+    "            if 'LOAD_GLOBAL' in clean:\n",
+    "                print('found a global!')\n",
+    "                glbls.append((int(clean[0]),clean[-1].replace('(','').replace(')','')))\n",
+    "\n",
+    "    for l,n in glbls:\n",
+    "        toks.loc[(toks.line==l) & (toks.text==n),'type'] = 'GLOBAL_VARIABLE'\n",
+    "\n",
+    "    toks .head(10) \n",
+    "\n",
+    "    text_imports = ' '.join(list(toks.text)).replace('\\n ','\\n').replace(' \\n','\\n').replace('\\t ','\\t').replace(' . ','.').replace(' (','(')\n",
+    "    text_imports = multireplace(text_imports, replacements, ignore_case = True)\n",
+    "\n",
+    "    with open('normalized_textual_file.py','w') as f:\n",
+    "        f.write(text_imports)\n",
+    "\n",
+    "    toks.type = toks.apply(lambda x: x['text'] if str(x['text']) in ['LIBRARY','LIB','ALIAS','MODULE'] else x['type'], axis = 1)\n",
+    "    code_converted = ' '.join(list(toks.type)).replace('\\n ','\\n').replace(' \\n','\\n').replace('\\t ','\\t').replace(' . ','.').replace(' (','(')\n",
+    "\n",
+    "    final_replacements = {'GLOBAL_VARIABLE(':'FUNCTION_CALL(',                      \n",
+    "    #                       'NAME.NAME':'NAME',\n",
+    "                          'NAME(':'FUNCTION_CALL(',\n",
+    "                          'NAME':'LOCAL_VARIABLE'}\n",
+    "\n",
+    "    code_converted = multireplace(code_converted, final_replacements, ignore_case = False)\n",
+    "\n",
+    "    with open(output_file,'w') as f:\n",
+    "        f.write(code_converted)\n",
+    "\n",
+    "\n",
+    "WEIGHT_MATRIX = {\n",
+    "        'NUMBER' : [1.625, 1.25, 1.125],\n",
+    "        'NAME' : [1.625, 1.125, 1.5],\n",
+    "        'LOCAL_VARIABLE' : [1.625, 1.125, 1.5],\n",
+    "        'FUNCTION_NAME' : [1.625, 1.25, 1.5]\n",
+    "    }\n",
+    "\n",
+    "input_file = \"/tmp/input_file.txt\"\n",
+    "output_file = \"/tmp/output_file.txt\"\n",
+    "def reranking_layer(outputs, context, tokenizer):\n",
+    "\n",
+    "  with open(input_file, 'w') as f:\n",
+    "    f.write(context);\n",
+    "  \n",
+    "  convert(file_path=input_file, output_file=output_file)\n",
+    "  with open(output_file, 'rb') as context:\n",
+    "    inputs = list(zip(tokenizer(input_file), tokenizer(output_file)))\n",
+    "    for item in inputs:\n",
+    "        loss_fct = nn.CrossEntropyLoss(weight=torch.tensor(WEIGHT_MATRIX[item[1]]))\n",
+    "\n"
+   ],
+   "metadata": {
+    "id": "PGHNJPreFmOw"
+   },
+   "execution_count": 13,
+   "outputs": []
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "convert(\"./sample_data/data/peakfinder.py\", \"./converted_train.txt\")"
+   ],
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "cNqN20xrhkGq",
+    "outputId": "7e041a8c-eb0a-4648-9de0-3c1ffc6440e6"
+   },
+   "execution_count": 14,
+   "outputs": [
     {
-      "cell_type": "code",
-      "source": [
-        "from pathlib import Path\n",
-        "from transformers import AutoTokenizer,TextDataset,DataCollatorForLanguageModeling\n",
-        "import glob\n",
-        "import random \n",
-        "\n",
-        "tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")\n"
-      ],
-      "metadata": {
-        "id": "oPq1Bau8UbpB"
-      },
-      "execution_count": 5,
-      "outputs": []
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "List of modules and libraries to replace:\n",
+      " {'pylab': 'MODULE', 'rtlsdr': 'MODULE'}\n",
+      "found a global!\n",
+      "found a global!\n",
+      "found a global!\n",
+      "found a global!\n",
+      "found a global!\n",
+      "found a global!\n",
+      "found a global!\n",
+      "found a global!\n",
+      "found a global!\n",
+      "found a global!\n",
+      "found a global!\n",
+      "found a global!\n",
+      "found a global!\n",
+      "found a global!\n"
+     ]
+    }
+   ]
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "# pretrain dataset\n",
+    "#!wget https://huggingface.co/rgismondi/python-50k-dedup/blob/main/pretrain_dataset.zip\n",
+    "#!unzip 'pretrain_dataset.zip'\n",
+    "\n",
+    "# converted dataset\n",
+    "#! wget https://huggingface.co/rgismondi/python-50k-dedup/blob/main/converted_dataset.zip\n",
+    "#! unzip 'converted_dataset.zip'\n",
+    "\n",
+    "# test dataset\n",
+    "#!wget https://huggingface.co/rgismondi/python-50k-dedup/blob/main/finetune_eval_dataset.zip\n",
+    "#!unzip 'finetune_eval_dataset.zip'"
+   ],
+   "metadata": {
+    "id": "sEmqUukXf__k"
+   },
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "Train a customised python byte-level Byte-pair encoding tokenizer. "
+   ],
+   "metadata": {
+    "id": "M0wmpgCxUIF3"
+   }
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "from pathlib import Path\n",
+    "from transformers import AutoTokenizer,TextDataset,DataCollatorForLanguageModeling\n",
+    "import glob\n",
+    "import random \n",
+    "\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")\n"
+   ],
+   "metadata": {
+    "id": "oPq1Bau8UbpB"
+   },
+   "execution_count": 5,
+   "outputs": []
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "paths = [str(x) for x in Path(\".\").glob(\"./sample_data/data/*.py\")]\n",
+    "converted_paths = []\n",
+    "for path in paths:\n",
+    "  converted_path = \"./sample_data/converted/\"+ path.split(\"/\").pop().split(\".\")[0] + \".txt\"\n",
+    "  print(converted_path)\n",
+    "  try:\n",
+    "    convert(path, converted_path)\n",
+    "    converted_paths.append(converted_path)\n",
+    "  except:\n",
+    "    pass\n",
+    "\n",
+    "    \n",
+    "with open(\"./train.txt\", \"wb\") as train_outfile:\n",
+    "  with open(\"./test.txt\", \"wb\") as test_outfile:\n",
+    "    for f in paths:\n",
+    "        choice = random.random()\n",
+    "        with open(f, \"rb\") as infile:\n",
+    "            if choice > 0.1:\n",
+    "              train_outfile.write(infile.read())\n",
+    "            else:\n",
+    "              test_outfile.write(infile.read())\n",
+    "\n",
+    "with open(\"./converted_train.txt\", \"wb\") as train_outfile:\n",
+    "  with open(\"./converted_test.txt\", \"wb\") as test_outfile:\n",
+    "    for f in converted_paths:\n",
+    "        choice = random.random()\n",
+    "        with open(f, \"rb\") as infile:\n",
+    "            if choice > 0.1:\n",
+    "              train_outfile.write(infile.read())\n",
+    "            else:\n",
+    "              test_outfile.write(infile.read())\n"
+   ],
+   "metadata": {
+    "id": "vOXkK5bWYNXz",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "outputId": "768058fc-877f-4d58-bea9-59933c15e9ad"
+   },
+   "execution_count": 26,
+   "outputs": [
     {
-      "cell_type": "code",
-      "source": [
-        "paths = [str(x) for x in Path(\".\").glob(\"./sample_data/data/*.py\")]\n",
-        "converted_paths = []\n",
-        "for path in paths:\n",
-        "  converted_path = \"./sample_data/converted/\"+ path.split(\"/\").pop().split(\".\")[0] + \".txt\"\n",
-        "  print(converted_path)\n",
-        "  try:\n",
-        "    convert(path, converted_path)\n",
-        "    converted_paths.append(converted_path)\n",
-        "  except:\n",
-        "    pass\n",
-        "\n",
-        "    \n",
-        "with open(\"./train.txt\", \"wb\") as train_outfile:\n",
-        "  with open(\"./test.txt\", \"wb\") as test_outfile:\n",
-        "    for f in paths:\n",
-        "        choice = random.random()\n",
-        "        with open(f, \"rb\") as infile:\n",
-        "            if choice > 0.1:\n",
-        "              train_outfile.write(infile.read())\n",
-        "            else:\n",
-        "              test_outfile.write(infile.read())\n",
-        "\n",
-        "with open(\"./converted_train.txt\", \"wb\") as train_outfile:\n",
-        "  with open(\"./converted_test.txt\", \"wb\") as test_outfile:\n",
-        "    for f in converted_paths:\n",
-        "        choice = random.random()\n",
-        "        with open(f, \"rb\") as infile:\n",
-        "            if choice > 0.1:\n",
-        "              train_outfile.write(infile.read())\n",
-        "            else:\n",
-        "              test_outfile.write(infile.read())\n"
-      ],
-      "metadata": {
-        "id": "vOXkK5bWYNXz",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "outputId": "768058fc-877f-4d58-bea9-59933c15e9ad"
-      },
-      "execution_count": 26,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "./sample_data/converted/peakfinder.txt\n",
-            "List of modules and libraries to replace:\n",
-            " {'pylab': 'MODULE', 'rtlsdr': 'MODULE'}\n",
-            "found a global!\n",
-            "found a global!\n",
-            "found a global!\n",
-            "found a global!\n",
-            "found a global!\n",
-            "found a global!\n",
-            "found a global!\n",
-            "found a global!\n",
-            "found a global!\n",
-            "found a global!\n",
-            "found a global!\n",
-            "found a global!\n",
-            "found a global!\n",
-            "found a global!\n"
-          ]
-        }
-      ]
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "./sample_data/converted/peakfinder.txt\n",
+      "List of modules and libraries to replace:\n",
+      " {'pylab': 'MODULE', 'rtlsdr': 'MODULE'}\n",
+      "found a global!\n",
+      "found a global!\n",
+      "found a global!\n",
+      "found a global!\n",
+      "found a global!\n",
+      "found a global!\n",
+      "found a global!\n",
+      "found a global!\n",
+      "found a global!\n",
+      "found a global!\n",
+      "found a global!\n",
+      "found a global!\n",
+      "found a global!\n",
+      "found a global!\n"
+     ]
+    }
+   ]
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "def load_dataset(train_path,test_path,tokenizer):\n",
+    "    train_dataset = TextDataset(\n",
+    "          tokenizer=tokenizer,\n",
+    "          file_path=train_path,\n",
+    "          block_size=128)\n",
+    "     \n",
+    "    test_dataset = TextDataset(\n",
+    "          tokenizer=tokenizer,\n",
+    "          file_path=test_path,\n",
+    "          block_size=128)   \n",
+    "    \n",
+    "    data_collator = DataCollatorForLanguageModeling(\n",
+    "        tokenizer=tokenizer, mlm=False,\n",
+    "    )\n",
+    "    return train_dataset,test_dataset,data_collator\n",
+    "\n",
+    "train_dataset,test_dataset,data_collator = load_dataset(\"./train.txt\", \"./test.txt\",tokenizer)\n",
+    "converted_train_dataset, converted_test_dataset, converted_datacollator = load_dataset(\"./converted_train.txt\", \"./converted_test.txt\",tokenizer)\n",
+    "#pretrain_raw_files = glob.glob(\"./pretrain_dataset\" + '/**/*.py', recursive=True)\n",
+    "#pretrain_converted_files = glob.glob(\"./pretrain_converted_dataset\" + '/**/*.py', recursive=True)"
+   ],
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "5Q4kLrTZXTjZ",
+    "outputId": "15e23b41-9245-454f-d9d6-c169eae0f943"
+   },
+   "execution_count": 16,
+   "outputs": [
     {
-      "cell_type": "code",
-      "source": [
-        "def load_dataset(train_path,test_path,tokenizer):\n",
-        "    train_dataset = TextDataset(\n",
-        "          tokenizer=tokenizer,\n",
-        "          file_path=train_path,\n",
-        "          block_size=128)\n",
-        "     \n",
-        "    test_dataset = TextDataset(\n",
-        "          tokenizer=tokenizer,\n",
-        "          file_path=test_path,\n",
-        "          block_size=128)   \n",
-        "    \n",
-        "    data_collator = DataCollatorForLanguageModeling(\n",
-        "        tokenizer=tokenizer, mlm=False,\n",
-        "    )\n",
-        "    return train_dataset,test_dataset,data_collator\n",
-        "\n",
-        "train_dataset,test_dataset,data_collator = load_dataset(\"./train.txt\", \"./test.txt\",tokenizer)\n",
-        "converted_train_dataset, converted_test_dataset, converted_datacollator = load_dataset(\"./converted_train.txt\", \"./converted_test.txt\",tokenizer)\n",
-        "#pretrain_raw_files = glob.glob(\"./pretrain_dataset\" + '/**/*.py', recursive=True)\n",
-        "#pretrain_converted_files = glob.glob(\"./pretrain_converted_dataset\" + '/**/*.py', recursive=True)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "5Q4kLrTZXTjZ",
-        "outputId": "15e23b41-9245-454f-d9d6-c169eae0f943"
-      },
-      "execution_count": 16,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "/usr/local/lib/python3.7/dist-packages/transformers/data/datasets/language_modeling.py:58: FutureWarning: This dataset will be removed from the library soon, preprocessing should be handled with the 🤗 Datasets library. You can have a look at this example script for pointers: https://github.com/huggingface/transformers/blob/main/examples/pytorch/language-modeling/run_mlm.py\n",
-            "  FutureWarning,\n",
-            "Token indices sequence length is longer than the specified maximum sequence length for this model (1381 > 1024). Running this sequence through the model will result in indexing errors\n"
-          ]
-        }
-      ]
+     "output_type": "stream",
+     "name": "stderr",
+     "text": [
+      "/usr/local/lib/python3.7/dist-packages/transformers/data/datasets/language_modeling.py:58: FutureWarning: This dataset will be removed from the library soon, preprocessing should be handled with the 🤗 Datasets library. You can have a look at this example script for pointers: https://github.com/huggingface/transformers/blob/main/examples/pytorch/language-modeling/run_mlm.py\n",
+      "  FutureWarning,\n",
+      "Token indices sequence length is longer than the specified maximum sequence length for this model (1381 > 1024). Running this sequence through the model will result in indexing errors\n"
+     ]
+    }
+   ]
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "tokenizer(\"for i in range(10)\")[\"input_ids\"]"
+   ],
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "0AraoltupXmb",
+    "outputId": "c90124e7-3695-44af-827d-c355632ec2af"
+   },
+   "execution_count": null,
+   "outputs": [
     {
-      "cell_type": "code",
-      "source": [
-        "tokenizer(\"for i in range(10)\")[\"input_ids\"]"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "0AraoltupXmb",
-        "outputId": "c90124e7-3695-44af-827d-c355632ec2af"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "[1640, 1312, 287, 2837, 7, 940, 8]"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 10
-        }
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "[1640, 1312, 287, 2837, 7, 940, 8]"
       ]
+     },
+     "metadata": {},
+     "execution_count": 10
+    }
+   ]
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import transformers\n",
+    "import nlp\n",
+    "import logging\n",
+    "from datasets import load_dataset\n",
+    "from transformers import TextDataset,DataCollatorForLanguageModeling\n",
+    "\n",
+    "\n",
+    "logging.basicConfig(level=logging.INFO)\n",
+    "\n",
+    "dataset_dict = {\n",
+    "    \"token\": train_dataset,\n",
+    "    \"token_type\": train_dataset,\n",
+    "    \"line\": train_dataset,\n",
+    "}\n",
+    "\n",
+    "print(dataset_dict[\"token\"])\n"
+   ],
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "4ePqOauaqjnZ",
+    "outputId": "b6959f0d-812a-4cf2-95e4-2718a5be58bd"
+   },
+   "execution_count": null,
+   "outputs": [
     {
-      "cell_type": "code",
-      "source": [
-        "import numpy as np\n",
-        "import torch\n",
-        "import torch.nn as nn\n",
-        "import transformers\n",
-        "import nlp\n",
-        "import logging\n",
-        "from datasets import load_dataset\n",
-        "from transformers import TextDataset,DataCollatorForLanguageModeling\n",
-        "\n",
-        "\n",
-        "logging.basicConfig(level=logging.INFO)\n",
-        "\n",
-        "dataset_dict = {\n",
-        "    \"token\": train_dataset,\n",
-        "    \"token_type\": train_dataset,\n",
-        "    \"line\": train_dataset,\n",
-        "}\n",
-        "\n",
-        "print(dataset_dict[\"token\"])\n"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "4ePqOauaqjnZ",
-        "outputId": "b6959f0d-812a-4cf2-95e4-2718a5be58bd"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "<transformers.data.datasets.language_modeling.TextDataset object at 0x7f1a025b7790>\n"
-          ]
-        }
-      ]
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "<transformers.data.datasets.language_modeling.TextDataset object at 0x7f1a025b7790>\n"
+     ]
+    }
+   ]
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "from transformers.utils.dummy_pt_objects import GPT2LMHeadModel\n",
+    "from transformers import GPT2Tokenizer\n",
+    "from transformers import GPT2Config, EncoderDecoderConfig, EncoderDecoderModel\n",
+    "\n",
+    "\n",
+    "class MultitaskModel(transformers.PreTrainedModel):\n",
+    "    def __init__(self, encoder, taskmodels_dict):\n",
+    "        \"\"\"\n",
+    "        Setting MultitaskModel up as a PretrainedModel allows us\n",
+    "        to take better advantage of Trainer features\n",
+    "        \"\"\"\n",
+    "        super().__init__(transformers.PretrainedConfig())\n",
+    "\n",
+    "        self.encoder = encoder\n",
+    "        self.taskmodels_dict = nn.ModuleDict(taskmodels_dict)\n",
+    "\n",
+    "    def _get_models(self):\n",
+    "      return self.taskmodels_dict\n",
+    "\n",
+    "    @classmethod\n",
+    "    def create(cls, model_name, model_type_dict, model_config_dict):\n",
+    "        \"\"\"\n",
+    "        This creates a MultitaskModel using the model class and config objects\n",
+    "        from single-task models. \n",
+    "\n",
+    "        We do this by creating each single-task model, and having them share\n",
+    "        the same encoder transformer.\n",
+    "        \"\"\"\n",
+    "        shared_encoder = None\n",
+    "        taskmodels_dict = {}\n",
+    "        for task_name, model_type in model_type_dict.items():\n",
+    "            model = model_type.from_pretrained( \"gpt2\",\n",
+    "                config=model_config_dict[task_name],\n",
+    "            )\n",
+    "            if shared_encoder is None:\n",
+    "                shared_encoder = cls.get_encoder(model)\n",
+    "            else:\n",
+    "                setattr(model, \"encoder\", shared_encoder)\n",
+    "            taskmodels_dict[task_name] = model\n",
+    "        return cls(encoder=shared_encoder, taskmodels_dict=taskmodels_dict)\n",
+    "    \n",
+    "\n",
+    "    @classmethod\n",
+    "    def get_encoder(cls, model):\n",
+    "        \"\"\"\n",
+    "        The encoder transformer is named differently in each model \"architecture\".\n",
+    "        This method lets us get the name of the encoder attribute\n",
+    "        \"\"\"\n",
+    "        model_class_name = model.__class__.__name__\n",
+    "        if model_class_name.startswith(\"Roberta\"):\n",
+    "            return \"roberta-base\"\n",
+    "        elif model_class_name.startswith(\"GPT2\"):\n",
+    "            config = EncoderDecoderConfig.from_encoder_decoder_configs(model.config, model.config) \n",
+    "            encoder_decoder = EncoderDecoderModel(config=config)\n",
+    "            return encoder_decoder.config.encoder\n",
+    "        else:\n",
+    "            raise KeyError(f\"Add support for new model {model_class_name}\")\n",
+    "    \n",
+    "    def forward(self, task_name, **kwargs):\n",
+    "        return self.taskmodels_dict[task_name](**kwargs)"
+   ],
+   "metadata": {
+    "id": "jJLxsM_H4A6z"
+   },
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "model_name = \"gpt2\"\n",
+    "multitask_model = MultitaskModel.create(\n",
+    "    model_name=model_name,\n",
+    "    model_type_dict={\n",
+    "        \"token\": transformers.AutoModelWithLMHead,\n",
+    "        \"token_type\": transformers.AutoModelWithLMHead,\n",
+    "        \"line\": transformers.AutoModelForSequenceClassification,\n",
+    "    },\n",
+    "    model_config_dict={\n",
+    "        \"token\": transformers.AutoConfig.from_pretrained(model_name),\n",
+    "        \"token_type\": transformers.AutoConfig.from_pretrained(model_name),\n",
+    "        \"line\": transformers.AutoConfig.from_pretrained(model_name),\n",
+    "    },\n",
+    ")"
+   ],
+   "metadata": {
+    "id": "9bbwa7E74Q0x",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "outputId": "4aa35f34-7709-464a-8c6c-bc966a7f3be9"
+   },
+   "execution_count": null,
+   "outputs": [
     {
-      "cell_type": "code",
-      "source": [
-        "from transformers.utils.dummy_pt_objects import GPT2LMHeadModel\n",
-        "from transformers import GPT2Tokenizer\n",
-        "from transformers import GPT2Config, EncoderDecoderConfig, EncoderDecoderModel\n",
-        "\n",
-        "\n",
-        "class MultitaskModel(transformers.PreTrainedModel):\n",
-        "    def __init__(self, encoder, taskmodels_dict):\n",
-        "        \"\"\"\n",
-        "        Setting MultitaskModel up as a PretrainedModel allows us\n",
-        "        to take better advantage of Trainer features\n",
-        "        \"\"\"\n",
-        "        super().__init__(transformers.PretrainedConfig())\n",
-        "\n",
-        "        self.encoder = encoder\n",
-        "        self.taskmodels_dict = nn.ModuleDict(taskmodels_dict)\n",
-        "\n",
-        "    def _get_models(self):\n",
-        "      return self.taskmodels_dict\n",
-        "\n",
-        "    @classmethod\n",
-        "    def create(cls, model_name, model_type_dict, model_config_dict):\n",
-        "        \"\"\"\n",
-        "        This creates a MultitaskModel using the model class and config objects\n",
-        "        from single-task models. \n",
-        "\n",
-        "        We do this by creating each single-task model, and having them share\n",
-        "        the same encoder transformer.\n",
-        "        \"\"\"\n",
-        "        shared_encoder = None\n",
-        "        taskmodels_dict = {}\n",
-        "        for task_name, model_type in model_type_dict.items():\n",
-        "            model = model_type.from_pretrained( \"gpt2\",\n",
-        "                config=model_config_dict[task_name],\n",
-        "            )\n",
-        "            if shared_encoder is None:\n",
-        "                shared_encoder = cls.get_encoder(model)\n",
-        "            else:\n",
-        "                setattr(model, \"encoder\", shared_encoder)\n",
-        "            taskmodels_dict[task_name] = model\n",
-        "        return cls(encoder=shared_encoder, taskmodels_dict=taskmodels_dict)\n",
-        "    \n",
-        "\n",
-        "    @classmethod\n",
-        "    def get_encoder(cls, model):\n",
-        "        \"\"\"\n",
-        "        The encoder transformer is named differently in each model \"architecture\".\n",
-        "        This method lets us get the name of the encoder attribute\n",
-        "        \"\"\"\n",
-        "        model_class_name = model.__class__.__name__\n",
-        "        if model_class_name.startswith(\"Roberta\"):\n",
-        "            return \"roberta-base\"\n",
-        "        elif model_class_name.startswith(\"GPT2\"):\n",
-        "            config = EncoderDecoderConfig.from_encoder_decoder_configs(model.config, model.config) \n",
-        "            encoder_decoder = EncoderDecoderModel(config=config)\n",
-        "            return encoder_decoder.config.encoder\n",
-        "        else:\n",
-        "            raise KeyError(f\"Add support for new model {model_class_name}\")\n",
-        "    \n",
-        "    def forward(self, task_name, **kwargs):\n",
-        "        return self.taskmodels_dict[task_name](**kwargs)"
-      ],
-      "metadata": {
-        "id": "jJLxsM_H4A6z"
-      },
-      "execution_count": null,
-      "outputs": []
+     "output_type": "stream",
+     "name": "stderr",
+     "text": [
+      "loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
+      "Model config GPT2Config {\n",
+      "  \"_name_or_path\": \"gpt2\",\n",
+      "  \"activation_function\": \"gelu_new\",\n",
+      "  \"architectures\": [\n",
+      "    \"GPT2LMHeadModel\"\n",
+      "  ],\n",
+      "  \"attn_pdrop\": 0.1,\n",
+      "  \"bos_token_id\": 50256,\n",
+      "  \"embd_pdrop\": 0.1,\n",
+      "  \"eos_token_id\": 50256,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"layer_norm_epsilon\": 1e-05,\n",
+      "  \"model_type\": \"gpt2\",\n",
+      "  \"n_ctx\": 1024,\n",
+      "  \"n_embd\": 768,\n",
+      "  \"n_head\": 12,\n",
+      "  \"n_inner\": null,\n",
+      "  \"n_layer\": 12,\n",
+      "  \"n_positions\": 1024,\n",
+      "  \"reorder_and_upcast_attn\": false,\n",
+      "  \"resid_pdrop\": 0.1,\n",
+      "  \"scale_attn_by_inverse_layer_idx\": false,\n",
+      "  \"scale_attn_weights\": true,\n",
+      "  \"summary_activation\": null,\n",
+      "  \"summary_first_dropout\": 0.1,\n",
+      "  \"summary_proj_to_labels\": true,\n",
+      "  \"summary_type\": \"cls_index\",\n",
+      "  \"summary_use_proj\": true,\n",
+      "  \"task_specific_params\": {\n",
+      "    \"text-generation\": {\n",
+      "      \"do_sample\": true,\n",
+      "      \"max_length\": 50\n",
+      "    }\n",
+      "  },\n",
+      "  \"transformers_version\": \"4.19.0.dev0\",\n",
+      "  \"use_cache\": true,\n",
+      "  \"vocab_size\": 50257\n",
+      "}\n",
+      "\n",
+      "loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
+      "Model config GPT2Config {\n",
+      "  \"_name_or_path\": \"gpt2\",\n",
+      "  \"activation_function\": \"gelu_new\",\n",
+      "  \"architectures\": [\n",
+      "    \"GPT2LMHeadModel\"\n",
+      "  ],\n",
+      "  \"attn_pdrop\": 0.1,\n",
+      "  \"bos_token_id\": 50256,\n",
+      "  \"embd_pdrop\": 0.1,\n",
+      "  \"eos_token_id\": 50256,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"layer_norm_epsilon\": 1e-05,\n",
+      "  \"model_type\": \"gpt2\",\n",
+      "  \"n_ctx\": 1024,\n",
+      "  \"n_embd\": 768,\n",
+      "  \"n_head\": 12,\n",
+      "  \"n_inner\": null,\n",
+      "  \"n_layer\": 12,\n",
+      "  \"n_positions\": 1024,\n",
+      "  \"reorder_and_upcast_attn\": false,\n",
+      "  \"resid_pdrop\": 0.1,\n",
+      "  \"scale_attn_by_inverse_layer_idx\": false,\n",
+      "  \"scale_attn_weights\": true,\n",
+      "  \"summary_activation\": null,\n",
+      "  \"summary_first_dropout\": 0.1,\n",
+      "  \"summary_proj_to_labels\": true,\n",
+      "  \"summary_type\": \"cls_index\",\n",
+      "  \"summary_use_proj\": true,\n",
+      "  \"task_specific_params\": {\n",
+      "    \"text-generation\": {\n",
+      "      \"do_sample\": true,\n",
+      "      \"max_length\": 50\n",
+      "    }\n",
+      "  },\n",
+      "  \"transformers_version\": \"4.19.0.dev0\",\n",
+      "  \"use_cache\": true,\n",
+      "  \"vocab_size\": 50257\n",
+      "}\n",
+      "\n",
+      "loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
+      "Model config GPT2Config {\n",
+      "  \"_name_or_path\": \"gpt2\",\n",
+      "  \"activation_function\": \"gelu_new\",\n",
+      "  \"architectures\": [\n",
+      "    \"GPT2LMHeadModel\"\n",
+      "  ],\n",
+      "  \"attn_pdrop\": 0.1,\n",
+      "  \"bos_token_id\": 50256,\n",
+      "  \"embd_pdrop\": 0.1,\n",
+      "  \"eos_token_id\": 50256,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"layer_norm_epsilon\": 1e-05,\n",
+      "  \"model_type\": \"gpt2\",\n",
+      "  \"n_ctx\": 1024,\n",
+      "  \"n_embd\": 768,\n",
+      "  \"n_head\": 12,\n",
+      "  \"n_inner\": null,\n",
+      "  \"n_layer\": 12,\n",
+      "  \"n_positions\": 1024,\n",
+      "  \"reorder_and_upcast_attn\": false,\n",
+      "  \"resid_pdrop\": 0.1,\n",
+      "  \"scale_attn_by_inverse_layer_idx\": false,\n",
+      "  \"scale_attn_weights\": true,\n",
+      "  \"summary_activation\": null,\n",
+      "  \"summary_first_dropout\": 0.1,\n",
+      "  \"summary_proj_to_labels\": true,\n",
+      "  \"summary_type\": \"cls_index\",\n",
+      "  \"summary_use_proj\": true,\n",
+      "  \"task_specific_params\": {\n",
+      "    \"text-generation\": {\n",
+      "      \"do_sample\": true,\n",
+      "      \"max_length\": 50\n",
+      "    }\n",
+      "  },\n",
+      "  \"transformers_version\": \"4.19.0.dev0\",\n",
+      "  \"use_cache\": true,\n",
+      "  \"vocab_size\": 50257\n",
+      "}\n",
+      "\n",
+      "/usr/local/lib/python3.7/dist-packages/transformers/models/auto/modeling_auto.py:915: FutureWarning: The class `AutoModelWithLMHead` is deprecated and will be removed in a future version. Please use `AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM` for masked language models and `AutoModelForSeq2SeqLM` for encoder-decoder models.\n",
+      "  FutureWarning,\n",
+      "loading weights file https://huggingface.co/gpt2/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/752929ace039baa8ef70fe21cdf9ab9445773d20e733cf693d667982e210837e.323c769945a351daa25546176f8208b3004b6f563438a7603e7932bae9025925\n",
+      "All model checkpoint weights were used when initializing GPT2LMHeadModel.\n",
+      "\n",
+      "All the weights of GPT2LMHeadModel were initialized from the model checkpoint at gpt2.\n",
+      "If your task is similar to the task the model of the checkpoint was trained on, you can already use GPT2LMHeadModel for predictions without further training.\n",
+      "Set `config.is_decoder=True` and `config.add_cross_attention=True` for decoder_config\n",
+      "loading weights file https://huggingface.co/gpt2/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/752929ace039baa8ef70fe21cdf9ab9445773d20e733cf693d667982e210837e.323c769945a351daa25546176f8208b3004b6f563438a7603e7932bae9025925\n",
+      "All model checkpoint weights were used when initializing GPT2LMHeadModel.\n",
+      "\n",
+      "All the weights of GPT2LMHeadModel were initialized from the model checkpoint at gpt2.\n",
+      "If your task is similar to the task the model of the checkpoint was trained on, you can already use GPT2LMHeadModel for predictions without further training.\n",
+      "loading weights file https://huggingface.co/gpt2/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/752929ace039baa8ef70fe21cdf9ab9445773d20e733cf693d667982e210837e.323c769945a351daa25546176f8208b3004b6f563438a7603e7932bae9025925\n",
+      "All model checkpoint weights were used when initializing GPT2ForSequenceClassification.\n",
+      "\n",
+      "Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    }
+   ]
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "# Check that we have a GPU\n",
+    "!nvidia-smi\n",
+    "# Check that PyTorch sees it\n",
+    "import torch\n",
+    "torch.cuda.is_available()"
+   ],
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "CNu7NKoYpvCP",
+    "outputId": "ae4d73f1-4657-4580-b85a-bfd1410bee7b"
+   },
+   "execution_count": null,
+   "outputs": [
     {
-      "cell_type": "code",
-      "source": [
-        "model_name = \"gpt2\"\n",
-        "multitask_model = MultitaskModel.create(\n",
-        "    model_name=model_name,\n",
-        "    model_type_dict={\n",
-        "        \"token\": transformers.AutoModelWithLMHead,\n",
-        "        \"token_type\": transformers.AutoModelWithLMHead,\n",
-        "        \"line\": transformers.AutoModelForSequenceClassification,\n",
-        "    },\n",
-        "    model_config_dict={\n",
-        "        \"token\": transformers.AutoConfig.from_pretrained(model_name),\n",
-        "        \"token_type\": transformers.AutoConfig.from_pretrained(model_name),\n",
-        "        \"line\": transformers.AutoConfig.from_pretrained(model_name),\n",
-        "    },\n",
-        ")"
-      ],
-      "metadata": {
-        "id": "9bbwa7E74Q0x",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "outputId": "4aa35f34-7709-464a-8c6c-bc966a7f3be9"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
-            "Model config GPT2Config {\n",
-            "  \"_name_or_path\": \"gpt2\",\n",
-            "  \"activation_function\": \"gelu_new\",\n",
-            "  \"architectures\": [\n",
-            "    \"GPT2LMHeadModel\"\n",
-            "  ],\n",
-            "  \"attn_pdrop\": 0.1,\n",
-            "  \"bos_token_id\": 50256,\n",
-            "  \"embd_pdrop\": 0.1,\n",
-            "  \"eos_token_id\": 50256,\n",
-            "  \"initializer_range\": 0.02,\n",
-            "  \"layer_norm_epsilon\": 1e-05,\n",
-            "  \"model_type\": \"gpt2\",\n",
-            "  \"n_ctx\": 1024,\n",
-            "  \"n_embd\": 768,\n",
-            "  \"n_head\": 12,\n",
-            "  \"n_inner\": null,\n",
-            "  \"n_layer\": 12,\n",
-            "  \"n_positions\": 1024,\n",
-            "  \"reorder_and_upcast_attn\": false,\n",
-            "  \"resid_pdrop\": 0.1,\n",
-            "  \"scale_attn_by_inverse_layer_idx\": false,\n",
-            "  \"scale_attn_weights\": true,\n",
-            "  \"summary_activation\": null,\n",
-            "  \"summary_first_dropout\": 0.1,\n",
-            "  \"summary_proj_to_labels\": true,\n",
-            "  \"summary_type\": \"cls_index\",\n",
-            "  \"summary_use_proj\": true,\n",
-            "  \"task_specific_params\": {\n",
-            "    \"text-generation\": {\n",
-            "      \"do_sample\": true,\n",
-            "      \"max_length\": 50\n",
-            "    }\n",
-            "  },\n",
-            "  \"transformers_version\": \"4.19.0.dev0\",\n",
-            "  \"use_cache\": true,\n",
-            "  \"vocab_size\": 50257\n",
-            "}\n",
-            "\n",
-            "loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
-            "Model config GPT2Config {\n",
-            "  \"_name_or_path\": \"gpt2\",\n",
-            "  \"activation_function\": \"gelu_new\",\n",
-            "  \"architectures\": [\n",
-            "    \"GPT2LMHeadModel\"\n",
-            "  ],\n",
-            "  \"attn_pdrop\": 0.1,\n",
-            "  \"bos_token_id\": 50256,\n",
-            "  \"embd_pdrop\": 0.1,\n",
-            "  \"eos_token_id\": 50256,\n",
-            "  \"initializer_range\": 0.02,\n",
-            "  \"layer_norm_epsilon\": 1e-05,\n",
-            "  \"model_type\": \"gpt2\",\n",
-            "  \"n_ctx\": 1024,\n",
-            "  \"n_embd\": 768,\n",
-            "  \"n_head\": 12,\n",
-            "  \"n_inner\": null,\n",
-            "  \"n_layer\": 12,\n",
-            "  \"n_positions\": 1024,\n",
-            "  \"reorder_and_upcast_attn\": false,\n",
-            "  \"resid_pdrop\": 0.1,\n",
-            "  \"scale_attn_by_inverse_layer_idx\": false,\n",
-            "  \"scale_attn_weights\": true,\n",
-            "  \"summary_activation\": null,\n",
-            "  \"summary_first_dropout\": 0.1,\n",
-            "  \"summary_proj_to_labels\": true,\n",
-            "  \"summary_type\": \"cls_index\",\n",
-            "  \"summary_use_proj\": true,\n",
-            "  \"task_specific_params\": {\n",
-            "    \"text-generation\": {\n",
-            "      \"do_sample\": true,\n",
-            "      \"max_length\": 50\n",
-            "    }\n",
-            "  },\n",
-            "  \"transformers_version\": \"4.19.0.dev0\",\n",
-            "  \"use_cache\": true,\n",
-            "  \"vocab_size\": 50257\n",
-            "}\n",
-            "\n",
-            "loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
-            "Model config GPT2Config {\n",
-            "  \"_name_or_path\": \"gpt2\",\n",
-            "  \"activation_function\": \"gelu_new\",\n",
-            "  \"architectures\": [\n",
-            "    \"GPT2LMHeadModel\"\n",
-            "  ],\n",
-            "  \"attn_pdrop\": 0.1,\n",
-            "  \"bos_token_id\": 50256,\n",
-            "  \"embd_pdrop\": 0.1,\n",
-            "  \"eos_token_id\": 50256,\n",
-            "  \"initializer_range\": 0.02,\n",
-            "  \"layer_norm_epsilon\": 1e-05,\n",
-            "  \"model_type\": \"gpt2\",\n",
-            "  \"n_ctx\": 1024,\n",
-            "  \"n_embd\": 768,\n",
-            "  \"n_head\": 12,\n",
-            "  \"n_inner\": null,\n",
-            "  \"n_layer\": 12,\n",
-            "  \"n_positions\": 1024,\n",
-            "  \"reorder_and_upcast_attn\": false,\n",
-            "  \"resid_pdrop\": 0.1,\n",
-            "  \"scale_attn_by_inverse_layer_idx\": false,\n",
-            "  \"scale_attn_weights\": true,\n",
-            "  \"summary_activation\": null,\n",
-            "  \"summary_first_dropout\": 0.1,\n",
-            "  \"summary_proj_to_labels\": true,\n",
-            "  \"summary_type\": \"cls_index\",\n",
-            "  \"summary_use_proj\": true,\n",
-            "  \"task_specific_params\": {\n",
-            "    \"text-generation\": {\n",
-            "      \"do_sample\": true,\n",
-            "      \"max_length\": 50\n",
-            "    }\n",
-            "  },\n",
-            "  \"transformers_version\": \"4.19.0.dev0\",\n",
-            "  \"use_cache\": true,\n",
-            "  \"vocab_size\": 50257\n",
-            "}\n",
-            "\n",
-            "/usr/local/lib/python3.7/dist-packages/transformers/models/auto/modeling_auto.py:915: FutureWarning: The class `AutoModelWithLMHead` is deprecated and will be removed in a future version. Please use `AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM` for masked language models and `AutoModelForSeq2SeqLM` for encoder-decoder models.\n",
-            "  FutureWarning,\n",
-            "loading weights file https://huggingface.co/gpt2/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/752929ace039baa8ef70fe21cdf9ab9445773d20e733cf693d667982e210837e.323c769945a351daa25546176f8208b3004b6f563438a7603e7932bae9025925\n",
-            "All model checkpoint weights were used when initializing GPT2LMHeadModel.\n",
-            "\n",
-            "All the weights of GPT2LMHeadModel were initialized from the model checkpoint at gpt2.\n",
-            "If your task is similar to the task the model of the checkpoint was trained on, you can already use GPT2LMHeadModel for predictions without further training.\n",
-            "Set `config.is_decoder=True` and `config.add_cross_attention=True` for decoder_config\n",
-            "loading weights file https://huggingface.co/gpt2/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/752929ace039baa8ef70fe21cdf9ab9445773d20e733cf693d667982e210837e.323c769945a351daa25546176f8208b3004b6f563438a7603e7932bae9025925\n",
-            "All model checkpoint weights were used when initializing GPT2LMHeadModel.\n",
-            "\n",
-            "All the weights of GPT2LMHeadModel were initialized from the model checkpoint at gpt2.\n",
-            "If your task is similar to the task the model of the checkpoint was trained on, you can already use GPT2LMHeadModel for predictions without further training.\n",
-            "loading weights file https://huggingface.co/gpt2/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/752929ace039baa8ef70fe21cdf9ab9445773d20e733cf693d667982e210837e.323c769945a351daa25546176f8208b3004b6f563438a7603e7932bae9025925\n",
-            "All model checkpoint weights were used when initializing GPT2ForSequenceClassification.\n",
-            "\n",
-            "Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']\n",
-            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-          ]
-        }
-      ]
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running.\n",
+      "\n"
+     ]
     },
     {
-      "cell_type": "code",
-      "source": [
-        "# Check that we have a GPU\n",
-        "!nvidia-smi\n",
-        "# Check that PyTorch sees it\n",
-        "import torch\n",
-        "torch.cuda.is_available()"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "CNu7NKoYpvCP",
-        "outputId": "ae4d73f1-4657-4580-b85a-bfd1410bee7b"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running.\n",
-            "\n"
-          ]
-        },
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "False"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 35
-        }
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "False"
       ]
+     },
+     "metadata": {},
+     "execution_count": 35
+    }
+   ]
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "import dataclasses\n",
+    "from torch.utils.data.dataloader import DataLoader\n",
+    "from transformers.data.data_collator import DataCollatorForLanguageModeling, InputDataClass, DefaultDataCollator\n",
+    "from torch.utils.data.distributed import DistributedSampler\n",
+    "from torch.utils.data.sampler import RandomSampler\n",
+    "from typing import List, Union, Dict\n",
+    "from transformers import Trainer\n",
+    "from random import random\n",
+    "\n",
+    "\n",
+    "class NLPDataCollator(DataCollatorForLanguageModeling):\n",
+    "    \"\"\"\n",
+    "    Extending the existing DataCollator to work with NLP dataset batches\n",
+    "    \"\"\"\n",
+    "    def collate_batch(self, features: List[Union[InputDataClass, Dict]]) -> Dict[str, torch.Tensor]:\n",
+    "        first = features[0]\n",
+    "        if isinstance(first, dict):\n",
+    "          # NLP data sets current works presents features as lists of dictionary\n",
+    "          # (one per example), so we  will adapt the collate_batch logic for that\n",
+    "          if \"labels\" in first and first[\"labels\"] is not None:\n",
+    "              if first[\"labels\"].dtype == torch.int64:\n",
+    "                  labels = torch.tensor([f[\"labels\"] for f in features], dtype=torch.long)\n",
+    "              else:\n",
+    "                  labels = torch.tensor([f[\"labels\"] for f in features], dtype=torch.float)\n",
+    "              batch = {\"labels\": labels}\n",
+    "          for k, v in first.items():\n",
+    "              if k != \"labels\" and v is not None and not isinstance(v, str):\n",
+    "                  batch[k] = torch.stack([f[k] for f in features])\n",
+    "          return batch\n",
+    "        else:\n",
+    "          # otherwise, revert to using the default collate_batch\n",
+    "          return DefaultDataCollator().collate_batch(features)\n",
+    "\n",
+    "\n",
+    "class StrIgnoreDevice(str):\n",
+    "    \"\"\"\n",
+    "    This is a hack. The Trainer is going call .to(device) on every input\n",
+    "    value, but we need to pass in an additional `task_name` string.\n",
+    "    This prevents it from throwing an error\n",
+    "    \"\"\"\n",
+    "    def to(self, device):\n",
+    "        return self\n",
+    "\n",
+    "class DataLoaderWithTaskname:\n",
+    "    \"\"\"\n",
+    "    Wrapper around a DataLoader to also yield a task name\n",
+    "    \"\"\"\n",
+    "    def __init__(self, task_name, data_loader):\n",
+    "        self.task_name = task_name\n",
+    "        self.data_loader = data_loader\n",
+    "\n",
+    "        self.batch_size = data_loader.batch_size\n",
+    "        self.dataset = data_loader.dataset\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return len(self.data_loader)\n",
+    "    \n",
+    "    def __iter__(self):\n",
+    "        for batch in self.data_loader:\n",
+    "            batch[\"task_name\"] = StrIgnoreDevice(self.task_name)\n",
+    "            yield batch\n",
+    "\n",
+    "\n",
+    "class MultitaskDataloader:\n",
+    "    \"\"\"\n",
+    "    Data loader that combines and samples from multiple single-task\n",
+    "    data loaders.\n",
+    "    \"\"\"\n",
+    "    def __init__(self, dataloader_dict):\n",
+    "        self.dataloader_dict = dataloader_dict\n",
+    "        self.num_batches_dict = {\n",
+    "            task_name: len(dataloader) \n",
+    "            for task_name, dataloader in self.dataloader_dict.items()\n",
+    "        }\n",
+    "        self.task_name_list = list(self.dataloader_dict)\n",
+    "        self.dataset = [None] * sum(\n",
+    "            len(dataloader.dataset) \n",
+    "            for dataloader in self.dataloader_dict.values()\n",
+    "        )\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return sum(self.num_batches_dict.values())\n",
+    "\n",
+    "    def __iter__(self):\n",
+    "        \"\"\"\n",
+    "        For each batch, sample a task, and yield a batch from the respective\n",
+    "        task Dataloader.\n",
+    "\n",
+    "        We use size-proportional sampling, but you could easily modify this\n",
+    "        to sample from some-other distribution.\n",
+    "        \"\"\"\n",
+    "        task_choice_list = []\n",
+    "        for i, task_name in enumerate(self.task_name_list):\n",
+    "            task_choice_list += [i] * self.num_batches_dict[task_name]\n",
+    "        task_choice_list = np.array(task_choice_list)\n",
+    "        np.random.shuffle(task_choice_list)\n",
+    "        dataloader_iter_dict = {\n",
+    "            task_name: iter(dataloader) \n",
+    "            for task_name, dataloader in self.dataloader_dict.items()\n",
+    "        }\n",
+    "        for task_choice in task_choice_list:\n",
+    "            task_name = self.task_name_list[task_choice]\n",
+    "            yield next(dataloader_iter_dict[task_name]) \n",
+    "\n",
+    "class MultitaskTrainer(transformers.Trainer):\n",
+    "\n",
+    "    def get_single_train_dataloader(self, task_name, train_dataset):\n",
+    "        \"\"\"\n",
+    "        Create a single-task data loader that also yields task names\n",
+    "        \"\"\"\n",
+    "        if self.train_dataset is None:\n",
+    "            raise ValueError(\"Trainer: training requires a train_dataset.\")\n",
+    "        \n",
+    "        train_sampler = (\n",
+    "            RandomSampler(train_dataset)\n",
+    "            if self.args.local_rank == -1\n",
+    "            else DistributedSampler(train_dataset)\n",
+    "        )\n",
+    "\n",
+    "        data_loader = DataLoaderWithTaskname(\n",
+    "            task_name=task_name,\n",
+    "            data_loader=DataLoader(\n",
+    "              train_dataset,\n",
+    "              batch_size=self.args.train_batch_size,\n",
+    "              sampler=train_sampler\n",
+    "            ),\n",
+    "        )\n",
+    "\n",
+    "        return data_loader\n",
+    "\n",
+    "    def get_train_dataloader(self):\n",
+    "        \"\"\"\n",
+    "        Returns a MultitaskDataloader, which is not actually a Dataloader\n",
+    "        but an iterable that returns a generator that samples from each \n",
+    "        task Dataloader\n",
+    "        \"\"\"\n",
+    "        return MultitaskDataloader({\n",
+    "            task_name: self.get_single_train_dataloader(task_name, task_dataset)\n",
+    "            for task_name, task_dataset in self.train_dataset.items()\n",
+    "        })\n",
+    "    \n",
+    "    def train(self):\n",
+    "      config = transformers.AutoConfig.from_pretrained(\"gpt2\")\n",
+    "      model = transformers.AutoModelWithLMHead.from_pretrained(\"gpt2\", config=config)\n",
+    "      self.trainer = Trainer(\n",
+    "        model=model,\n",
+    "        args=transformers.TrainingArguments(\n",
+    "          output_dir=\"./models/multitask_model\",\n",
+    "          overwrite_output_dir=True,\n",
+    "          learning_rate=1e-5,\n",
+    "          do_train=True,\n",
+    "          num_train_epochs=100,\n",
+    "          # Adjust batch size if this doesn't fit on the Colab GPU\n",
+    "          per_device_train_batch_size=8,  \n",
+    "          save_steps=3000,\n",
+    "        ),\n",
+    "        data_collator=data_collator,\n",
+    "        train_dataset=train_dataset,\n",
+    "      )\n",
+    "      self.trainer.train()\n",
+    "\n",
+    "    def prediction_loop(self):\n",
+    "      return self.trainer.predict()\n",
+    "\n",
+    "    def compute_loss(self, model, inputs, return_outputs=True):\n",
+    "        labels = inputs.get(\"labels\")\n",
+    "        # forward pass\n",
+    "        outputs = model(**inputs)\n",
+    "        reranking_layer(outputs, inputs._get_value(), tokenizer=tokenizer) #input value is tensor\n",
+    "        logits = outputs.get(\"logits\")\n",
+    "        loss_fct = nn.CrossEntropyLoss(weight=torch.tensor([1.0, 2.0, 3.0]))\n",
+    "        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))\n",
+    "        return (loss, outputs) if return_outputs else loss"
+   ],
+   "metadata": {
+    "id": "dzVHNee8EP-T"
+   },
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    ""
+   ],
+   "metadata": {
+    "id": "tzNxUL7gLsYV"
+   },
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "trainer = MultitaskTrainer(\n",
+    "    model=multitask_model,\n",
+    "    args=transformers.TrainingArguments(\n",
+    "        output_dir=\"./models/multitask_model\",\n",
+    "        overwrite_output_dir=True,\n",
+    "        learning_rate=1e-5,\n",
+    "        do_train=True,\n",
+    "        num_train_epochs=100,\n",
+    "        # Adjust batch size if this doesn't fit on the Colab GPU\n",
+    "        per_device_train_batch_size=8,  \n",
+    "        save_steps=3000,\n",
+    "    ),\n",
+    "    data_collator=data_collator,\n",
+    ")\n",
+    "trainer.train()\n",
+    "\n"
+   ],
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 1000
     },
+    "id": "TJ3CrZfgHGKq",
+    "outputId": "30aa520a-c8c4-4b53-8659-9f30b068562c"
+   },
+   "execution_count": null,
+   "outputs": [
     {
-      "cell_type": "code",
-      "source": [
-        "import dataclasses\n",
-        "from torch.utils.data.dataloader import DataLoader\n",
-        "from transformers.data.data_collator import DataCollatorForLanguageModeling, InputDataClass, DefaultDataCollator\n",
-        "from torch.utils.data.distributed import DistributedSampler\n",
-        "from torch.utils.data.sampler import RandomSampler\n",
-        "from typing import List, Union, Dict\n",
-        "from transformers import Trainer\n",
-        "from random import random\n",
-        "\n",
-        "\n",
-        "class NLPDataCollator(DataCollatorForLanguageModeling):\n",
-        "    \"\"\"\n",
-        "    Extending the existing DataCollator to work with NLP dataset batches\n",
-        "    \"\"\"\n",
-        "    def collate_batch(self, features: List[Union[InputDataClass, Dict]]) -> Dict[str, torch.Tensor]:\n",
-        "        first = features[0]\n",
-        "        if isinstance(first, dict):\n",
-        "          # NLP data sets current works presents features as lists of dictionary\n",
-        "          # (one per example), so we  will adapt the collate_batch logic for that\n",
-        "          if \"labels\" in first and first[\"labels\"] is not None:\n",
-        "              if first[\"labels\"].dtype == torch.int64:\n",
-        "                  labels = torch.tensor([f[\"labels\"] for f in features], dtype=torch.long)\n",
-        "              else:\n",
-        "                  labels = torch.tensor([f[\"labels\"] for f in features], dtype=torch.float)\n",
-        "              batch = {\"labels\": labels}\n",
-        "          for k, v in first.items():\n",
-        "              if k != \"labels\" and v is not None and not isinstance(v, str):\n",
-        "                  batch[k] = torch.stack([f[k] for f in features])\n",
-        "          return batch\n",
-        "        else:\n",
-        "          # otherwise, revert to using the default collate_batch\n",
-        "          return DefaultDataCollator().collate_batch(features)\n",
-        "\n",
-        "\n",
-        "class StrIgnoreDevice(str):\n",
-        "    \"\"\"\n",
-        "    This is a hack. The Trainer is going call .to(device) on every input\n",
-        "    value, but we need to pass in an additional `task_name` string.\n",
-        "    This prevents it from throwing an error\n",
-        "    \"\"\"\n",
-        "    def to(self, device):\n",
-        "        return self\n",
-        "\n",
-        "class DataLoaderWithTaskname:\n",
-        "    \"\"\"\n",
-        "    Wrapper around a DataLoader to also yield a task name\n",
-        "    \"\"\"\n",
-        "    def __init__(self, task_name, data_loader):\n",
-        "        self.task_name = task_name\n",
-        "        self.data_loader = data_loader\n",
-        "\n",
-        "        self.batch_size = data_loader.batch_size\n",
-        "        self.dataset = data_loader.dataset\n",
-        "\n",
-        "    def __len__(self):\n",
-        "        return len(self.data_loader)\n",
-        "    \n",
-        "    def __iter__(self):\n",
-        "        for batch in self.data_loader:\n",
-        "            batch[\"task_name\"] = StrIgnoreDevice(self.task_name)\n",
-        "            yield batch\n",
-        "\n",
-        "\n",
-        "class MultitaskDataloader:\n",
-        "    \"\"\"\n",
-        "    Data loader that combines and samples from multiple single-task\n",
-        "    data loaders.\n",
-        "    \"\"\"\n",
-        "    def __init__(self, dataloader_dict):\n",
-        "        self.dataloader_dict = dataloader_dict\n",
-        "        self.num_batches_dict = {\n",
-        "            task_name: len(dataloader) \n",
-        "            for task_name, dataloader in self.dataloader_dict.items()\n",
-        "        }\n",
-        "        self.task_name_list = list(self.dataloader_dict)\n",
-        "        self.dataset = [None] * sum(\n",
-        "            len(dataloader.dataset) \n",
-        "            for dataloader in self.dataloader_dict.values()\n",
-        "        )\n",
-        "\n",
-        "    def __len__(self):\n",
-        "        return sum(self.num_batches_dict.values())\n",
-        "\n",
-        "    def __iter__(self):\n",
-        "        \"\"\"\n",
-        "        For each batch, sample a task, and yield a batch from the respective\n",
-        "        task Dataloader.\n",
-        "\n",
-        "        We use size-proportional sampling, but you could easily modify this\n",
-        "        to sample from some-other distribution.\n",
-        "        \"\"\"\n",
-        "        task_choice_list = []\n",
-        "        for i, task_name in enumerate(self.task_name_list):\n",
-        "            task_choice_list += [i] * self.num_batches_dict[task_name]\n",
-        "        task_choice_list = np.array(task_choice_list)\n",
-        "        np.random.shuffle(task_choice_list)\n",
-        "        dataloader_iter_dict = {\n",
-        "            task_name: iter(dataloader) \n",
-        "            for task_name, dataloader in self.dataloader_dict.items()\n",
-        "        }\n",
-        "        for task_choice in task_choice_list:\n",
-        "            task_name = self.task_name_list[task_choice]\n",
-        "            yield next(dataloader_iter_dict[task_name]) \n",
-        "\n",
-        "class MultitaskTrainer(transformers.Trainer):\n",
-        "\n",
-        "    def get_single_train_dataloader(self, task_name, train_dataset):\n",
-        "        \"\"\"\n",
-        "        Create a single-task data loader that also yields task names\n",
-        "        \"\"\"\n",
-        "        if self.train_dataset is None:\n",
-        "            raise ValueError(\"Trainer: training requires a train_dataset.\")\n",
-        "        \n",
-        "        train_sampler = (\n",
-        "            RandomSampler(train_dataset)\n",
-        "            if self.args.local_rank == -1\n",
-        "            else DistributedSampler(train_dataset)\n",
-        "        )\n",
-        "\n",
-        "        data_loader = DataLoaderWithTaskname(\n",
-        "            task_name=task_name,\n",
-        "            data_loader=DataLoader(\n",
-        "              train_dataset,\n",
-        "              batch_size=self.args.train_batch_size,\n",
-        "              sampler=train_sampler\n",
-        "            ),\n",
-        "        )\n",
-        "\n",
-        "        return data_loader\n",
-        "\n",
-        "    def get_train_dataloader(self):\n",
-        "        \"\"\"\n",
-        "        Returns a MultitaskDataloader, which is not actually a Dataloader\n",
-        "        but an iterable that returns a generator that samples from each \n",
-        "        task Dataloader\n",
-        "        \"\"\"\n",
-        "        return MultitaskDataloader({\n",
-        "            task_name: self.get_single_train_dataloader(task_name, task_dataset)\n",
-        "            for task_name, task_dataset in self.train_dataset.items()\n",
-        "        })\n",
-        "    \n",
-        "    def train(self):\n",
-        "      config = transformers.AutoConfig.from_pretrained(\"gpt2\")\n",
-        "      model = transformers.AutoModelWithLMHead.from_pretrained(\"gpt2\", config=config)\n",
-        "      trainer = Trainer(\n",
-        "        model=model,\n",
-        "        args=transformers.TrainingArguments(\n",
-        "          output_dir=\"./models/multitask_model\",\n",
-        "          overwrite_output_dir=True,\n",
-        "          learning_rate=1e-5,\n",
-        "          do_train=True,\n",
-        "          num_train_epochs=100,\n",
-        "          # Adjust batch size if this doesn't fit on the Colab GPU\n",
-        "          per_device_train_batch_size=8,  \n",
-        "          save_steps=3000,\n",
-        "        ),\n",
-        "        data_collator=data_collator,\n",
-        "        train_dataset=train_dataset,\n",
-        "      )\n",
-        "      trainer.train()\n",
-        "\n",
-        "    def compute_loss(self, model, inputs, return_outputs=True):\n",
-        "        labels = inputs.get(\"labels\")\n",
-        "        # forward pass\n",
-        "        outputs = model(**inputs)\n",
-        "        reranking_layer(outputs, inputs._get_value(), tokenizer=tokenizer) #input value is tensor\n",
-        "        logits = outputs.get(\"logits\")\n",
-        "        loss_fct = nn.CrossEntropyLoss(weight=torch.tensor([1.0, 2.0, 3.0]))\n",
-        "        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))\n",
-        "        return (loss, outputs) if return_outputs else loss"
-      ],
-      "metadata": {
-        "id": "dzVHNee8EP-T"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        ""
-      ],
-      "metadata": {
-        "id": "tzNxUL7gLsYV"
-      },
-      "execution_count": null,
-      "outputs": []
+     "output_type": "stream",
+     "name": "stderr",
+     "text": [
+      "PyTorch: setting up devices\n",
+      "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n",
+      "loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
+      "Model config GPT2Config {\n",
+      "  \"_name_or_path\": \"gpt2\",\n",
+      "  \"activation_function\": \"gelu_new\",\n",
+      "  \"architectures\": [\n",
+      "    \"GPT2LMHeadModel\"\n",
+      "  ],\n",
+      "  \"attn_pdrop\": 0.1,\n",
+      "  \"bos_token_id\": 50256,\n",
+      "  \"embd_pdrop\": 0.1,\n",
+      "  \"eos_token_id\": 50256,\n",
+      "  \"initializer_range\": 0.02,\n",
+      "  \"layer_norm_epsilon\": 1e-05,\n",
+      "  \"model_type\": \"gpt2\",\n",
+      "  \"n_ctx\": 1024,\n",
+      "  \"n_embd\": 768,\n",
+      "  \"n_head\": 12,\n",
+      "  \"n_inner\": null,\n",
+      "  \"n_layer\": 12,\n",
+      "  \"n_positions\": 1024,\n",
+      "  \"reorder_and_upcast_attn\": false,\n",
+      "  \"resid_pdrop\": 0.1,\n",
+      "  \"scale_attn_by_inverse_layer_idx\": false,\n",
+      "  \"scale_attn_weights\": true,\n",
+      "  \"summary_activation\": null,\n",
+      "  \"summary_first_dropout\": 0.1,\n",
+      "  \"summary_proj_to_labels\": true,\n",
+      "  \"summary_type\": \"cls_index\",\n",
+      "  \"summary_use_proj\": true,\n",
+      "  \"task_specific_params\": {\n",
+      "    \"text-generation\": {\n",
+      "      \"do_sample\": true,\n",
+      "      \"max_length\": 50\n",
+      "    }\n",
+      "  },\n",
+      "  \"transformers_version\": \"4.19.0.dev0\",\n",
+      "  \"use_cache\": true,\n",
+      "  \"vocab_size\": 50257\n",
+      "}\n",
+      "\n",
+      "/usr/local/lib/python3.7/dist-packages/transformers/models/auto/modeling_auto.py:915: FutureWarning: The class `AutoModelWithLMHead` is deprecated and will be removed in a future version. Please use `AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM` for masked language models and `AutoModelForSeq2SeqLM` for encoder-decoder models.\n",
+      "  FutureWarning,\n",
+      "loading weights file https://huggingface.co/gpt2/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/752929ace039baa8ef70fe21cdf9ab9445773d20e733cf693d667982e210837e.323c769945a351daa25546176f8208b3004b6f563438a7603e7932bae9025925\n",
+      "All model checkpoint weights were used when initializing GPT2LMHeadModel.\n",
+      "\n",
+      "All the weights of GPT2LMHeadModel were initialized from the model checkpoint at gpt2.\n",
+      "If your task is similar to the task the model of the checkpoint was trained on, you can already use GPT2LMHeadModel for predictions without further training.\n",
+      "PyTorch: setting up devices\n",
+      "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n",
+      "/usr/local/lib/python3.7/dist-packages/transformers/optimization.py:309: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+      "  FutureWarning,\n",
+      "***** Running training *****\n",
+      "  Num examples = 288\n",
+      "  Num Epochs = 100\n",
+      "  Instantaneous batch size per device = 8\n",
+      "  Total train batch size (w. parallel, distributed & accumulation) = 8\n",
+      "  Gradient Accumulation steps = 1\n",
+      "  Total optimization steps = 3600\n"
+     ]
     },
     {
-      "cell_type": "code",
-      "source": [
-        "trainer = MultitaskTrainer(\n",
-        "    model=multitask_model,\n",
-        "    args=transformers.TrainingArguments(\n",
-        "        output_dir=\"./models/multitask_model\",\n",
-        "        overwrite_output_dir=True,\n",
-        "        learning_rate=1e-5,\n",
-        "        do_train=True,\n",
-        "        num_train_epochs=100,\n",
-        "        # Adjust batch size if this doesn't fit on the Colab GPU\n",
-        "        per_device_train_batch_size=8,  \n",
-        "        save_steps=3000,\n",
-        "    ),\n",
-        "    data_collator=data_collator,\n",
-        ")\n",
-        "trainer.train()\n",
-        "\n"
+     "output_type": "display_data",
+     "data": {
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
       ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 1000
-        },
-        "id": "TJ3CrZfgHGKq",
-        "outputId": "30aa520a-c8c4-4b53-8659-9f30b068562c"
-      },
-      "execution_count": null,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "PyTorch: setting up devices\n",
-            "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n",
-            "loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
-            "Model config GPT2Config {\n",
-            "  \"_name_or_path\": \"gpt2\",\n",
-            "  \"activation_function\": \"gelu_new\",\n",
-            "  \"architectures\": [\n",
-            "    \"GPT2LMHeadModel\"\n",
-            "  ],\n",
-            "  \"attn_pdrop\": 0.1,\n",
-            "  \"bos_token_id\": 50256,\n",
-            "  \"embd_pdrop\": 0.1,\n",
-            "  \"eos_token_id\": 50256,\n",
-            "  \"initializer_range\": 0.02,\n",
-            "  \"layer_norm_epsilon\": 1e-05,\n",
-            "  \"model_type\": \"gpt2\",\n",
-            "  \"n_ctx\": 1024,\n",
-            "  \"n_embd\": 768,\n",
-            "  \"n_head\": 12,\n",
-            "  \"n_inner\": null,\n",
-            "  \"n_layer\": 12,\n",
-            "  \"n_positions\": 1024,\n",
-            "  \"reorder_and_upcast_attn\": false,\n",
-            "  \"resid_pdrop\": 0.1,\n",
-            "  \"scale_attn_by_inverse_layer_idx\": false,\n",
-            "  \"scale_attn_weights\": true,\n",
-            "  \"summary_activation\": null,\n",
-            "  \"summary_first_dropout\": 0.1,\n",
-            "  \"summary_proj_to_labels\": true,\n",
-            "  \"summary_type\": \"cls_index\",\n",
-            "  \"summary_use_proj\": true,\n",
-            "  \"task_specific_params\": {\n",
-            "    \"text-generation\": {\n",
-            "      \"do_sample\": true,\n",
-            "      \"max_length\": 50\n",
-            "    }\n",
-            "  },\n",
-            "  \"transformers_version\": \"4.19.0.dev0\",\n",
-            "  \"use_cache\": true,\n",
-            "  \"vocab_size\": 50257\n",
-            "}\n",
-            "\n",
-            "/usr/local/lib/python3.7/dist-packages/transformers/models/auto/modeling_auto.py:915: FutureWarning: The class `AutoModelWithLMHead` is deprecated and will be removed in a future version. Please use `AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM` for masked language models and `AutoModelForSeq2SeqLM` for encoder-decoder models.\n",
-            "  FutureWarning,\n",
-            "loading weights file https://huggingface.co/gpt2/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/752929ace039baa8ef70fe21cdf9ab9445773d20e733cf693d667982e210837e.323c769945a351daa25546176f8208b3004b6f563438a7603e7932bae9025925\n",
-            "All model checkpoint weights were used when initializing GPT2LMHeadModel.\n",
-            "\n",
-            "All the weights of GPT2LMHeadModel were initialized from the model checkpoint at gpt2.\n",
-            "If your task is similar to the task the model of the checkpoint was trained on, you can already use GPT2LMHeadModel for predictions without further training.\n",
-            "PyTorch: setting up devices\n",
-            "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n",
-            "/usr/local/lib/python3.7/dist-packages/transformers/optimization.py:309: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
-            "  FutureWarning,\n",
-            "***** Running training *****\n",
-            "  Num examples = 288\n",
-            "  Num Epochs = 100\n",
-            "  Instantaneous batch size per device = 8\n",
-            "  Total train batch size (w. parallel, distributed & accumulation) = 8\n",
-            "  Gradient Accumulation steps = 1\n",
-            "  Total optimization steps = 3600\n"
-          ]
-        },
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ],
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "      \n",
-              "      <progress value='383' max='3600' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      [ 383/3600 1:49:51 < 15:27:36, 0.06 it/s, Epoch 10.61/100]\n",
-              "    </div>\n",
-              "    <table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              " <tr style=\"text-align: left;\">\n",
-              "      <th>Step</th>\n",
-              "      <th>Training Loss</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "  </tbody>\n",
-              "</table><p>"
-            ]
-          },
-          "metadata": {}
-        }
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='383' max='3600' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [ 383/3600 1:49:51 < 15:27:36, 0.06 it/s, Epoch 10.61/100]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       " <tr style=\"text-align: left;\">\n",
+       "      <th>Step</th>\n",
+       "      <th>Training Loss</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table><p>"
       ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "  preds_dict = {}\n",
-        "  for task_name in [\"token\", \"token_type\", \"line\"]:\n",
-        "      eval_dataloader = DataLoaderWithTaskname(\n",
-        "          task_name,\n",
-        "          trainer.get_eval_dataloader(eval_dataset=dataset_dict[task_name])\n",
-        "      )\n",
-        "      print(eval_dataloader.data_loader.collate_fn)\n",
-        "      preds_dict[task_name] = trainer.prediction_loop(\n",
-        "          eval_dataloader, \n",
-        "          description=f\"Validation: {task_name}\",\n",
-        "      )\n",
-        "\n",
-        "\n",
-        "  print(preds_dict)"
-      ],
-      "metadata": {
-        "id": "Xgw82zyxp-_5"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "from sklearn.metrics import accuracy_score, label_ranking_average_precision_score\n",
-        "\n",
-        "accuracy_dict = {}\n",
-        "mrr_dict = {}\n",
-        "\n",
-        "for task_name in [\"token\", \"token_type\", \"line\"]:\n",
-        "  accuracy_dict[task_name] = accuracy_score(preds_dict[task_name].predictions.flatten(),\n",
-        "    preds_dict[task_name].label_ids)\n",
-        "  \n",
-        "  mrr_dict[task_name] = label_ranking_average_precision_score(preds_dict[task_name].predictions.flatten(),\n",
-        "    preds_dict[task_name].label_ids)\n",
-        "  "
-      ],
-      "metadata": {
-        "id": "XWUxUWUVE5Dq"
-      },
-      "execution_count": null,
-      "outputs": []
+     },
+     "metadata": {}
     }
-  ]
+   ]
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "  preds_dict = {}\n",
+    "  for task_name in [\"token\", \"token_type\", \"line\"]:\n",
+    "      eval_dataloader = DataLoaderWithTaskname(\n",
+    "          task_name,\n",
+    "          trainer.get_eval_dataloader(eval_dataset=dataset_dict[task_name])\n",
+    "      )\n",
+    "      print(eval_dataloader.data_loader.collate_fn)\n",
+    "      preds_dict[task_name] = trainer.prediction_loop(\n",
+    "          eval_dataloader, \n",
+    "          description=f\"Validation: {task_name}\",\n",
+    "      )\n",
+    "\n",
+    "\n",
+    "  print(preds_dict)"
+   ],
+   "metadata": {
+    "id": "Xgw82zyxp-_5"
+   },
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "from sklearn.metrics import accuracy_score, label_ranking_average_precision_score\n",
+    "\n",
+    "accuracy_dict = {}\n",
+    "mrr_dict = {}\n",
+    "\n",
+    "for task_name in [\"token\", \"token_type\", \"line\"]:\n",
+    "  accuracy_dict[task_name] = accuracy_score(preds_dict[task_name].predictions.flatten(),\n",
+    "    preds_dict[task_name].label_ids)\n",
+    "  \n",
+    "  mrr_dict[task_name] = label_ranking_average_precision_score(preds_dict[task_name].predictions.flatten(),\n",
+    "    preds_dict[task_name].label_ids)\n",
+    "  "
+   ],
+   "metadata": {
+    "id": "XWUxUWUVE5Dq"
+   },
+   "execution_count": null,
+   "outputs": []
+  }
+ ]
 }
\ No newline at end of file
diff --git a/notebooks/Evaluation_of_CodeFill.ipynb b/notebooks/Evaluation_of_CodeFill.ipynb
new file mode 100644
index 0000000..c31c519
--- /dev/null
+++ b/notebooks/Evaluation_of_CodeFill.ipynb
@@ -0,0 +1,1422 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "Evaluation of CodeFill.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "widgets": {
+      "application/vnd.jupyter.widget-state+json": {
+        "03103d813d964d819b6a2c0a1afac919": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_b478a5503c314e8880931716b4d3be31",
+              "IPY_MODEL_600a7388c66a464285e598195ef980a0",
+              "IPY_MODEL_c2f23a4bfec2424f8556a460757b7f7c"
+            ],
+            "layout": "IPY_MODEL_f8962c79a027458e8d282f95b9db14e6"
+          }
+        },
+        "b478a5503c314e8880931716b4d3be31": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_17fd247324f84aedbd3cf77d9e6a9970",
+            "placeholder": "​",
+            "style": "IPY_MODEL_a44ac743025e4b9db4161a1215b35904",
+            "value": "Downloading: 100%"
+          }
+        },
+        "600a7388c66a464285e598195ef980a0": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_ca2c310b21a049aa85be1be907b14456",
+            "max": 548118077,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_3097af23a3e547f9a34d6d5e61e189c9",
+            "value": 548118077
+          }
+        },
+        "c2f23a4bfec2424f8556a460757b7f7c": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_36fcac3bb12e4934a548fcc2e85eee8a",
+            "placeholder": "​",
+            "style": "IPY_MODEL_74028709db644e8c9ef1865cf4396e63",
+            "value": " 523M/523M [00:12&lt;00:00, 41.5MB/s]"
+          }
+        },
+        "f8962c79a027458e8d282f95b9db14e6": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "17fd247324f84aedbd3cf77d9e6a9970": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "a44ac743025e4b9db4161a1215b35904": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "ca2c310b21a049aa85be1be907b14456": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "3097af23a3e547f9a34d6d5e61e189c9": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "36fcac3bb12e4934a548fcc2e85eee8a": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "74028709db644e8c9ef1865cf4396e63": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        }
+      }
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Install the correct dependencies on HuggingFace transformer and ternsorflow"
+      ],
+      "metadata": {
+        "id": "xIT_uHUdThub"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "f3KWTckbTUs2",
+        "outputId": "3a3b09c5-4700-4c1e-8a90-5f74584fdd8c"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Found existing installation: tensorflow 2.8.0\n",
+            "Uninstalling tensorflow-2.8.0:\n",
+            "  Successfully uninstalled tensorflow-2.8.0\n",
+            "Collecting git+https://github.com/huggingface/transformers\n",
+            "  Cloning https://github.com/huggingface/transformers to /tmp/pip-req-build-plp9s8wa\n",
+            "  Running command git clone -q https://github.com/huggingface/transformers /tmp/pip-req-build-plp9s8wa\n",
+            "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
+            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
+            "    Preparing wheel metadata ... \u001b[?25l\u001b[?25hdone\n",
+            "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers==4.20.0.dev0) (2.23.0)\n",
+            "Collecting huggingface-hub<1.0,>=0.1.0\n",
+            "  Downloading huggingface_hub-0.6.0-py3-none-any.whl (84 kB)\n",
+            "\u001b[K     |████████████████████████████████| 84 kB 3.5 MB/s \n",
+            "\u001b[?25hRequirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers==4.20.0.dev0) (4.11.3)\n",
+            "Collecting tokenizers!=0.11.3,<0.13,>=0.11.1\n",
+            "  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)\n",
+            "\u001b[K     |████████████████████████████████| 6.6 MB 41.4 MB/s \n",
+            "\u001b[?25hRequirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from transformers==4.20.0.dev0) (21.3)\n",
+            "Collecting pyyaml>=5.1\n",
+            "  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)\n",
+            "\u001b[K     |████████████████████████████████| 596 kB 52.4 MB/s \n",
+            "\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers==4.20.0.dev0) (4.64.0)\n",
+            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers==4.20.0.dev0) (1.21.6)\n",
+            "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers==4.20.0.dev0) (3.6.0)\n",
+            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers==4.20.0.dev0) (2019.12.20)\n",
+            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0,>=0.1.0->transformers==4.20.0.dev0) (4.2.0)\n",
+            "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->transformers==4.20.0.dev0) (3.0.8)\n",
+            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers==4.20.0.dev0) (3.8.0)\n",
+            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.20.0.dev0) (1.24.3)\n",
+            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.20.0.dev0) (2021.10.8)\n",
+            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.20.0.dev0) (3.0.4)\n",
+            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers==4.20.0.dev0) (2.10)\n",
+            "Building wheels for collected packages: transformers\n",
+            "  Building wheel for transformers (PEP 517) ... \u001b[?25l\u001b[?25hdone\n",
+            "  Created wheel for transformers: filename=transformers-4.20.0.dev0-py3-none-any.whl size=4171103 sha256=8b39ec060327b98dc94c8a22f4026398eecafcdceea6576419f831cee6db09b5\n",
+            "  Stored in directory: /tmp/pip-ephem-wheel-cache-0z34eq2h/wheels/35/2e/a7/d819e3310040329f0f47e57c9e3e7a7338aa5e74c49acfe522\n",
+            "Successfully built transformers\n",
+            "Installing collected packages: pyyaml, tokenizers, huggingface-hub, transformers\n",
+            "  Attempting uninstall: pyyaml\n",
+            "    Found existing installation: PyYAML 3.13\n",
+            "    Uninstalling PyYAML-3.13:\n",
+            "      Successfully uninstalled PyYAML-3.13\n",
+            "Successfully installed huggingface-hub-0.6.0 pyyaml-6.0 tokenizers-0.12.1 transformers-4.20.0.dev0\n",
+            "tokenizers                    0.12.1\n",
+            "transformers                  4.20.0.dev0\n",
+            "Collecting nlp==0.2.0\n",
+            "  Downloading nlp-0.2.0-py3-none-any.whl (857 kB)\n",
+            "\u001b[K     |████████████████████████████████| 857 kB 28.1 MB/s \n",
+            "\u001b[?25hRequirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from nlp==0.2.0) (0.3.4)\n",
+            "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from nlp==0.2.0) (1.21.6)\n",
+            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from nlp==0.2.0) (4.64.0)\n",
+            "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from nlp==0.2.0) (3.6.0)\n",
+            "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.7/dist-packages (from nlp==0.2.0) (2.23.0)\n",
+            "Requirement already satisfied: pyarrow>=0.16.0 in /usr/local/lib/python3.7/dist-packages (from nlp==0.2.0) (6.0.1)\n",
+            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->nlp==0.2.0) (2021.10.8)\n",
+            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->nlp==0.2.0) (1.24.3)\n",
+            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->nlp==0.2.0) (3.0.4)\n",
+            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->nlp==0.2.0) (2.10)\n",
+            "Installing collected packages: nlp\n",
+            "Successfully installed nlp-0.2.0\n",
+            "Collecting datasets\n",
+            "  Downloading datasets-2.2.1-py3-none-any.whl (342 kB)\n",
+            "\u001b[K     |████████████████████████████████| 342 kB 20.2 MB/s \n",
+            "\u001b[?25hRequirement already satisfied: huggingface-hub<1.0.0,>=0.1.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (0.6.0)\n",
+            "Requirement already satisfied: pyarrow>=6.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (6.0.1)\n",
+            "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (2.23.0)\n",
+            "Collecting responses<0.19\n",
+            "  Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n",
+            "Requirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from datasets) (0.3.4)\n",
+            "Requirement already satisfied: multiprocess in /usr/local/lib/python3.7/dist-packages (from datasets) (0.70.12.2)\n",
+            "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from datasets) (21.3)\n",
+            "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from datasets) (1.3.5)\n",
+            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from datasets) (1.21.6)\n",
+            "Collecting xxhash\n",
+            "  Downloading xxhash-3.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)\n",
+            "\u001b[K     |████████████████████████████████| 212 kB 47.2 MB/s \n",
+            "\u001b[?25hRequirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.7/dist-packages (from datasets) (4.64.0)\n",
+            "Collecting aiohttp\n",
+            "  Downloading aiohttp-3.8.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)\n",
+            "\u001b[K     |████████████████████████████████| 1.1 MB 56.8 MB/s \n",
+            "\u001b[?25hRequirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from datasets) (4.11.3)\n",
+            "Collecting fsspec[http]>=2021.05.0\n",
+            "  Downloading fsspec-2022.3.0-py3-none-any.whl (136 kB)\n",
+            "\u001b[K     |████████████████████████████████| 136 kB 72.1 MB/s \n",
+            "\u001b[?25hRequirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets) (6.0)\n",
+            "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets) (3.6.0)\n",
+            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets) (4.2.0)\n",
+            "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->datasets) (3.0.8)\n",
+            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (2.10)\n",
+            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (1.24.3)\n",
+            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (3.0.4)\n",
+            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (2021.10.8)\n",
+            "Collecting urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1\n",
+            "  Downloading urllib3-1.25.11-py2.py3-none-any.whl (127 kB)\n",
+            "\u001b[K     |████████████████████████████████| 127 kB 65.9 MB/s \n",
+            "\u001b[?25hRequirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (2.0.12)\n",
+            "Collecting frozenlist>=1.1.1\n",
+            "  Downloading frozenlist-1.3.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (144 kB)\n",
+            "\u001b[K     |████████████████████████████████| 144 kB 43.4 MB/s \n",
+            "\u001b[?25hCollecting aiosignal>=1.1.2\n",
+            "  Downloading aiosignal-1.2.0-py3-none-any.whl (8.2 kB)\n",
+            "Collecting asynctest==0.13.0\n",
+            "  Downloading asynctest-0.13.0-py3-none-any.whl (26 kB)\n",
+            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (21.4.0)\n",
+            "Collecting multidict<7.0,>=4.5\n",
+            "  Downloading multidict-6.0.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (94 kB)\n",
+            "\u001b[K     |████████████████████████████████| 94 kB 3.2 MB/s \n",
+            "\u001b[?25hCollecting yarl<2.0,>=1.0\n",
+            "  Downloading yarl-1.7.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (271 kB)\n",
+            "\u001b[K     |████████████████████████████████| 271 kB 52.3 MB/s \n",
+            "\u001b[?25hCollecting async-timeout<5.0,>=4.0.0a3\n",
+            "  Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)\n",
+            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->datasets) (3.8.0)\n",
+            "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets) (2.8.2)\n",
+            "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets) (2022.1)\n",
+            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->datasets) (1.15.0)\n",
+            "Installing collected packages: multidict, frozenlist, yarl, urllib3, asynctest, async-timeout, aiosignal, fsspec, aiohttp, xxhash, responses, datasets\n",
+            "  Attempting uninstall: urllib3\n",
+            "    Found existing installation: urllib3 1.24.3\n",
+            "    Uninstalling urllib3-1.24.3:\n",
+            "      Successfully uninstalled urllib3-1.24.3\n",
+            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+            "kapre 0.3.7 requires tensorflow>=2.0.0, which is not installed.\n",
+            "datascience 0.10.6 requires folium==0.2.1, but you have folium 0.8.3 which is incompatible.\u001b[0m\n",
+            "Successfully installed aiohttp-3.8.1 aiosignal-1.2.0 async-timeout-4.0.2 asynctest-0.13.0 datasets-2.2.1 frozenlist-1.3.0 fsspec-2022.3.0 multidict-6.0.2 responses-0.18.0 urllib3-1.25.11 xxhash-3.0.0 yarl-1.7.2\n",
+            "Collecting git+https://github.com/huggingface/nlp\n",
+            "  Cloning https://github.com/huggingface/nlp to /tmp/pip-req-build-feh2or5e\n",
+            "  Running command git clone -q https://github.com/huggingface/nlp /tmp/pip-req-build-feh2or5e\n",
+            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from datasets==2.2.2.dev0) (1.21.6)\n",
+            "Requirement already satisfied: pyarrow>=6.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets==2.2.2.dev0) (6.0.1)\n",
+            "Requirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from datasets==2.2.2.dev0) (0.3.4)\n",
+            "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from datasets==2.2.2.dev0) (1.3.5)\n",
+            "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.7/dist-packages (from datasets==2.2.2.dev0) (2.23.0)\n",
+            "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.7/dist-packages (from datasets==2.2.2.dev0) (4.64.0)\n",
+            "Requirement already satisfied: xxhash in /usr/local/lib/python3.7/dist-packages (from datasets==2.2.2.dev0) (3.0.0)\n",
+            "Requirement already satisfied: multiprocess in /usr/local/lib/python3.7/dist-packages (from datasets==2.2.2.dev0) (0.70.12.2)\n",
+            "Requirement already satisfied: fsspec[http]>=2021.05.0 in /usr/local/lib/python3.7/dist-packages (from datasets==2.2.2.dev0) (2022.3.0)\n",
+            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.7/dist-packages (from datasets==2.2.2.dev0) (3.8.1)\n",
+            "Requirement already satisfied: huggingface-hub<1.0.0,>=0.1.0 in /usr/local/lib/python3.7/dist-packages (from datasets==2.2.2.dev0) (0.6.0)\n",
+            "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from datasets==2.2.2.dev0) (21.3)\n",
+            "Requirement already satisfied: responses<0.19 in /usr/local/lib/python3.7/dist-packages (from datasets==2.2.2.dev0) (0.18.0)\n",
+            "Requirement already satisfied: importlib_metadata in /usr/local/lib/python3.7/dist-packages (from datasets==2.2.2.dev0) (4.11.3)\n",
+            "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets==2.2.2.dev0) (3.6.0)\n",
+            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets==2.2.2.dev0) (6.0)\n",
+            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets==2.2.2.dev0) (4.2.0)\n",
+            "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->datasets==2.2.2.dev0) (3.0.8)\n",
+            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets==2.2.2.dev0) (2021.10.8)\n",
+            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets==2.2.2.dev0) (2.10)\n",
+            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets==2.2.2.dev0) (1.25.11)\n",
+            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets==2.2.2.dev0) (3.0.4)\n",
+            "Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==2.2.2.dev0) (2.0.12)\n",
+            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==2.2.2.dev0) (21.4.0)\n",
+            "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==2.2.2.dev0) (1.7.2)\n",
+            "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==2.2.2.dev0) (4.0.2)\n",
+            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==2.2.2.dev0) (6.0.2)\n",
+            "Requirement already satisfied: asynctest==0.13.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==2.2.2.dev0) (0.13.0)\n",
+            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==2.2.2.dev0) (1.2.0)\n",
+            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==2.2.2.dev0) (1.3.0)\n",
+            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib_metadata->datasets==2.2.2.dev0) (3.8.0)\n",
+            "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets==2.2.2.dev0) (2022.1)\n",
+            "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets==2.2.2.dev0) (2.8.2)\n",
+            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->datasets==2.2.2.dev0) (1.15.0)\n",
+            "Building wheels for collected packages: datasets\n",
+            "  Building wheel for datasets (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+            "  Created wheel for datasets: filename=datasets-2.2.2.dev0-py3-none-any.whl size=344393 sha256=a978a302726ae208dbbf56e886c30425f98b51e04f16a8abf0993ea4f0f473a7\n",
+            "  Stored in directory: /tmp/pip-ephem-wheel-cache-f7wyr7sc/wheels/b7/b2/b6/a0b4e0d11cb66d705e54f7bb72fdbe910b5e9f198ada8b4347\n",
+            "Successfully built datasets\n",
+            "Installing collected packages: datasets\n",
+            "  Attempting uninstall: datasets\n",
+            "    Found existing installation: datasets 2.2.1\n",
+            "    Uninstalling datasets-2.2.1:\n",
+            "      Successfully uninstalled datasets-2.2.1\n",
+            "Successfully installed datasets-2.2.2.dev0\n"
+          ]
+        }
+      ],
+      "source": [
+        "# We won't need TensorFlow here\n",
+        "!pip uninstall -y tensorflow\n",
+        "# Install `transformers` from master\n",
+        "!pip install git+https://github.com/huggingface/transformers\n",
+        "!pip list | grep -E 'transformers|tokenizers'\n",
+        "!pip install nlp==0.2.0\n",
+        "!pip install datasets\n",
+        "!pip install git+https://github.com/huggingface/nlp\n",
+        "\n",
+        "# transformers version at notebook update --- 2.11.0\n",
+        "# tokenizers version at notebook update --- 0.8.0rc1"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Train a customised python byte-level Byte-pair encoding tokenizer. "
+      ],
+      "metadata": {
+        "id": "M0wmpgCxUIF3"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from pathlib import Path\n",
+        "from transformers import AutoTokenizer,TextDataset,DataCollatorForLanguageModeling\n",
+        "import glob\n",
+        "import random \n",
+        "\n",
+        "tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")\n"
+      ],
+      "metadata": {
+        "id": "oPq1Bau8UbpB",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "84eaefed-3eee-452f-fa68-c43cc1052d3c"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
+            "loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
+            "Model config GPT2Config {\n",
+            "  \"_name_or_path\": \"gpt2\",\n",
+            "  \"activation_function\": \"gelu_new\",\n",
+            "  \"architectures\": [\n",
+            "    \"GPT2LMHeadModel\"\n",
+            "  ],\n",
+            "  \"attn_pdrop\": 0.1,\n",
+            "  \"bos_token_id\": 50256,\n",
+            "  \"embd_pdrop\": 0.1,\n",
+            "  \"eos_token_id\": 50256,\n",
+            "  \"initializer_range\": 0.02,\n",
+            "  \"layer_norm_epsilon\": 1e-05,\n",
+            "  \"model_type\": \"gpt2\",\n",
+            "  \"n_ctx\": 1024,\n",
+            "  \"n_embd\": 768,\n",
+            "  \"n_head\": 12,\n",
+            "  \"n_inner\": null,\n",
+            "  \"n_layer\": 12,\n",
+            "  \"n_positions\": 1024,\n",
+            "  \"reorder_and_upcast_attn\": false,\n",
+            "  \"resid_pdrop\": 0.1,\n",
+            "  \"scale_attn_by_inverse_layer_idx\": false,\n",
+            "  \"scale_attn_weights\": true,\n",
+            "  \"summary_activation\": null,\n",
+            "  \"summary_first_dropout\": 0.1,\n",
+            "  \"summary_proj_to_labels\": true,\n",
+            "  \"summary_type\": \"cls_index\",\n",
+            "  \"summary_use_proj\": true,\n",
+            "  \"task_specific_params\": {\n",
+            "    \"text-generation\": {\n",
+            "      \"do_sample\": true,\n",
+            "      \"max_length\": 50\n",
+            "    }\n",
+            "  },\n",
+            "  \"transformers_version\": \"4.20.0.dev0\",\n",
+            "  \"use_cache\": true,\n",
+            "  \"vocab_size\": 50257\n",
+            "}\n",
+            "\n",
+            "loading file https://huggingface.co/gpt2/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/684fe667923972fb57f6b4dcb61a3c92763ad89882f3da5da9866baf14f2d60f.c7ed1f96aac49e745788faa77ba0a26a392643a50bb388b9c04ff469e555241f\n",
+            "loading file https://huggingface.co/gpt2/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/c0c761a63004025aeadd530c4c27b860ec4ecbe8a00531233de21d865a402598.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b\n",
+            "loading file https://huggingface.co/gpt2/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/16a2f78023c8dc511294f0c97b5e10fde3ef9889ad6d11ffaa2a00714e73926e.cf2d0ecb83b6df91b3dbb53f1d1e4c311578bfd3aa0e04934215a49bf9898df0\n",
+            "loading file https://huggingface.co/gpt2/resolve/main/added_tokens.json from cache at None\n",
+            "loading file https://huggingface.co/gpt2/resolve/main/special_tokens_map.json from cache at None\n",
+            "loading file https://huggingface.co/gpt2/resolve/main/tokenizer_config.json from cache at None\n",
+            "loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
+            "Model config GPT2Config {\n",
+            "  \"_name_or_path\": \"gpt2\",\n",
+            "  \"activation_function\": \"gelu_new\",\n",
+            "  \"architectures\": [\n",
+            "    \"GPT2LMHeadModel\"\n",
+            "  ],\n",
+            "  \"attn_pdrop\": 0.1,\n",
+            "  \"bos_token_id\": 50256,\n",
+            "  \"embd_pdrop\": 0.1,\n",
+            "  \"eos_token_id\": 50256,\n",
+            "  \"initializer_range\": 0.02,\n",
+            "  \"layer_norm_epsilon\": 1e-05,\n",
+            "  \"model_type\": \"gpt2\",\n",
+            "  \"n_ctx\": 1024,\n",
+            "  \"n_embd\": 768,\n",
+            "  \"n_head\": 12,\n",
+            "  \"n_inner\": null,\n",
+            "  \"n_layer\": 12,\n",
+            "  \"n_positions\": 1024,\n",
+            "  \"reorder_and_upcast_attn\": false,\n",
+            "  \"resid_pdrop\": 0.1,\n",
+            "  \"scale_attn_by_inverse_layer_idx\": false,\n",
+            "  \"scale_attn_weights\": true,\n",
+            "  \"summary_activation\": null,\n",
+            "  \"summary_first_dropout\": 0.1,\n",
+            "  \"summary_proj_to_labels\": true,\n",
+            "  \"summary_type\": \"cls_index\",\n",
+            "  \"summary_use_proj\": true,\n",
+            "  \"task_specific_params\": {\n",
+            "    \"text-generation\": {\n",
+            "      \"do_sample\": true,\n",
+            "      \"max_length\": 50\n",
+            "    }\n",
+            "  },\n",
+            "  \"transformers_version\": \"4.20.0.dev0\",\n",
+            "  \"use_cache\": true,\n",
+            "  \"vocab_size\": 50257\n",
+            "}\n",
+            "\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def compute_metric():\n",
+        "\n",
+        "  nlp.load_metric('accuracy', name=\"token\").compute(\n",
+        "      np.argmax(preds_dict[\"token\"].predictions, axis=1),\n",
+        "      preds_dict[\"token\"].label_ids,\n",
+        "  )\n",
+        "\n",
+        "  nlp.load_metric('average_precision_score', name=\"token\").compute(\n",
+        "      np.argmax(preds_dict[\"token\"].predictions, axis=1),\n",
+        "      preds_dict[\"token\"].label_ids,\n",
+        "  )\n",
+        "\n",
+        "  nlp.load_metric('bleu', name=\"line\").compute(\n",
+        "      np.argmax(preds_dict[\"line\"].predictions, axis=1),\n",
+        "      preds_dict[\"line\"].label_ids,\n",
+        "  )\n",
+        "\n",
+        "  nlp.load_metric('meteor', name=\"line\").compute(\n",
+        "      np.argmax(preds_dict[\"line\"].predictions, axis=1),\n",
+        "      preds_dict[\"line\"].label_ids,\n",
+        "  )\n",
+        "\n",
+        "  nlp.load_metric('rouge', name=\"line\").compute(\n",
+        "      np.argmax(preds_dict[\"line\"].predictions, axis=1),\n",
+        "      preds_dict[\"line\"].label_ids,\n",
+        "  )\n"
+      ],
+      "metadata": {
+        "id": "aJkbrM1h2gH0"
+      },
+      "execution_count": 2,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "paths = [str(x) for x in Path(\".\").glob(\"./sample_data/test_data/*.py\")]\n",
+        "converted_paths = []\n",
+        "for path in paths:\n",
+        "  converted_path = \"./sample_data/test_data/\"+ path.split(\"/\").pop().split(\".\")[0] + \".txt\"\n",
+        "  print(converted_path)\n",
+        "  try:\n",
+        "    convert(path, converted_path)\n",
+        "    converted_paths.append(converted_path)\n",
+        "  except:\n",
+        "    pass\n",
+        "\n",
+        "\n",
+        "with open(\"./test.txt\", \"wb\") as test_outfile:\n",
+        "  for f in paths:\n",
+        "      with open(f, \"rb\") as infile:\n",
+        "          test_outfile.write(infile.read())\n",
+        "\n",
+        "with open(\"./converted_test.txt\", \"wb\") as test_outfile:\n",
+        "  for f in converted_paths:\n",
+        "      with open(f, \"rb\") as infile:\n",
+        "          test_outfile.write(infile.read())\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "vOXkK5bWYNXz"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def load_dataset(test_path,tokenizer):\n",
+        "     \n",
+        "    test_dataset = TextDataset(\n",
+        "          tokenizer=tokenizer,\n",
+        "          file_path=test_path,\n",
+        "          block_size=128)   \n",
+        "    \n",
+        "    data_collator = DataCollatorForLanguageModeling(\n",
+        "        tokenizer=tokenizer, mlm=False,\n",
+        "    )\n",
+        "    return test_dataset,data_collator\n",
+        "\n",
+        "test_dataset,data_collator = load_dataset(\"./train.txt\", \"./test.txt\",tokenizer)\n",
+        "converted_test_dataset, converted_datacollator = load_dataset(\"./converted_train.txt\", \"./converted_test.txt\",tokenizer)\n",
+        "#pretrain_raw_files = glob.glob(\"./pretrain_dataset\" + '/**/*.py', recursive=True)\n",
+        "#pretrain_converted_files = glob.glob(\"./pretrain_converted_dataset\" + '/**/*.py', recursive=True)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "5Q4kLrTZXTjZ",
+        "outputId": "254ec48b-7aa2-49b6-9b92-7c5d6c673794"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/transformers/data/datasets/language_modeling.py:58: FutureWarning: This dataset will be removed from the library soon, preprocessing should be handled with the 🤗 Datasets library. You can have a look at this example script for pointers: https://github.com/huggingface/transformers/blob/main/examples/pytorch/language-modeling/run_mlm.py\n",
+            "  FutureWarning,\n",
+            "Loading features from cached file ./cached_lm_GPT2TokenizerFast_128_train.txt [took 0.000 s]\n",
+            "Loading features from cached file ./cached_lm_GPT2TokenizerFast_128_test.txt [took 0.000 s]\n",
+            "Loading features from cached file ./cached_lm_GPT2TokenizerFast_128_converted_train.txt [took 0.000 s]\n",
+            "Loading features from cached file ./cached_lm_GPT2TokenizerFast_128_converted_test.txt [took 0.000 s]\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "tokenizer(\"for i in range(10)\")[\"input_ids\"]"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "0AraoltupXmb",
+        "outputId": "c90124e7-3695-44af-827d-c355632ec2af"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "[1640, 1312, 287, 2837, 7, 940, 8]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 10
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import numpy as np\n",
+        "import torch\n",
+        "import torch.nn as nn\n",
+        "import transformers\n",
+        "import nlp\n",
+        "import logging\n",
+        "from datasets import load_dataset\n",
+        "from transformers import TextDataset,DataCollatorForLanguageModeling\n",
+        "\n",
+        "\n",
+        "logging.basicConfig(level=logging.INFO)\n",
+        "\n",
+        "dataset_dict = {\n",
+        "    \"token\": train_dataset,\n",
+        "    \"token_type\": train_dataset,\n",
+        "    \"line\": train_dataset,\n",
+        "}\n",
+        "\n",
+        "print(dataset_dict[\"token\"])\n"
+      ],
+      "metadata": {
+        "id": "4ePqOauaqjnZ"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from transformers.utils.dummy_pt_objects import GPT2LMHeadModel\n",
+        "from transformers import GPT2Tokenizer\n",
+        "from transformers import GPT2Config, EncoderDecoderConfig, EncoderDecoderModel\n",
+        "\n",
+        "\n",
+        "class MultitaskModel(transformers.PreTrainedModel):\n",
+        "    def __init__(self, encoder, taskmodels_dict):\n",
+        "        \"\"\"\n",
+        "        Setting MultitaskModel up as a PretrainedModel allows us\n",
+        "        to take better advantage of Trainer features\n",
+        "        \"\"\"\n",
+        "        super().__init__(transformers.PretrainedConfig())\n",
+        "\n",
+        "        self.encoder = encoder\n",
+        "        self.taskmodels_dict = nn.ModuleDict(taskmodels_dict)\n",
+        "\n",
+        "    def _get_models(self):\n",
+        "      return self.taskmodels_dict\n",
+        "\n",
+        "    @classmethod\n",
+        "    def create(cls, model_name, model_type_dict, model_config_dict):\n",
+        "        \"\"\"\n",
+        "        This creates a MultitaskModel using the model class and config objects\n",
+        "        from single-task models. \n",
+        "\n",
+        "        We do this by creating each single-task model, and having them share\n",
+        "        the same encoder transformer.\n",
+        "        \"\"\"\n",
+        "        shared_encoder = None\n",
+        "        taskmodels_dict = {}\n",
+        "        for task_name, model_type in model_type_dict.items():\n",
+        "            model = model_type.from_pretrained( \"gpt2\",\n",
+        "                config=model_config_dict[task_name],\n",
+        "            )\n",
+        "            if shared_encoder is None:\n",
+        "                shared_encoder = cls.get_encoder(model)\n",
+        "            else:\n",
+        "                setattr(model, \"encoder\", shared_encoder)\n",
+        "            taskmodels_dict[task_name] = model\n",
+        "        return cls(encoder=shared_encoder, taskmodels_dict=taskmodels_dict)\n",
+        "    \n",
+        "\n",
+        "    @classmethod\n",
+        "    def get_encoder(cls, model):\n",
+        "        \"\"\"\n",
+        "        The encoder transformer is named differently in each model \"architecture\".\n",
+        "        This method lets us get the name of the encoder attribute\n",
+        "        \"\"\"\n",
+        "        model_class_name = model.__class__.__name__\n",
+        "        if model_class_name.startswith(\"Roberta\"):\n",
+        "            return \"roberta-base\"\n",
+        "        elif model_class_name.startswith(\"GPT2\"):\n",
+        "            config = EncoderDecoderConfig.from_encoder_decoder_configs(model.config, model.config) \n",
+        "            encoder_decoder = EncoderDecoderModel(config=config)\n",
+        "            return encoder_decoder.config.encoder\n",
+        "        else:\n",
+        "            raise KeyError(f\"Add support for new model {model_class_name}\")\n",
+        "    \n",
+        "    def forward(self, task_name, **kwargs):\n",
+        "        return self.taskmodels_dict[task_name](**kwargs)"
+      ],
+      "metadata": {
+        "id": "jJLxsM_H4A6z"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "model_name = \"gpt2\"\n",
+        "multitask_model = MultitaskModel.create(\n",
+        "    model_name=model_name,\n",
+        "    model_type_dict={\n",
+        "        \"token\": transformers.AutoModelWithLMHead,\n",
+        "        \"token_type\": transformers.AutoModelWithLMHead,\n",
+        "        \"line\": transformers.AutoModelForSequenceClassification,\n",
+        "    },\n",
+        "    model_config_dict={\n",
+        "        \"token\": transformers.AutoConfig.from_pretrained(model_name),\n",
+        "        \"token_type\": transformers.AutoConfig.from_pretrained(model_name),\n",
+        "        \"line\": transformers.AutoConfig.from_pretrained(model_name),\n",
+        "    },\n",
+        ")"
+      ],
+      "metadata": {
+        "id": "9bbwa7E74Q0x",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 140,
+          "referenced_widgets": [
+            "03103d813d964d819b6a2c0a1afac919",
+            "b478a5503c314e8880931716b4d3be31",
+            "600a7388c66a464285e598195ef980a0",
+            "c2f23a4bfec2424f8556a460757b7f7c",
+            "f8962c79a027458e8d282f95b9db14e6",
+            "17fd247324f84aedbd3cf77d9e6a9970",
+            "a44ac743025e4b9db4161a1215b35904",
+            "ca2c310b21a049aa85be1be907b14456",
+            "3097af23a3e547f9a34d6d5e61e189c9",
+            "36fcac3bb12e4934a548fcc2e85eee8a",
+            "74028709db644e8c9ef1865cf4396e63"
+          ]
+        },
+        "outputId": "776e3562-c46f-47da-d4b9-217b85d9cb79"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/transformers/models/auto/modeling_auto.py:925: FutureWarning: The class `AutoModelWithLMHead` is deprecated and will be removed in a future version. Please use `AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM` for masked language models and `AutoModelForSeq2SeqLM` for encoder-decoder models.\n",
+            "  FutureWarning,\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "Downloading:   0%|          | 0.00/523M [00:00<?, ?B/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "03103d813d964d819b6a2c0a1afac919"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']\n",
+            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Check that we have a GPU\n",
+        "!nvidia-smi\n",
+        "# Check that PyTorch sees it\n",
+        "import torch\n",
+        "torch.cuda.is_available()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "CNu7NKoYpvCP",
+        "outputId": "ae4d73f1-4657-4580-b85a-bfd1410bee7b"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running.\n",
+            "\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "False"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 35
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import dataclasses\n",
+        "from torch.utils.data.dataloader import DataLoader\n",
+        "from transformers.data.data_collator import DataCollatorForLanguageModeling, InputDataClass, DefaultDataCollator\n",
+        "from torch.utils.data.distributed import DistributedSampler\n",
+        "from torch.utils.data.sampler import RandomSampler\n",
+        "from typing import List, Union, Dict\n",
+        "from transformers import Trainer\n",
+        "from random import random\n",
+        "\n",
+        "\n",
+        "class NLPDataCollator(DataCollatorForLanguageModeling):\n",
+        "    \"\"\"\n",
+        "    Extending the existing DataCollator to work with NLP dataset batches\n",
+        "    \"\"\"\n",
+        "    def collate_batch(self, features: List[Union[InputDataClass, Dict]]) -> Dict[str, torch.Tensor]:\n",
+        "        first = features[0]\n",
+        "        if isinstance(first, dict):\n",
+        "          # NLP data sets current works presents features as lists of dictionary\n",
+        "          # (one per example), so we  will adapt the collate_batch logic for that\n",
+        "          if \"labels\" in first and first[\"labels\"] is not None:\n",
+        "              if first[\"labels\"].dtype == torch.int64:\n",
+        "                  labels = torch.tensor([f[\"labels\"] for f in features], dtype=torch.long)\n",
+        "              else:\n",
+        "                  labels = torch.tensor([f[\"labels\"] for f in features], dtype=torch.float)\n",
+        "              batch = {\"labels\": labels}\n",
+        "          for k, v in first.items():\n",
+        "              if k != \"labels\" and v is not None and not isinstance(v, str):\n",
+        "                  batch[k] = torch.stack([f[k] for f in features])\n",
+        "          return batch\n",
+        "        else:\n",
+        "          # otherwise, revert to using the default collate_batch\n",
+        "          return DefaultDataCollator().collate_batch(features)\n",
+        "\n",
+        "\n",
+        "class StrIgnoreDevice(str):\n",
+        "    \"\"\"\n",
+        "    This is a hack. The Trainer is going call .to(device) on every input\n",
+        "    value, but we need to pass in an additional `task_name` string.\n",
+        "    This prevents it from throwing an error\n",
+        "    \"\"\"\n",
+        "    def to(self, device):\n",
+        "        return self\n",
+        "\n",
+        "class DataLoaderWithTaskname:\n",
+        "    \"\"\"\n",
+        "    Wrapper around a DataLoader to also yield a task name\n",
+        "    \"\"\"\n",
+        "    def __init__(self, task_name, data_loader):\n",
+        "        self.task_name = task_name\n",
+        "        self.data_loader = data_loader\n",
+        "\n",
+        "        self.batch_size = data_loader.batch_size\n",
+        "        self.dataset = data_loader.dataset\n",
+        "\n",
+        "    def __len__(self):\n",
+        "        return len(self.data_loader)\n",
+        "    \n",
+        "    def __iter__(self):\n",
+        "        for batch in self.data_loader:\n",
+        "            batch[\"task_name\"] = StrIgnoreDevice(self.task_name)\n",
+        "            yield batch\n",
+        "\n",
+        "\n",
+        "class MultitaskDataloader:\n",
+        "    \"\"\"\n",
+        "    Data loader that combines and samples from multiple single-task\n",
+        "    data loaders.\n",
+        "    \"\"\"\n",
+        "    def __init__(self, dataloader_dict):\n",
+        "        self.dataloader_dict = dataloader_dict\n",
+        "        self.num_batches_dict = {\n",
+        "            task_name: len(dataloader) \n",
+        "            for task_name, dataloader in self.dataloader_dict.items()\n",
+        "        }\n",
+        "        self.task_name_list = list(self.dataloader_dict)\n",
+        "        self.dataset = [None] * sum(\n",
+        "            len(dataloader.dataset) \n",
+        "            for dataloader in self.dataloader_dict.values()\n",
+        "        )\n",
+        "\n",
+        "    def __len__(self):\n",
+        "        return sum(self.num_batches_dict.values())\n",
+        "\n",
+        "    def __iter__(self):\n",
+        "        \"\"\"\n",
+        "        For each batch, sample a task, and yield a batch from the respective\n",
+        "        task Dataloader.\n",
+        "\n",
+        "        We use size-proportional sampling, but you could easily modify this\n",
+        "        to sample from some-other distribution.\n",
+        "        \"\"\"\n",
+        "        task_choice_list = []\n",
+        "        for i, task_name in enumerate(self.task_name_list):\n",
+        "            task_choice_list += [i] * self.num_batches_dict[task_name]\n",
+        "        task_choice_list = np.array(task_choice_list)\n",
+        "        np.random.shuffle(task_choice_list)\n",
+        "        dataloader_iter_dict = {\n",
+        "            task_name: iter(dataloader) \n",
+        "            for task_name, dataloader in self.dataloader_dict.items()\n",
+        "        }\n",
+        "        for task_choice in task_choice_list:\n",
+        "            task_name = self.task_name_list[task_choice]\n",
+        "            yield next(dataloader_iter_dict[task_name]) \n",
+        "\n",
+        "class MultitaskTrainer(transformers.Trainer):\n",
+        "\n",
+        "    def get_single_train_dataloader(self, task_name, train_dataset):\n",
+        "        \"\"\"\n",
+        "        Create a single-task data loader that also yields task names\n",
+        "        \"\"\"\n",
+        "        if self.train_dataset is None:\n",
+        "            raise ValueError(\"Trainer: training requires a train_dataset.\")\n",
+        "        \n",
+        "        train_sampler = (\n",
+        "            RandomSampler(train_dataset)\n",
+        "            if self.args.local_rank == -1\n",
+        "            else DistributedSampler(train_dataset)\n",
+        "        )\n",
+        "\n",
+        "        data_loader = DataLoaderWithTaskname(\n",
+        "            task_name=task_name,\n",
+        "            data_loader=DataLoader(\n",
+        "              train_dataset,\n",
+        "              batch_size=self.args.train_batch_size,\n",
+        "              sampler=train_sampler\n",
+        "            ),\n",
+        "        )\n",
+        "\n",
+        "        return data_loader\n",
+        "\n",
+        "    def get_train_dataloader(self):\n",
+        "        \"\"\"\n",
+        "        Returns a MultitaskDataloader, which is not actually a Dataloader\n",
+        "        but an iterable that returns a generator that samples from each \n",
+        "        task Dataloader\n",
+        "        \"\"\"\n",
+        "        return MultitaskDataloader({\n",
+        "            task_name: self.get_single_train_dataloader(task_name, task_dataset)\n",
+        "            for task_name, task_dataset in self.train_dataset.items()\n",
+        "        })\n",
+        "    \n",
+        "    def train(self):\n",
+        "      config = transformers.AutoConfig.from_pretrained(\"gpt2\")\n",
+        "      model = transformers.AutoModelWithLMHead.from_pretrained(\"gpt2\", config=config)\n",
+        "      self.trainer = Trainer(\n",
+        "        model=model,\n",
+        "        args=transformers.TrainingArguments(\n",
+        "          output_dir=\"./models/multitask_model\",\n",
+        "          overwrite_output_dir=True,\n",
+        "          learning_rate=1e-5,\n",
+        "          do_train=True,\n",
+        "          num_train_epochs=100,\n",
+        "          # Adjust batch size if this doesn't fit on the Colab GPU\n",
+        "          per_device_train_batch_size=8,  \n",
+        "          save_steps=3000,\n",
+        "        ),\n",
+        "        data_collator=data_collator,\n",
+        "        train_dataset=train_dataset,\n",
+        "      )\n",
+        "      self.trainer.train()\n",
+        "    \n",
+        "    def evaluate(self):\n",
+        "      config = transformers.AutoConfig.from_pretrained(\"gpt2\")\n",
+        "      model = transformers.AutoModelWithLMHead.from_pretrained(\"gpt2\", config=config)\n",
+        "      self.trainer = Trainer(\n",
+        "        model=model,\n",
+        "        args=transformers.TrainingArguments(\n",
+        "          output_dir=\"./models/multitask_model\",\n",
+        "          overwrite_output_dir=True,\n",
+        "          learning_rate=1e-5,\n",
+        "          do_train=True,\n",
+        "          num_train_epochs=100,\n",
+        "          # Adjust batch size if this doesn't fit on the Colab GPU\n",
+        "          per_device_train_batch_size=8,  \n",
+        "          save_steps=3000,\n",
+        "          compute_metrics=compute_metrics\n",
+        "        ),\n",
+        "        data_collator=data_collator,\n",
+        "        train_dataset=train_dataset,\n",
+        "      )\n",
+        "      self.trainer.train()\n",
+        "\n",
+        "    #run evaluation first to set the compute metrics to compute full set of metrics  \n",
+        "    def prediction_loop(self):\n",
+        "      return self.trainer.predict()\n",
+        "\n",
+        "    def compute_loss(self, model, inputs, return_outputs=True):\n",
+        "        labels = inputs.get(\"labels\")\n",
+        "        # forward pass\n",
+        "        outputs = model(**inputs)\n",
+        "        reranking_layer(outputs, inputs._get_value(), tokenizer=tokenizer) #input value is tensor\n",
+        "        logits = outputs.get(\"logits\")\n",
+        "        loss_fct = nn.CrossEntropyLoss(weight=torch.tensor([1.0, 2.0, 3.0]))\n",
+        "        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))\n",
+        "        return (loss, outputs) if return_outputs else loss"
+      ],
+      "metadata": {
+        "id": "dzVHNee8EP-T"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# point to training folder  \n",
+        "model_path = \"./models/multitask_model\"\n",
+        "model = AutoModelWithLMHead.from_pretrained(model_path)\n",
+        "\n",
+        "# Define test trainer\n",
+        "test_trainer = Trainer(model)\n",
+        "trainer.train()\n",
+        "\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        },
+        "id": "TJ3CrZfgHGKq",
+        "outputId": "3d86f277-b512-4468-f736-242baf745030"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "PyTorch: setting up devices\n",
+            "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n",
+            "loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
+            "Model config GPT2Config {\n",
+            "  \"_name_or_path\": \"gpt2\",\n",
+            "  \"activation_function\": \"gelu_new\",\n",
+            "  \"architectures\": [\n",
+            "    \"GPT2LMHeadModel\"\n",
+            "  ],\n",
+            "  \"attn_pdrop\": 0.1,\n",
+            "  \"bos_token_id\": 50256,\n",
+            "  \"embd_pdrop\": 0.1,\n",
+            "  \"eos_token_id\": 50256,\n",
+            "  \"initializer_range\": 0.02,\n",
+            "  \"layer_norm_epsilon\": 1e-05,\n",
+            "  \"model_type\": \"gpt2\",\n",
+            "  \"n_ctx\": 1024,\n",
+            "  \"n_embd\": 768,\n",
+            "  \"n_head\": 12,\n",
+            "  \"n_inner\": null,\n",
+            "  \"n_layer\": 12,\n",
+            "  \"n_positions\": 1024,\n",
+            "  \"reorder_and_upcast_attn\": false,\n",
+            "  \"resid_pdrop\": 0.1,\n",
+            "  \"scale_attn_by_inverse_layer_idx\": false,\n",
+            "  \"scale_attn_weights\": true,\n",
+            "  \"summary_activation\": null,\n",
+            "  \"summary_first_dropout\": 0.1,\n",
+            "  \"summary_proj_to_labels\": true,\n",
+            "  \"summary_type\": \"cls_index\",\n",
+            "  \"summary_use_proj\": true,\n",
+            "  \"task_specific_params\": {\n",
+            "    \"text-generation\": {\n",
+            "      \"do_sample\": true,\n",
+            "      \"max_length\": 50\n",
+            "    }\n",
+            "  },\n",
+            "  \"transformers_version\": \"4.20.0.dev0\",\n",
+            "  \"use_cache\": true,\n",
+            "  \"vocab_size\": 50257\n",
+            "}\n",
+            "\n",
+            "/usr/local/lib/python3.7/dist-packages/transformers/models/auto/modeling_auto.py:925: FutureWarning: The class `AutoModelWithLMHead` is deprecated and will be removed in a future version. Please use `AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM` for masked language models and `AutoModelForSeq2SeqLM` for encoder-decoder models.\n",
+            "  FutureWarning,\n",
+            "loading weights file https://huggingface.co/gpt2/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/752929ace039baa8ef70fe21cdf9ab9445773d20e733cf693d667982e210837e.323c769945a351daa25546176f8208b3004b6f563438a7603e7932bae9025925\n",
+            "All model checkpoint weights were used when initializing GPT2LMHeadModel.\n",
+            "\n",
+            "All the weights of GPT2LMHeadModel were initialized from the model checkpoint at gpt2.\n",
+            "If your task is similar to the task the model of the checkpoint was trained on, you can already use GPT2LMHeadModel for predictions without further training.\n",
+            "PyTorch: setting up devices\n",
+            "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n"
+          ]
+        },
+        {
+          "output_type": "error",
+          "ename": "ValueError",
+          "evalue": "ignored",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+            "\u001b[0;32m<ipython-input-23-9ea3affade0e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     13\u001b[0m     \u001b[0mdata_collator\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata_collator\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     14\u001b[0m )\n\u001b[0;32m---> 15\u001b[0;31m \u001b[0mtrainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     16\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m<ipython-input-19-2c432fd25bd2>\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    158\u001b[0m         \u001b[0mtrain_dataset\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtrain_dataset\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    159\u001b[0m       )\n\u001b[0;32m--> 160\u001b[0;31m       \u001b[0mtrainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    161\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    162\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mcompute_loss\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreturn_outputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m   1322\u001b[0m             \u001b[0mresume_from_checkpoint\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mresume_from_checkpoint\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1323\u001b[0m             \u001b[0mtrial\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtrial\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1324\u001b[0;31m             \u001b[0mignore_keys_for_eval\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mignore_keys_for_eval\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1325\u001b[0m         )\n\u001b[1;32m   1326\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36m_inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m   1330\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_train_batch_size\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1331\u001b[0m         \u001b[0;31m# Data loader and number of training steps\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1332\u001b[0;31m         \u001b[0mtrain_dataloader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_train_dataloader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1333\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1334\u001b[0m         \u001b[0;31m# Setting up training control variables:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mget_train_dataloader\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    767\u001b[0m             )\n\u001b[1;32m    768\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 769\u001b[0;31m         \u001b[0mtrain_sampler\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_train_sampler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    770\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    771\u001b[0m         return DataLoader(\n",
+            "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36m_get_train_sampler\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    708\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mworld_size\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    709\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0m_is_torch_generator_available\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 710\u001b[0;31m                     \u001b[0;32mreturn\u001b[0m \u001b[0mRandomSampler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain_dataset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgenerator\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mgenerator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    711\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mRandomSampler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain_dataset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    712\u001b[0m             elif (\n",
+            "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/torch/utils/data/sampler.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, data_source, replacement, num_samples, generator)\u001b[0m\n\u001b[1;32m     96\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnum_samples\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnum_samples\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     97\u001b[0m             raise ValueError(\"num_samples should be a positive integer \"\n\u001b[0;32m---> 98\u001b[0;31m                              \"value, but got num_samples={}\".format(self.num_samples))\n\u001b[0m\u001b[1;32m     99\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    100\u001b[0m     \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;31mValueError\u001b[0m: num_samples should be a positive integer value, but got num_samples=0"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "preds_dict = {}\n",
+        "for task_name in [\"token\", \"token_type\", \"line\"]:\n",
+        "      eval_dataloader = DataLoaderWithTaskname(\n",
+        "          task_name,\n",
+        "          trainer.get_eval_dataloader(eval_dataset=dataset_dict[task_name])\n",
+        "      )\n",
+        "      print(eval_dataloader.data_loader.collate_fn)\n",
+        "      preds_dict[task_name] = trainer.prediction_loop(\n",
+        "          eval_dataloader, \n",
+        "          description=f\"Validation: {task_name}\",\n",
+        "      )\n",
+        "\n",
+        "\n",
+        "print(preds_dict)"
+      ],
+      "metadata": {
+        "id": "Xgw82zyxp-_5",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "6be34973-95c4-4acf-d3a4-1a072e36373a"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "***** Running Validation: token *****\n",
+            "  Num examples = 0\n",
+            "  Batch size = 8\n",
+            "/usr/local/lib/python3.7/dist-packages/transformers/trainer_pt_utils.py:395: FutureWarning: DistributedTensorGatherer is deprecated and will be removed in v5 of Transformers.\n",
+            "  FutureWarning,\n",
+            "***** Running Validation: token_type *****\n",
+            "  Num examples = 0\n",
+            "  Batch size = 8\n",
+            "***** Running Validation: line *****\n",
+            "  Num examples = 0\n",
+            "  Batch size = 8\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from sklearn.metrics import accuracy_score, label_ranking_average_precision_score\n",
+        "\n",
+        "accuracy_dict = {}\n",
+        "mrr_dict = {}\n",
+        "\n",
+        "for task_name in [\"token\", \"token_type\", \"line\"]:\n",
+        "  accuracy_dict[task_name] = accuracy_score(preds_dict[task_name].predictions.flatten(),\n",
+        "    preds_dict[task_name].label_ids)\n",
+        "  \n",
+        "  mrr_dict[task_name] = label_ranking_average_precision_score(preds_dict[task_name].predictions.flatten(),\n",
+        "    preds_dict[task_name].label_ids)\n",
+        "  \n",
+        "  "
+      ],
+      "metadata": {
+        "id": "XWUxUWUVE5Dq"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file