data processing and model tranning started

414093c6 · Karunarathna K.M.D.Y.K · 8c0fd60c · 414093c6
Commit 414093c6 authored Jan 31, 2023 by Karunarathna K.M.D.Y.K
Hide whitespace changes
Inline Side-by-side

Showing with 103 additions and 0 deletions

BackEnd/ChatBot.ipynb BackEnd/ChatBot.ipynb +103 -0

No files found.
--- a/BackEnd/ChatBot.ipynb
+++ b/BackEnd/ChatBot.ipynb
@@ -79,6 +79,109 @@
      "source": [
        "train_data_file = '/content/drive/MyDrive/RP_GoviMitura/Kavee - ChatBot/DataSets/intents.json'"
      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## 💬 Data_Preprocessing and Model Training"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "#Used in Tensorflow Model\n",
+        "import numpy as np\n",
+        "import tensorflow as tf\n",
+        "import tflearn\n",
+        "import random\n",
+        "\n",
+        "#Usde to for Contextualisation and Other NLP Tasks.\n",
+        "import nltk\n",
+        "from nltk.stem.lancaster import LancasterStemmer\n",
+        "stemmer = LancasterStemmer()\n",
+        "\n",
+        "#Other\n",
+        "import json\n",
+        "import pickle\n",
+        "import warnings\n",
+        "warnings.filterwarnings(\"ignore\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "print(\"Processing the Intents.....\")\n",
+        "with open(train_data_file) as json_data:\n",
+        "    intents = json.load(json_data)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "nltk.download('punkt')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "words = []\n",
+        "classes = []\n",
+        "documents = []\n",
+        "ignore_words = ['?']\n",
+        "print(\"Looping through the Intents to Convert them to words, classes, documents and ignore_words.......\")\n",
+        "for intent in intents['intents']:\n",
+        "    for pattern in intent['patterns']:\n",
+        "        # tokenize each word in the sentence\n",
+        "        w = nltk.word_tokenize(pattern)\n",
+        "        # add to our words list\n",
+        "        words.extend(w)\n",
+        "        # add to documents in our corpus\n",
+        "        documents.append((w, intent['tag']))\n",
+        "        # add to our classes list\n",
+        "        if intent['tag'] not in classes:\n",
+        "            classes.append(intent['tag'])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "print(\"Stemming, Lowering and Removing Duplicates.......\")\n",
+        "words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]\n",
+        "words = sorted(list(set(words)))\n",
+        "\n",
+        "# remove duplicates\n",
+        "classes = sorted(list(set(classes)))\n",
+        "\n",
+        "print (len(documents), \"documents\")\n",
+        "print (len(classes), \"classes\", classes)\n",
+        "print (len(words), \"unique stemmed words\", words)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "print(documents)"
+      ]
    }
  ],
  "metadata": {