Merge branch 'master' of http://gitlab.sliit.lk/2023-118/2023-118 into IT20167264

# Conflicts: # OuterScopeTesting.ipynb

Merge branch 'master' of http://gitlab.sliit.lk/2023-118/2023-118 into IT20167264
# Conflicts: # OuterScopeTesting.ipynb
08de5812 · S.T. Galappaththi · af11c45e · ef835ac5 · 08de5812 · 08de5812
Commit 08de5812 authored Aug 24, 2023 by S.T. Galappaththi
98 changed files
--- a/.gitignore
+++ b/.gitignore
@@ -66,3 +66,5 @@ env/

 # Vector files
 *.vec
+
+README.md
\ No newline at end of file
--- a/LICENSE
+++ b/LICENSE
--- a/OuterScopeTesting.ipynb
+++ b/OuterScopeTesting.ipynb
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "source": [
-    "# Testing from outside"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "outputs": [],
-   "source": [
-    "from sinhala_data_processor.singlish.hybrid_transliterator import HybridTransliterator, RuleBasedTransliterator"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [
-    {
-     "ename": "KeyboardInterrupt",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
-      "\u001B[1;31mKeyboardInterrupt\u001B[0m                         Traceback (most recent call last)",
-      "\u001B[1;32m~\\AppData\\Local\\Temp/ipykernel_21020/1015352440.py\u001B[0m in \u001B[0;36m<module>\u001B[1;34m\u001B[0m\n\u001B[0;32m      3\u001B[0m \u001B[0mtext\u001B[0m\u001B[1;33m=\u001B[0m\u001B[1;34m\"mama oyaata aadareyi\"\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m      4\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m----> 5\u001B[1;33m \u001B[0mo\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mtransliterator\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mtext\u001B[0m\u001B[1;33m=\u001B[0m\u001B[0mtext\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m",
-      "\u001B[1;32m~\\PycharmProjects\\SinhalaDataProcessor\\sinhala_data_processor\\singlish\\hybrid_transliterator.py\u001B[0m in \u001B[0;36mtransliterator\u001B[1;34m(self, text)\u001B[0m\n\u001B[0;32m    117\u001B[0m                             \u001B[1;32mcontinue\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m    118\u001B[0m                         \u001B[1;31m# Get GPT response for level 0\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m--> 119\u001B[1;33m                         \u001B[0mgpt_response_1\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0m__get_gpt_response\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mtext\u001B[0m\u001B[1;33m=\u001B[0m\u001B[0msentence\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mlevel\u001B[0m\u001B[1;33m=\u001B[0m\u001B[1;36m0\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m    120\u001B[0m                         \u001B[0mdictionary_1\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mjson\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mloads\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mgpt_response_1\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m    121\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n",
-      "\u001B[1;32m~\\PycharmProjects\\SinhalaDataProcessor\\sinhala_data_processor\\singlish\\hybrid_transliterator.py\u001B[0m in \u001B[0;36m__get_gpt_response\u001B[1;34m(self, text, level, word)\u001B[0m\n\u001B[0;32m     78\u001B[0m             \u001B[0mresult\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mcompletion\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mchoices\u001B[0m\u001B[1;33m[\u001B[0m\u001B[1;36m0\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mmessage\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mcontent\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m     79\u001B[0m             \u001B[0msleep_time\u001B[0m \u001B[1;33m=\u001B[0m \u001B[1;36m2\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m---> 80\u001B[1;33m             \u001B[0mtime\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0msleep\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0msleep_time\u001B[0m\u001B[1;33m)\u001B[0m  \u001B[1;31m# To avoid rate limit\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m     81\u001B[0m             \u001B[1;32mreturn\u001B[0m \u001B[0mresult\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m     82\u001B[0m         \u001B[1;32mexcept\u001B[0m \u001B[0mException\u001B[0m \u001B[1;32mas\u001B[0m \u001B[0me\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n",
-      "\u001B[1;31mKeyboardInterrupt\u001B[0m: "
-     ]
-    }
-   ],
-   "source": [
-    "o = HybridTransliterator()\n",
-    "\n",
-    "text=\"mama oyaata aadareyi\"\n",
-    "\n",
-    "o.transliterator(text=text)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "outputs": [],
-   "source": [
-    "i = RuleBasedTransliterator()\n",
-    "\n",
-    "text=\"mama oyaata aadareyi\"\n",
-    "\n",
-    "i.transliterator(text=text)"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "outputs": [],
-   "source": [
-    "from sinhala_data_processor.singlish.machine_transliterator import MachineTransliterator\n",
-    "\n",
-    "transliterator = MachineTransliterator(model=\"resources/models/cc.si.300.vec\")\n",
-    "input_word = \"oyaata kohomada\"\n",
-    "\n",
-    "out = transliterator.transliterator(input_word)\n",
-    "print(out)"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "outputs": [],
-   "source": [
-    "from sinhala_data_processor.singlish.hybrid_transliterator import HybridTransliterator\n",
-    "\n",
-    "a = HybridTransliterator()\n",
-    "\n",
-    "a.view_prompt(0)"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "outputs": [
-    {
-     "ename": "AttributeError",
-     "evalue": "'GrammarMain' object has no attribute 'mapper'",
-     "output_type": "error",
-     "traceback": [
-      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
-      "\u001B[1;31mAttributeError\u001B[0m                            Traceback (most recent call last)",
-      "\u001B[1;32m~\\AppData\\Local\\Temp\\ipykernel_27136\\1852559308.py\u001B[0m in \u001B[0;36m<module>\u001B[1;34m\u001B[0m\n\u001B[0;32m      4\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m      5\u001B[0m \u001B[0msentence\u001B[0m \u001B[1;33m=\u001B[0m \u001B[1;34m\"මම මේ ටික හොඳට බලන\"\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m----> 6\u001B[1;33m \u001B[0mout\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mobj\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mmapper\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0msentence\u001B[0m\u001B[1;33m=\u001B[0m\u001B[0msentence\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m      7\u001B[0m \u001B[0mprint\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mout\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n",
-      "\u001B[1;31mAttributeError\u001B[0m: 'GrammarMain' object has no attribute 'mapper'"
-     ]
-    }
-   ],
-   "source": [
-    "from sinhala_data_processor.grammar_rule.grammar_main import GrammarMain\n",
-    "\n",
-    "obj = GrammarMain()\n",
-    "\n",
-    "sentence = \"දරුවා වෙහෙස මහන්සියෙන් ඉගෙන ගන්නවා\"\n",
-    "out = obj.mapper(sentence=sentence)\n",
-    "print(out)"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2023-08-22T22:31:14.201083400Z",
-     "start_time": "2023-08-22T22:31:14.170322400Z"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Fetching File\n",
-      "Converting audio transcripts into text ...\n",
-      "කුඹුර ගොවියාට වී ලබාගැනීමට උපකාරී වීම වශයෙන් පිහිටවන්න කි\n"
-     ]
-    }
-   ],
-   "source": [
-    "from sinhala_data_processor.sinhala_audio.audio_to_text import conversion\n",
-    "path=\"resources/datasets/IT20167264/test_audio/pn_sin_01_00001.wav\"\n",
-    "\n",
-    "text = conversion(path=path)\n",
-    "print(text)"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2023-08-22T11:28:38.382431Z",
-     "start_time": "2023-08-22T11:28:37.738017500Z"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "outputs": [],
-   "source": [],
-   "metadata": {
-    "collapsed": false
-   }
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
--- a/README.md
+++ b/README.md
-# 2023-118
-
-# Sinhala Data Cleaning and Preprocessing Library
-
-Welcome to the Sinhala Data Cleaning and Preprocessing Library! This comprehensive Python library is designed to streamline the process of cleaning and preprocessing Sinhala text data for various natural language processing (NLP) tasks. Our library addresses the unique challenges posed by the Sinhala language, providing efficient solutions for data cleaning, text summarization, and more.
-
-## Objective
-
-Our primary objective is to create an all-encompassing library that enhances the quality and accessibility of Sinhala text data. Through machine learning techniques and innovative approaches, we aim to empower users across industries to efficiently process, understand, and utilize Sinhala language content.
-
-## Key Research Questions
-
-### Sameera W.G.G.A.S - IT20146924
-
- How can Singlish text be accurately and efficiently converted to Sinhala language using machine learning techniques?
- What approaches can be taken to support different variations of Singlish text and real-time conversion?
- How can integration with other systems be seamlessly achieved to enhance user experience?
-
-### Galappaththi S.T - 20167264
-
- What are the key challenges in accurately converting Sinhala speech to text while maintaining grammar rules?
- How can verbal Sinhala text be effectively converted to written text while adhering to the language's specific grammar?
- What machine learning and deep learning models can be utilized for optimal performance in Sinhala language processing?
-
-### Yasodya P.B.B - 20227586
-
- What are the limitations of existing tools for Sinhala text cleaning and preprocessing, and how can they be addressed?
- How can hybrid approaches combining rule-based and machine learning methods improve the accuracy and scalability of text processing?
- What strategies can be implemented to support various Sinhala text variations and promote integration with other systems?
-
-### Wijesinghe W.R.A.S.S - 20181406
-
- What are the current challenges in summarizing Sinhala news articles, and how can they be overcome?
- How can the library effectively combine extractive and abstractive summarization techniques to generate high-quality summaries?
- What methods can be employed to provide users with customizable summarization lengths, formats, and translation options?
-
-## Individual Objectives
-
-### Sameera W.G.G.A.S - IT20146924
-
-Develop a Python library for converting Singlish text to Sinhala language. Our library leverages deep learning models for accurate and real-time conversion, supporting various Singlish variations and integration options.
-
-### Galappaththi S.T - 20167264
-
-Create a comprehensive Sinhala Data Cleaning and Preprocessing Library. Focus areas include accurate Sinhala speech-to-text and verbal text-to-written text conversion, machine learning integration, and real-time performance optimization.
-
-### Yasodya P.B.B - 20227586
-
-Develop a robust Python library for cleaning and preprocessing Sinhala text data. Employ hybrid approaches combining rule-based and machine learning methods for accuracy and scalability. Prioritize support for various Sinhala text variations and integration with other systems.
-
-### Wijesinghe W.R.A.S.S - 20181406
-
-Design an advanced Sinhala text summarization library with extractive and abstractive techniques. Offer customizable summarization lengths and formats, and integrate translation features for wider accessibility of Sinhala news content.
-
-## Contact
-
-For inquiries or feedback, please contact us at sssbprojects@gmail.com.
-
---
-Note: This version includes the individual research questions and objectives for each team member, providing a more detailed overview of their contributions to the project. Adapt the content to match your README's format and design.
--- a/notebooks/IT20146924/singlish_01 (preprocessing).ipynb
+++ b/notebooks/IT20146924/singlish_01 (preprocessing).ipynb
--- a/notebooks/IT20146924/singlish_02 (rule-based-approach.ipynb
+++ b/notebooks/IT20146924/singlish_02 (rule-based-approach.ipynb
--- a/notebooks/IT20146924/singlish_03 (testing-rule-based-approach).ipynb
+++ b/notebooks/IT20146924/singlish_03 (testing-rule-based-approach).ipynb
--- a/notebooks/IT20146924/singlish_04 (ml-approach).ipynb
+++ b/notebooks/IT20146924/singlish_04 (ml-approach).ipynb
--- a/notebooks/IT20146924/singlish_05 (hybrid-approach).ipynb
+++ b/notebooks/IT20146924/singlish_05 (hybrid-approach).ipynb
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "import openai\n",
-    "import json\n",
-    "import time\n",
-    "\n",
-    "def read_json_config(file_path):\n",
-    "    try:\n",
-    "        with open(file_path, 'r') as json_file:\n",
-    "            json_data = json.load(json_file)\n",
-    "        return json_data\n",
-    "    except Exception as e:\n",
-    "        print(f\"Error while reading JSON configuration file '{file_path}': {str(e)}\")\n",
-    "        return {}\n",
-    "\n",
-    "def get_gpt_response(text: str, json_data: dict, level: int, word: str = \"\"):\n",
-    "    completion = None\n",
-    "    try:\n",
-    "        openai.api_key = json_data['api_key']\n",
-    "        openai.organization = json_data['org_key']\n",
-    "        user_prompt = json_data['Prompts'][level]['content'].replace(\"{{masked-sentence}}\", text).replace(\"{{misspelled-word}}\", word)\n",
-    "        if not text.strip():\n",
-    "            raise ValueError(\"Text is empty. Please provide a valid text string.\")\n",
-    "\n",
-    "        success = False\n",
-    "        while not success:\n",
-    "            try:\n",
-    "                completion = openai.ChatCompletion.create(\n",
-    "                    model=json_data['model'],\n",
-    "                    messages=[\n",
-    "                        {\n",
-    "                            \"role\": json_data['Prompts'][level]['role'],\n",
-    "                            \"content\": user_prompt\n",
-    "                        }\n",
-    "                    ],\n",
-    "                    n=1,\n",
-    "                    temperature=json_data['temperature'],\n",
-    "                    max_tokens=json_data['max_tokens'],\n",
-    "                    top_p=json_data['Top_P'],\n",
-    "                    frequency_penalty=json_data['Frequency_penalty'],\n",
-    "                    presence_penalty=json_data['Presence_penalty']\n",
-    "                )\n",
-    "                success = True\n",
-    "            except Exception as e:\n",
-    "                sleep_time = 2\n",
-    "                time.sleep(sleep_time)\n",
-    "                print(\"Error:\", e)\n",
-    "                print(\"Retrying...\")\n",
-    "\n",
-    "        result = completion.choices[0].message.content\n",
-    "        sleep_time = 2\n",
-    "        time.sleep(sleep_time)  # to avoid rate limit\n",
-    "        return result\n",
-    "    except Exception as e:\n",
-    "        print(f\"Error in GPT response for text '{text}': {str(e)}\")\n",
-    "        return \"\"\n",
-    "\n",
-    "def remove_duplicates(input_list):\n",
-    "    unique_words = set()\n",
-    "    result = []\n",
-    "\n",
-    "    for word in input_list:\n",
-    "        if word not in unique_words:\n",
-    "            unique_words.add(word)\n",
-    "            result.append(word)\n",
-    "\n",
-    "    return result"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "outputs": [],
-   "source": [
-    "def main(config_file: str, text: str):\n",
-    "    json_data = read_json_config(config_file)\n",
-    "\n",
-    "    gpt_response_1 = get_gpt_response(text=text, json_data=json_data, level=0)\n",
-    "    dictionary_1 = json.loads(gpt_response_1)\n",
-    "    print(f\"Sentence: {text}\")\n",
-    "\n",
-    "    primary_list_item = 0  # Initialize the index for the while loop\n",
-    "    dictionary_list = dictionary_1[\"word_list\"]\n",
-    "    dictionary_list = remove_duplicates(dictionary_list)\n",
-    "    print(f\"Misspelled Word List: {dictionary_list}\")\n",
-    "    while primary_list_item < len(dictionary_1[\"word_list\"]):\n",
-    "        for list_item in dictionary_list:\n",
-    "            print(f\"Word to Mask: {list_item}\")\n",
-    "            if text.count(list_item) > 1:\n",
-    "                text = text.replace(list_item, \"<mask>\", 1)\n",
-    "                print(f\"Masked Sentence: {text}\")\n",
-    "                dictionary_1[\"word_list\"].append(list_item)\n",
-    "                dictionary_list.remove(list_item)\n",
-    "                dictionary_list.append(list_item)\n",
-    "                gpt_response_2 = get_gpt_response(text=text, json_data=json_data, level=1, word=list_item)\n",
-    "                print(f\"Predicted Word for Mask: {gpt_response_2}\")\n",
-    "                dictionary_2 = json.loads(gpt_response_2)\n",
-    "                if list_item in dictionary_2:\n",
-    "                    text = text.replace(\"<mask>\", dictionary_2[list_item])\n",
-    "                    print(f\"Mask Replaced Sentence: {text}\")\n",
-    "                break\n",
-    "            elif text.count(list_item) == 1:\n",
-    "                text = text.replace(list_item, \"<mask>\", 1)\n",
-    "                print(f\"Masked Sentence: {text}\")\n",
-    "                gpt_response_2 = get_gpt_response(text=text, json_data=json_data, level=1, word=list_item)\n",
-    "                print(f\"Predicted Word for Mask: {gpt_response_2}\")\n",
-    "                dictionary_2 = json.loads(gpt_response_2)\n",
-    "                if list_item in dictionary_2:\n",
-    "                    text = text.replace(\"<mask>\", dictionary_2[list_item])\n",
-    "                    print(f\"Mask Replaced Sentence: {text}\")\n",
-    "                break\n",
-    "\n",
-    "        primary_list_item += 1\n",
-    "    return text"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "outputs": [],
-   "source": [
-    "# conf_file_path = \"../../resources/configuration/IT20146924/config.json\"\n",
-    "# masked_sentence = 'මම ඔයාට <mask>'\n",
-    "# masked_word = 'ආඩරෙයි'\n",
-    "# main(conf_file_path, masked_sentence, masked_word)"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "outputs": [],
-   "source": [
-    "# from sinhala_data_processor.singlish.rulebased_transliterator import RuleBasedTransliterator\n",
-    "#\n",
-    "# transliterator = RuleBasedTransliterator()\n",
-    "# input_word = \"kohomada oyaata. mamanam hondhin\"\n",
-    "#\n",
-    "# out = transliterator.transliterator(input_word)\n",
-    "# print(out)"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Sentence: කොහොමඩ ඔයාට. කොහොමඩ\n",
-      "Misspelled Word List: ['කොහොමඩ']\n",
-      "Word to Mask: කොහොමඩ\n",
-      "Masked Sentence: <mask> ඔයාට. කොහොමඩ\n",
-      "Predicted Word for Mask: {\n",
-      "    \"කොහොමඩ\": \"කොහෟඩ්\",\n",
-      "    \"category\": \"complement\"\n",
-      "}\n",
-      "Mask Replaced Sentence: කොහෟඩ් ඔයාට. කොහොමඩ\n",
-      "Word to Mask: කොහොමඩ\n",
-      "Masked Sentence: කොහෟඩ් ඔයාට. <mask>\n",
-      "Predicted Word for Mask: {\n",
-      "   \"කොහොමඩ\": \"කොහොමද්දී\",\n",
-      "   \"category\": \"object\"\n",
-      "}\n",
-      "Mask Replaced Sentence: කොහෟඩ් ඔයාට. කොහොමද්දී\n",
-      "Word to Mask: කොහොමඩ\n",
-      "කොහෟඩ් ඔයාට. කොහොමද්දී\n"
-     ]
-    }
-   ],
-   "source": [
-    "conf_file_path = \"../../resources/configuration/IT20146924/config.json\"\n",
-    "masked_sentence = \"කොහොමඩ ඔයාට. කොහොමඩ\"\n",
-    "sen = main(conf_file_path, masked_sentence)\n",
-    "print(sen)"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "outputs": [],
-   "source": [],
-   "metadata": {
-    "collapsed": false
-   }
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
--- a/notebooks/IT20146924/singlish_06 (manual-approach).ipynb
+++ b/notebooks/IT20146924/singlish_06 (manual-approach).ipynb
--- a/notebooks/IT20167264/conversion_audio_to text_given_file.ipynb
+++ b/notebooks/IT20167264/conversion_audio_to text_given_file.ipynb
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "collapsed": true,
-    "ExecuteTime": {
-     "end_time": "2023-05-20T15:34:17.370768300Z",
-     "start_time": "2023-05-20T15:34:17.186405800Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "# import library\n",
-    "import speech_recognition as sr"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "outputs": [],
-   "source": [
-    "# Initialize recognizer class (for recognizing the speech)\n",
-    "r = sr.Recognizer()\n"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2023-05-20T15:35:10.086838500Z",
-     "start_time": "2023-05-20T15:35:10.067440800Z"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "outputs": [],
-   "source": [
-    "# Reading Audio file as source\n",
-    "# listening the audio file and store in audio_text variable\n",
-    "def startConvertion(path='recorded_1.wav', lang='si-LK'):\n",
-    "    with sr.AudioFile(path) as source:\n",
-    "        print('Fetching File')\n",
-    "        audio_text = r.listen(source)\n",
-    "        # recoginize_() method will throw a request error if the API is unreachable, hence using exception handling\n",
-    "        try:\n",
-    "\n",
-    "            # using google speech recognition\n",
-    "            print('Converting audio transcripts into text ...')\n",
-    "            text = r.recognize_google(audio_text, language=lang)\n",
-    "            print(text)\n",
-    "\n",
-    "        except:\n",
-    "            print('Sorry.. run again...')\n"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2023-05-20T15:37:39.259626200Z",
-     "start_time": "2023-05-20T15:37:39.231404100Z"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Here your text : \n",
-      "Fetching File\n",
-      "Converting audio transcripts into text ...\n",
-      "ගොවියා ගේ ප්‍රයෝජනය පිණිස අල්ප ව්‍යායාම් එකතු කුඹුර නොකරන්නේය\n"
-     ]
-    }
-   ],
-   "source": [
-    "print('Here your text : ')\n",
-    "# calling startConvertion method to start process\n",
-    "startConvertion(r\"../../resources/datasets/IT20167264/test_audio/pn_sin_01_00003.wav\", 'si-LK')"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2023-05-20T15:42:33.788572600Z",
-     "start_time": "2023-05-20T15:42:29.682423300Z"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "outputs": [],
-   "source": [],
-   "metadata": {
-    "collapsed": false
-   }
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
--- a/notebooks/IT20167264/conversion_audio_to_text_input_path.ipynb
+++ b/notebooks/IT20167264/conversion_audio_to_text_input_path.ipynb
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "collapsed": true,
-    "ExecuteTime": {
-     "end_time": "2023-05-20T15:45:27.459402100Z",
-     "start_time": "2023-05-20T15:45:27.348747700Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "import speech_recognition as sr"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "outputs": [],
-   "source": [
-    "def startConversion():\n",
-    "    # Initialize recognizer class (for recognizing the speech)\n",
-    "    recognizer = sr.Recognizer()\n",
-    "\n",
-    "    # Read the audio file path from user input\n",
-    "    audio_file = input(\"Enter the path to the audio file: \")\n",
-    "\n",
-    "    # Perform speech recognition\n",
-    "    with sr.AudioFile(audio_file) as source:\n",
-    "        print('Fetching File')\n",
-    "        audio_text = recognizer.listen(source)\n",
-    "        try:\n",
-    "            print('Converting audio transcripts into text ...')\n",
-    "            text = recognizer.recognize_google(audio_text, language='si-LK')\n",
-    "            print(text)\n",
-    "        except sr.UnknownValueError:\n",
-    "            print('Speech recognition could not understand audio')\n",
-    "        except sr.RequestError as e:\n",
-    "            print(f'Error occurred during speech recognition: {e}')\n"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2023-05-20T15:45:58.749059400Z",
-     "start_time": "2023-05-20T15:45:58.737592100Z"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Speech-to-Text Conversion\n",
-      "Fetching File\n",
-      "Converting audio transcripts into text ...\n",
-      "ගොවියා ගේ ප්‍රයෝජනය පිණිස අල්ප ව්‍යායාම් එකතු කුඹුර නොකරන්නේය\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Prompt the user to start the conversion\n",
-    "print('Speech-to-Text Conversion')\n",
-    "startConversion()"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2023-05-20T15:48:39.991168500Z",
-     "start_time": "2023-05-20T15:48:30.395209400Z"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "outputs": [],
-   "source": [],
-   "metadata": {
-    "collapsed": false
-   }
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
--- a/notebooks/IT20167264/rule_based_approach_1.ipynb
+++ b/notebooks/IT20167264/rule_based_approach_1.ipynb
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "collapsed": true,
-    "ExecuteTime": {
-     "end_time": "2023-05-20T17:17:27.353830200Z",
-     "start_time": "2023-05-20T17:17:27.329849800Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "import re"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "outputs": [],
-   "source": [
-    "def conjugate_sentence(sentence):\n",
-    "    # Define the pattern to match Sinhala verbs\n",
-    "    verb_pattern = r\"([\\u0D80-\\u0DFF]+)\"\n",
-    "\n",
-    "    # Check if the sentence starts with \"මම\"\n",
-    "    if sentence.startswith(\"මම\"):\n",
-    "        # Find all verbs in the sentence\n",
-    "        verbs = re.findall(verb_pattern, sentence)\n",
-    "\n",
-    "        # Check if there is at least one verb in the sentence\n",
-    "        if verbs:\n",
-    "            # Get the last verb in the sentence\n",
-    "            last_verb = verbs[-1]\n",
-    "\n",
-    "            # Extract the verb stem\n",
-    "            verb_stem = last_verb[:2]\n",
-    "\n",
-    "            # Conjugate the verb stem with \"මි\"\n",
-    "            conjugated_verb = verb_stem + \"මි\"\n",
-    "\n",
-    "            # Remove the last word (verb) from the sentence\n",
-    "            words = sentence.split()\n",
-    "            words.pop()\n",
-    "\n",
-    "            # Append the conjugated verb to the sentence\n",
-    "            words.append(conjugated_verb)\n",
-    "\n",
-    "            # Reconstruct the sentence\n",
-    "            conjugated_sentence = \" \".join(words)\n",
-    "\n",
-    "            return conjugated_sentence\n",
-    "\n",
-    "    return sentence"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2023-05-20T17:24:31.851227400Z",
-     "start_time": "2023-05-20T17:24:31.835226600Z"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "මම ඉතා හොදින් පීමි\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Example usage\n",
-    "sentence = \"මම ඉතා හොදින් පීනනවා\"\n",
-    "conjugated_sentence = conjugate_sentence(sentence)\n",
-    "print(conjugated_sentence)  # Output: මම සෙල්ලම් කරමි"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2023-05-20T17:24:34.100069200Z",
-     "start_time": "2023-05-20T17:24:34.074817400Z"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Number of Sinhala letters: 30\n"
-     ]
-    }
-   ],
-   "source": [
-    "import regex\n",
-    "\n",
-    "def count_sinhala_letters(text):\n",
-    "    # Define the pattern for Sinhala letters using Unicode properties\n",
-    "    pattern = regex.compile(r'\\p{Script=Sinhala}')\n",
-    "\n",
-    "    # Find all matches of Sinhala letters in the text\n",
-    "    matches = pattern.findall(text)\n",
-    "\n",
-    "    # Return the count of Sinhala letters\n",
-    "    return len(matches)\n",
-    "\n",
-    "\n",
-    "# Example usage\n",
-    "text = \"මම සිංහල බස්නාහිර භාෂාවෙන් කතා කරයි\"\n",
-    "letter_count = count_sinhala_letters(text)\n",
-    "print(\"Number of Sinhala letters:\", letter_count)\n"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2023-05-20T17:37:09.749693300Z",
-     "start_time": "2023-05-20T17:37:09.695584900Z"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "outputs": [],
-   "source": [],
-   "metadata": {
-    "collapsed": false
-   }
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
--- a/notebooks/IT20167264/test_models.ipynb
+++ b/notebooks/IT20167264/test_models.ipynb
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "collapsed": true,
-    "ExecuteTime": {
-     "end_time": "2023-08-08T23:13:59.733165Z",
-     "start_time": "2023-08-08T23:13:54.502417700Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Prediction: [0]\n"
-     ]
-    }
-   ],
-   "source": [
-    "import pickle\n",
-    "from sklearn.feature_extraction.text import CountVectorizer\n",
-    "# Load the trained model\n",
-    "with open('../../resources/pickels/svm_singular_plural.pkl', 'rb') as file:\n",
-    "    saved_data = pickle.load(file)\n",
-    "\n",
-    "svm_model = saved_data['model']\n",
-    "vectorizer = saved_data['vectorizer']\n",
-    "\n",
-    "# Prepare input data\n",
-    "input_data = [\"සමනලයා\"]  # Example input data as a list\n",
-    "\n",
-    "# Transform the input data features using the same vectorizer\n",
-    "input_data_features = vectorizer.transform(input_data)\n",
-    "\n",
-    "# Pass input data to the model for prediction\n",
-    "prediction = svm_model.predict(input_data_features)\n",
-    "\n",
-    "# Use the model's output\n",
-    "print(f\"Prediction: {prediction}\")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "outputs": [],
-   "source": [],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2023-08-08T23:13:59.754162300Z",
-     "start_time": "2023-08-08T23:13:59.733165Z"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "outputs": [],
-   "source": [],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2023-08-08T23:13:59.828562600Z",
-     "start_time": "2023-08-08T23:13:59.758670400Z"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "outputs": [],
-   "source": [],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2023-08-08T23:13:59.829579200Z",
-     "start_time": "2023-08-08T23:13:59.773617700Z"
-    }
-   }
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
--- a/notebooks/IT20167264/testing_book.ipynb
+++ b/notebooks/IT20167264/testing_book.ipynb
--- a/notebooks/IT20181406/Intial note book.ipynb
+++ b/notebooks/IT20181406/Intial note book.ipynb
--- a/notebooks/IT20181406/ex_sum.ipynb
+++ b/notebooks/IT20181406/ex_sum.ipynb
--- a/notebooks/IT20181406/summarizer(ex).ipynb
+++ b/notebooks/IT20181406/summarizer(ex).ipynb
--- a/pyproject.toml
+++ b/pyproject.toml
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "sinlingua_test"
+version = "0.0.4"
+authors = [
+  { name="Supun Gurusinghe", email="supunsameeran@gmail.com" },
+  { name="Sandaruwini Galappaththi", email="sandaruwinigalappaththi@gmail.com" },
+  { name="Supun Sarada Wijesinghe", email="saradawijesinghe@gmail.com" },
+  { name="Binura Yasodya", email="binurayasodya24@gmail.com" },
+]
+description = "Package for Sinhala data processing"
+readme = "README.md"
+requires-python = ">=3.7"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+]
+
+[project.urls]
+"Homepage" = "https://github.com/SupunGurusinghe/SinlinguaDocumentation/blob/main/README.md"
+
+[tool.poetry.dependencies]
+python = "^3.7"
+chardet = "^3.0.4"
+click = "^8.1.7"
+colorama = "0.4.6"
+gensim = "4.3.1"
+joblib = "1.2.0"
+nltk = "3.8.1"
+numpy = "^1.22.4"
+regex = "^2022.10.31"
+scipy = "^1.9.3"
+tqdm = "^4.64.1"
+urllib3 = "^1.26.12"
+pip = "^21.2.4"
+wheel = "^0.37.1"
+cryptography = "^38.0.3"
+py = "^1.10.0"
+lxml = "^4.9.2"
+future = "^0.18.2"
+matplotlib = "^3.5.3"
+pytest = "^6.2.4"
+sklearn = "^0.0"
+scikit-learn = "^1.1.3"
+requests = "^2.27.1"
+pyparsing = "^2.4.7"
+keras = "^2.11.0"
+ipywidgets = "^7.6.3"
+ipython = "^7.26.0"
+notebook = "^6.5.2"
+hypothesis = "^6.56.4"
+setuptools = "^56.0.0"
+pytz = "^2022.5"
+cffi = "^1.14.6"
+mpmath = "^1.3.0"
+sympy = "^1.12"
+fasttext = "^0.9.2"
+psutil = "^5.9.3"
+boto3 = "^1.26.43"
+botocore = "^1.29.43"
+pandas = "^1.3.5"
+pygtrie = "^2.5.0"
+fuzzywuzzy = "^0.18.0"
+asyncio = "^3.4.3"
+Levenshtein = "^0.21.0"
+idna = "^2.10"
+multidict = "^6.0.4"
+attrs = "^21.2.0"
+openai = "^0.27.2"
+aiohttp = "^3.8.3"
+plotly = "^5.5.0"
+tenacity = "^8.1.0"
+yarl = "^1.8.2"
+aiosignal = "^1.3.1"
+frozenlist = "^1.3.3"
+python-multipart = "^0.0.6"
+certifi = "^2021.5.30"
+simplejson = "^3.18.1"
+sinling = "^0.3.6"
+hpack = "^3.0.0"
+hyperframe = "^5.2.0"
+h2 = "^3.2.0"
+h11 = "^0.9.0"
+hstspreload = "^2023.1.1"
+httpcore = "^0.9.1"
+rfc3986 = "^1.5.0"
+sniffio = "^1.3.0"
+httpx = "^0.13.3"
+googletrans = "^3.0.0"
+torch = "^2.0.1"
+transformers = "^4.29.1"
\ No newline at end of file
--- a/resources/datasets/IT20146924/sinhala-singlish-parallel-corpus.csv
+++ b/resources/datasets/IT20146924/sinhala-singlish-parallel-corpus.csv
--- a/resources/datasets/IT20167264/word_set/past_verbs.text
+++ b/resources/datasets/IT20167264/word_set/past_verbs.text
-කෑවා
-නෑවා
-හෑවා
-ආවා
-ගියා
-බිව්වා
-කැඩුවා
-නැටුවා
-සේදුවා
-හේදුවා
-මැරුවා
-පැගුවා
-හැකිලුවා
-දිව්වා
-කැපුවා
-කලා
-කෙරුවා
-කිව්වා
-කැවුනා
-දැනුනා
-බැලුවා
-නිවුනා
-පියෑඹුවා
-පිලිස්සුවා
-හිනැහුනා
-ගත්තා
-උනා
-වුනා
-පෑව්වා
-හැපුවා
-පීරුවා
-නැරඹුවා
-ගැයුවා
-වැයුවා
-දැමුවා
-කෙරුවා
-වැන්දා
-නැමුනා
-පිම්බා
-නැග්ගා
-බැස්සා
-නැග්ගුවා
-ගෑවා
-රිංගුවා
-පීනුවා
-හෑරුවා
-හිතුවා
-සිතුවා
-ඇන්දා
-පැලදුවා
-හැදුවා
-මැසුවා
-ගෙතුවා
-ඉව්වා
-ඇරියා
-වැහුවා
-පැන්නා
-මැහුවා
\ No newline at end of file
--- a/resources/outputs/IT20146924/preprocessed-parallel-corpus.csv
+++ b/resources/outputs/IT20146924/preprocessed-parallel-corpus.csv
--- a/resources/pickels/svm_singular_plural.pkl
+++ b/resources/pickels/svm_singular_plural.pkl
--- a/resources/removables/core/__init__.py
+++ b/resources/removables/core/__init__.py
-from resources.removables.core.tokenizer import *
--- a/resources/removables/core/stemmer.py
+++ b/resources/removables/core/stemmer.py
-__all__ = [
-    'Stemmer'
-]
-
-
-class Stemmer:
-    def __init__(self):
-        pass
-
-    def stem(self, text):
-        raise NotImplementedError
\ No newline at end of file
--- a/resources/removables/core/stop_word_remover.py
+++ b/resources/removables/core/stop_word_remover.py
-from typing import Text, List
-
-__all__ = [
-    'StopRemover'
-]
-
-
-# noinspection SpellCheckingInspection
-class StopRemover:
-    def stop_word_remove(self, sentence: Text) -> List[Text]:
-        raise NotImplementedError()
\ No newline at end of file
--- a/resources/removables/core/tokenizer.py
+++ b/resources/removables/core/tokenizer.py
-from typing import Text, List
-
-__all__ = [
-    'Tokenizer'
-]
-
-
-# noinspection SpellCheckingInspection
-class Tokenizer:
-    def tokenize(self, sentence: Text) -> List[Text]:
-        raise NotImplementedError()
\ No newline at end of file
--- a/sinhala_data_processor/config.py
+++ b/sinhala_data_processor/config.py
-from os.path import dirname, abspath, join
-
-PROJECT_PATH = join(dirname(abspath(__file__)), '..')
-
-RESOURCE_PATH = join(PROJECT_PATH, 'sinhala_data_processor', 'resources')
\ No newline at end of file
--- a/sinhala_data_processor/preprocessor/__init__.py
+++ b/sinhala_data_processor/preprocessor/__init__.py
-from sinhala_data_processor.preprocessor.tokenizer import *
\ No newline at end of file
--- a/sinhala_data_processor/resources/IT20167264/test_audio/pn_sin_01_00001.wav
+++ b/sinhala_data_processor/resources/IT20167264/test_audio/pn_sin_01_00001.wav
--- a/sinhala_data_processor/resources/IT20167264/test_audio/pn_sin_01_00002.wav
+++ b/sinhala_data_processor/resources/IT20167264/test_audio/pn_sin_01_00002.wav
--- a/sinhala_data_processor/resources/IT20167264/test_audio/pn_sin_01_00003.wav
+++ b/sinhala_data_processor/resources/IT20167264/test_audio/pn_sin_01_00003.wav
--- a/sinhala_data_processor/resources/IT20167264/word_set/nouns_subject_plural.text
+++ b/sinhala_data_processor/resources/IT20167264/word_set/nouns_subject_plural.text
-ගුරුවරු
-ළමයි
-ශිෂ්‍යයෝ
-මව්වරු
-පියවරු
-දෙමව්පියෝ
-සත්තු
-දොස්තරවරු
-ගායකයො
-ප්‍රේක්ශකයො
-නලුවො
-සන්ගීතකාරයො
-මනුස්ස්‍යො 
-මනුශ්‍යයෝ
-වදුරෝ
-ගොවියෝ
-රජවරු
-නායකයෝ
-මිනිස්සු
-සාමාජිකයෝ
-සමූහයෝ
-දරුවෝ
-පාත්තයෝ
-කුරුල්ලෝ
-බල්ලෝ
-පටව්
-බබාලා
-මීයෝ
-පූසෝ
-මාළු 
-දෙවියෝ
-වැද්දෝ
-කපුටෝ
-දිම්යෝ
-කඩියෝ
-අලි
-යාලුවො 
-යාළුවො
-කැල
-සමූහය 
-රැල
-රන්චුව
-පෙල 
-රචකයො
-ලේඛකයෝ
-ඇදුරෝ
-කථිකාචාර්යවරු
-නිලියෝ
-නිළයෝ
-ඉන්ජිනේරුවරු
-කොන්දොස්තරවරු
-නිලධාරියෝ
-සර්පයෝ
-කතුවරු
-මනාලියෝ
-මනාලයෝ
-කුමාරියෝ
-කුමාරයෝ
-කැරපොත්තො
-වැඩිහිටියෝ
-කොල්ලො
-කෙල්ලො
-යක්කු
-හිමිවරු
-ශ්‍රමිකයෝ
-කම්කරුවෝ
-ශ්‍රමනයෝ
-ශ්‍රමණයෝ
-ගණිකාවො
-කාන්තාවෝ
-පිරිමි 
-අය
-නාටිකාන්ගනාවෝ
-අන්ගනාවො
-කසකරුවෝ
-ගිනිබෝලකරුවෝ
-විදේශිකයෝ
-පෙම්වත්තු
-පෙම්වතියෝ
-ප්‍රේමවන්තයෝ
-ධීවරයෝ
-පෙදරේරුවෝ
-කාර්මිකයෝ
-තරඟකරුවෝ
-ජයග්‍රාහකයො
-පරාජිකයෝ
-ක්‍රීඩකයෝ
-දායකයෝ
-පාලකයෝ
-දේශපාලකයෝ
-සිරකරුවෝ
-නිලදාරීවරු
-වරු
-පිරිස
-පාහරයෝ
-ආක්‍රමණිකයෝ
-ද්‍රෝහියෝ
-සේවකයෝ
-සහෝදරියෝ
-සහෝදරයෝ
-අම්මලා
-තාත්තලා
-කුරුමිනියෝ
\ No newline at end of file
--- a/sinhala_data_processor/resources/IT20167264/word_set/nouns_subject_singular.text
+++ b/sinhala_data_processor/resources/IT20167264/word_set/nouns_subject_singular.text
--- a/sinhala_data_processor/resources/IT20167264/word_set/singular_plural_train_data.text
+++ b/sinhala_data_processor/resources/IT20167264/word_set/singular_plural_train_data.text
--- a/sinhala_data_processor/resources/IT20167264/word_set/verbs.text
+++ b/sinhala_data_processor/resources/IT20167264/word_set/verbs.text
--- a/sinhala_data_processor/resources/IT20167264/word_set/verbs_2f.text
+++ b/sinhala_data_processor/resources/IT20167264/word_set/verbs_2f.text
--- a/sinhala_data_processor/resources/stem_dictionary.txt
+++ b/sinhala_data_processor/resources/stem_dictionary.txt
--- a/sinhala_data_processor/resources/stop_words.txt
+++ b/sinhala_data_processor/resources/stop_words.txt
--- a/sinhala_data_processor/resources/suffix_list_dependent_vowels.txt
+++ b/sinhala_data_processor/resources/suffix_list_dependent_vowels.txt
--- a/sinhala_data_processor/resources/suffixes_list.txt
+++ b/sinhala_data_processor/resources/suffixes_list.txt
--- a/sinhala_data_processor/singlish/README.md
+++ b/sinhala_data_processor/singlish/README.md
--- a/sinhala_data_processor/summarizer/__init__.py
+++ b/sinhala_data_processor/summarizer/__init__.py
--- a/resources/__init__.py
+++ b/resources/__init__.py
--- a/sinlingua/config.py
+++ b/sinlingua/config.py
+import pkg_resources
+
+PROJECT_PATH = pkg_resources.resource_filename('sinlingua', '')
+RESOURCE_PATH = pkg_resources.resource_filename('sinlingua', 'resources')
\ No newline at end of file
--- a/sinhala_data_processor/grammar_rule/LLM_config.py
+++ b/sinhala_data_processor/grammar_rule/LLM_config.py
@@ -4,7 +4,7 @@ import openai
 import json
 import time
 import requests
-from sinhala_data_processor.config import RESOURCE_PATH
+from sinlingua.config import RESOURCE_PATH


 class LLMConfig:

--- a/resources/removables/__init__.py
+++ b/resources/removables/__init__.py
--- a/sinhala_data_processor/grammar_rule/grammar_main.py
+++ b/sinhala_data_processor/grammar_rule/grammar_main.py
-from sinhala_data_processor.grammar_rule.grammar_rules import GrammarRules
-from sinhala_data_processor.grammar_rule.rule_based_1 import FirstPerson
-from sinhala_data_processor.grammar_rule.rule_based_2 import SecondPersonSingular
-from sinhala_data_processor.grammar_rule.rule_based_3 import SecondPersonPlural
-from sinhala_data_processor.grammar_rule.rule_based_4 import FourthPerson
-from sinhala_data_processor.grammar_rule.rule_based_future_1 import FirstPersonFuture
-from sinhala_data_processor.grammar_rule.rule_based_plural import PluralSubject
-from sinhala_data_processor.grammar_rule.rule_based_plural_past import PluralSubjectPast
-from sinhala_data_processor.grammar_rule.rule_based_singular import SingularSubject
-from sinhala_data_processor.grammar_rule.rule_based_past_1 import PastFirstPerson
-from sinhala_data_processor.grammar_rule.rule_based_past_2 import PastSecondPersonSingular
-from sinhala_data_processor.grammar_rule.rule_based_past_3 import PastSecondPersonPlural
-from sinhala_data_processor.grammar_rule.mask import PredictNoun
+from sinlingua.grammar_rule.grammar_rules import GrammarRules
+from sinlingua.grammar_rule.rule_based_1 import FirstPerson
+from sinlingua.grammar_rule.rule_based_2 import SecondPersonSingular
+from sinlingua.grammar_rule.rule_based_3 import SecondPersonPlural
+from sinlingua.grammar_rule.rule_based_4 import FourthPerson
+from sinlingua.grammar_rule.rule_based_future_1 import FirstPersonFuture
+from sinlingua.grammar_rule.rule_based_plural import PluralSubject
+from sinlingua.grammar_rule.rule_based_plural_past import PluralSubjectPast
+from sinlingua.grammar_rule.rule_based_singular import SingularSubject
+from sinlingua.grammar_rule.rule_based_past_1 import PastFirstPerson
+from sinlingua.grammar_rule.rule_based_past_2 import PastSecondPersonSingular
+from sinlingua.grammar_rule.rule_based_past_3 import PastSecondPersonPlural
+from sinlingua.grammar_rule.mask import PredictNoun


 class GrammarMain:

--- a/sinhala_data_processor/grammar_rule/grammar_rules.py
+++ b/sinhala_data_processor/grammar_rule/grammar_rules.py
--- a/sinhala_data_processor/grammar_rule/mask.py
+++ b/sinhala_data_processor/grammar_rule/mask.py
--- a/sinhala_data_processor/grammar_rule/regression_model_for_singular_plural.py
+++ b/sinhala_data_processor/grammar_rule/regression_model_for_singular_plural.py
--- a/sinhala_data_processor/grammar_rule/rule_based_1.py
+++ b/sinhala_data_processor/grammar_rule/rule_based_1.py
--- a/sinhala_data_processor/grammar_rule/rule_based_2.py
+++ b/sinhala_data_processor/grammar_rule/rule_based_2.py
--- a/sinhala_data_processor/grammar_rule/rule_based_3.py
+++ b/sinhala_data_processor/grammar_rule/rule_based_3.py
--- a/sinhala_data_processor/grammar_rule/rule_based_4.py
+++ b/sinhala_data_processor/grammar_rule/rule_based_4.py
--- a/sinhala_data_processor/grammar_rule/rule_based_future_1.py
+++ b/sinhala_data_processor/grammar_rule/rule_based_future_1.py
--- a/sinhala_data_processor/grammar_rule/rule_based_past_1.py
+++ b/sinhala_data_processor/grammar_rule/rule_based_past_1.py
--- a/sinhala_data_processor/grammar_rule/rule_based_past_2.py
+++ b/sinhala_data_processor/grammar_rule/rule_based_past_2.py
--- a/sinhala_data_processor/grammar_rule/rule_based_past_3.py
+++ b/sinhala_data_processor/grammar_rule/rule_based_past_3.py
--- a/sinhala_data_processor/grammar_rule/rule_based_plural.py
+++ b/sinhala_data_processor/grammar_rule/rule_based_plural.py
--- a/sinhala_data_processor/grammar_rule/rule_based_plural_past.py
+++ b/sinhala_data_processor/grammar_rule/rule_based_plural_past.py
--- a/sinhala_data_processor/grammar_rule/rule_based_singular.py
+++ b/sinhala_data_processor/grammar_rule/rule_based_singular.py
--- a/sinhala_data_processor/grammar_rule/svm_model.py
+++ b/sinhala_data_processor/grammar_rule/svm_model.py
--- a/sinlingua/preprocessor/__init__.py
+++ b/sinlingua/preprocessor/__init__.py
--- a/sinhala_data_processor/preprocessor/stemmer.py
+++ b/sinhala_data_processor/preprocessor/stemmer.py
--- a/sinhala_data_processor/preprocessor/stop_word_remover.py
+++ b/sinhala_data_processor/preprocessor/stop_word_remover.py
--- a/sinhala_data_processor/preprocessor/tokenizer.py
+++ b/sinhala_data_processor/preprocessor/tokenizer.py
--- a/resources/datasets/IT20167264/test_audio/pn_sin_01_00001.wav
+++ b/resources/datasets/IT20167264/test_audio/pn_sin_01_00001.wav
--- a/resources/datasets/IT20167264/test_audio/pn_sin_01_00002.wav
+++ b/resources/datasets/IT20167264/test_audio/pn_sin_01_00002.wav
--- a/resources/datasets/IT20167264/test_audio/pn_sin_01_00003.wav
+++ b/resources/datasets/IT20167264/test_audio/pn_sin_01_00003.wav
--- a/resources/datasets/IT20167264/word_set/nouns_subject_plural.text
+++ b/resources/datasets/IT20167264/word_set/nouns_subject_plural.text
--- a/resources/datasets/IT20167264/word_set/nouns_subject_singular.text
+++ b/resources/datasets/IT20167264/word_set/nouns_subject_singular.text
--- a/sinhala_data_processor/resources/IT20167264/word_set/past_verbs.text
+++ b/sinhala_data_processor/resources/IT20167264/word_set/past_verbs.text
--- a/sinhala_data_processor/resources/IT20167264/word_set/question_verbs.text
+++ b/sinhala_data_processor/resources/IT20167264/word_set/question_verbs.text
--- a/resources/datasets/IT20167264/word_set/singular_plural_train_data.text
+++ b/resources/datasets/IT20167264/word_set/singular_plural_train_data.text
--- a/resources/datasets/IT20167264/word_set/verbs.text
+++ b/resources/datasets/IT20167264/word_set/verbs.text
--- a/resources/datasets/IT20167264/word_set/verbs_2f.text
+++ b/resources/datasets/IT20167264/word_set/verbs_2f.text
--- a/sinhala_data_processor/resources/config.json
+++ b/sinhala_data_processor/resources/config.json
--- a/sinhala_data_processor/resources/conversion_config.json
+++ b/sinhala_data_processor/resources/conversion_config.json
--- a/sinhala_data_processor/resources/singlish-alphabet.json
+++ b/sinhala_data_processor/resources/singlish-alphabet.json
--- a/sinhala_data_processor/__init__.py
+++ b/sinhala_data_processor/__init__.py
--- a/sinhala_data_processor/singlish/hybrid_transliterator.py
+++ b/sinhala_data_processor/singlish/hybrid_transliterator.py
--- a/sinhala_data_processor/singlish/machine_transliterator.py
+++ b/sinhala_data_processor/singlish/machine_transliterator.py
--- a/sinhala_data_processor/singlish/manual_transliterator.py
+++ b/sinhala_data_processor/singlish/manual_transliterator.py
--- a/sinhala_data_processor/singlish/rulebased_transliterator.py
+++ b/sinhala_data_processor/singlish/rulebased_transliterator.py
--- a/sinhala_data_processor/grammar_rule/__init__.py
+++ b/sinhala_data_processor/grammar_rule/__init__.py
--- a/sinhala_data_processor/sinhala_audio/audio_to_text.py
+++ b/sinhala_data_processor/sinhala_audio/audio_to_text.py
--- a/sinhala_data_processor/singlish/__init__.py
+++ b/sinhala_data_processor/singlish/__init__.py
--- a/sinlingua/src/singlish_resources.py
+++ b/sinlingua/src/singlish_resources.py
--- a/sinhala_data_processor/sinhala_audio/__init__.py
+++ b/sinhala_data_processor/sinhala_audio/__init__.py
--- a/sinhala_data_processor/summarizer/bert.py
+++ b/sinhala_data_processor/summarizer/bert.py
--- a/sinhala_data_processor/summarizer/faster_bert.py
+++ b/sinhala_data_processor/summarizer/faster_bert.py
--- a/sinhala_data_processor/summarizer/faster_longformer.py
+++ b/sinhala_data_processor/summarizer/faster_longformer.py
--- a/sinhala_data_processor/summarizer/longformer.py
+++ b/sinhala_data_processor/summarizer/longformer.py
--- a/sinhala_data_processor/summarizer/tf_idf.py
+++ b/sinhala_data_processor/summarizer/tf_idf.py
--- a/testing_preprocessor.ipynb
+++ b/testing_preprocessor.ipynb
--- a/testing_singlish.ipynb
+++ b/testing_singlish.ipynb
--- a/usables/sinhala_alphabet.xls
+++ b/usables/sinhala_alphabet.xls