Commit a014a7ef authored by W.G.G.A. Supun Sameera's avatar W.G.G.A. Supun Sameera

Merge branch 'IT20146924' into 'master'

It20146924

See merge request !94
parents c23ae5d3 b82596a0
......@@ -4,28 +4,27 @@ import openai
import json
import time
import requests
from sinlingua.config import RESOURCE_PATH
from sinlingua.src.grammar_rule_resources import grammar_rule_llm_config
class LLMConfig:
def __init__(self, api_key: str = None, org_key: str = None):
config_file = "conversion_config.json"
self.json_data = self.__read_json_config(file_path=config_file)
self.json_data = grammar_rule_llm_config
if api_key is not None and org_key is not None:
self.json_data["api_key"] = api_key
self.json_data["org_key"] = org_key
@staticmethod
def __read_json_config(file_path: str) -> dict:
try:
# Read JSON configuration file and return the data as dictionary
with open(os.path.join(RESOURCE_PATH, file_path), 'r', encoding='utf-8') as json_file:
json_data_c = json.load(json_file)
return json_data_c
except Exception as e:
# Handle exceptions while reading JSON configuration
print(f"Error while reading JSON configuration file '{file_path}': {str(e)}")
return {}
# @staticmethod
# def __read_json_config(file_path: str) -> dict:
# try:
# # Read JSON configuration file and return the data as dictionary
# with open(os.path.join(RESOURCE_PATH, file_path), 'r', encoding='utf-8') as json_file:
# json_data_c = json.load(json_file)
# return json_data_c
# except Exception as e:
# # Handle exceptions while reading JSON configuration
# print(f"Error while reading JSON configuration file '{file_path}': {str(e)}")
# return {}
def __get_llm_response(self, text: str, level: int) -> str:
completion = None
......
......@@ -160,7 +160,9 @@ class GrammarMain:
# if __name__ == "__main__":
# obj = GrammarMain()
# obj = GrammarMain()
# out = obj.mapper(sentence="ඇය මල් නෙලුවා")
# print(out)
# sent = "මා කෝප්පෙන් වතුර බිව්වා"
# out = obj.mapper(sentence=sent)
# print(out)
......
import os
from fuzzywuzzy import fuzz
from googletrans import Translator
from sinlingua.config import RESOURCE_PATH
class GrammarRules:
@staticmethod
def find_similar_words(file_path, input_string):
def find_similar_words(list_items, input_string):
words = input_string.split()
max_similarity = 0
similar_word = None
actual_word_of_string = None
with open(os.path.join(RESOURCE_PATH, file_path), 'r', encoding='utf-8') as file:
for line in file:
line = line.strip() # Remove leading/trailing whitespace
for word in words:
similarity_ratio = fuzz.ratio(word, line)
if similarity_ratio >= 75 and similarity_ratio > max_similarity:
max_similarity = similarity_ratio
similar_word = line
actual_word_of_string = word
for line in list_items:
line = line.strip() # Remove leading/trailing whitespace
for word in words:
similarity_ratio = fuzz.ratio(word, line)
if similarity_ratio >= 75 and similarity_ratio > max_similarity:
max_similarity = similarity_ratio
similar_word = line
actual_word_of_string = word
return similar_word, actual_word_of_string, max_similarity
def common_function(self, sentence):
......
import requests
from sinlingua.grammar_rule.grammar_rules import GrammarRules
from sinlingua.grammar_rule.LLM_config import LLMConfig
from sinlingua.src.grammar_rule_resources import verbs
class PredictNoun(GrammarRules):
......@@ -10,8 +11,7 @@ class PredictNoun(GrammarRules):
grammar_obj = GrammarRules()
global conjugated_sentence
conjugated_verb = ''
file_path_for_verb = 'IT20167264/word_set/verbs.text'
returned_string_verb = grammar_obj.find_similar_words(file_path_for_verb, sentence)
returned_string_verb = grammar_obj.find_similar_words(verbs, sentence)
verb_checked = returned_string_verb[0]
actual_word = returned_string_verb[1]
ratio = returned_string_verb[2]
......
from fuzzywuzzy import fuzz
from sinlingua.grammar_rule.grammar_rules import GrammarRules
from sinlingua.src.grammar_rule_resources import verbs
class FirstPerson(GrammarRules):
def common_function(self, sentence):
global conjugated_sentence
grammar_obj = GrammarRules()
conjugated_verb = ''
file_path_for_verb = 'IT20167264/word_set/verbs.text'
returned_string_verb = grammar_obj.find_similar_words(file_path_for_verb, sentence)
returned_string_verb = grammar_obj.find_similar_words(verbs, sentence)
# call the function find verb of sentence
verb_checked = returned_string_verb[0]
actual_word = returned_string_verb[1]
......
from fuzzywuzzy import fuzz
from sinlingua.grammar_rule.grammar_rules import GrammarRules
from sinlingua.src.grammar_rule_resources import verbs
class SecondPersonSingular(GrammarRules):
......@@ -8,8 +9,7 @@ class SecondPersonSingular(GrammarRules):
grammar_obj = GrammarRules()
conjugated_verb = ''
file_path_for_verb = 'IT20167264/word_set/verbs.text'
returned_string_verb = grammar_obj.find_similar_words(file_path_for_verb, sentence)
returned_string_verb = grammar_obj.find_similar_words(verbs, sentence)
# call the function find verb of sentence
# returned_string_verb = find_similar_words(file_path_for_verb, sentence)
verb_checked = returned_string_verb[0]
......
from fuzzywuzzy import fuzz
from sinlingua.grammar_rule.grammar_rules import GrammarRules
from sinlingua.src.grammar_rule_resources import verbs
class SecondPersonPlural(GrammarRules):
......@@ -7,8 +8,7 @@ class SecondPersonPlural(GrammarRules):
global conjugated_sentence
grammar_obj = GrammarRules()
conjugated_verb = ''
file_path_for_verb = 'IT20167264/word_set/verbs.text'
returned_string_verb = grammar_obj.find_similar_words(file_path_for_verb, sentence)
returned_string_verb = grammar_obj.find_similar_words(verbs, sentence)
verb_checked = returned_string_verb[0]
actual_word = returned_string_verb[1]
ratio = returned_string_verb[2]
......
from fuzzywuzzy import fuzz
from sinlingua.grammar_rule.grammar_rules import GrammarRules
from sinlingua.src.grammar_rule_resources import verbs
class FourthPerson(GrammarRules):
......@@ -7,8 +8,7 @@ class FourthPerson(GrammarRules):
global conjugated_sentence
grammar_obj = GrammarRules()
conjugated_verb = ''
file_path_for_verb = 'IT20167264/word_set/question_verbs.text'
returned_string_verb = grammar_obj.find_similar_words(file_path_for_verb, sentence)
returned_string_verb = grammar_obj.find_similar_words(verbs, sentence)
# call the function find verb of sentence
verb_checked = returned_string_verb[0]
actual_word = returned_string_verb[1]
......
from fuzzywuzzy import fuzz
from sinlingua.grammar_rule.grammar_rules import GrammarRules
from sinlingua.src.grammar_rule_resources import verbs_2f
class FirstPersonFuture(GrammarRules):
......@@ -7,8 +8,7 @@ class FirstPersonFuture(GrammarRules):
global conjugated_sentence
grammar_obj = GrammarRules()
conjugated_verb = ''
file_path_for_verb = 'IT20167264/word_set/verbs_2f.text'
returned_string_verb = grammar_obj.find_similar_words(file_path_for_verb, sentence)
returned_string_verb = grammar_obj.find_similar_words(verbs_2f, sentence)
verb_checked = returned_string_verb[0]
actual_word = returned_string_verb[1]
ratio = returned_string_verb[2]
......
from fuzzywuzzy import fuzz
from sinlingua.grammar_rule.grammar_rules import GrammarRules
from sinlingua.src.grammar_rule_resources import past_verbs
class PastFirstPerson(GrammarRules):
......@@ -7,8 +8,7 @@ class PastFirstPerson(GrammarRules):
global conjugated_sentence
grammar_obj = GrammarRules()
conjugated_verb = ''
file_path_for_verb_past = 'IT20167264/word_set/past_verbs.text'
returned_string_verb_past = grammar_obj.find_similar_words(file_path_for_verb_past, sentence)
returned_string_verb_past = grammar_obj.find_similar_words(past_verbs, sentence)
# call the function find verb of sentence
verb_checked_past = returned_string_verb_past[0]
actual_word_past = returned_string_verb_past[1]
......
from fuzzywuzzy import fuzz
from sinlingua.grammar_rule.grammar_rules import GrammarRules
from sinlingua.src.grammar_rule_resources import past_verbs
class PastSecondPersonSingular(GrammarRules):
......@@ -8,8 +9,7 @@ class PastSecondPersonSingular(GrammarRules):
grammar_obj = GrammarRules()
conjugated_verb = ''
file_path_for_verb = 'IT20167264/word_set/past_verbs.text'
returned_string_verb = grammar_obj.find_similar_words(file_path_for_verb, sentence)
returned_string_verb = grammar_obj.find_similar_words(past_verbs, sentence)
# call the function find verb of sentence
# returned_string_verb = find_similar_words(file_path_for_verb, sentence)
verb_checked = returned_string_verb[0]
......
from fuzzywuzzy import fuzz
from sinlingua.grammar_rule.grammar_rules import GrammarRules
from sinlingua.src.grammar_rule_resources import past_verbs
class PastSecondPersonPlural(GrammarRules):
......@@ -7,8 +8,7 @@ class PastSecondPersonPlural(GrammarRules):
global conjugated_sentence
grammar_obj = GrammarRules()
conjugated_verb = ''
file_path_for_verb = 'IT20167264/word_set/past_verbs.text'
returned_string_verb = grammar_obj.find_similar_words(file_path_for_verb, sentence)
returned_string_verb = grammar_obj.find_similar_words(past_verbs, sentence)
verb_checked = returned_string_verb[0]
actual_word = returned_string_verb[1]
ratio = returned_string_verb[2]
......
from sinlingua.grammar_rule.grammar_rules import GrammarRules, translate_sinhala_to_english
from googletrans import Translator
from sinlingua.src.grammar_rule_resources import verbs, nouns_subject_plural
class PluralSubject(GrammarRules):
......@@ -8,13 +9,11 @@ class PluralSubject(GrammarRules):
global conjugated_sentence
prefixes = ["මා", "අපි", "මම", "ම", "අප", "ඔහු", "ඇය", "ඈ", "ඔවුන්", "ඔවුහු"]
conjugated_verb = ''
file_path_for_verb = 'IT20167264/word_set/verbs.text'
file_path_for_subject = 'IT20167264/word_set/nouns_subject_plural.text'
returned_string_verb = grammar_obj.find_similar_words(file_path_for_verb, sentence)
returned_string_verb = grammar_obj.find_similar_words(verbs, sentence)
newsentence = sentence
verb_checked = returned_string_verb[0]
actual_word = returned_string_verb[1]
returned_string_subject = grammar_obj.find_similar_words(file_path_for_subject, sentence)
returned_string_subject = grammar_obj.find_similar_words(nouns_subject_plural, sentence)
subject_checked = returned_string_subject[0]
actual_subject = returned_string_subject[1]
ratio = returned_string_subject[2]
......
from sinlingua.grammar_rule.grammar_rules import GrammarRules, translate_sinhala_to_english
from googletrans import Translator
from sinlingua.src.grammar_rule_resources import past_verbs, nouns_subject_plural
class PluralSubjectPast(GrammarRules):
......@@ -8,13 +9,11 @@ class PluralSubjectPast(GrammarRules):
global conjugated_sentence
prefixes = ["මා", "අපි", "මම", "ම", "අප", "ඔහු", "ඇය", "ඈ", "ඔවුන්", "ඔවුහු"]
conjugated_verb = ''
file_path_for_verb_past = 'IT20167264/word_set/past_verbs.text'
file_path_for_subject_past = 'IT20167264/word_set/nouns_subject_plural.text'
wordlist = sentence.split()
returned_string_verb_past = grammar_obj.find_similar_words(file_path_for_verb_past, wordlist[-1])
returned_string_verb_past = grammar_obj.find_similar_words(past_verbs, wordlist[-1])
verb_checked_past = returned_string_verb_past[0]
actual_word_past = returned_string_verb_past[1]
returned_string_subject_past = grammar_obj.find_similar_words(file_path_for_subject_past, wordlist[0])
returned_string_subject_past = grammar_obj.find_similar_words(nouns_subject_plural, wordlist[0])
subject_checked_past = returned_string_subject_past[0]
actual_subject_past = returned_string_subject_past[1]
ratio = returned_string_subject_past[2]
......
from sinlingua.grammar_rule.grammar_rules import GrammarRules
from sinlingua.src.grammar_rule_resources import verbs, nouns_subject_singular
class SingularSubject(GrammarRules):
......@@ -7,12 +8,10 @@ class SingularSubject(GrammarRules):
global conjugated_sentence
prefixes = ["මා", "අපි", "මම", "ම", "අප", "ඔහු", "ඇය", "ඈ", "ඔවුන්", "ඔවුහු"]
conjugated_verb = ''
file_path_for_verb = 'IT20167264/word_set/verbs.text'
file_path_for_subject = 'IT20167264/word_set/nouns_subject_singular.text'
returned_string_verb = grammar_obj.find_similar_words(file_path_for_verb, sentence)
returned_string_verb = grammar_obj.find_similar_words(verbs, sentence)
verb_checked = returned_string_verb[0]
actual_word = returned_string_verb[1]
returned_string_subject = grammar_obj.find_similar_words(file_path_for_subject, sentence)
returned_string_subject = grammar_obj.find_similar_words(nouns_subject_singular, sentence)
subject_checked = returned_string_subject[0]
actual_subject = returned_string_subject[1]
ratio = returned_string_subject[2]
......
......@@ -3,6 +3,7 @@ import functools
from typing import Union
import pygtrie as trie
from sinlingua.config import RESOURCE_PATH
from sinlingua.src.preprocessor_resources import suffix_list_dependent_vowels, suffixes_list, stem_dictionary
dependent_vowels = {
"අ": "ා",
......@@ -50,10 +51,10 @@ def _load_lists(file: str) -> list:
class SinhalaStemmer:
def __init__(self):
super().__init__()
self.stem_list = _load_stem_dictionary()
self.suffixes = _load_suffixes(file='suffixes_list.txt')
self.suffixes_normal = _load_lists(file='suffixes_list.txt')
self.suffixes_dependent_vowels = _load_lists(file='suffix_list_dependent_vowels.txt')
self.stem_list = stem_dictionary
# self.suffixes = _load_suffixes(file='suffixes_list.txt')
self.suffixes_normal = suffixes_list
self.suffixes_dependent_vowels = suffix_list_dependent_vowels
def step_one(self, text: str):
for items in self.stem_list:
......
from sinlingua.src.preprocessor_resources import stop_words
import os
class StopWordRemover:
def __init__(self):
stop_words_path = os.path.join(os.path.dirname(__file__), '..', 'resources', 'stop_words.txt')
with open(stop_words_path, 'r', encoding='utf-8') as file:
self.stop_words = set(word.strip() for word in file.readlines())
self.stop_words = stop_words
def remove_stop_words(self, text):
words = text.split()
......
This diff is collapsed.
suffixes_list = []
suffix_list_dependent_vowels = []
stop_words = [
සහ,
සමග,
සමඟ,
අහා,
ආහ්,
,
ඕහෝ,
අනේ,
අඳෝ,
අපොයි,
අපෝ,
අයියෝ,
ආයි,
ඌයි,
චී,
චිහ්,
චික්,
හෝ‍,
දෝ,
දෝහෝ,
මෙන්,
සේ,
වැනි,
බඳු,
වන්,
අයුරු,
අයුරින්,
ලෙස,
වැඩි,
ශ්‍රී,
හා,
,
නිසා,
නිසාවෙන්,
බවට,
බව,
බවෙන්,
නම්,
වැඩි,
සිට,
දී,
මහා,
මහ,
පමණ,
පමණින්,
පමන,
වන,
විට,
විටින්,
මේ,
මෙලෙස,
මෙයින්,
ඇති,
ලෙස,
සිදු,
වශයෙන්,
යන,
සඳහා,
මගින්,
හෝ‍,
ඉතා,
,
එම,
,
අතර,
විසින්,
සමග,
පිළිබඳව,
පිළිබඳ,
තුළ,
බව,
වැනි,
මහ,
මෙම,
මෙහි,
මේ,
වෙත,
වෙතින්,
වෙතට,
වෙනුවෙන්,
වෙනුවට,
වෙන,
ගැන,
නෑ,
අනුව,
නව,
පිළිබඳ,
විශේෂ,
දැනට,
එහෙන්,
මෙහෙන්,
එහේ,
මෙහේ,
,
තවත්,
තව,
සහ,
දක්වා,
,
ගේ,
,
,
ක්,
බවත්,
බවද,
මත,
ඇතුලු,
ඇතුළු,
මෙසේ,
වඩා,
වඩාත්ම,
නිති,
නිතිත්,
නිතොර,
නිතර,
ඉක්බිති,
දැන්,
යලි,
පුන,
ඉතින්,
සිට,
සිටන්,
පටන්,
තෙක්,
දක්වා,
සා,
තාක්,
තුවක්,
පවා,
,
හෝ‍,
වත්,
විනා,
හැර,
මිස,
මුත්,
කිම,
කිම්,
ඇයි,
මන්ද,
හෙවත්,
නොහොත්,
පතා,
පාසා,
ගානෙ,
තව,
ඉතා,
බොහෝ,
වහා,
සෙද,
සැනින්,
හනික,
එම්බා,
එම්බල,
බොල,
නම්,
වනාහි,
කලී,
ඉඳුරා,
අන්න,
ඔන්න,
මෙන්න,
උදෙසා,
පිණිස,
සඳහා,
අරබයා,
නිසා,
එනිසා,
එබැවින්,
බැවින්,
හෙයින්,
සේක්,
සේක,
ගැන,
අනුව,
පරිදි,
විට,
තෙක්,
මෙතෙක්,
මේතාක්,
තුරු,
තුරා,
තුරාවට,
තුලින්,
නමුත්,
එනමුත්,
වස්,
මෙන්,
ලෙස,
පරිදි,
එහෙත්]
stem_dictionary = {}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment