Changes for publish errors

parent 51db8e8b
......@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "sinlingua_test"
version = "0.0.1"
version = "0.0.4"
authors = [
{ name="Supun Gurusinghe", email="supunsameeran@gmail.com" },
{ name="Sandaruwini Galappaththi", email="sandaruwinigalappaththi@gmail.com" },
......@@ -22,3 +22,74 @@ classifiers = [
[project.urls]
"Homepage" = "https://github.com/SupunGurusinghe/SinlinguaDocumentation/blob/main/README.md"
[tool.poetry.dependencies]
python = "^3.7"
chardet = "^3.0.4"
click = "^8.1.7"
colorama = "0.4.6"
gensim = "4.3.1"
joblib = "1.2.0"
nltk = "3.8.1"
numpy = "^1.22.4"
regex = "^2022.10.31"
scipy = "^1.9.3"
tqdm = "^4.64.1"
urllib3 = "^1.26.12"
pip = "^21.2.4"
wheel = "^0.37.1"
cryptography = "^38.0.3"
py = "^1.10.0"
lxml = "^4.9.2"
future = "^0.18.2"
matplotlib = "^3.5.3"
pytest = "^6.2.4"
sklearn = "^0.0"
scikit-learn = "^1.1.3"
requests = "^2.27.1"
pyparsing = "^2.4.7"
keras = "^2.11.0"
ipywidgets = "^7.6.3"
ipython = "^7.26.0"
notebook = "^6.5.2"
hypothesis = "^6.56.4"
setuptools = "^56.0.0"
pytz = "^2022.5"
cffi = "^1.14.6"
mpmath = "^1.3.0"
sympy = "^1.12"
fasttext = "^0.9.2"
psutil = "^5.9.3"
boto3 = "^1.26.43"
botocore = "^1.29.43"
pandas = "^1.3.5"
pygtrie = "^2.5.0"
fuzzywuzzy = "^0.18.0"
asyncio = "^3.4.3"
Levenshtein = "^0.21.0"
idna = "^2.10"
multidict = "^6.0.4"
attrs = "^21.2.0"
openai = "^0.27.2"
aiohttp = "^3.8.3"
plotly = "^5.5.0"
tenacity = "^8.1.0"
yarl = "^1.8.2"
aiosignal = "^1.3.1"
frozenlist = "^1.3.3"
python-multipart = "^0.0.6"
certifi = "^2021.5.30"
simplejson = "^3.18.1"
sinling = "^0.3.6"
hpack = "^3.0.0"
hyperframe = "^5.2.0"
h2 = "^3.2.0"
h11 = "^0.9.0"
hstspreload = "^2023.1.1"
httpcore = "^0.9.1"
rfc3986 = "^1.5.0"
sniffio = "^1.3.0"
httpx = "^0.13.3"
googletrans = "^3.0.0"
torch = "^2.0.1"
transformers = "^4.29.1"
\ No newline at end of file
from os.path import dirname, abspath, join
import pkg_resources
PROJECT_PATH = join(dirname(abspath(__file__)), '..')
RESOURCE_PATH = join(PROJECT_PATH, 'sinlingua', 'resources')
\ No newline at end of file
PROJECT_PATH = pkg_resources.resource_filename('sinlingua', '')
RESOURCE_PATH = pkg_resources.resource_filename('sinlingua', 'resources')
\ No newline at end of file
......@@ -3,13 +3,14 @@ import openai
import json
import time
from sinlingua.singlish.rulebased_transliterator import RuleBasedTransliterator
from sinlingua.config import RESOURCE_PATH
from sinlingua.src.singlish_resources import config_data
class HybridTransliterator:
def __init__(self, api_key: str = None, org_key: str = None, prompt_masking: str = None, prompt_suggestion: str = None):
config_file = "config.json"
self.json_data = self.__read_json_config(file_path=config_file)
# config_file = "config.json"
# self.json_data = self.__read_json_config(file_path=config_file)
self.json_data = config_data
if api_key is not None and org_key is not None:
self.json_data["api_key"] = api_key
self.json_data["org_key"] = org_key
......@@ -21,17 +22,17 @@ class HybridTransliterator:
def view_prompt(self, level: int):
print(self.json_data["Prompts"][level]["content"])
@staticmethod
def __read_json_config(file_path: str) -> dict:
try:
# Read JSON configuration file and return the data as dictionary
with open(os.path.join(RESOURCE_PATH, file_path), 'r', encoding='utf-8') as json_file:
json_data_c = json.load(json_file)
return json_data_c
except Exception as e:
# Handle exceptions while reading JSON configuration
print(f"Error while reading JSON configuration file '{file_path}': {str(e)}")
return {}
# @staticmethod
# def __read_json_config(file_path: str) -> dict:
# try:
# # Read JSON configuration file and return the data as dictionary
# with open(os.path.join(RESOURCE_PATH, file_path), 'r', encoding='utf-8') as json_file:
# json_data_c = json.load(json_file)
# return json_data_c
# except Exception as e:
# # Handle exceptions while reading JSON configuration
# print(f"Error while reading JSON configuration file '{file_path}': {str(e)}")
# return {}
def __get_gpt_response(self, text: str, level: int, word: str = "") -> str:
completion = None
......
import json
import os
from sinlingua.config import RESOURCE_PATH
import pkg_resources
from sinlingua.src.singlish_resources import alphabet
class RuleBasedTransliterator:
class RuleBasedTransliterator():
def __init__(self):
"""
Initialize the RuleBasedTransliterator object.
......@@ -24,8 +25,9 @@ class RuleBasedTransliterator:
self.consonants = {}
self.dependent_vowels = {}
alphabet = "singlish-alphabet.json"
data = self.__read_json_config(file_path=alphabet)
# alphabet = "singlish-alphabet.json"
# data = self.__read_json_config(file_path=alphabet)
data = alphabet
self.vowels = data.get('vowels', {})
self.consonants = data.get('consonants', {})
self.dependent_vowels = data.get('dependent_vowels', {})
......@@ -110,17 +112,18 @@ class RuleBasedTransliterator:
return logical_groups
@staticmethod
def __read_json_config(file_path: str) -> dict:
try:
# Read JSON configuration file and return the data as dictionary
with open(os.path.join(RESOURCE_PATH, file_path), 'r', encoding='utf-8') as json_file:
json_data_c = json.load(json_file)
return json_data_c
except Exception as e:
# Handle exceptions while reading JSON configuration
print(f"Error while reading JSON configuration file '{file_path}': {str(e)}")
return {}
# @staticmethod
# def __read_json_config(file_path: str) -> dict:
# try:
# json_file_path = pkg_resources.resource_filename('sinlingua', os.path.join('resources', file_path))
# # Read JSON configuration file and return the data as dictionary
# with open(os.path.join(json_file_path, file_path), 'r', encoding='utf-8') as json_file:
# json_data_c = json.load(json_file)
# return json_data_c
# except Exception as e:
# # Handle exceptions while reading JSON configuration
# print(f"Error while reading JSON configuration file '{file_path}': {str(e)}")
# return {}
def transliterator(self, text: str) -> str:
"""
......
alphabet = {
"vowels": {
"a": "අ",
"A": "අ",
"aa": "ආ",
"Aa": "ආ",
"ae": "ඇ",
"Ae": "ඇ",
"aee": "ඈ",
"Aee": "ඈ",
"i": "ඉ",
"I": "ඉ",
"ii": "ඊ",
"Ii": "ඊ",
"u": "උ",
"U": "උ",
"uu": "ඌ",
"Uu": "ඌ",
"ERU": "ඍ",
"ERU'": "ඎ",
"IRU": "ඏ",
"IRU'": "ඐ",
"e": "එ",
"E": "එ",
"ee": "ඒ",
"Ee": "ඒ",
"ai": "ඓ",
"Ai": "ඓ",
"o": "ඔ",
"O": "ඔ",
"oo": "ඕ",
"Oo": "ඕ",
"au": "ඖ",
"Au": "ඖ",
"x": "(අං)",
"X": "(අඃ)"
},
"consonants": {
"k": "ක",
"c": "ක",
"K": "ක",
"kh": "ඛ",
"Kh": "ඛ",
"g": "ග",
"G": "ඝ",
"gh": "ඝ",
"Gh": "ඝ",
"ng": "ඞ",
"nng": "ඟ",
"zg": "ඟ",
"ch": "ච",
"Ch": "ඡ",
"j": "ජ",
"J": "ඣ",
"jh": "ඣ",
"ngj": "ඤ",
"zk": "ඤ",
"ny": "ඤ",
"Ngj": "ඥ",
"jny": "ඥ",
"nyj": "ඦ",
"zj": "ඦ",
"t": "ට",
"T": "ඨ",
"d": "ඩ",
"D": "ඪ",
"N": "ණ",
"zd": "ඬ",
"nd": "ඬ",
"th": "ත",
"Th": "ථ",
"dh": "ද",
"Dh": "ධ",
"n": "න",
"ndh": "ඳ",
"p": "ප",
"P": "ඵ",
"ph": "ඵ",
"r": "ර",
"l": "ල",
"L": "ළ",
"s": "ස",
"sh": "ශ",
"Sh": "ෂ",
"h": "හ",
"w": "ව",
"v": "ව",
"f": "ෆ",
"b": "බ",
"B": "භ",
"bh": "භ",
"m": "ම",
"mb": "ඹ",
"y": "ය"
},
"dependent_vowels": {
"aa": "ා",
"ae": "ැ",
"aae": "ෑ",
"i": "ි",
"ii": "ී",
"u": "ු",
"uu": "ූ",
"ERU": "ෘ",
"e": "ෙ",
"ee": "ේ",
"ai": "ෛ",
"o": "ො",
"oo": "ෝ",
"au": "ෞ",
"x": "ං"
}
}
config_data = {
"api_key": "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",
"org_key": "org-FAg23PQBtCvq57kZHYd0HYlW",
"model": "gpt-3.5-turbo",
"temperature": 0,
"max_tokens": 2000,
"Top_P": 1,
"Frequency_penalty": 0,
"Presence_penalty": 0,
"max_characters": 4000,
"TC_Only": "NO",
"Prompts": [
{
"role": "user",
"content": "I can provide you a sentence with some spelling errors. Your goal is to accurately identify the misspelled Sinhala words. \n\nThe output should be presented in JSON format, structured as follows:\n\n{\n \"word_list\": [\n \"<first_incorrect_word>\",\n \"<second_incorrect_word>\",\n ...............,\n \"<last_incorrect_word>\"\n ]\n}\nMake sure to use only the words which is not exist in the Sinhala language as \"<first_incorrect_word>\", \"<second_incorrect_word>\", ..............., \"<last_incorrect_word>\".\n\nSentence: '{{masked-sentence}}'"
},
{
"role": "user",
"content": "I can provide you with a version of the word with some spelling errors. Your goal is to accurately identify the most suitable Sinhala word that matches the given misspelled version for the specified <mask>. Please note that the word you suggest should indeed exist in the Sinhala language (Real Word). Decide the category of the misspelled word as well. Word categories are subject, predicate, object, complement, and modifier. Get the help of this to decide the word as well.\n\nThe output should be presented in JSON format, structured as follows:\n\n{\n \"<misspelled_word>\": \"<correct_word>\",\n \"category\": \"<category_from_given_list>\"\n}\n\nSentence with the <mask>: '{{masked-sentence}}'\nMisspelled word: '{{misspelled-word}}'"
}
]
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment