add flask backend files for components in KGQnA folder

parent 02d55174
import spacy,requests
from transformers import pipeline
class ComplexFunc:
# """docstring for Tenses."""
def __init__(self):
self.ent_pairs = list()
self.nlp = spacy.load('en_core_web_sm')
self.nlp_ = pipeline("question-answering", model='distilbert-base-cased-distilled-squad')
def get_time_place_from_sent(self,sentence):
xdate =[]
xplace =[]
for i in sentence.ents:
if i.label_ in ('DATE'):
xdate.append(str(i))
if i.label_ in ('GPE'):
xplace.append(str(i))
return xdate, xplace
def find_obj(self, sentence, place, time):
object_list = []
for word in sentence:
# """OBJECT FINDING loop"""
if word.dep_ in ('obj', 'dobj', 'pobj'):
buffer_obj = word
if str(word) in place and word.nbor(-1).dep_ in ('prep') and str(word.nbor(-1)) == "of":
pass
# """ INDIA should be in place list + "of" "India" is there then it will come here """
else:
if str(word) not in time and str(word) not in place:
# """ INDIA should not be in place list + INDIA should not be in time list """
# """ice-cream and mangoes"""
for child in word.subtree:
if child.dep_ in ('conj', 'dobj', 'pobj', 'obj') and (str(child) not in time) and (str(child) not in place):
if [i for i in child.lefts]:
if child.nbor(-1).dep_ in ('nummod') and child.dep_ in ('dobj', 'obj','pobj'):
child = str(child.nbor(-1)) + " " + str(child)
object_list.append(str(child))
elif child.nbor(-1).dep_ in ('punct'):
if child.nbor(-2).dep_ in ('compound'):
#ice-cream
child = str(child.nbor(-2)) + str(child.nbor(-1)) + str(child)
object_list.append(str(child))
elif child.nbor(-2).dep_ in ('amod'):
#social-distancing
child = str(child.nbor(-2)) + str(child.nbor(-1)) + str(child)
object_list.append(str(child))
elif child.nbor(-1).dep_ in ('compound'):
# print(child)
child_with_comp = ""
for i in child.subtree:
if i.dep_ in ('compound', 'nummod','quantmod'):
if child_with_comp == "":
child_with_comp = str(i)
else:
child_with_comp = child_with_comp +" "+ str(i)
elif i.dep_ in ('cc'):
break
child = child_with_comp + " " + str(child)
# ice cream
object_list.append(str(child))
elif child.nbor(-1).dep_ in ('det'):
# The Taj Mahal
object_list.append(str(child))
elif [i for i in child.rights]:
if str(child.text) not in object_list:
object_list.append(str(child.text))
for a in child.children:
if a.dep_ in ('conj'):
if a.nbor(-1).dep_ in ('punct'):
pass
else:
object_list.extend( [ str(a.text) ] )
else:
# icecream
if str(child) not in object_list:
object_list.append(str(child))
elif str(word) in place and str(word.nbor(-1)) != "of":
if object_list == []:
object_list.append(str(word))
else:
pass
else:
if str(word) in time and object_list == []:
object_list.append(str(word))
return object_list, buffer_obj
def find_subj(self, sentence):
subject_list = []
# """ SUBJECT FINDING loop"""
dep_word = [word.dep_ for word in sentence]
word_dep_count_subj = [dep_word.index(word) for word in dep_word if word in ('nsubj', 'subj', 'nsubjpass')]
if word_dep_count_subj:
word_dep_count_subj = word_dep_count_subj[0] + 1
else:
word_dep_count_subj = 1
subject_final = ""
for word in sentence:
# print(word.dep_, word)
if word_dep_count_subj > 0:
# in prime minister it gives compound and then nmod
if word.dep_ in ('compound') or word.dep_ in ('nmod') or word.dep_ in ('amod') or word.dep_ in ('poss') or word.dep_ in ('case') or word.dep_ in ('nummod'):
if subject_final == "":
subject_final = str(word)
word_dep_count_subj = word_dep_count_subj - 1
elif word.dep_ in ('case'):
subject_final = subject_final+ "" +str(word)
word_dep_count_subj = word_dep_count_subj - 1
else:
subject_final = subject_final+ " " +str(word)
word_dep_count_subj = word_dep_count_subj - 1
elif word.dep_ in ('nsubj', 'subj', 'nsubjpass'):
if subject_final == "":
subject_final = str(word)
subject_list.extend([str(a.text) for a in word.subtree if a.dep_ in ('conj')])
word_dep_count_subj = word_dep_count_subj - 1
break
else:
subject_final = subject_final+" "+str(word)
subject_list.extend([str(a.text) for a in word.subtree if a.dep_ in ('conj')])
word_dep_count_subj = word_dep_count_subj - 1
break
else:
pass
subject_list.append(subject_final)
return subject_list
def find_relation(self, buffer_obj):
aux_relation = ""
# RELATION FINDING loop
relation = [w for w in buffer_obj.ancestors if w.dep_ =='ROOT']
if relation:
relation = relation[0]
sp_relation = relation
if relation.nbor(1).pos_ in ('VERB'):
if relation.nbor(2).dep_ in ('xcomp'):
relation = ' '.join((str(relation), str(relation.nbor(1)), str(relation.nbor(2))))
else:
relation = str(relation)
if str(sp_relation.nbor(2)) != 'and':
if sp_relation.nbor(1).dep_ in ('xcomp'):
aux_relation = str(sp_relation.nbor(1))
else:
aux_relation = str(sp_relation.nbor(2))
elif relation.nbor(1).pos_ in ('ADP', 'PART') and relation.nbor(1).dep_ in ('aux') and str(relation.nbor(1)) == 'to':
# print(relation.nbor(1), relation.nbor(1).pos_ )
# print(relation)
relation = " ".join((str(relation), str(relation.nbor(1))))
if str(sp_relation.nbor(2)) != 'and':
aux_relation = str(sp_relation.nbor(2))
elif relation.nbor(1).dep_ in ('prep') and str(relation.nbor(1)) == 'to' and (relation.nbor(1)).dep_ not in ('obj','dobj','pobj','det'):
# print(relation.nbor(1), relation.nbor(1).pos_ )
# print(relation)
relation = " ".join((str(relation), str(relation.nbor(1))))
else:
relation = str(relation)
else:
relation = 'unknown'
return relation, aux_relation
def normal_sent(self, sentence):
time, place = self.get_time_place_from_sent(sentence)
subject_list, object_list = [], []
aux_relation, child_with_comp = "", ""
subject_list = self.find_subj(sentence)
object_list, buffer_obj = self.find_obj(sentence, place, time)
relation, aux_relation = self.find_relation(buffer_obj)
self.ent_pairs = []
if time:
time = time[0]
else:
time = ""
if place:
place = place[0]
else:
place = ""
pa, pb=[], []
for m in subject_list:
pa.append([m])
for n in object_list:
pb.append([n])
# print(pa, pb)
for m in range(0, len(pa)):
for n in range(0, len(pb)):
self.ent_pairs.append([str(pa[m][0]).lower(), str(relation).lower(),str(aux_relation).lower(), str(pb[n][0]).lower(), str(time), str(place)])
# print(self.ent_pairs)
return self.ent_pairs
def question_pairs(self, question__):
# questionList = question__.split(" ")
# print(questionList)
questionNLPed = self.nlp(question__)
maybe_object = ([i for i in questionNLPed if i.dep_ in ('obj', 'pobj', 'dobj')])
# print(maybe_object)
maybe_place, maybe_time = [], []
aux_relation = ""
maybe_time, maybe_place = self.get_time_place_from_sent(questionNLPed)
object_list = []
for obj in questionNLPed:
objectNEW = obj
# print(obj.dep_)
# FOR WHO
if obj.dep_ in ('obj', 'dobj', 'pobj', 'xcomp') and str(obj).lower() != "what":
buffer_obj = obj
if obj.dep_ in ('xcomp') and obj.nbor(-1).dep_ in ('aux') and obj.nbor(-2).dep_ in ('ROOT'):
# print("here")
continue
if str(obj) in maybe_place and obj.nbor(-1).dep_ in ('prep') and str(obj.nbor(-1)) == "of":
# """ INDIA should be in place list + "of" "India" is there then it will come here """
pass
else:
if str(obj) not in maybe_time and str(obj) not in maybe_place:
# INDIA should not be in place list + INDIA should not be in time list
# ice-cream and mangoes
for child in obj.subtree:
# print(child)
if child.dep_ in ('conj', 'dobj', 'pobj', 'obj'):
if [i for i in child.lefts]:
if child.nbor(-1).dep_ in ('punct') and child.nbor(-2).dep_ in ('compound'):
# """ice-cream"""
child = str(child.nbor(-2)) + str(child.nbor(-1)) + str(child)
object_list.append(str(child))
elif child.nbor(-1).dep_ in ('compound'):
# print(child)
child_with_comp = ""
for i in child.subtree:
if i.dep_ in ('compound', 'nummod','quantmod'):
if child_with_comp == "":
child_with_comp = str(i)
else:
child_with_comp = child_with_comp +" "+ str(i)
elif i.dep_ in ('cc'):
break
child = child_with_comp + " " + str(child)
# ice cream
# print(child)
object_list.append(str(child))
elif child.nbor(-1).dep_ in ('det'):
# The Taj Mahal
object_list.append(str(child))
elif [i for i in child.rights]:
if str(child.text) not in object_list:
object_list.append(str(child.text))
for a in child.children:
if a.dep_ in ('conj'):
if a.nbor(-1).dep_ in ('punct'):
pass
else:
object_list.extend( [ str(a.text) ] )
else:
# icecream
if str(child) not in object_list:
object_list.append(str(child))
elif obj.dep_ in ('xcomp'):
object_list.append(str(obj))
elif str(obj) in maybe_place and str(obj.nbor(-1)) != "of":
object_list.append(str(obj))
else:
if str(obj) in maybe_time and object_list == []:
object_list.append(str(obj))
# print(object_list)
obj = object_list[-1]
# # print(obj)
# # print(obj.nbor(1))
# try:
# if obj.nbor(-1).pos_ in ('PUNCT') and obj.nbor(-2).pos_ in ('NOUN'):
# obj = ' '.join((str(obj.nbor(-2)), str(obj)))
# elif obj.nbor(-1).pos_ in ('NOUN'):
# obj = ' '.join( (str(obj.nbor(-1)), str(obj) ))
# # elif obj.nbor(1).pos_ in ('ROOT'):
# # pass
# except IndexError:
# pass
# elif obj.nbor(1).pos_ in :
# print(obj.nbor(1).pos_)
# print(obj)
relation = [w for w in objectNEW.ancestors if w.dep_ =='ROOT']
if relation:
relation = relation[0]
sp_relation = relation
# print(sp_relation)
# print(relation)
if relation.nbor(1).pos_ in ('ADP', 'PART', 'VERB'):
if relation.nbor(2).dep_ in ('xcomp'):
aux_relation = str(relation.nbor(2))
relation = str(relation)+" "+str(relation.nbor(1))
else:# print(relation.nbor(2).dep_)
relation = str(relation)
# print(relation)
subject = [a for a in sp_relation.lefts if a.dep_ in ('subj', 'nsubj','nsubjpass')] # identify subject nodes
# print(subject)
if subject:
subject = subject[0]
# print(subject)
# subject, subject_type = self.prepro.refine_ent(subject, question__)
# print(subject)
else:
subject = 'unknown'
else:
relation = 'unknown'
# obj, object_type = self.prepro.refine_ent(obj, question__)
# print(subject, relation, obj)
self.ent_pairs = []
if maybe_time and maybe_place:
self.ent_pairs.append([str(subject).lower(), str(relation).lower(),str(aux_relation).lower(), str(obj).lower(), str(maybe_time[0]).lower(), str(maybe_place[0]).lower()])
elif maybe_time:
self.ent_pairs.append([str(subject).lower(), str(relation).lower(),str(aux_relation).lower(), str(obj).lower(), str(maybe_time[0]).lower(), str("").lower()])
elif maybe_place:
self.ent_pairs.append([str(subject).lower(), str(relation).lower(),str(aux_relation).lower(), str(obj).lower(), str("").lower(), str(maybe_place[0]).lower()])
else:
self.ent_pairs.append([str(subject).lower(), str(relation).lower(),str(aux_relation).lower(), str(obj).lower(), str("").lower(), str("").lower()])
# ent_pairs.append([str(subject), str(relation), str(obj)])
# print(self.ent_pairs)
return self.ent_pairs
elif str(obj).lower() == "what":
relation = [w for w in objectNEW.ancestors if w.dep_ =='ROOT']
if relation:
relation = relation[0]
sp_relation = relation
if relation.nbor(1).pos_ in ('ADP', 'PART', 'VERB'):
if relation.nbor(2).dep_ in ('xcomp'):
aux_relation = str(relation.nbor(2))
relation = str(relation)+" "+str(relation.nbor(1))
else:# print(relation.nbor(2).dep_)
relation = str(relation)
# print(relation)
subject = self.find_subj(questionNLPed)
# print(subject)
subject = subject[-1]
# subject = [a for a in sp_relation.lefts if a.dep_ in ('subj', 'nsubj','nsubjpass')] # identify subject nodes
# print(subject)
# if subject:
# subject = subject[0]
# print(subject)
# subject, subject_type = self.prepro.refine_ent(subject, question__)
# print(subject)
# else:
# subject = 'unknown'
else:
relation = 'unknown'
# obj, object_type = self.prepro.refine_ent(obj, question__)
# print(obj)
self.ent_pairs = []
# print(subject,relation,obj)
if maybe_time and maybe_place:
self.ent_pairs.append([str(subject).lower(), str(relation).lower(),str(aux_relation).lower(), str(obj).lower(), str(maybe_time[0]).lower(), str(maybe_place[0]).lower()])
elif maybe_time:
self.ent_pairs.append([str(subject).lower(), str(relation).lower(),str(aux_relation).lower(), str(obj).lower(), str(maybe_time[0]).lower(), str("").lower()])
elif maybe_place:
self.ent_pairs.append([str(subject).lower(), str(relation).lower(),str(aux_relation).lower(), str(obj).lower(), str("").lower(), str(maybe_place[0]).lower()])
else:
self.ent_pairs.append([str(subject).lower(), str(relation).lower(),str(aux_relation).lower(), str(obj).lower(), str("").lower(), str("").lower()])
# ent_pairs.append([str(subject), str(relation), str(obj)])
# print(self.ent_pairs)
return self.ent_pairs
elif obj.dep_ in ('advmod'):
# print(str(obj).lower())
if str(obj).lower() == 'where':
relation = [w for w in obj.ancestors if w.dep_ =='ROOT']
# print(relation)
if relation:
relation = relation[0]
sp_relation = relation
# print(relation)
if relation.nbor(1).pos_ in ('ADP', 'PART', 'VERB'):
if relation.nbor(2).dep_ in ('xcomp'):
aux_relation = str(relation.nbor(2))
relation = str(relation)+" "+str(relation.nbor(1))
else:# print(relation.nbor(2).dep_)
relation = str(relation)
# print(relation)
# for left_word in sp_relation.lefts:
# if left_word.dep_ in ('subj', 'nsubj','nsubjpass'):
# if [i for i in left_word.lefts]:
# for left_of_left_word in left_word.lefts:
# subject = str(left_of_left_word) + " " + str(left_word)
# else:
# subject = str(left_word)
subject = self.find_subj(questionNLPed)
# print(subject)
subject = subject[-1]
# subject = [a for a in sp_relation.lefts if a.dep_ in ('subj', 'nsubj','nsubjpass')] # identify subject nodes
# # print(subject)
# if subject:
# subject = subject[0]
# # print(subject)
# # subject, subject_type = self.prepro.refine_ent(subject, question__)
# # print(subject)
# else:
# subject = 'unknown'
else:
relation = 'unknown'
self.ent_pairs = []
# print(obj, subject, relation)
if maybe_object:
if maybe_time and maybe_place:
self.ent_pairs.append([str(subject).lower(), str(relation).lower(),str(aux_relation).lower(), str(maybe_object[-1]).lower(), str(maybe_time[0]).lower(), str("where").lower()])
elif maybe_time:
self.ent_pairs.append([str(subject).lower(), str(relation).lower(),str(aux_relation).lower(), str(maybe_object[-1]).lower(), str(maybe_time[0]).lower(), str("where").lower()])
elif maybe_place:
self.ent_pairs.append([str(subject).lower(), str(relation).lower(),str(aux_relation).lower(), str(maybe_object[-1]).lower(), str("").lower(), str("where").lower()])
else:
self.ent_pairs.append([str(subject).lower(), str(relation).lower(),str(aux_relation).lower(), str(maybe_object[-1]).lower(), str("").lower(), str("where").lower()])
else:
if maybe_time and maybe_place:
self.ent_pairs.append([str(subject).lower(), str(relation).lower(),str(aux_relation).lower(), str("").lower(), str(maybe_time[0]).lower(), str("where").lower()])
elif maybe_time:
self.ent_pairs.append([str(subject).lower(), str(relation).lower(),str(aux_relation).lower(), str("").lower(), str(maybe_time[0]).lower(), str("where").lower()])
elif maybe_place:
self.ent_pairs.append([str(subject).lower(), str(relation).lower(),str(aux_relation).lower(), str("").lower(), str("").lower(), str("where").lower()])
else:
self.ent_pairs.append([str(subject).lower(), str(relation).lower(),str(aux_relation).lower(), str("").lower(), str("").lower(), str("where").lower()])
# ent_pairs.append([str(subject), str(relation), str(obj)])
# print(self.ent_pairs)
return self.ent_pairs
elif str(obj).lower() == 'when':
# print(obj)
relation = [w for w in obj.ancestors if w.dep_ =='ROOT']
# print(relation)
if relation:
relation = relation[0]
sp_relation = relation
# print(relation)
if relation.nbor(1).pos_ in ('ADP', 'PART', 'VERB'):
# print(relation.nbor(1).pos_)
if relation.nbor(2).dep_ in ('xcomp'):
relation = ' '.join((str(relation), str(relation.nbor(1)), str(relation.nbor(2))))
else:# print(relation.nbor(2).dep_)
relation = ' '.join((str(relation), str(relation.nbor(1))))
# print(relation)
for left_word in sp_relation.lefts:
if left_word.dep_ in ('subj', 'nsubj','nsubjpass'):
if [i for i in left_word.lefts]:
for left_of_left_word in left_word.lefts:
subject = str(left_of_left_word) + " " + str(left_word)
else:
subject = str(left_word)
# subject = [a for a in sp_relation.lefts if a.dep_ in ('subj', 'nsubj','nsubjpass')] # identify subject nodes
# # print(subject)
# if subject:
# subject = subject[0]
# # print(subject)
# # subject, subject_type = self.prepro.refine_ent(subject, question__)
# # print(subject)
# else:
# subject = 'unknown'
else:
relation = 'unknown'
self.ent_pairs = []
# print(obj, subject, relation)
if maybe_object:
if maybe_time and maybe_place:
self.ent_pairs.append([str(subject).lower(), str(relation).lower(),str(aux_relation).lower(), str(maybe_object[-1]).lower(), str("when").lower(), str(maybe_place[0]).lower()])
elif maybe_time:
self.ent_pairs.append([str(subject).lower(), str(relation).lower(),str(aux_relation).lower(), str(maybe_object[-1]).lower(), str("when").lower(), str("").lower()])
elif maybe_place:
self.ent_pairs.append([str(subject).lower(), str(relation).lower(),str(aux_relation).lower(), str(maybe_object[-1]).lower(), str("when").lower(), str(maybe_place[0]).lower()])
else:
self.ent_pairs.append([str(subject).lower(), str(relation).lower(),str(aux_relation).lower(), str(maybe_object[-1]).lower(), str("when").lower(), str("").lower()])
else:
if maybe_time and maybe_place:
self.ent_pairs.append([str(subject).lower(), str(relation).lower(),str(aux_relation).lower(), str("").lower(), str("when").lower(), str(maybe_place[0]).lower()])
elif maybe_time:
self.ent_pairs.append([str(subject).lower(), str(relation).lower(),str(aux_relation).lower(), str("").lower(), str("when").lower(), str("").lower()])
elif maybe_place:
self.ent_pairs.append([str(subject).lower(), str(relation).lower(),str(aux_relation).lower(), str("").lower(), str("when").lower(), str(maybe_place[0]).lower()])
else:
self.ent_pairs.append([str(subject).lower(), str(relation).lower(),str(aux_relation).lower(), str("").lower(), str("when").lower(), str("").lower()])
# ent_pairs.append([str(subject), str(relation), str(obj)])
# print(self.ent_pairs)
return self.ent_pairs
# import json
# import pandas
import os
class exportToJSON:
"""docstring for exportToJSON."""
def __init__(self):
super(exportToJSON, self).__init__()
def dumpdata(self, pairs):
if os.path.exists(os.path.join(os.getcwd(), 'extra')):
pass
else:
os.makedirs('extra')
my_data = pairs.to_json('extra/database.json', orient='index')
# print(my_data)
class exportToCSV:
"""docstring for exportToJSON."""
def __init__(self):
super(exportToJSON, self).__init__()
def dumpdata(self, pairs):
df = pairs.to_csv(index=False)
# ff = pairs.to_csv('out.zip', index=False, compression=compression_opts)
# print(df)
# df = pd.DataFrame({'name': ['Raphael', 'Donatello'],
# 'mask': ['red', 'purple'],
# 'weapon': ['sai', 'bo staff']})
#
# df.to_csv(index=False)
# 'name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n'
#
# Create ‘out.zip’ containing ‘out.csv’
# compression_opts = dict(method='zip',
# archive_name='out.csv')
# df.to_csv('out.zip', index=False,
# compression=compression_opts)
# import re
import pandas as pd
import spacy
from KGQnA._complex import ComplexFunc
from KGQnA._resolvedep import change_nouns
class GetEntity:
"""docstring for GetEntity."""
def __init__(self):
super(GetEntity, self).__init__()
self.complex = ComplexFunc()
self.nlp = spacy.load('en_core_web_sm')
self.change = change_nouns()
def preprocess_text(self, input_file):
text_strip = [text.strip() for text in input_file]
preprocessed_text = [text for text in text_strip if text not in ('', ' ')]
text = " ".join(preprocessed_text)
text = self.change.resolved(text)
text = self.nlp(text)
return text
def get_entity(self, text):
ent_pairs, final_entity_pairs = [],[]
sentences = [one_sentence.text.strip() for one_sentence in text.sents]
for one_sentence in sentences:
final_entity_pairs = []
one_sentence = self.nlp(one_sentence)
dep = [token.dep_ for token in one_sentence]
# print(dep)
# pos = [token.pos_ for token in one_sentence]
# label = [token.label_ for token in one_sentence.ents]
normal_sent_ = self.complex.normal_sent(one_sentence)
if normal_sent_:
for pair in normal_sent_:
ent_pairs.append(pair)
pairs = pd.DataFrame(ent_pairs, columns=['source', 'relation', 'aux_relation', 'target', 'time', 'place'])
number_of_ent_pairs = str(len(ent_pairs))
final_entity_pairs.append(pairs)
if final_entity_pairs:
return final_entity_pairs, number_of_ent_pairs
return None, None
if __name__ == '__main__':
test = GetEntity()
text = test.nlp("Vibhav ate chocolates. Vedant met Vibhav")
entities, numbers = test.get_entity(text)
# print(entities[0])
import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd
from KGQnA._getentitypair import GetEntity
class GraphEnt:
"""docstring for graphEnt."""
def __init__(self):
super(GraphEnt, self).__init__()
self.x = GetEntity()
def createGraph(self, dataEntities):
entity_list = dataEntities.values.tolist()
source, relations, target = [],[],[]
for i in entity_list:
# if i[0] == "" or i[1] == "" or i[3] == "":
# pass
# else:
source.append(i[0])
relations.append(i[1])
# aux_relations = i[2]
target.append(i[3])
# time = i[4]
# place = i[5]
kg_df = pd.DataFrame({'source':source, 'target':target, 'edge':relations})
G=nx.from_pandas_edgelist(kg_df, "source", "target", edge_attr=True, create_using=nx.MultiDiGraph())
plt.figure(figsize=(12,12))
pos = nx.spring_layout(G, k = 2) # k regulates the distance between nodes
nx.draw(G, with_labels=True, node_color='skyblue', node_size=1500, edge_cmap=plt.cm.Blues, pos = pos)
# nx.draw_networkx_edge_labels(G,pos,edge_labels=labels,font_size=30)
plt.show()
if __name__ == '__main__':
test = GraphEnt()
print("Can't Test directly")
import re
import json
import spacy
import inflect
import requests
from KGQnA._getentitypair import GetEntity
from KGQnA._complex import *
class QuestionAnswer:
"""docstring for QuestionAnswer."""
def __init__(self):
super(QuestionAnswer, self).__init__()
self.complex = ComplexFunc()
self.nlp = spacy.load('en_core_web_sm')
self.p = inflect.engine()
def findanswer(self, question, c=None, con=None):
if con is None:
p = self.complex.question_pairs(question)
if p == [] or p is None:
return "Not Applicable"
pair = p[0]
# print(pair[5])
f = open("extra/database.json","r", encoding="utf8")
listData = f.readlines()
relQ = []
loaded = json.loads(listData[0])
relationQ = self.nlp(pair[1])
for i in relationQ:
relationQ = i.lemma_
relQ.append(relationQ)
objectQ = pair[3]
subList = []
timeQ = str(pair[4]).lower()
placeQ = str(pair[5]).lower()
# print(timeQ, placeQ)
relationQ = " ".join(relQ)
# print(relationQ)
if pair[0] in ('who'):
for i in loaded:
relationS = [relation for relation in self.nlp(loaded[str(i)]["relation"])]
relationSSS = " ".join([relation.lemma_ for relation in self.nlp(loaded[str(i)]["relation"])])
relationS = [i.lemma_ for i in relationS]
relationS = relationS[0]
# print(relationSSS)
if relationS == relationQ:
objectS = loaded[str(i)]["target"]
objectS = re.sub('-', ' ', objectS)
objectQ = re.sub('-', ' ', objectQ)
# print(objectQ, objectS)
if self.p.singular_noun(objectS):
objectS = self.p.singular_noun(objectS)
if self.p.singular_noun(objectQ):
objectQ = self.p.singular_noun(objectQ)
if objectS == objectQ:
if str(pair[4]) != "":
timeS = [str(loaded[str(i)]["time"]).lower()]
# print(timeQ, timeS)
if timeQ in timeS:
answer_subj = loaded[str(i)]["source"]
subList.append(answer_subj)
else:
answer_subj = loaded[str(i)]["source"]
subList.append(answer_subj)
elif str(relationSSS) == str(relationQ):
objectS = loaded[str(i)]["target"]
objectS = re.sub('-', ' ', objectS)
if objectS == objectQ:
if str(pair[4]) != "":
timeS = [str(loaded[str(i)]["time"]).lower()]
if timeQ in timeS:
answer_subj = loaded[str(i)]["source"]
subList.append(answer_subj)
else:
answer_subj = loaded[str(i)]["source"]
subList.append(answer_subj)
answer_subj = ",".join(subList)
if answer_subj == "":
return "None"
return answer_subj
elif pair[3] in ['what']:
subjectQ = pair[0]
subList = []
for i in loaded:
subjectS = loaded[str(i)]["source"]
# print(subjectQ, subjectS)
if subjectQ == subjectS:
relationS = [relation for relation in self.nlp(loaded[str(i)]["relation"])]
relationS = [i.lemma_ for i in relationS]
if len(relationS) > 1:
relationS = " ".join(relationS)
else:
relationS = relationS[0]
# print(relationQ, relationS)
if relationQ == relationS:
if str(pair[5]) != "":
placeS = [str(place).lower() for place in self.nlp(loaded[str(i)]["place"])]
# print(placeQ, placeS)
if placeQ in placeS:
if str(pair[4]) != "":
timeS = [str(time).lower() for time in self.nlp(loaded[str(i)]["time"])]
if timeQ in timeS:
answer_subj = loaded[str(i)]["target"]
subList.append(answer_subj)
else:
answer_subj = loaded[str(i)]["target"]
subList.append(answer_subj)
else:
if str(pair[4]) != "":
timeS = [str(time).lower() for time in self.nlp(loaded[str(i)]["time"])]
if timeQ in timeS:
answer_subj = loaded[str(i)]["target"]
subList.append(answer_subj)
else:
answer_subj = loaded[str(i)]["target"]
subList.append(answer_subj)
answer_obj = ",".join(subList)
if answer_obj == "":
return "None"
return answer_obj
elif pair[4] in ['when']:
subjectQ = pair[0]
# print(relationQ, subjectQ)
# print(pair[2])
for i in loaded:
# if i.dep_ in ('obj'):
# print(loaded[str(i)], "HERE we go")
subjectS = loaded[str(i)]["source"]
# print(type(subjectQ), type(subjectS), numberOfPairs)
if subjectQ == subjectS:
relationS = [relation for relation in self.nlp(loaded[str(i)]["relation"])]
# print(relationS)
relationS = [i.lemma_ for i in relationS]
relBuffer = relationS
# print(relationS[0], relationS[1])
# print(relBuffer[1])
if len(relBuffer) < 2:
relationS = relBuffer[0]
else:
if str(relBuffer[1]).lower() == 'to':
relationS = " ".join(relationS)
else:
relationS = relationS[0]
extraIN = relBuffer[1].lower()
# print(relationQ, relationS)
if relationQ == relationS:
if str(pair[5]) != "":
placeS = [str(place).lower() for place in self.nlp(loaded[str(i)]["place"])]
# print(placeQ, placeS)
if placeQ in placeS:
if loaded[str(i)]["time"] != '':
answer_obj = loaded[str(i)]["time"]
# elif extraIN == "in" or extraIN == "on":
# answer_obj = loaded[str(i)]["target"]
return answer_obj
return None
else:
if loaded[str(i)]["time"] != '':
answer_obj = loaded[str(i)]["time"]
return answer_obj
return None
elif pair[5] in ['where']:
subjectQ = pair[0]
for i in loaded:
subjectS = loaded[str(i)]["source"]
if subjectQ == subjectS:
relationS = [relation for relation in self.nlp(loaded[str(i)]["relation"])]
relationS = [i.lemma_ for i in relationS]
relationS = relationS[0]
if relationQ == relationS:
if str(pair[4]) != "":
timeS = [str(time).lower() for time in self.nlp(loaded[str(i)]["time"])]
if timeQ in timeS:
answer_obj = loaded[str(i)]["place"]
if answer_obj in (" ",""):
if int(i)<int(len(loaded)-1):
pass
return None
return answer_obj
return None
answer_obj = loaded[str(i)]["place"]
if answer_obj in (" ",""):
if int(i)<int(len(loaded)-1):
pass
return None
return answer_obj
else:
output = self.complex.nlp_(question=question, context=con)
return output
import spacy
class change_nouns:
"""docstring for change_nouns."""
def __init__(self):
super(change_nouns, self).__init__()
self.nlp = spacy.load('en_core_web_sm')
def resolved(self, text):
flag = True
official_subject = "Unknown"
sentences = []
prev_subjs = []
temp_text = text
# print([i for i, j in enumerate(temp_text) if j in ("(",")")])
pos_of_brackets = {pos:char for pos, char in enumerate(temp_text) if str(char) in ("(",")")}
# print(pos_of_brackets)
# if pos_of_brackets:
# for key, val in pos_of_brackets:
# if val in ["("]:
# # string_with_brackets = str(temp_text[pos_of_brackets[0]:pos_of_brackets[-1]+2])
# # last_pos = pos_of_brackets[0]-1
# text = temp_text[:pos_of_brackets[0]] + temp_text[pos_of_brackets[-1]+2:]
text = self.nlp(text)
# checked_for_and , depend , pos_of_and_= self.check_for_multi_and_(sent)
# print(checked_for_and)
# sent1, sent2 = self.diff_sent_return(sent, depend, pos_of_and_)
for sent in text.sents:
prev_subj, compound_is, last_word = "", "", ""
dep_word = [word.dep_ for word in sent]
# print(dep_word)
word_dep_count_subj = [dep_word.index(word) for word in dep_word if word in ('nsubj', 'subj', 'nsubjpass')]
# print(word_dep_count_subj)
try:
word_dep_count_subj = word_dep_count_subj[-1] + 1
except IndexError:
word_dep_count_subj = 1
more_subjs = [word for word in dep_word if word in ('nsubj', 'subj', 'nsubjpass')]
for word in sent:
if len(more_subjs) > 1:
if word.dep_ in more_subjs:
if word.dep_ in ['nsubjpass']:
# print("HELLO", word.dep_)
break
elif word.dep_ in ('nsubj','subj'):
if word_dep_count_subj > 0:
# """ IN prime minister it gives compound and then nmod """
if word.dep_ in ('compound') or word.dep_ in ('nmod', 'amod'):
if compound_is == "":
compound_is = str(word)
word_dep_count_subj = word_dep_count_subj - 1
else:
compound_is = compound_is+ " " +str(word)
word_dep_count_subj = word_dep_count_subj - 1
elif word.dep_ in ('nsubj', 'subj', 'nsubjpass'):
pronoun = [i for i in word.subtree]
if compound_is == "":
if str(word) not in ('he','HE', 'He','she','SHE', 'She','it','IT', 'It'):
prev_subj = str(word)
if str(pronoun[0]) not in ('his','His', 'her','Her', 'its', 'Its'):
prev_subjs = [prev_subj]
official_subject = prev_subjs[0]
word_dep_count_subj = word_dep_count_subj - 1
else:
if str('poss') in [str(i.dep_) for i in word.subtree]:
prev_subj = compound_is
word_dep_count_subj = word_dep_count_subj - 1
prev_subjs = [prev_subj]
# official_subject = prev_subjs[0]
else:
prev_subj = compound_is+" "+str(word)
word_dep_count_subj = word_dep_count_subj - 1
prev_subjs = [prev_subj]
official_subject = prev_subjs[0]
# if str(word) in ('they'):
# subject_list.extend([str(a.text) for a in word.subtree if a.dep_ in ('conj')])
if str(word) in ('he','HE', 'He','she','SHE', 'She','it','IT', 'It'):
# print(prev_subjs)
new_word = prev_subjs[-1]
# print(new_word)
sentences.append(str(sent).replace(str(word), str(new_word)))
flag = False
if pronoun:
if len(pronoun) <= 2 and str(pronoun[0]) in ('his','His', 'her','Her', 'its', 'Its'):
print(official_subject)
new_word = str(official_subject)+"\'s"
# print(new_word)
sentences.append(str(sent).replace((str(pronoun[0])), str(new_word)))
flag = False
elif len(pronoun)>2 and str(pronoun[0]) in ('his','His', 'her','Her', 'its', 'Its'):
new_word = str(official_subject)+"\'s"
sentences.append(str(sent).replace(str(pronoun[0]), str(new_word)))
flag = False
elif word.dep_ in ('nsubj','subj','nsubjpass') and str(word) not in ('he','HE', 'He','she','SHE', 'She','it','IT', 'It'):
last_word = word
else:
pass
else:
if word_dep_count_subj > 0:
# """ IN prime minister it gives compound and then nmod """
if word.dep_ in ('compound') or word.dep_ in ('nmod', 'amod'):
if compound_is == "":
compound_is = str(word)
word_dep_count_subj = word_dep_count_subj - 1
else:
compound_is = compound_is+ " " +str(word)
word_dep_count_subj = word_dep_count_subj - 1
elif word.dep_ in ('nsubj', 'subj', 'nsubjpass'):
pronoun = [i for i in word.subtree]
if compound_is == "":
if str(word) not in ('he','HE', 'He','she','SHE', 'She','it','IT', 'It'):
prev_subj = str(word)
if str(pronoun[0]) not in ('his','His', 'her','Her', 'its', 'Its'):
prev_subjs = [prev_subj]
official_subject = prev_subjs[0]
word_dep_count_subj = word_dep_count_subj - 1
else:
if str('poss') in [str(i.dep_) for i in word.subtree]:
prev_subj = compound_is
word_dep_count_subj = word_dep_count_subj - 1
prev_subjs = [prev_subj]
# official_subject = prev_subjs[0]
else:
prev_subj = compound_is+" "+str(word)
word_dep_count_subj = word_dep_count_subj - 1
prev_subjs = [prev_subj]
official_subject = prev_subjs[0]
# if str(word) in ('they'):
# subject_list.extend([str(a.text) for a in word.subtree if a.dep_ in ('conj')])
if str(word) in ('he','HE', 'He','she','SHE', 'She','it','IT', 'It'):
# print(prev_subjs)
new_word = prev_subjs[-1]
# print(new_word)
sentences.append(str(sent).replace(str(word), str(new_word)))
flag = False
if pronoun:
if len(pronoun) <= 2 and str(pronoun[0]) in ('his','His', 'her','Her', 'its', 'Its'):
# print(official_subject)
new_word = str(official_subject)+"\'s"
# print(new_word)
sentences.append(str(sent).replace((str(pronoun[0])), str(new_word)))
flag = False
elif len(pronoun)>2 and str(pronoun[0]) in ('his','His', 'her','Her', 'its', 'Its'):
new_word = str(official_subject)+"\'s"
sentences.append(str(sent).replace(str(pronoun[0]), str(new_word)))
flag = False
elif word.dep_ in ('nsubj','subj','nsubjpass') and str(word) not in ('he','HE', 'He','she','SHE', 'She','it','IT', 'It'):
last_word = word
else:
pass
if flag:
sentences.append(str(sent))
else:
flag = True
resolved_text = " ".join(sentences)
# print(resolved_text)
return resolved_text
def check_for_multi_and_(self, sentence):
x = []
count = 0
for word in sentence:
# print([i for i in word.subtree])
count += 1
if word.dep_ in ('cc'):
x.append(count-1)
# print([i for i in word.head.rights if i.dep_ in ('obj', 'dobj', 'pobj')])
# print([i for i in word.head.rights if i.dep_ in ('nsubj', 'nsubjpass', 'subj')])
# print([i for i in word.head.rights if i.dep_ in ('conj')])
# print(x)
depen = []
for i in x:
depen.append([word.dep_ for word in sentence[:i]])
senten1, senten2 = "", ""
list2 = ["nsubj", "ROOT", "dobj"]
# , ["subj", "ROOT", "dobj"], ["subj", "ROOT", "pobj"], ["nsubj", "ROOT", "obj"], ["nsubj", "ROOT", "dobj"], ["nsubj", "ROOT", "pobj"], ["nsubjpass", "ROOT", "obj"], ["nsubjpass", "ROOT", "dobj"], ["nsubjpass", "ROOT", "pobj"]]
for list1 in depen:
check = all(item in list1 for item in list2)
#
# print(list1)
if check:
# print(depen, x)
return True, depen, x
return False, [], 0
def diff_sent_return(self, sentence, depen, pos_of_and):
newcount = -1
senten1, senten2 = "", ""
# , ["subj", "ROOT", "dobj"], ["subj", "ROOT", "pobj"], ["nsubj", "ROOT", "obj"], ["nsubj", "ROOT", "dobj"], ["nsubj", "ROOT", "pobj"], ["nsubjpass", "ROOT", "obj"], ["nsubjpass", "ROOT", "dobj"], ["nsubjpass", "ROOT", "pobj"]]
list2 = ["nsubj", "ROOT", "dobj"]
for i in depen:
newcount += 1
list1 = i
check = all(item in list1 for item in list2)
if check:
lista = [str(w) for w in sentence]
p1 = lista[:pos_of_and[newcount]]
p2 = lista[pos_of_and[newcount]+1:]
# print(p1, p2)
senten1 = " ".join(p1)
senten2 = " ".join(p2)
senten1 = self.nlp(senten1)
senten2 = self.nlp(senten2)
return str(senten1), str(senten2)
if __name__ == "__main__":
test = change_nouns()
sentences = test.resolved("The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse (\"Norman\" comes from \"Norseman\") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.")
print(sentences)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment