add question generation component

eaef6cc0 · HashaniJayasinghe · 346dd83e · eaef6cc0 · eaef6cc0 · eaef6cc0
Commit eaef6cc0 authored Nov 26, 2021 by HashaniJayasinghe
21 changed files
--- a/Questions_generator/Inputs/text.txt
+++ b/Questions_generator/Inputs/text.txt
+The island was divided into numerous kingdoms over the following centuries, 
+intermittently (between CE 993–1077) united under Chola rule. Sri Lanka was ruled
+ by 181 monarchs from the Anuradhapura to Kandy periods.[6] From the 16th century, 
+ some coastal areas of the country were also controlled by the Portuguese, Dutch and British. 
+ Between 1597 and 1658, a substantial part of the island was under Portuguese rule. The Portuguese 
+ lost their possessions in Ceylon due to Dutch intervention in the Eighty Years' War. Following the 
+ Kandyan Wars, the island was united under British rule in 1815. Armed uprisings against the British 
+ took place in the 1818 Uva Rebellion and the 1848 Matale Rebellion. Independence was finally granted in 1948 but the country remained a Dominion of the British Empire until 1972.
+
--- a/Questions_generator/Outputs/questions.txt
+++ b/Questions_generator/Outputs/questions.txt
+Q-01: What was Sri Lanka ruled by 181 monarchs from the Anuradhapura to Kandy periods?
+Q-02: Who was ruled by 181 monarchs from the Anuradhapura to Kandy periods?
+Q-03: How much was Sri Lanka ruled by monarchs from the Anuradhapura to Kandy periods?
+Q-04: Who was under portuguese rule?
+Q-05: When did the island was under british rule?
+Q-06: What did the island was under british rule in 1815?
+Q-07: Who was rule under british rule in 1815?
+Q-08: How much a D was Dominion of the british Empire until 1972?
+Q-09: What did Independence was in 1948 but the country remained a dominion of the british Empire until 1972?
+Q-10: Who was finally granted in 1948 but the country remained a D Dominion of the british Empire until 1972?
+Q-11: Who remained a D Dominion of the british Empire until 1972?
+Q-12: Who a D finally Dominion of the british Empire until 1972?
+Q-13: How much did Independence was finally granted in but the country remained a D Dominion of the british Empire until 1972?
--- a/Questions_generator/__pycache__/aqgFunction.cpython-36.pyc
+++ b/Questions_generator/__pycache__/aqgFunction.cpython-36.pyc
--- a/Questions_generator/__pycache__/aqgFunction.cpython-38.pyc
+++ b/Questions_generator/__pycache__/aqgFunction.cpython-38.pyc
--- a/Questions_generator/__pycache__/clause.cpython-36.pyc
+++ b/Questions_generator/__pycache__/clause.cpython-36.pyc
--- a/Questions_generator/__pycache__/clause.cpython-38.pyc
+++ b/Questions_generator/__pycache__/clause.cpython-38.pyc
--- a/Questions_generator/__pycache__/identification.cpython-36.pyc
+++ b/Questions_generator/__pycache__/identification.cpython-36.pyc
--- a/Questions_generator/__pycache__/identification.cpython-38.pyc
+++ b/Questions_generator/__pycache__/identification.cpython-38.pyc
--- a/Questions_generator/__pycache__/nlpNER.cpython-36.pyc
+++ b/Questions_generator/__pycache__/nlpNER.cpython-36.pyc
--- a/Questions_generator/__pycache__/nlpNER.cpython-38.pyc
+++ b/Questions_generator/__pycache__/nlpNER.cpython-38.pyc
--- a/Questions_generator/__pycache__/nonClause.cpython-36.pyc
+++ b/Questions_generator/__pycache__/nonClause.cpython-36.pyc
--- a/Questions_generator/__pycache__/nonClause.cpython-38.pyc
+++ b/Questions_generator/__pycache__/nonClause.cpython-38.pyc
--- a/Questions_generator/__pycache__/questionValidation.cpython-36.pyc
+++ b/Questions_generator/__pycache__/questionValidation.cpython-36.pyc
--- a/Questions_generator/__pycache__/questionValidation.cpython-38.pyc
+++ b/Questions_generator/__pycache__/questionValidation.cpython-38.pyc
--- a/Questions_generator/aqgFunction.py
+++ b/Questions_generator/aqgFunction.py
+import spacy
+import clause
+import nonClause
+import identification
+import questionValidation
+from nlpNER import nerTagger
+
+
+class AutomaticQuestionGenerator():
+    # AQG Parsing & Generate a question
+    def aqgParse(self, sentence):
+        nlp = spacy.load('en_core_web_md')
+
+        singleSentences = sentence.split(".")
+        questionsList = []
+        if len(singleSentences) != 0:
+            for i in range(len(singleSentences)):
+                segmentSets = singleSentences[i].split(",")
+
+                ner = nerTagger(nlp, singleSentences[i])
+
+                if (len(segmentSets)) != 0:
+                    for j in range(len(segmentSets)):
+                        try:
+                            questionsList += clause.howmuch_2(segmentSets, j, ner)
+                        except Exception:
+                            pass
+
+                        if identification.clause_identify(segmentSets[j]) == 1:
+                            try:
+                                questionsList += clause.whom_1(segmentSets, j, ner)
+                            except Exception:
+                                pass
+                            try:
+                                questionsList += clause.whom_2(segmentSets, j, ner)
+                            except Exception:
+                                pass
+                            try:
+                                questionsList += clause.whom_3(segmentSets, j, ner)
+                            except Exception:
+                                pass
+                            try:
+                                questionsList += clause.whose(segmentSets, j, ner)
+                            except Exception:
+                                pass
+                            try:
+                                questionsList += clause.what_to_do(segmentSets, j, ner)
+                            except Exception:
+                                pass
+                            try:
+                                questionsList += clause.who(segmentSets, j, ner)
+                            except Exception:
+                                pass
+                            try:
+                                questionsList += clause.howmuch_1(segmentSets, j, ner)
+                            except Exception:
+                                pass
+                            try:
+                                questionsList += clause.howmuch_3(segmentSets, j, ner)
+                            except Exception:
+                                pass
+
+
+                            else:
+                                try:
+                                    s = identification.subjectphrase_search(segmentSets, j)
+                                except Exception:
+                                    pass
+
+                                if len(s) != 0:
+                                    segmentSets[j] = s + segmentSets[j]
+                                    try:
+                                        questionsList += clause.whom_1(segmentSets, j, ner)
+                                    except Exception:
+                                        pass
+                                    try:
+                                        questionsList += clause.whom_2(segmentSets, j, ner)
+                                    except Exception:
+                                        pass
+                                    try:
+                                        questionsList += clause.whom_3(segmentSets, j, ner)
+                                    except Exception:
+                                        pass
+                                    try:
+                                        questionsList += clause.whose(segmentSets, j, ner)
+                                    except Exception:
+                                        pass
+                                    try:
+                                        questionsList += clause.what_to_do(segmentSets, j, ner)
+                                    except Exception:
+                                        pass
+                                    try:
+                                        questionsList += clause.who(segmentSets, j, ner)
+                                    except Exception:
+                                        pass
+
+                                    else:
+                                        try:
+                                            questionsList += nonClause.what_whom1(segmentSets, j, ner)
+                                        except Exception:
+                                            pass
+                                        try:
+                                            questionsList += nonClause.what_whom2(segmentSets, j, ner)
+                                        except Exception:
+                                            pass
+                                        try:
+                                            questionsList += nonClause.whose(segmentSets, j, ner)
+                                        except Exception:
+                                            pass
+                                        try:
+                                            questionsList += nonClause.howmany(segmentSets, j, ner)
+                                        except Exception:
+                                            pass
+                                        try:
+                                            questionsList += nonClause.howmuch_1(segmentSets, j, ner)
+                                        except Exception:
+                                            pass
+
+                questionsList.append('\n')
+        return questionsList
+
+    # AQG Display the Generated Question
+    def display(self, str):
+        count = 0
+        out = ""
+        for i in range(len(str)):
+            if (len(str[i]) >= 3):
+                if (questionValidation.hNvalidation(str[i]) == 1):
+                    if ((str[i][0] == 'W' and str[i][1] == 'h') or (str[i][0] == 'H' and str[i][1] == 'o') or (
+                            str[i][0] == 'H' and str[i][1] == 'a')):
+                        WH = str[i].split(',')
+                        if (len(WH) == 1):
+                            str[i] = str[i][:-1]
+                            str[i] = str[i][:-1]
+                            str[i] = str[i][:-1]
+                            str[i] = str[i] + "?"
+                            count = count + 1
+
+                            if (count < 10):
+                                print("Q-0%d: %s" % (count, str[i]))
+                                out += "Q-0" + count.__str__() + ": " + str[i] + "\n"
+
+                            else:
+                                print("Q-%d: %s" % (count, str[i]))
+                                out += "Q-" + count.__str__() + ": " + str[i] + "\n"
+
+        output = "Questions_generator\Outputs\\questions.txt"
+        w = open(output, 'w+', encoding="utf8")
+        w.write(out)
+        w.close()
+        return 0
--- a/Questions_generator/clause.py
+++ b/Questions_generator/clause.py
--- a/Questions_generator/identification.py
+++ b/Questions_generator/identification.py
+import nltk
+
+
+def chunk_search(segment, chunked):
+    m = len(chunked)
+    list1 = []
+    for j in range(m):
+        if (len(chunked[j]) > 2 or len(chunked[j]) == 1):
+            list1.append(j)
+        if (len(chunked[j]) == 2):
+            try:
+                str1 = chunked[j][0][0] + " " + chunked[j][1][0]
+            except Exception:
+                pass
+            else:
+                if (str1 in segment) == True:
+                    list1.append(j)
+    return list1
+
+def segment_identify(sen):
+    segment_set = sen.split(",")
+    return segment_set
+
+
+def clause_identify(segment):
+    tok = nltk.word_tokenize(segment)
+    tag = nltk.pos_tag(tok)
+    gram = r"""chunk:{<EX>?<DT>?<JJ.?>*<NN.?|PRP|PRP\$|POS|IN|DT|CC|VBG|VBN>+<RB.?|VB.?|MD|RP>+}"""
+    chunkparser = nltk.RegexpParser(gram)
+    chunked = chunkparser.parse(tag)
+
+    flag = 0
+    for j in range(len(chunked)):
+        if (len(chunked[j]) > 2):
+            flag = 1
+        if (len(chunked[j]) == 2):
+            try:
+                str1 = chunked[j][0][0] + " " + chunked[j][1][0]
+            except Exception:
+                pass
+            else:
+                if (str1 in segment) == True:
+                    flag = 1
+        if flag == 1:
+            break
+
+    return flag
+
+
+def verbphrase_identify(clause):
+    tok = nltk.word_tokenize(clause)
+    tag = nltk.pos_tag(tok)
+    gram = r"""chunk:{<EX>?<DT>?<JJ.?>*<NN.?|PRP|PRP\$|POS|IN|DT|CC|VBG|VBN>+<RB.?>*<VB.?|MD|RP>+}"""
+    chunkparser = nltk.RegexpParser(gram)
+    chunked = chunkparser.parse(tag)
+    str1 = ""
+    str2 = ""
+    str3 = ""
+    list1 = chunk_search(clause, chunked)
+    if len(list1) != 0:
+        m = list1[len(list1) - 1]
+        for j in range(len(chunked[m])):
+            str1 += chunked[m][j][0]
+            str1 += " "
+
+    tok1 = nltk.word_tokenize(str1)
+    tag1 = nltk.pos_tag(tok1)
+    gram1 = r"""chunk:{<EX>?<DT>?<JJ.?>*<NN.?|PRP|PRP\$|POS|IN|DT|CC|VBG|VBN>+<RB.?>*}"""
+    chunkparser1 = nltk.RegexpParser(gram1)
+    chunked1 = chunkparser1.parse(tag1)
+
+    list2 = chunk_search(str1, chunked1)
+    if len(list2) != 0:
+
+        m = list2[0]
+        for j in range(len(chunked1[m])):
+            str2 += (chunked1[m][j][0] + " ")
+
+    tok1 = nltk.word_tokenize(str1)
+    tag1 = nltk.pos_tag(tok1)
+    gram1 = r"""chunk:{<VB.?|MD|RP>+}"""
+    chunkparser1 = nltk.RegexpParser(gram1)
+    chunked2 = chunkparser1.parse(tag1)
+
+    list3 = chunk_search(str1, chunked2)
+    if len(list3) != 0:
+
+        m = list3[0]
+        for j in range(len(chunked2[m])):
+            str3 += (chunked2[m][j][0] + " ")
+
+    X = ""
+    str4 = ""
+    st = nltk.word_tokenize(str3)
+    if len(st) > 1:
+        X = st[0]
+        s = ""
+        for k in range(1, len(st)):
+            s += st[k]
+            s += " "
+        str3 = s
+        str4 = X + " " + str2 + str3
+
+    if len(st) == 1:
+        tag1 = nltk.pos_tag(st)
+        if tag1[0][0] != 'are' and tag1[0][0] != 'were' and tag1[0][0] != 'is' and tag1[0][0] != 'am':
+            if tag1[0][1] == 'VB' or tag1[0][1] == 'VBP':
+                X = 'do'
+            if tag1[0][1] == 'VBD' or tag1[0][1] == 'VBN':
+                X = 'did'
+            if tag1[0][1] == 'VBZ':
+                X = 'does'
+            str4 = X + " " + str2 + str3
+        if (tag1[0][0] == 'are' or tag1[0][0] == 'were' or tag1[0][0] == 'is' or tag1[0][0] == 'am'):
+            str4 = tag1[0][0] + " " + str2
+
+    return str4
+
+
+def subjectphrase_search(segment_set, num):
+    str2 = ""
+    for j in range(num - 1, 0, -1):
+        str1 = ""
+        flag = 0
+        tok = nltk.word_tokenize(segment_set[j])
+        tag = nltk.pos_tag(tok)
+        gram = r"""chunk:{<EX>?<DT>?<JJ.?>*<NN.?|PRP|PRP\$|POS|IN|DT|CC|VBG|VBN>+<RB.?>*<VB.?|MD|RP>+}"""
+        chunkparser = nltk.RegexpParser(gram)
+        chunked = chunkparser.parse(tag)
+
+        list1 = chunk_search(segment_set[j], chunked)
+        if len(list1) != 0:
+            m = list1[len(list1) - 1]
+            for j in range(len(chunked[m])):
+                str1 += chunked[m][j][0]
+                str1 += " "
+
+            tok1 = nltk.word_tokenize(str1)
+            tag1 = nltk.pos_tag(tok1)
+            gram1 = r"""chunk:{<EX>?<DT>?<JJ.?>*<NN.?|PRP|PRP\$|POS|IN|DT|CC|VBG|VBN>+}"""
+            chunkparser1 = nltk.RegexpParser(gram1)
+            chunked1 = chunkparser1.parse(tag1)
+
+            list2 = chunk_search(str1, chunked1)
+            if len(list2) != 0:
+                m = list2[len(list2) - 1]
+                for j in range(len(chunked1[m])):
+                    str2 += (chunked1[m][j][0] + " ")
+                flag = 1
+
+        if flag == 0:
+            tok1 = nltk.word_tokenize(segment_set[j])
+            tag1 = nltk.pos_tag(tok1)
+            gram1 = r"""chunk:{<EX>?<DT>?<JJ.?>*<NN.?|PRP|PRP\$|POS|IN|DT|CC|VBG|VBN>+}"""
+            chunkparser1 = nltk.RegexpParser(gram1)
+            chunked1 = chunkparser1.parse(tag1)
+
+            list2 = chunk_search(str1, chunked1)
+            st = nltk.word_tokenize(segment_set[j])
+            if len(chunked1[list2[0]]) == len(st):
+                str2 = segment_set[j]
+                flag = 1
+
+        if flag == 1:
+            break
+
+    return str2
+
+
+def postprocess(string):
+    tok = nltk.word_tokenize(string)
+    tag = nltk.pos_tag(tok)
+
+    str1 = tok[0].capitalize()
+    str1 += " "
+    if len(tok) != 0:
+        for i in range(1, len(tok)):
+            if tag[i][1] == "NNP":
+                str1 += tok[i].capitalize()
+                str1 += " "
+            else:
+                str1 += tok[i].lower()
+                str1 += " "
+        tok = nltk.word_tokenize(str1)
+        str1 = ""
+        for i in range(len(tok)):
+            if tok[i] == "i" or tok[i] == "we":
+                str1 += "you"
+                str1 += " "
+            elif tok[i] == "my" or tok[i] == "our":
+                str1 += "your"
+                str1 += " "
+            elif tok[i] == "your":
+                str1 += "my"
+                str1 += " "
+            elif tok[i] == "you":
+                if i - 1 >= 0:
+                    to = nltk.word_tokenize(tok[i - 1])
+                    ta = nltk.pos_tag(to)
+                    # print ta
+                    if ta[0][1] == 'IN':
+                        str1 += "me"
+                        str1 += " "
+                    else:
+                        str1 += "i"
+                        str1 += " "
+                else:
+                    str1 += "i "
+
+            elif tok[i] == "am":
+                str1 += "are"
+                str1 += " "
+            else:
+                str1 += tok[i]
+                str1 += " "
+
+    return str1
+
--- a/Questions_generator/main.py
+++ b/Questions_generator/main.py
+import aqgFunction
+
+import os
+
+current_path =  os.path.abspath(os.path.join(os.path.dirname(__file__)))
+# Main Function
+def main():
+    # Create AQG object
+    aqg = aqgFunction.AutomaticQuestionGenerator()
+
+    inputTextPath = "Questions_generator\Inputs\\text.txt"
+    readFile = open(inputTextPath, 'r+', encoding="utf8")
+
+    inputText = readFile.read()
+
+    questionList = aqg.aqgParse(inputText)
+    aqg.display(questionList)
+
+    return 0
+
+
+# Call Main Function
+if __name__ == "__main__":
+    main()
+
--- a/Questions_generator/nlpNER.py
+++ b/Questions_generator/nlpNER.py
+import spacy
+
+
+def nerTagger(nlp, tokenize):
+    doc = nlp(tokenize)
+
+    finalList = []
+    array = [[]]
+    for word in doc:
+        array[0] = 0
+        for ner in doc.ents:
+            if (ner.text == word.text):
+                finalList.append((word.text, ner.label_))
+                array[0] = 1
+        if (array[0] == 0):
+            finalList.append((word.text, 'O'))
+
+    return finalList
+
--- a/Questions_generator/nonClause.py
+++ b/Questions_generator/nonClause.py
+import nltk
+import identification
+
+
+def get_chunk(chunked):
+    str1 = ""
+    for j in range(len(chunked)):
+        str1 += (chunked[j][0] + " ")
+    return str1
+
+def what_whom1(segment_set, num, ner):
+    tok = nltk.word_tokenize(segment_set[num])
+    tag = nltk.pos_tag(tok)
+    gram = r"""chunk:{<TO>+<DT>?<RB.?>*<JJ.?>*<NN.?|PRP|PRP\$|VBG|DT|POS|CD|VBN>+}"""
+    chunkparser = nltk.RegexpParser(gram)
+    chunked = chunkparser.parse(tag)
+
+    list1 = identification.chunk_search(segment_set[num], chunked)
+    s = []
+
+    if len(list1) != 0:
+        for j in range(len(chunked)):
+            str1 = ""
+            str3 = ""
+            if j in list1:
+                for k in range(j):
+                    if k in list1:
+                        str1 += get_chunk(chunked[k])
+                    else:
+                        str1 += (chunked[k][0] + " ")
+                for k in range(j + 1, len(chunked)):
+                    if k in list1:
+                        str3 += get_chunk(chunked[k])
+                    else:
+                        str3 += (chunked[k][0] + " ")
+
+                if chunked[j][1][1] == 'PRP':
+                    str2 = "to whom "
+                else:
+                    for x in range(len(chunked[j])):
+                        if (chunked[j][x][1] == "NNP" or chunked[j][x][1] == "NNPS" or chunked[j][x][1] == "NNS" or
+                                chunked[j][x][1] == "NN"):
+                            break
+
+                    for x1 in range(len(ner)):
+                        if ner[x1][0] == chunked[j][x][0]:
+                            if ner[x1][1] == "PERSON":
+                                str2 = " to whom "
+                            elif ner[x1][1] == "LOC" or ner[x1][1] == "ORG" or ner[x1][1] == "GPE":
+                                str2 = " where "
+                            elif ner[x1][1] == "TIME" or ner[x1][1] == "DATE":
+                                str2 = " when "
+                            else:
+                                str2 = "to what"
+
+                str4 = str1 + str2 + str3
+                for k in range(len(segment_set)):
+                    if k != num:
+                        str4 += ("," + segment_set[k])
+                str4 += '?'
+                str4 = identification.postprocess(str4)
+                # str4 = 'Q.' + str4
+                s.append(str4)
+    return s
+
+
+def what_whom2(segment_set, num, ner):
+    tok = nltk.word_tokenize(segment_set[num])
+    tag = nltk.pos_tag(tok)
+    gram = r"""chunk:{<IN>+<DT>?<RB.?>*<JJ.?>*<NN.?|PRP|PRP\$|POS|VBG|DT|CD|VBN>+}"""
+    chunkparser = nltk.RegexpParser(gram)
+    chunked = chunkparser.parse(tag)
+    list1 = identification.chunk_search(segment_set[num], chunked)
+    s = []
+
+    if len(list1) != 0:
+        for j in range(len(chunked)):
+            str1 = ""
+            str3 = ""
+            if j in list1:
+                for k in range(j):
+                    if k in list1:
+                        str1 += get_chunk(chunked[k])
+                    else:
+                        str1 += (chunked[k][0] + " ")
+                for k in range(j + 1, len(chunked)):
+                    if k in list1:
+                        str3 += get_chunk(chunked[k])
+                    else:
+                        str3 += (chunked[k][0] + " ")
+
+                if chunked[j][1][1] == 'PRP':
+                    str2 = " " + chunked[j][0][0] + " whom "
+                else:
+                    for x in range(len(chunked[j])):
+                        if (chunked[j][x][1] == "NNP" or chunked[j][x][1] == "NNPS" or chunked[j][x][1] == "NNS" or
+                                chunked[j][x][1] == "NN"):
+                            break
+
+                    for x1 in range(len(ner)):
+                        if ner[x1][0] == chunked[j][x][0]:
+                            if ner[x1][1] == "PERSON":
+                                str2 = " " + chunked[j][0][0] + "whom "
+                            elif ner[x1][1] == "LOC" or ner[x1][1] == "ORG" or ner[x1][1] == "GPE":
+                                str2 = " where "
+                            elif ner[x1][1] == "TIME" or ner[x1][1] == "DATE":
+                                str2 = " when "
+                            else:
+                                str2 = " " + chunked[j][0][0] + " what"
+
+                str4 = str1 + str2 + str3
+                for k in range(len(segment_set)):
+                    if k != num:
+                        str4 += ("," + segment_set[k])
+                str4 += '?'
+                str4 = identification.postprocess(str4)
+                # str4 = 'Q.' + str4
+                s.append(str4)
+    return s
+
+
+def whose(segment_set, num, ner):
+    tok = nltk.word_tokenize(segment_set[num])
+    tag = nltk.pos_tag(tok)
+    gram = r"""chunk:{<NN.?>*<PRP\$|POS>+<RB.?>*<JJ.?>*<NN.?|VBG|VBN>+}"""
+    chunkparser = nltk.RegexpParser(gram)
+    chunked = chunkparser.parse(tag)
+
+    list1 = identification.chunk_search(segment_set[num], chunked)
+    s = []
+
+    if len(list1) != 0:
+        for j in range(len(chunked)):
+            str1 = ""
+            str3 = ""
+            str2 = " whose "
+            if j in list1:
+                for k in range(j):
+                    if k in list1:
+                        str1 += get_chunk(chunked[k])
+                    else:
+                        str1 += (chunked[k][0] + " ")
+                for k in range(j + 1, len(chunked)):
+                    if k in list1:
+                        str3 += get_chunk(chunked[k])
+                    else:
+                        str3 += (chunked[k][0] + " ")
+                if chunked[j][1][1] == 'POS':
+                    for k in range(2, len(chunked[j])):
+                        str2 += (chunked[j][k][0] + " ")
+                else:
+                    for k in range(1, len(chunked[j])):
+                        str2 += (chunked[j][k][0] + " ")
+
+                str4 = str1 + str2 + str3
+                for k in range(len(segment_set)):
+                    if k != num:
+                        str4 += ("," + segment_set[k])
+                str4 += '?'
+                str4 = identification.postprocess(str4)
+                # str4 = 'Q.' + str4
+                s.append(str4)
+    return s
+
+
+def howmany(segment_set, num, ner):
+    tok = nltk.word_tokenize(segment_set[num])
+    tag = nltk.pos_tag(tok)
+    gram = r"""chunk:{<DT>?<CD>+<RB>?<JJ|JJR|JJS>?<NN|NNS|NNP|NNPS|VBG>+}"""
+    chunkparser = nltk.RegexpParser(gram)
+    chunked = chunkparser.parse(tag)
+
+    list1 = identification.chunk_search(segment_set[num], chunked)
+    s = []
+
+    if len(list1) != 0:
+        for j in range(len(chunked)):
+            str1 = ""
+            str3 = ""
+            str2 = " how many "
+            if j in list1:
+                for k in range(j):
+                    if k in list1:
+                        str1 += get_chunk(chunked[k])
+                    else:
+                        str1 += (chunked[k][0] + " ")
+                for k in range(j + 1, len(chunked)):
+                    if k in list1:
+                        str3 += get_chunk(chunked[k])
+                    else:
+                        str3 += (chunked[k][0] + " ")
+
+                st = get_chunk(chunked[j])
+                tok = nltk.word_tokenize(st)
+                tag = nltk.pos_tag(tok)
+                gram = r"""chunk:{<RB>?<JJ|JJR|JJS>?<NN|NNS|NNP|NNPS|VBG>+}"""
+                chunkparser = nltk.RegexpParser(gram)
+                chunked1 = chunkparser.parse(tag)
+
+                list2 = identification.chunk_search(st, chunked1)
+                z = ""
+
+                for k in range(len(chunked1)):
+                    if k in list2:
+                        z += get_chunk(chunked1[k])
+
+                str4 = str1 + str2 + z + str3
+                for k in range(len(segment_set)):
+                    if k != num:
+                        str4 += ("," + segment_set[k])
+                str4 += '?'
+                str4 = identification.postprocess(str4)
+                # str4 = 'Q.' + str4
+                s.append(str4)
+    return s
+
+
+def howmuch_1(segment_set, num, ner):
+    tok = nltk.word_tokenize(segment_set[num])
+    tag = nltk.pos_tag(tok)
+    gram = r"""chunk:{<IN>+<\$>?<CD>+}"""
+    chunkparser = nltk.RegexpParser(gram)
+    chunked = chunkparser.parse(tag)
+
+    list1 = identification.chunk_search(segment_set[num], chunked)
+    s = []
+
+    if len(list1) != 0:
+        for j in range(len(chunked)):
+            str1 = ""
+            str3 = ""
+            str2 = " how much "
+            if j in list1:
+                for k in range(j):
+                    if k in list1:
+                        str1 += get_chunk(chunked[k])
+                    else:
+                        str1 += (chunked[k][0] + " ")
+                for k in range(j + 1, len(chunked)):
+                    if k in list1:
+                        str3 += get_chunk(chunked[k])
+                    else:
+                        str3 += (chunked[k][0] + " ")
+
+                str2 = chunked[j][0][0] + str2
+                str4 = str1 + str2 + str3
+                for k in range(len(segment_set)):
+                    if k != num:
+                        str4 += ("," + segment_set[k])
+                str4 += '?'
+                str4 = identification.postprocess(str4)
+                # str4 = 'Q.' + str4
+                s.append(str4)
+    return s
+
--- a/Questions_generator/questionValidation.py
+++ b/Questions_generator/questionValidation.py
+# Question Validation
+
+
+def hNvalidation(sentence):
+    flag = 1
+
+    Length = len(sentence)
+    if (Length > 4):
+        for i in range(Length):
+            if (i+4 < Length):
+                if (sentence[i]==' ' and sentence[i+1]=='h' and sentence[i+2]==' ' and sentence[i+3]=='N' and sentence[i+4]==' '):
+                    flag = 0
+
+
+    return flag
+