Merge remote-tracking branch 'origin/IT19114736' into IT19408316

df8ceba1 · ThushanSandeepa · 07324af0 · da86d1ac · df8ceba1 · df8ceba1
Commit df8ceba1 authored Nov 16, 2022 by ThushanSandeepa
41 changed files
--- a/Backend/Python/__pycache__/summary.cpython-38.pyc
+++ b/Backend/Python/__pycache__/summary.cpython-38.pyc
--- a/Backend/Python/controllers/api.py
+++ b/Backend/Python/controllers/api.py
+from flask import Flask, request, url_for, redirect, render_template
+from flask_cors import CORS
+import werkzeug
+import summerise.summary as summarizeed
+import json
+import textract
+from pptx import Presentation
+import os
+app = Flask(__name__)
+CORS(app)
+@app.route('/summerize', methods=['GET', 'POST'])
+def summerize():
+    file = request.files['file']
+    ratio = float(request.form['ratio'])
+    filename = werkzeug.utils.secure_filename(file.filename)
+    print("\nReceived image File name : " + file.filename)
+    file.save('upload/' + filename)
+    f, file_extension = os.path.splitext('upload/' + filename)
+    print(file_extension)
+    if file_extension == '.docx':
+        text = textract.process('upload/' + filename)
+        arr = str(text).replace("\\n", "")
+        arr = arr.replace("\\t", "")
+        arr = arr.replace("\\", "")
+        prs = Presentation()
+        lyt = prs.slide_layouts[0]  # choosing a slide layout
+        for x in range(0, 3):
+            if x == 2:
+                slide = prs.slides.add_slide(lyt)  # adding a slide
+                title = slide.shapes.title  # assigning a title
+                subtitle = slide.placeholders[1]  # placeholder for subtitle
+                subtitle.text = arr
+            else:
+                slide = prs.slides.add_slide(lyt)  # adding a slide
+                title = slide.shapes.title  # assigning a title
+                subtitle = slide.placeholders[1]  # placeholder for subtitle
+                title.text = "ignore"  # title
+                subtitle.text = "ignore"  # subtitle
+        prs.save("upload/slide3.pptx")  # saving file
+        print('file saved')
+        res = summarizeed.create_sumall('upload/slide3.pptx', ratio)
+    else:
+        res = summarizeed.create_sumall('upload/' + filename, ratio)
+    rr = []
+    for r in res[0]:
+        rr.append(r[0].replace('"', ''))
+    return_str = '{ "result" : ['
+    for i in range(len(rr)):
+        if i == len(rr) - 1:
+            return_str += '"' + rr[i] + '"'
+        else:
+            return_str += '"' + rr[i] + '"' + ','
+    return_str += ']}'
+    print(return_str)
+    return json.loads(return_str)
+if __name__ == '__main__':
+    app.run(host="0.0.0.0", port=5005, debug=True)
--- a/Backend/Python/modules/summary.py
+++ b/Backend/Python/modules/summary.py
+import glob
+from pptx import Presentation
+import math
+import nltk
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize, sent_tokenize
+import textract
+import os.path
+import nltk
+nltk.download('stopwords')
+nltk.download('punkt')
+def create_sumall(abc, ratio):
+    if abc:
+        filename = abc
+        stop_word = ['is', 'a', 'and', 'the']
+        # Function to create Text summarization
+        def create_summ(text):
+            stopWords = set(stopwords.words("english"))
+            words = word_tokenize(text)
+            freqTable = dict()
+            for word in words:
+                word = word.lower()
+                if word in stopWords:
+                    continue
+                if word in freqTable:
+                    freqTable[word] += 1
+                else:
+                    freqTable[word] = 1
+            sentences = sent_tokenize(text)
+            sentenceValue = dict()
+            for sentence in sentences:
+                for word, freq in freqTable.items():
+                    if word in sentence.lower():
+                        if sentence in sentenceValue:
+                            sentenceValue[sentence] += freq
+                        else:
+                            sentenceValue[sentence] = freq
+            sumValues = 0
+            for sentence in sentenceValue:
+                sumValues += sentenceValue[sentence]
+            lensenvalu = len(sentenceValue)
+            if lensenvalu == 0:
+                lensenvalu = 1
+                average = int(sumValues / lensenvalu)
+            else:
+                average = int(sumValues / lensenvalu)
+            summary = ''
+            for sentence in sentences:
+                if (sentence in sentenceValue) and (sentenceValue[sentence] > (
+                        ratio * average)):
+                    summary += " " + sentence
+            return summary
+        def read_full_pptxe(filename):
+            sentences = []
+            b = []
+            a = 0
+            for eachfile in glob.glob(filename):
+                prs = Presentation(eachfile)
+                for slide in prs.slides:
+                    a = a + 1
+                    for shape in slide.shapes:
+                        if hasattr(shape, "text"):
+                            s = create_summ(shape.text.replace("\n", " "))
+                            s = str(s)
+                            if (len(s)) >= 1:
+                                f = ["Slide " + str(a) + "-" + s]
+                                sentences.append(f)
+            return sentences
+        def read_full_docx(filename):
+            sentences = []
+            text = textract.process(filename)
+            temp = text.split(".")
+            for t in temp:
+                sentences.append(t.replace("\n", " "))
+            return sentences
+        extension = os.path.splitext(filename)[1]
+        if extension == 'docx':
+            read_full_docx(filename)
+        else:
+            read_full_pptxe(filename)
+        def Convert(string):
+            li = list(string.split(" "))
+            return li
+        def Convert2(string):
+            li = list(string.split("\n"))
+            return li
+        def read_slide3(filename):
+            a = 1
+            for eachfile in glob.glob(filename):
+                prs = Presentation(eachfile)
+                for slide in prs.slides:
+                    a = a + 1
+                    for shape in slide.shapes:
+                        if hasattr(shape, "text"):
+                            if a == 4 and shape.shape_id == 3:
+                                s3 = str(shape.text)
+                                return s3
+        def read_full_pptx(filename, sss):
+            numberslide = []
+            numberslide.append(sss)
+            a = 0
+            for eachfile in glob.glob(filename):
+                prs = Presentation(eachfile)
+                for slide in prs.slides:
+                    a = a + 1
+                    for shape in slide.shapes:
+                        if hasattr(shape, "text"):
+                            if shape.shape_id != 2:
+                                s = shape.text.replace("\n", " ")
+                                s = str(s)
+                                if (len(s)) >= 20 and a != 3 and a != 1 and a != 2:
+                                    lo_1 = [a for a in new_l1 if a in s.lower()]
+                                    f_lo_l = round((len(lo_1) / len_of_l1) * 100)
+                                    if f_lo_l >= 50:
+                                        f = "Slide " + str(a)
+                                        numberslide.append(f)
+            return numberslide
+        loooo = Convert2(read_slide3(filename))
+        abc = []
+        for i in loooo:
+            l1 = Convert(i.lower())
+            new_l1 = [w for w in l1 if w not in stop_word]
+            len_of_l1 = len(new_l1)
+            read_full_pptx(filename, i)
+            abc.append(read_full_pptx(filename, i))
+        return (read_full_pptxe(filename), abc)
+    else:
+        print('error')
--- a/Backend/upload/IT2040_Lecture01_2018.pptx
+++ b/Backend/upload/IT2040_Lecture01_2018.pptx
--- a/Indexing/.DS_Store
+++ b/Indexing/.DS_Store
--- a/Indexing/LDA.ipynb
+++ b/Indexing/LDA.ipynb
--- a/Indexing/__pycache__/audio_gen.cpython-38.pyc
+++ b/Indexing/__pycache__/audio_gen.cpython-38.pyc
--- a/Indexing/__pycache__/bert.cpython-38.pyc
+++ b/Indexing/__pycache__/bert.cpython-38.pyc
--- a/Indexing/__pycache__/question_generator.cpython-38.pyc
+++ b/Indexing/__pycache__/question_generator.cpython-38.pyc
--- a/Indexing/__pycache__/text_gen.cpython-38.pyc
+++ b/Indexing/__pycache__/text_gen.cpython-38.pyc
--- a/Indexing/api.py
+++ b/Indexing/api.py
+# import libraries
+import json
+import os
+import audio_gen as topic_gen
+import bert as bert
+import werkzeug
+from flask import Flask, request, send_file
+from flask_cors import CORS
+from nltk.corpus import stopwords
+s = set(stopwords.words('english'))
+app = Flask(__name__)
+CORS(app)
+download_file = ''
+# Topics API
+@app.route('/topic', methods=['GET', 'POST'])
+def topic():
+    imagefile = request.files['video']
+    filename = werkzeug.utils.secure_filename(imagefile.filename)
+    print("\nReceived image File name : " + imagefile.filename)
+    imagefile.save('upload/' + filename)
+    global download_file
+    download_file = 'upload/' + str(filename).replace('.mp4', '.txt')
+    text_list_from_video, all_text = topic_gen.split_video_file('upload/' + filename)
+    # Writing to a file
+    file1 = open(download_file, 'w')
+    file1.writelines(all_text)
+    file1.close()
+    topic_list = []
+    for index in text_list_from_video:
+        temp_topic = bert.get_topics_new(index[1])
+        filtered_topics = [elem for elem in temp_topic if elem not in s]
+        topic_list.append(filtered_topics[0])
+    return_json = '[ '
+    for i, topic in enumerate(topic_list):
+        if i == len(topic_list) - 1:
+            return_json += '{ "index" : "' + str(i) + '", "topic" : "' + str(topic) + '", "time_frame" : "' + str(
+                i * 240) + ' to end" } ]'
+        else:
+            return_json += '{ "index" : "' + str(i) + '", "topic" : "' + str(topic) + '", "time_frame" : "' + str(
+                i * 240) + ' to ' + str((i + 1) * 240) + ' seconds"} ,'
+    print(return_json)
+    return json.loads(return_json)
+# Transcript API
+@app.route('/transcript', methods=['GET', 'POST'])
+def transcript():
+    global download_file
+    doc = download_file
+    return send_file(doc, as_attachment=True)
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=1100, debug=True)
--- a/Indexing/audio_gen.py
+++ b/Indexing/audio_gen.py
+import moviepy.editor as mp
+from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
+from moviepy.editor import VideoFileClip
+import os
+import text_gen
+def convert_video_to_audio(filename):
+    clip = mp.VideoFileClip(r"" + filename)
+    audio_file_name = str(filename).split('/')[-1].replace('.mp4', '.wav')
+    clip.audio.write_audiofile(r"audio_input/" + audio_file_name)
+    return text_gen.convert_audio_to_text("audio_input/" + audio_file_name)
+def split_video_file(filename):
+    return_list = []
+    all_text = ''
+    required_video_file = filename
+    files = os.listdir('video_input')
+    for filename in files:
+        os.remove('video_input/' + filename)
+    total_length = VideoFileClip(required_video_file).duration
+    # print(total_length)
+    no_of_slices = int(total_length / 240) + 1
+    time_grid = []
+    for i in range(0, no_of_slices):
+        time_grid.append(i * 240)
+    for i in range(no_of_slices):
+        if i == len(time_grid) - 1:
+            # ffmpeg_extract_subclip(required_video_file, time_grid[i], total_length - time_grid[i],
+            #                        targetname='videos/' + str(i) + ".mp4")
+            pass
+        else:
+            ffmpeg_extract_subclip(required_video_file, time_grid[i], time_grid[i + 1],
+                                   targetname='video_input/' + str(i) + ".mp4")
+            text = convert_video_to_audio('video_input/' + str(i) + ".mp4")
+            all_text += text + ' '
+            return_list.append([i, text])
+    return return_list, all_text
--- a/Indexing/audio_input/0.wav
+++ b/Indexing/audio_input/0.wav
--- a/Indexing/bert.py
+++ b/Indexing/bert.py
+import nltk
+import question_generator as q_gen
+from bertopic import BERTopic
+from nltk.corpus import words
+model = BERTopic(verbose=True)
+def get_topics(file):
+    topics_outputs = []
+    docs = []
+    with open(file) as file:
+        for line in file:
+            docs.append(line.rstrip())
+    topics, probabilities = model.fit_transform(docs)
+    print(model.get_topic_freq())
+    print('done')
+    print(model.get_topics())
+    for i in model.get_topic(0):
+        if i[0] in words.words():
+            pass
+        else:
+            print(i[0])
+            topics_outputs.append(i[0])
+    return topics_outputs
+def get_topics_new(text):
+    topics = q_gen.get_keywords(text, q_gen.summarizer(text))
+    return topics
--- a/Indexing/question_generator.py
+++ b/Indexing/question_generator.py
+from textwrap3 import wrap
+import torch
+from transformers import T5ForConditionalGeneration, T5Tokenizer
+import random
+import numpy as np
+import nltk
+# nltk.download('punkt')
+# nltk.download('brown')
+# nltk.download('wordnet')
+# nltk.download('stopwords')
+from nltk.corpus import wordnet as wn
+from nltk.tokenize import sent_tokenize
+from nltk.corpus import stopwords
+import string
+import pke
+import traceback
+from flashtext import KeywordProcessor
+summary_model = T5ForConditionalGeneration.from_pretrained('t5-base')
+summary_tokenizer = T5Tokenizer.from_pretrained('t5-base')
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+summary_model = summary_model.to(device)
+def set_seed(seed: int):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+def postprocesstext(content):
+    final = ""
+    for sent in sent_tokenize(content):
+        sent = sent.capitalize()
+        final = final + " " + sent
+    return final
+# text summarizing
+def summarizer(text, model=summary_model, tokenizer=summary_tokenizer):
+    text = text.strip().replace("\n", " ")
+    text = "summarize: " + text
+    max_len = 512
+    encoding = tokenizer.encode_plus(text, max_length=max_len, pad_to_max_length=False, truncation=True,
+                                     return_tensors="pt").to(device)
+    input_ids, attention_mask = encoding["input_ids"], encoding["attention_mask"]
+    outs = model.generate(input_ids=input_ids,
+                          attention_mask=attention_mask,
+                          early_stopping=True,
+                          num_beams=3,
+                          num_return_sequences=1,
+                          no_repeat_ngram_size=2,
+                          min_length=75,
+                          max_length=300)
+    dec = [tokenizer.decode(ids, skip_special_tokens=True) for ids in outs]
+    summary = dec[0]
+    summary = postprocesstext(summary)
+    summary = summary.strip()
+    return summary
+def get_nouns_multipartite(content):
+    out = []
+    try:
+        extractor = pke.unsupervised.MultipartiteRank()
+        extractor.load_document(input=content)
+        #    not contain punctuation marks or stopwords as candidates.
+        pos = {'PROPN', 'NOUN'}
+        stoplist = list(string.punctuation)
+        stoplist += ['-lrb-', '-rrb-', '-lcb-', '-rcb-', '-lsb-', '-rsb-']
+        stoplist += stopwords.words('english')
+        extractor.candidate_selection(pos=pos, stoplist=stoplist)
+        # 4. build the Multipartite graph and rank candidates using random walk,
+        #    alpha controls the weight adjustment mechanism, see TopicRank for
+        #    threshold/method parameters.
+        extractor.candidate_weighting(alpha=1.1,
+                                      threshold=0.75,
+                                      method='average')
+        keyphrases = extractor.get_n_best(n=15)
+        for val in keyphrases:
+            out.append(val[0])
+    except:
+        out = []
+        traceback.print_exc()
+    return out
+def get_keywords(originaltext, summarytext):
+    keywords = get_nouns_multipartite(originaltext)
+    # print("keywords unsummarized: ", keywords)
+    keyword_processor = KeywordProcessor()
+    for keyword in keywords:
+        keyword_processor.add_keyword(keyword)
+    keywords_found = keyword_processor.extract_keywords(summarytext)
+    keywords_found = list(set(keywords_found))
+    # print("keywords_found in summarized: ", keywords_found)
+    important_keywords = []
+    for keyword in keywords:
+        if keyword in keywords_found:
+            important_keywords.append(keyword)
+    return important_keywords[:4]
+question_model = T5ForConditionalGeneration.from_pretrained('ramsrigouthamg/t5_squad_v1')
+question_tokenizer = T5Tokenizer.from_pretrained('ramsrigouthamg/t5_squad_v1')
+question_model = question_model.to(device)
+def get_question(context, answer, model, tokenizer):
+    text = "context: {} answer: {}".format(context, answer)
+    encoding = tokenizer.encode_plus(text, max_length=384, pad_to_max_length=False, truncation=True,
+                                     return_tensors="pt").to(device)
+    input_ids, attention_mask = encoding["input_ids"], encoding["attention_mask"]
+    outs = model.generate(input_ids=input_ids,
+                          attention_mask=attention_mask,
+                          early_stopping=True,
+                          num_beams=5,
+                          num_return_sequences=1,
+                          no_repeat_ngram_size=2,
+                          max_length=72)
+    dec = [tokenizer.decode(ids, skip_special_tokens=True) for ids in outs]
+    Question = dec[0].replace("question:", "")
+    Question = Question.strip()
+    return Question
+def generate_questions_and_answers(text):
+    set_seed(42)
+    summarized_text = summarizer(text, summary_model, summary_tokenizer)
+    imp_keywords = get_keywords(text, summarized_text)
+    question_and_answer_list = []
+    for answer in imp_keywords:
+        ques = get_question(summarized_text, answer, question_model, question_tokenizer)
+        question_and_answer_list.append([ques, answer.capitalize()])
+    return question_and_answer_list
--- a/Indexing/readme.txt
+++ b/Indexing/readme.txt
+pip install --ignore-installed nltk pywsd scikit-learn flask Flask-Cors PyPDF2 textwrap3 transformers pke-tool flashtext sentence_transformers spacy pydot bertopic pandas rake-nltk protobuf==3.20.0 moviepy SpeechRecognition
+python -m spacy download en_core_web_sm
\ No newline at end of file
--- a/Indexing/summary.py
+++ b/Indexing/summary.py
+import glob
+import math
+import os.path
+import nltk
+import textract
+from nltk.corpus import stopwords
+from nltk.tokenize import sent_tokenize, word_tokenize
+from pptx import Presentation
+def create_sumall(abc, ratio):
+    if abc:
+        filename = abc
+        stop_word = ['is', 'a', 'and', 'the']
+        # Function to create Text summarization
+        def create_summ(text):
+            stopWords = set(stopwords.words("english"))
+            words = word_tokenize(text)
+            freqTable = dict()
+            for word in words:
+                word = word.lower()
+                if word in stopWords:
+                    continue
+                if word in freqTable:
+                    freqTable[word] += 1
+                else:
+                    freqTable[word] = 1
+            sentences = sent_tokenize(text)
+            sentenceValue = dict()
+            for sentence in sentences:
+                for word, freq in freqTable.items():
+                    if word in sentence.lower():
+                        if sentence in sentenceValue:
+                            sentenceValue[sentence] += freq
+                        else:
+                            sentenceValue[sentence] = freq
+            sumValues = 0
+            for sentence in sentenceValue:
+                sumValues += sentenceValue[sentence]
+            lensenvalu = len(sentenceValue)
+            if lensenvalu == 0:
+                lensenvalu = 1
+                average = int(sumValues / lensenvalu)
+            else:
+                average = int(sumValues / lensenvalu)
+            summary = ''
+            for sentence in sentences:
+                if (sentence in sentenceValue) and (sentenceValue[sentence] > (
+                        ratio * average)):
+                    summary += " " + sentence
+            return summary
+        def read_full_pptxe(filename):
+            sentences = []
+            b = []
+            a = 0
+            for eachfile in glob.glob(filename):
+                prs = Presentation(eachfile)
+                for slide in prs.slides:
+                    a = a + 1
+                    for shape in slide.shapes:
+                        if hasattr(shape, "text"):
+                            s = create_summ(shape.text.replace("\n", " "))
+                            s = str(s)
+                            if (len(s)) >= 1:
+                                f = ["Slide " + str(a) + "-" + s]
+                                sentences.append(f)
+            return sentences
+        def read_full_docx(filename):
+            sentences = []
+            text = textract.process(filename)
+            temp = text.split(".")
+            for t in temp:
+                sentences.append(t.replace("\n", " "))
+            return sentences
+        extension = os.path.splitext(filename)[1]
+        if extension == 'docx':
+            read_full_docx(filename)
+        else:
+            read_full_pptxe(filename)
+        def Convert(string):
+            li = list(string.split(" "))
+            return li
+        def Convert2(string):
+            li = list(string.split("\n"))
+            return li
+        def read_slide3(filename):
+            a = 1
+            for eachfile in glob.glob(filename):
+                prs = Presentation(eachfile)
+                for slide in prs.slides:
+                    a = a + 1
+                    for shape in slide.shapes:
+                        if hasattr(shape, "text"):
+                            if a == 4 and shape.shape_id == 3:
+                                s3 = str(shape.text)
+                                return s3
+        def read_full_pptx(filename, sss):
+            numberslide = []
+            numberslide.append(sss)
+            a = 0
+            for eachfile in glob.glob(filename):
+                prs = Presentation(eachfile)
+                for slide in prs.slides:
+                    a = a + 1
+                    for shape in slide.shapes:
+                        if hasattr(shape, "text"):
+                            if shape.shape_id != 2:
+                                s = shape.text.replace("\n", " ")
+                                s = str(s)
+                                if (len(s)) >= 20 and a != 3 and a != 1 and a != 2:
+                                    lo_1 = [a for a in new_l1 if a in s.lower()]
+                                    f_lo_l = round((len(lo_1) / len_of_l1) * 100)
+                                    if f_lo_l >= 50:
+                                        f = "Slide " + str(a)
+                                        numberslide.append(f)
+            return numberslide
+        loooo = Convert2(read_slide3(filename))
+        abc = []
+        for i in loooo:
+            l1 = Convert(i.lower())
+            new_l1 = [w for w in l1 if w not in stop_word]
+            len_of_l1 = len(new_l1)
+            read_full_pptx(filename, i)
+            abc.append(read_full_pptx(filename, i))
+        return (read_full_pptxe(filename), abc)
+    else:
+        print('error')
--- a/Indexing/test_api.py
+++ b/Indexing/test_api.py
+from flask import Flask, request, url_for, redirect, render_template
+from flask_cors import CORS
+import werkzeug
+import topics_find.summary as summarizeed
+import json
+import textract
+from pptx import Presentation
+import os
+app = Flask(__name__)
+CORS(app)
+@app.route('/summerize', methods=['GET', 'POST'])
+def summerize():
+    file = request.files['file']
+    ratio = float(request.form['ratio'])
+    filename = werkzeug.utils.secure_filename(file.filename)
+    print("\nReceived image File name : " + file.filename)
+    file.save('upload/' + filename)
+    f, file_extension = os.path.splitext('upload/' + filename)
+    print(file_extension)
+    if file_extension == '.docx':
+        text = textract.process('upload/' + filename)
+        arr = str(text).replace("\\n", "")
+        arr = arr.replace("\\t", "")
+        arr = arr.replace("\\", "")
+        prs = Presentation()
+        lyt = prs.slide_layouts[0]  # choosing a slide layout
+        for x in range(0, 3):
+            if x == 2:
+                slide = prs.slides.add_slide(lyt)  # adding a slide
+                title = slide.shapes.title  # assigning a title
+                subtitle = slide.placeholders[1]  # placeholder for subtitle
+                subtitle.text = arr
+            else:
+                slide = prs.slides.add_slide(lyt)  # adding a slide
+                title = slide.shapes.title  # assigning a title
+                subtitle = slide.placeholders[1]  # placeholder for subtitle
+                title.text = "ignore"  # title
+                subtitle.text = "ignore"  # subtitle
+        prs.save("upload/slide3.pptx")  # saving file
+        print('file saved')
+        res = summarizeed.create_sumall('upload/slide3.pptx', ratio)
+    else:
+        res = summarizeed.create_sumall('upload/' + filename, ratio)
+    rr = []
+    for r in res[0]:
+        rr.append(r[0].replace('"', ''))
+    return_str = '{ "result" : ['
+    for i in range(len(rr)):
+        if i == len(rr) - 1:
+            return_str += '"' + rr[i] + '"'
+        else:
+            return_str += '"' + rr[i] + '"' + ','
+    return_str += ']}'
+    print(return_str)
+    return json.loads(return_str)
+if __name__ == '__main__':
+    app.run(host="0.0.0.0", port=5005, debug=True)
--- a/Indexing/text_gen.py
+++ b/Indexing/text_gen.py
+import os
+import subprocess
+import sys
+import speech_recognition as sr
+PYTHONIOENCODING = "UTF-8"
+FOLDER_AUDIO = "audio_input"
+FOLDER_TEXT = "text_output"
+LANGUAGE = "en-US"
+def convert_audio_to_text(filename):
+    r = sr.Recognizer()
+    with sr.AudioFile(filename) as source:
+        audio = r.record(source)
+    try:
+        command = r.recognize_google(audio, language='en-IN', show_all=True)
+        print(command["alternative"][0]["transcript"])
+        return command["alternative"][0]["transcript"]
+    except:
+        return 'did not convert'
+# convert_audio_to_text('audio_input/3.wav')
--- a/Indexing/text_output/3.txt
+++ b/Indexing/text_output/3.txt
+{'alternative': [{'transcript': 'type of diffusion is an IR module that making waves right now open source machine many more than the Legend images from text free ridiculously well with the engine is the ability to fusion took 256 180 hours and hours to train at market price that 612 and pigments', 'confidence': 0.88088202}, {'transcript': 'type of diffusion is an IR module that making waves right now open source machine many more than the Legend images from text free ridiculously well with the engine is the ability to fusion took 256 150 hours and hours to train at market price that 612 and pigments'}, {'transcript': 'type of diffusion is an IR module that making waves right now open source machine many more than the Legend images from text free ridiculously well with the engine is the ability to fusion took 256 180 days and hours to train at market price that 612 and pigments'}, {'transcript': 'type of diffusion is an IR module that making waves right now kitchen open source machine many more than the Legend images from text free ridiculously well with the engine is the ability to fusion took 256 150 hours and hours to train at market price that 612 and pigments'}, {'transcript': 'type of diffusion is an IR module that making waves right now open source machine many more than the Legend images from text free ridiculously well with the engine is the ability to fusion Tuk 256 150 hours and hours to train at market price that 612 and pigments'}], 'final': True}
\ No newline at end of file
--- a/Indexing/text_output/uil9h-hwq08.txt
+++ b/Indexing/text_output/uil9h-hwq08.txt
+{'alternative': [{'transcript': 'two baby chicks headphone very own eggs when they would ask returned home from gathering food for 24 x 7 Pro Kabaddi final anywhere natural on what you discover the Lion treks around her to for it tracks therefore be determined the final babies', 'confidence': 0.87500781}, {'transcript': 'two baby chicks headphone very own eggs when they would ask that returned home from gathering food for 24 x 7 Pro Kabaddi final anywhere natural on what you discover the Lion treks around her to for it tracks therefore be determined to find her babies'}, {'transcript': 'water to baby chicks headphone very own eggs when they would ask that returned home from gathering food for 24 x 7 Pro Kabaddi final anywhere natural on what you discover the Lion treks around her to for it tracks therefore be determined the final babies'}, {'transcript': 'two baby chicks headphone very own eggs when they would ask returned home from gathering food for 24 x 7 Pro Kabaddi final anywhere natural on what you discover the Lion treks around her to for it tracks therefore be determined to find her babies'}, {'transcript': 'water to baby chicks headphone very own eggs when they would ask returned home from gathering food for 24 x 7 Pro Kabaddi final anywhere natural on what you discover the Lion treks around her to for it tracks therefore be determined the final babies'}], 'final': True}
\ No newline at end of file
--- a/Indexing/upload/Eduscope_3_00_01_31-00_10_30.txt
+++ b/Indexing/upload/Eduscope_3_00_01_31-00_10_30.txt
+we will start with supervised learning and unsupervised learning is the machine talking about using the data which is well labelled let's marry right then what is the telling ok low-income and other companies of this well it's a low-income marriage male and living in Android with some number of children no like you know what is this you are giving some attributes and the corresponding outcome with positive and that you're giving it will look at the patterns and identify ok if I get which type of combination will be the day is it right one thank you to another example is so it pays in the email spam filter problem we have the date of receipt of email with over text within every evening we also know which of these moon so when you are having condition with its like that only thing is you are giving just the data and the outcome to find out is it a problem with each and every moving into an excellent so what is the answer 100 emails in other two machine is just looking at it and see what is this what is this is it is very simple process of partition in set of data object in the subject you are breaking how to cancel the items in a manner that you are maximized in Singh similarity between within one and you heard maximize the gift of fighter in separate that and it has Max we have to maximum put it into something that you don't know how many subject you will get to you will get then you can licence ok 
\ No newline at end of file
--- a/Indexing/upload/IAS_Lec1_10min.txt
+++ b/Indexing/upload/IAS_Lec1_10min.txt
+security that Security at your home or anything else you can think who are the people involved who would you who would you attributes security in your home who is the person who is responsible for security usually at home in your pin it can be different from house to house parents ok very good so parents are you should I am in your mother most of the time asking you know that did you lock the door before going to sleep so that I want to be protected from teams very good if you don't think so it's just that we want to learn what is security in information system security in information security at home means to protect our property and personal and we have property things that we need protection from so these new words in other subjects have a very clear idea about what is computer security is already told me some of this was not trying to protect information systems that that we keep hold of three things this is the important what are the 3 things integrity and availability and it's called confidential guys no matter which part of the world you leaving wherever you go security is always always about these three things what are the three things easy to remember confidentiality integrity and availability ok so I will go through this time one by one and explain the meaning of every time ok but for you to remember very easily remember these three letters very familiar acronym of America right Central Intelligence Agency no no that but confidentiality integrity availability you should remember get so you have this we want this we want we want for what one of the student send me a message saying we want our valuable things and you know personal items protect just that information system resources should be protected what are the resources we want the hardware to be protected we want this software to be protected we want to be protected and the data that is stored in the hardware and not only that don't forget you have you know service computers and everything but none of these things are useful if we don't connect internet and connectivity is very so we want to protect this one also which is called telecommunication and we want to make sure that this is also protected not only this not only this not only the software inside we also want to make sure whatever the data that is going here and coming in is also protect that is clear so now all we need to learn about these three things what do we mean by confidentiality integrity and that's what we want to learn today is not happen after that your time to talk not my type what does the thief usually do after please talk to me guys otherwise you know I'm already losing interest I am looking at my phone good still valuable things stealing is not there to have a cup of tea electricity line so that 
\ No newline at end of file
--- a/Indexing/upload/IAS_Lec1_30min.txt
+++ b/Indexing/upload/IAS_Lec1_30min.txt
--- a/Indexing/upload/text.txt
+++ b/Indexing/upload/text.txt
+supervised learning is a learning English speak paint machine ok talking about I told you machine learning using the data which is well labelled what do you mean by that mean something is old 
\ No newline at end of file
--- a/TopicIndexing/api.py
+++ b/TopicIndexing/api.py
+from flask import Flask, request, render_template, send_file
+from flask_cors import CORS
+import werkzeug
+import cv2
+import note_generator.note as note_gen
+import topics_find.audio_gen as topic_gen
+import note_generator.write_word as writer
+import topics_find.bert as bert
+import os
+os.environ['KMP_DUPLICATE_LIB_OK']='True'
+app = Flask(__name__)
+CORS(app)
+video_file_name = ''
+pptx_file_name = ''
+@app.route('/')
+def index():
+    return render_template('index.html')
+@app.route('/upload_action', methods=['GET', 'POST'])
+def upload_action():
+    lecture_video = request.files['lecture_video']
+    filename_v = werkzeug.utils.secure_filename(lecture_video.filename)
+    print("\nReceived image File name : " + lecture_video.filename)
+    lecture_video.save('upload/' + filename_v)
+    lecture_ppt = request.files['lecture_ppt']
+    filename = werkzeug.utils.secure_filename(lecture_ppt.filename)
+    print("\nReceived image File name : " + lecture_ppt.filename)
+    lecture_ppt.save('upload/' + filename)
+    global video_file_name
+    global pptx_file_name
+    video_file_name = filename_v
+    pptx_file_name = filename
+    return render_template('upload.html', video_file_name=video_file_name, pptx_file_name=pptx_file_name)
+@app.route('/generate_topics', methods=['GET', 'POST'])
+def generate_topics():
+    global video_file_name
+    global pptx_file_name
+    text_list_from_video, all_text = topic_gen.split_video_file('upload/' + video_file_name)
+    topic_list = []
+    for index in text_list_from_video:
+        topic_list.append([index[0], index[1], bert.get_topics_new(index[1])])
+    return render_template('topics.html', topic_list=topic_list)
+@app.route('/generate_short_note', methods=['GET', 'POST'])
+def generate_short_note():
+    global video_file_name
+    global pptx_file_name
+    text_from_pptx = note_gen.generate_note('upload/' + pptx_file_name)
+    text_list_from_video, all_text = topic_gen.split_video_file('upload/' + video_file_name)
+    writer.create_doc()
+    writer.write_note('short note from lecture video :- ')
+    for i in text_list_from_video:
+        writer.write_note(i[1])
+    writer.write_note('short note from lecture slide (pptx) :- ')
+    writer.write_note(text_from_pptx)
+    writer.save_note('upload/' + pptx_file_name.split('.')[0] + '.docx')
+    doc_filename = pptx_file_name.split('.')[0] + '.docx'
+    print(doc_filename)
+    return render_template('short_notes.html', filename=doc_filename)
+@app.route('/short_note/<name>')
+def short_note(name):
+    doc = 'upload/' + name
+    print('request', doc)
+    return send_file(doc, as_attachment=True)
+if __name__ == '__main__':
+    app.run(host="0.0.0.0", port=5200, debug=True)
--- a/TopicIndexing/static/css/main.css
+++ b/TopicIndexing/static/css/main.css
+.html { 
+	height: 100%; 
+}
+* {box-sizing: border-box;}
+.body { 
+	margin: 0;  
+	height: 100%; 
+	font-family: Arial, Helvetica, sans-serif;	
+	background-image:  url( ../images/bg.jpg )
+}
+.body_login {
+	height:50%;
+	width: 50%;
+	padding: 10px;
+	margin: 60px auto;
+	font-family: Arial, Helvetica, sans-serif;
+	background-image:  url( ../images/bg.jpg )
+}
+.header {
+	overflow: hidden;
+	background-color: #e28743;
+	padding: 5px 10px;
+}
+.header a {
+	float: left;
+	color: White;
+	text-align: center;
+	padding: 12px;
+	text-decoration: none;
+	font-size: 18px; 
+	line-height: 25px;
+	border-radius: 4px;
+}
+.header a.logo {
+	font-size: 25px;
+	font-weight: bold;
+}
+.header a:hover {
+	background-color: #76b5c5;
+	color: black;
+}
+.header a.active {
+	background-color: #76b5c5;
+	color: white;
+}
+.header-right {
+	float: right;
+}
+@media screen and (max-width: 500px) {
+  .header a {
+	float: none;
+	display: block;
+	text-align: left;
+  }
+  .header-right {
+	float: none;
+  }
+}
+.global-container{
+	height:100%;
+	display: flex;
+	align-items: center;
+	justify-content: center;
+	float: left;
+	width: 100%;
+}
+.login-form {
+	opacity: 0.9;
+	width: 340px;
+	margin: 50px auto;
+	font-size: 15px;
+} 
+.login-form form {
+	margin-bottom: 15px;
+	background: #f7f7f7;
+	box-shadow: 0px 2px 2px rgba(0, 0, 0, 0.3);
+	padding: 30px;
+}
+.login-form h2 {
+	margin: 0 0 15px;
+}
+.form-control, .btn {
+	min-height: 38px;
+	border-radius: 2px;
+}
+.btn {        
+	font-size: 15px;
+	font-weight: bold;
+}
+form{
+	padding-top: 10px;
+	font-size: 14px;
+	margin-top: 30px;
+}
+.card-title{ font-weight:300; }
+.card{opacity: 0.9;}
+.effect7{
+	position:relative;
+	-webkit-box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
+	-moz-box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
+	box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
+}
+.btn{
+	font-size: 14px;
+	margin-top:20px;
+}
+.login-form{ 
+	width:700px;
+	margin:20px;
+}
+.sign-up{
+	text-align:center;
+	padding:20px 0 0;
+}
+.alert{
+	margin-bottom:-30px;
+	font-size: 13px;
+	margin-top:20px;
+}
+.modal-dialog {
+	max-width: 800px;
+	margin: 30px auto;
+}
+.modal-body {
+	position:relative;
+	padding:0px;
+}
+.close {
+	position:absolute;
+	right:-30px;
+	top:0;
+	z-index:999;
+	font-size:2rem;
+	font-weight: normal;
+	color:#fff;
+	opacity:1;
+}
+.custom-file-uploader {
+	position: relative;
+	input[type='file'] {
+		display: block;
+		position: absolute;
+		top: 0;
+		right: 0;
+		bottom: 0;
+		left: 0;
+		z-index: 5;
+		width: 100%;
+		height: 100%;
+		opacity: 0;
+		cursor: default;
+	}
+}
\ No newline at end of file
--- a/TopicIndexing/static/css/style.css
+++ b/TopicIndexing/static/css/style.css
+/* Always set the map height explicitly to define the size of the div
+       * element that contains the map. */
+#map {
+  height: 100%;
+}
+/* Optional: Makes the sample page fill the window. */
+html,
+body {
+  height: 100%;
+  margin: 0;
+  padding: 0;
+}
\ No newline at end of file
--- a/TopicIndexing/static/images/bg.jpg
+++ b/TopicIndexing/static/images/bg.jpg
--- a/TopicIndexing/static/js/index.js
+++ b/TopicIndexing/static/js/index.js
+const citymap = {
+  chicago: {
+    center: { lat: 6.9061, lng: 79.9696 },
+    population: 100,
+  }
+};
+function initMap() {
+  // Create the map.
+  const map = new google.maps.Map(document.getElementById("map"), {
+    zoom: 15,
+    center: { lat: 6.9061, lng: 79.9696 },
+    mapTypeId: "terrain",
+  });
+  // Construct the circle for each value in citymap.
+  // Note: We scale the area of the circle based on the population.
+  for (const city in citymap) {
+    // Add the circle for this city to the map.
+    const cityCircle = new google.maps.Circle({
+      strokeColor: "#FF0000",
+      strokeOpacity: 0.8,
+      strokeWeight: 2,
+      fillColor: "#FF0000",
+      fillOpacity: 0.35,
+      map,
+      center: citymap[city].center,
+      radius: Math.sqrt(citymap[city].population) * 100,
+    });
+  }
+}
\ No newline at end of file
--- a/TopicIndexing/static/js/main.js
+++ b/TopicIndexing/static/js/main.js
+document.querySelectorAll(".drop-zone__input").forEach((inputElement) => {
+  const dropZoneElement = inputElement.closest(".drop-zone");
+  dropZoneElement.addEventListener("click", (e) => {
+    inputElement.click();
+  });
+  inputElement.addEventListener("change", (e) => {
+    if (inputElement.files.length) {
+      updateThumbnail(dropZoneElement, inputElement.files[0]);
+    }
+  });
+  dropZoneElement.addEventListener("dragover", (e) => {
+    e.preventDefault();
+    dropZoneElement.classList.add("drop-zone--over");
+  });
+  ["dragleave", "dragend"].forEach((type) => {
+    dropZoneElement.addEventListener(type, (e) => {
+      dropZoneElement.classList.remove("drop-zone--over");
+    });
+  });
+  dropZoneElement.addEventListener("drop", (e) => {
+    e.preventDefault();
+    if (e.dataTransfer.files.length) {
+      inputElement.files = e.dataTransfer.files;
+      updateThumbnail(dropZoneElement, e.dataTransfer.files[0]);
+    }
+    dropZoneElement.classList.remove("drop-zone--over");
+  });
+});
+/**
+ * Updates the thumbnail on a drop zone element.
+ *
+ * @param {HTMLElement} dropZoneElement
+ * @param {File} file
+ */
+function updateThumbnail(dropZoneElement, file) {
+  let thumbnailElement = dropZoneElement.querySelector(".drop-zone__thumb");
+  // First time - remove the prompt
+  if (dropZoneElement.querySelector(".drop-zone__prompt")) {
+    dropZoneElement.querySelector(".drop-zone__prompt").remove();
+  }
+  // First time - there is no thumbnail element, so lets create it
+  if (!thumbnailElement) {
+    thumbnailElement = document.createElement("div");
+    thumbnailElement.classList.add("drop-zone__thumb");
+    dropZoneElement.appendChild(thumbnailElement);
+  }
+  thumbnailElement.dataset.label = file.name;
+  // Show thumbnail for image files
+  if (file.type.startsWith("image/")) {
+    const reader = new FileReader();
+    reader.readAsDataURL(file);
+    reader.onload = () => {
+      thumbnailElement.style.backgroundImage = `url('${reader.result}')`;
+    };
+  } else {
+    thumbnailElement.style.backgroundImage = null;
+  }
+}
\ No newline at end of file
--- a/TopicIndexing/templates/index.html
+++ b/TopicIndexing/templates/index.html
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <title>Index</title>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css">
+    <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
+    <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/js/bootstrap.min.js"></script>
+</head>
+<style>
+	html {
+		height: 100%;
+	}
+	* {box-sizing: border-box;}
+	body {
+		margin: 0;
+		height: 100%;
+		font-family: Arial, Helvetica, sans-serif;
+		background-image: url({{ url_for('static', filename='images/bg.jpg') }})
+	}
+	.header {
+		overflow: hidden;
+		background-color: #970103;
+		padding: 5px 10px;
+	}
+	.header a {
+		float: left;
+		color: White;
+		text-align: center;
+		padding: 12px;
+		text-decoration: none;
+		font-size: 18px;
+		line-height: 25px;
+		border-radius: 4px;
+	}
+	.header a.logo {
+		font-size: 25px;
+		font-weight: bold;
+	}
+	.header a:hover {
+		background-color: #8a8a8a;
+		color: black;
+	}
+	.header a.active {
+		background-color: #0b0b0b;
+		color: white;
+	}
+	.header-right {
+		float: right;
+	}
+	@media screen and (max-width: 500px) {
+	  .header a {
+		float: none;
+		display: block;
+		text-align: left;
+	  }
+	  .header-right {
+		float: none;
+	  }
+	}
+   .global-container{
+		margin-top: 20px;
+		display: flex;
+		align-items: center;
+		justify-content: center;
+		float: left;
+		width: 100%;
+	}
+	form{
+		padding-top: 10px;
+		font-size: 14px;
+		margin-top: 30px;
+		margin-left: 50px;
+		margin-right: 50px;
+	}
+	.card-title{ font-weight:300; }
+	.card{opacity: 0.95;}
+	.effect7{
+		position:relative;
+		-webkit-box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
+		-moz-box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
+        box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
+	}
+	.login-form{
+		width:1175px;
+		margin:20px;
+	}
+.drop-zone {
+  max-width: 300px;
+  height: 300px;
+  padding: 25px;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  text-align: center;
+  font-family: "Quicksand", sans-serif;
+  font-weight: 500;
+  font-size: 20px;
+  cursor: pointer;
+  color: #cccccc;
+  border: 4px dashed #345BDE;
+  border-radius: 10px;
+}
+.drop-zone--over {
+  border-style: solid;
+}
+.drop-zone__input {
+  display: none;
+}
+.drop-zone__thumb {
+  width: 100%;
+  height: 100%;
+  border-radius: 10px;
+  overflow: hidden;
+  background-color: #cccccc;
+  background-size: cover;
+  position: relative;
+}
+.drop-zone__thumb::after {
+  content: attr(data-label);
+  position: absolute;
+  bottom: 0;
+  left: 0;
+  width: 100%;
+  padding: 5px 0;
+  color: #ffffff;
+  background: rgba(0, 0, 0, 0.75);
+  font-size: 14px;
+  text-align: center;
+}
+</style>
+<body>
+<div class="global-container">
+    <br>
+    <br>
+    <br>
+    <div class="card login-form effect7">
+        <div class="card-body">
+            <center>
+                <h3>Please Upload Lecture and Slides</h3>
+            </center>
+            <form action="upload_action" method="post" enctype="multipart/form-data">
+                <div class="form-group">
+                    <h5>Please Select Lecture</h5>
+                    <input type="file" name="lecture_video" class="form" accept="video/mp4,video/x-m4v,video/*"
+                           required>
+                </div>
+                <div class="form-group">
+                    <h5>Please Select Slides</h5>
+                    <input type="file" name="lecture_ppt" class="form" accept=".ppt, .pptx" required>
+                </div>
+                <div class="form-group">
+                    <button type="submit" class="btn btn-primary btn-block">upload</button>
+                </div>
+            </form>
+        </div>
+    </div>
+</div>
+</body>
+</html>
--- a/TopicIndexing/templates/short_notes.html
+++ b/TopicIndexing/templates/short_notes.html
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <title>Short Note</title>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css">
+    <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
+    <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/js/bootstrap.min.js"></script>
+</head>
+<style>
+	html {
+		height: 100%;
+	}
+	* {box-sizing: border-box;}
+	body {
+		margin: 0;
+		height: 100%;
+		font-family: Arial, Helvetica, sans-serif;
+		background-image: url({{ url_for('static', filename='images/bg.jpg') }})
+	}
+	.header {
+		overflow: hidden;
+		background-color: #970103;
+		padding: 5px 10px;
+	}
+	.header a {
+		float: left;
+		color: White;
+		text-align: center;
+		padding: 12px;
+		text-decoration: none;
+		font-size: 18px;
+		line-height: 25px;
+		border-radius: 4px;
+	}
+	.header a.logo {
+		font-size: 25px;
+		font-weight: bold;
+	}
+	.header a:hover {
+		background-color: #8a8a8a;
+		color: black;
+	}
+	.header a.active {
+		background-color: #0b0b0b;
+		color: white;
+	}
+	.header-right {
+		float: right;
+	}
+	@media screen and (max-width: 500px) {
+	  .header a {
+		float: none;
+		display: block;
+		text-align: left;
+	  }
+	  .header-right {
+		float: none;
+	  }
+	}
+   .global-container{
+		margin-top: 20px;
+		display: flex;
+		align-items: center;
+		justify-content: center;
+		float: left;
+		width: 100%;
+	}
+	form{
+		padding-top: 10px;
+		font-size: 14px;
+		margin-top: 50px;
+		margin-left: 50px;
+		margin-right: 50px;
+	}
+	.card-title{ font-weight:300; }
+	.card{
+	opacity: 0.95;
+	}
+	.card-body{
+  margin-top: 100px;
+	}
+	.effect7{
+		position:relative;
+		-webkit-box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
+		-moz-box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
+        box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
+	}
+	.login-form{
+		width:1175px;
+		margin:20px;
+	}
+</style>
+<body>
+<div class="global-container">
+    <br>
+    <br>
+    <br>
+    <div class="card login-form effect7">
+        <div class="card-body">
+            <center>
+                <h2>Short Note Ready</h2>
+                <a href="/short_note/{{filename}}" style="color:red;">Download as doc file</a>
+            </center>
+        </div>
+    </div>
+</div>
+</body>
+</html>
--- a/TopicIndexing/templates/topics.html
+++ b/TopicIndexing/templates/topics.html
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <title>Topics</title>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css">
+    <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
+    <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/js/bootstrap.min.js"></script>
+</head>
+<style>
+	html {
+		height: 100%;
+	}
+	* {box-sizing: border-box;}
+	body {
+		margin: 0;
+		height: 100%;
+		font-family: Arial, Helvetica, sans-serif;
+		background-image: url({{ url_for('static', filename='images/bg.jpg') }})
+	}
+	.header {
+		overflow: hidden;
+		background-color: #970103;
+		padding: 5px 10px;
+	}
+	.header a {
+		float: left;
+		color: White;
+		text-align: center;
+		padding: 12px;
+		text-decoration: none;
+		font-size: 18px;
+		line-height: 25px;
+		border-radius: 4px;
+	}
+	.header a.logo {
+		font-size: 25px;
+		font-weight: bold;
+	}
+	.header a:hover {
+		background-color: #8a8a8a;
+		color: black;
+	}
+	.header a.active {
+		background-color: #0b0b0b;
+		color: white;
+	}
+	.header-right {
+		float: right;
+	}
+	@media screen and (max-width: 500px) {
+	  .header a {
+		float: none;
+		display: block;
+		text-align: left;
+	  }
+	  .header-right {
+		float: none;
+	  }
+	}
+   .global-container{
+		margin-top: 20px;
+		display: flex;
+		align-items: center;
+		justify-content: center;
+		float: left;
+		width: 100%;
+	}
+	form{
+		padding-top: 10px;
+		font-size: 14px;
+		margin-top: 50px;
+		margin-left: 50px;
+		margin-right: 50px;
+	}
+	.card-title{ font-weight:300; }
+	.card{
+	opacity: 0.95;
+	}
+	.card-body{
+  margin-top: 100px;
+	}
+	.effect7{
+		position:relative;
+		-webkit-box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
+		-moz-box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
+        box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
+	}
+	.login-form{
+		width:1175px;
+		margin:20px;
+	}
+</style>
+<body>
+<div class="global-container">
+    <br>
+    <br>
+    <br>
+    <div class="card login-form effect7">
+        <div class="card-body">
+            <center>
+                <h2>Topics Ready</h2>
+            </center>
+            <table class="table" id="table">
+                <thead>
+                <tr>
+                    <th>Index</th>
+                    <th>Key Points ( Topics )</th>
+                </tr>
+                </thead>
+                <tbody>
+                {% for row in topic_list %}
+                <tr>
+                    <td>{{row[0]}}</td>
+                    <td>{{row[2]}}</td>
+                </tr>
+                 {% endfor %}
+                </tbody>
+            </table>
+        </div>
+    </div>
+</div>
+</body>
+</html>
--- a/TopicIndexing/templates/upload.html
+++ b/TopicIndexing/templates/upload.html
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <title>Upload</title>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/css/bootstrap.min.css">
+    <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
+    <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.1/js/bootstrap.min.js"></script>
+</head>
+<style>
+	html {
+		height: 100%;
+	}
+	* {box-sizing: border-box;}
+	body {
+		margin: 0;
+		height: 100%;
+		font-family: Arial, Helvetica, sans-serif;
+		background-image: url({{ url_for('static', filename='images/bg.jpg') }})
+	}
+	.header {
+		overflow: hidden;
+		background-color: #970103;
+		padding: 5px 10px;
+	}
+	.header a {
+		float: left;
+		color: White;
+		text-align: center;
+		padding: 12px;
+		text-decoration: none;
+		font-size: 18px;
+		line-height: 25px;
+		border-radius: 4px;
+	}
+	.header a.logo {
+		font-size: 25px;
+		font-weight: bold;
+	}
+	.header a:hover {
+		background-color: #8a8a8a;
+		color: black;
+	}
+	.header a.active {
+		background-color: #0b0b0b;
+		color: white;
+	}
+	.header-right {
+		float: right;
+	}
+	@media screen and (max-width: 500px) {
+	  .header a {
+		float: none;
+		display: block;
+		text-align: left;
+	  }
+	  .header-right {
+		float: none;
+	  }
+	}
+   .global-container{
+		margin-top: 20px;
+		display: flex;
+		align-items: center;
+		justify-content: center;
+		float: left;
+		width: 100%;
+	}
+	form{
+		padding-top: 10px;
+		font-size: 14px;
+		margin-top: 50px;
+		margin-left: 50px;
+		margin-right: 50px;
+	}
+	.card-title{ font-weight:300; }
+	.card{
+	opacity: 0.95;
+	}
+	.card-body{
+  margin-top: 100px;
+	}
+	.effect7{
+		position:relative;
+		-webkit-box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
+		-moz-box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
+        box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
+	}
+	.login-form{
+		width:1175px;
+		margin:20px;
+	}
+</style>
+<body>
+<div class="global-container">
+    <br>
+    <br>
+    <br>
+    <div class="card login-form effect7">
+        <div class="card-body">
+            <center>
+                <h2>File uploaded successfully</h2>
+                <h4 style="color:red;">Note generation and topic analysing will take some time</h4>
+            </center>
+            <form>
+                <h4>Lecture Video File : {{video_file_name}}</h4>
+                <h4>Lecture Slide File (pptx) : {{pptx_file_name}}</h4>
+                <div class="form-group">
+                    <a href="/generate_short_note" class="btn btn-primary btn-block">Generate Short Note</a>
+                </div>
+                <div class="form-group">
+                    <a href="/generate_topics" class="btn btn-primary btn-block">Analyse Topics</a>
+                </div>
+            </form>
+        </div>
+    </div>
+</div>
+</body>
+</html>
--- a/TopicIndexing/topics_find/audio_gen.py
+++ b/TopicIndexing/topics_find/audio_gen.py
+import moviepy.editor as mp
+from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
+from moviepy.editor import VideoFileClip
+import os
+from topics_find import text_gen
+def convert_video_to_audio(filename):
+    clip = mp.VideoFileClip(r"" + filename)
+    audio_file_name = str(filename).split('/')[-1].replace('.mp4', '.wav')
+    clip.audio.write_audiofile(r"topics_find/audio_input/" + audio_file_name)
+    return text_gen.convert_audio_to_text("topics_find/audio_input/" + audio_file_name)
+def split_video_file(filename):
+    return_list = []
+    all_text = ''
+    required_video_file = filename
+    files = os.listdir('topics_find/video_input')
+    for filename in files:
+        os.remove('topics_find/video_input/' + filename)
+    total_length = VideoFileClip(required_video_file).duration
+    print(total_length)
+    no_of_slices = int(total_length / 50) + 1
+    time_grid = []
+    for i in range(0, no_of_slices):
+        time_grid.append(i * 50)
+    for i in range(no_of_slices):
+        if i == len(time_grid) - 1:
+            # ffmpeg_extract_subclip(required_video_file, time_grid[i], total_length - time_grid[i],
+            #                        targetname='videos/' + str(i) + ".mp4")
+            pass
+        else:
+            ffmpeg_extract_subclip(required_video_file, time_grid[i], time_grid[i + 1],
+                                   targetname='topics_find/video_input' + str(i) + ".mp4")
+            text = convert_video_to_audio('topics_find/video_input' + str(i) + ".mp4")
+            all_text += text + ' '
+            return_list.append([i, text])
+    return return_list, all_text
--- a/TopicIndexing/topics_find/bert.py
+++ b/TopicIndexing/topics_find/bert.py
+import nltk
+import topics_find.question_generator as q_gen
+# nltk.download('words')
+# from bertopic import BERTopic
+from nltk.corpus import words
+# model = BERTopic(verbose=True)
+def get_topics(file):
+    topics_outputs = []
+    docs = []
+    with open(file) as file:
+        for line in file:
+            docs.append(line.rstrip())
+    topics, probabilities = model.fit_transform(docs)
+    #
+    print(model.get_topic_freq())
+    #
+    print('done')
+    #
+    # model.get_topic_freq().head(11)
+    print(model.get_topics())
+    for i in model.get_topic(0):
+        if i[0] in words.words():
+            pass
+        else:
+            print(i[0])
+            topics_outputs.append(i[0])
+    return topics_outputs
+def get_topics_new(text):
+    topics = q_gen.get_keywords(text, q_gen.summarizer(text))
+    print(topics)
+    return topics
--- a/TopicIndexing/topics_find/question_generator.py
+++ b/TopicIndexing/topics_find/question_generator.py
+from textwrap3 import wrap
+import torch
+from transformers import T5ForConditionalGeneration, T5Tokenizer
+import random
+import numpy as np
+import nltk
+# nltk.download('punkt')
+# nltk.download('brown')
+# nltk.download('wordnet')
+# nltk.download('stopwords')
+from nltk.corpus import wordnet as wn
+from nltk.tokenize import sent_tokenize
+from nltk.corpus import stopwords
+import string
+import pke
+import traceback
+from flashtext import KeywordProcessor
+summary_model = T5ForConditionalGeneration.from_pretrained('t5-base')
+summary_tokenizer = T5Tokenizer.from_pretrained('t5-base')
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+summary_model = summary_model.to(device)
+def set_seed(seed: int):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+def postprocesstext(content):
+    final = ""
+    for sent in sent_tokenize(content):
+        sent = sent.capitalize()
+        final = final + " " + sent
+    return final
+def summarizer(text, model=summary_model, tokenizer=summary_tokenizer):
+    text = text.strip().replace("\n", " ")
+    text = "summarize: " + text
+    # print (text)
+    max_len = 512
+    encoding = tokenizer.encode_plus(text, max_length=max_len, pad_to_max_length=False, truncation=True,
+                                     return_tensors="pt").to(device)
+    input_ids, attention_mask = encoding["input_ids"], encoding["attention_mask"]
+    outs = model.generate(input_ids=input_ids,
+                          attention_mask=attention_mask,
+                          early_stopping=True,
+                          num_beams=3,
+                          num_return_sequences=1,
+                          no_repeat_ngram_size=2,
+                          min_length=75,
+                          max_length=300)
+    dec = [tokenizer.decode(ids, skip_special_tokens=True) for ids in outs]
+    summary = dec[0]
+    summary = postprocesstext(summary)
+    summary = summary.strip()
+    return summary
+def get_nouns_multipartite(content):
+    out = []
+    try:
+        extractor = pke.unsupervised.MultipartiteRank()
+        extractor.load_document(input=content)
+        #    not contain punctuation marks or stopwords as candidates.
+        pos = {'PROPN', 'NOUN'}
+        # pos = {'PROPN','NOUN'}
+        stoplist = list(string.punctuation)
+        stoplist += ['-lrb-', '-rrb-', '-lcb-', '-rcb-', '-lsb-', '-rsb-']
+        stoplist += stopwords.words('english')
+        extractor.candidate_selection(pos=pos, stoplist=stoplist)
+        # 4. build the Multipartite graph and rank candidates using random walk,
+        #    alpha controls the weight adjustment mechanism, see TopicRank for
+        #    threshold/method parameters.
+        extractor.candidate_weighting(alpha=1.1,
+                                      threshold=0.75,
+                                      method='average')
+        keyphrases = extractor.get_n_best(n=15)
+        for val in keyphrases:
+            out.append(val[0])
+    except:
+        out = []
+        traceback.print_exc()
+    return out
+def get_keywords(originaltext, summarytext):
+    keywords = get_nouns_multipartite(originaltext)
+    print("keywords unsummarized: ", keywords)
+    keyword_processor = KeywordProcessor()
+    for keyword in keywords:
+        keyword_processor.add_keyword(keyword)
+    keywords_found = keyword_processor.extract_keywords(summarytext)
+    keywords_found = list(set(keywords_found))
+    print("keywords_found in summarized: ", keywords_found)
+    important_keywords = []
+    for keyword in keywords:
+        if keyword in keywords_found:
+            important_keywords.append(keyword)
+    return important_keywords[:1]
+question_model = T5ForConditionalGeneration.from_pretrained('ramsrigouthamg/t5_squad_v1')
+question_tokenizer = T5Tokenizer.from_pretrained('ramsrigouthamg/t5_squad_v1')
+question_model = question_model.to(device)
+def get_question(context, answer, model, tokenizer):
+    text = "context: {} answer: {}".format(context, answer)
+    encoding = tokenizer.encode_plus(text, max_length=384, pad_to_max_length=False, truncation=True,
+                                     return_tensors="pt").to(device)
+    input_ids, attention_mask = encoding["input_ids"], encoding["attention_mask"]
+    outs = model.generate(input_ids=input_ids,
+                          attention_mask=attention_mask,
+                          early_stopping=True,
+                          num_beams=5,
+                          num_return_sequences=1,
+                          no_repeat_ngram_size=2,
+                          max_length=72)
+    dec = [tokenizer.decode(ids, skip_special_tokens=True) for ids in outs]
+    Question = dec[0].replace("question:", "")
+    Question = Question.strip()
+    return Question
+def generate_questions_and_answers(text):
+    set_seed(42)
+    summarized_text = summarizer(text, summary_model, summary_tokenizer)
+    imp_keywords = get_keywords(text, summarized_text)
+    question_and_answer_list = []
+    for answer in imp_keywords:
+        ques = get_question(summarized_text, answer, question_model, question_tokenizer)
+        question_and_answer_list.append([ques, answer.capitalize()])
+    return question_and_answer_list
+# xxx = """Elon Musk has shown again he can influence the digital currency market with just his tweets. After saying that his electric vehicle-making company
+# Tesla will not accept payments in Bitcoin because of environmental concerns, he tweeted that he was working with developers of Dogecoin to improve
+# system transaction efficiency. Following the two distinct statements from him, the world's largest cryptocurrency hit a two-month low, while Dogecoin
+# rallied by about 20 percent. The SpaceX CEO has in recent months often tweeted in support of Dogecoin, but rarely for Bitcoin.  In a recent tweet,
+# Musk put out a statement from Tesla that it was “concerned” about the rapidly increasing use of fossil fuels for Bitcoin (price in India) mining and
+# transaction, and hence was suspending vehicle purchases using the cryptocurrency.  A day later he again tweeted saying, “To be clear, I strongly
+# believe in crypto, but it can't drive a massive increase in fossil fuel use, especially coal”.  It triggered a downward spiral for Bitcoin value but
+# the cryptocurrency has stabilised since.   A number of Twitter users welcomed Musk's statement. One of them said it's time people started realising
+# that Dogecoin “is here to stay” and another referred to Musk's previous assertion that crypto could become the world's future currency."""
+# print(generate_questions_and_answers(xxx))
+#
+# x = generate_questions_and_answers(xxx)
+#
+# for i in x:
+#     print(i[0])
+#     print(i[1])
--- a/TopicIndexing/topics_find/summary.py
+++ b/TopicIndexing/topics_find/summary.py
+import glob
+from pptx import Presentation
+import math
+import nltk
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize, sent_tokenize
+import textract
+import os.path
+def create_sumall(abc, ratio):
+    if abc:
+        filename = abc
+        stop_word = ['is', 'a', 'and', 'the']
+        # Function to create Text summarization
+        def create_summ(text):
+            stopWords = set(stopwords.words("english"))
+            words = word_tokenize(text)
+            freqTable = dict()
+            for word in words:
+                word = word.lower()
+                if word in stopWords:
+                    continue
+                if word in freqTable:
+                    freqTable[word] += 1
+                else:
+                    freqTable[word] = 1
+            sentences = sent_tokenize(text)
+            sentenceValue = dict()
+            for sentence in sentences:
+                for word, freq in freqTable.items():
+                    if word in sentence.lower():
+                        if sentence in sentenceValue:
+                            sentenceValue[sentence] += freq
+                        else:
+                            sentenceValue[sentence] = freq
+            sumValues = 0
+            for sentence in sentenceValue:
+                sumValues += sentenceValue[sentence]
+            lensenvalu = len(sentenceValue)
+            if lensenvalu == 0:
+                lensenvalu = 1
+                average = int(sumValues / lensenvalu)
+            else:
+                average = int(sumValues / lensenvalu)
+            summary = ''
+            for sentence in sentences:
+                if (sentence in sentenceValue) and (sentenceValue[sentence] > (
+                        ratio * average)):
+                    summary += " " + sentence
+            return summary
+        def read_full_pptxe(filename):
+            sentences = []
+            b = []
+            a = 0
+            for eachfile in glob.glob(filename):
+                prs = Presentation(eachfile)
+                for slide in prs.slides:
+                    a = a + 1
+                    for shape in slide.shapes:
+                        if hasattr(shape, "text"):
+                            s = create_summ(shape.text.replace("\n", " "))
+                            s = str(s)
+                            if (len(s)) >= 1:
+                                f = ["Slide " + str(a) + "-" + s]
+                                sentences.append(f)
+            return sentences
+        def read_full_docx(filename):
+            sentences = []
+            text = textract.process(filename)
+            temp = text.split(".")
+            for t in temp:
+                sentences.append(t.replace("\n", " "))
+            return sentences
+        extension = os.path.splitext(filename)[1]
+        if extension == 'docx':
+            read_full_docx(filename)
+        else:
+            read_full_pptxe(filename)
+        def Convert(string):
+            li = list(string.split(" "))
+            return li
+        def Convert2(string):
+            li = list(string.split("\n"))
+            return li
+        def read_slide3(filename):
+            a = 1
+            for eachfile in glob.glob(filename):
+                prs = Presentation(eachfile)
+                for slide in prs.slides:
+                    a = a + 1
+                    for shape in slide.shapes:
+                        if hasattr(shape, "text"):
+                            if a == 4 and shape.shape_id == 3:
+                                s3 = str(shape.text)
+                                return s3
+        def read_full_pptx(filename, sss):
+            numberslide = []
+            numberslide.append(sss)
+            a = 0
+            for eachfile in glob.glob(filename):
+                prs = Presentation(eachfile)
+                for slide in prs.slides:
+                    a = a + 1
+                    for shape in slide.shapes:
+                        if hasattr(shape, "text"):
+                            if shape.shape_id != 2:
+                                s = shape.text.replace("\n", " ")
+                                s = str(s)
+                                if (len(s)) >= 20 and a != 3 and a != 1 and a != 2:
+                                    lo_1 = [a for a in new_l1 if a in s.lower()]
+                                    f_lo_l = round((len(lo_1) / len_of_l1) * 100)
+                                    if f_lo_l >= 50:
+                                        f = "Slide " + str(a)
+                                        numberslide.append(f)
+            return numberslide
+        loooo = Convert2(read_slide3(filename))
+        abc = []
+        for i in loooo:
+            l1 = Convert(i.lower())
+            new_l1 = [w for w in l1 if w not in stop_word]
+            len_of_l1 = len(new_l1)
+            read_full_pptx(filename, i)
+            abc.append(read_full_pptx(filename, i))
+        return (read_full_pptxe(filename), abc)
+    else:
+        print('error')
--- a/TopicIndexing/topics_find/test_api.py
+++ b/TopicIndexing/topics_find/test_api.py
+from flask import Flask, request, url_for, redirect, render_template
+from flask_cors import CORS
+import werkzeug
+import topics_find.summary as summarizeed
+import json
+import textract
+from pptx import Presentation
+import os
+app = Flask(__name__)
+CORS(app)
+@app.route('/summerize', methods=['GET', 'POST'])
+def summerize():
+    file = request.files['file']
+    ratio = float(request.form['ratio'])
+    filename = werkzeug.utils.secure_filename(file.filename)
+    print("\nReceived image File name : " + file.filename)
+    file.save('upload/' + filename)
+    f, file_extension = os.path.splitext('upload/' + filename)
+    print(file_extension)
+    if file_extension == '.docx':
+        text = textract.process('upload/' + filename)
+        arr = str(text).replace("\\n", "")
+        arr = arr.replace("\\t", "")
+        arr = arr.replace("\\", "")
+        prs = Presentation()
+        lyt = prs.slide_layouts[0]  # choosing a slide layout
+        for x in range(0, 3):
+            if x == 2:
+                slide = prs.slides.add_slide(lyt)  # adding a slide
+                title = slide.shapes.title  # assigning a title
+                subtitle = slide.placeholders[1]  # placeholder for subtitle
+                subtitle.text = arr
+            else:
+                slide = prs.slides.add_slide(lyt)  # adding a slide
+                title = slide.shapes.title  # assigning a title
+                subtitle = slide.placeholders[1]  # placeholder for subtitle
+                title.text = "ignore"  # title
+                subtitle.text = "ignore"  # subtitle
+        prs.save("upload/slide3.pptx")  # saving file
+        print('file saved')
+        res = summarizeed.create_sumall('upload/slide3.pptx', ratio)
+    else:
+        res = summarizeed.create_sumall('upload/' + filename, ratio)
+    rr = []
+    for r in res[0]:
+        rr.append(r[0].replace('"', ''))
+    return_str = '{ "result" : ['
+    for i in range(len(rr)):
+        if i == len(rr) - 1:
+            return_str += '"' + rr[i] + '"'
+        else:
+            return_str += '"' + rr[i] + '"' + ','
+    return_str += ']}'
+    print(return_str)
+    return json.loads(return_str)
+if __name__ == '__main__':
+    app.run(host="0.0.0.0", port=5005, debug=True)
--- a/TopicIndexing/topics_find/text_gen.py
+++ b/TopicIndexing/topics_find/text_gen.py
+import speech_recognition as sr
+import subprocess
+import os
+import sys
+PYTHONIOENCODING = "UTF-8"
+FOLDER_AUDIO = "audio_input"
+FOLDER_TEXT = "text_output"
+LANGUAGE = "en-US"
+# print("starting...")
+#
+# if not os.path.isdir(FOLDER_AUDIO):
+#     os.mkdir(FOLDER_AUDIO)
+#
+# if not os.path.isdir(FOLDER_TEXT):
+#     os.mkdir(FOLDER_TEXT)
+#
+# paths = [os.path.join(FOLDER_AUDIO, nome) for nome in os.listdir(FOLDER_AUDIO)]
+# files = [arq for arq in paths if os.path.isfile(arq)]
+# wav_files = [arq for arq in files if arq.lower().endswith(".wav")]
+#
+# for filename in wav_files:
+#     r = sr.Recognizer()
+#     with sr.AudioFile(filename) as source:
+#         audio = r.record(source)
+#
+#     command = r.recognize_google(audio, language='en-IN', show_all=True)
+#     print(command)
+#
+#     print("running file {}".format(filename))
+#
+#     filefinal = filename.split("audio_input/")[1].split(".wav")[0]
+#     filefinal = '{}/{}.txt'.format(FOLDER_TEXT, filefinal)
+#     with open(filefinal, 'w') as arq:
+#         arq.write(str(command))
+#
+#     print("create a new file {}".format(filefinal))
+#
+# print("finish")
+def convert_audio_to_text(filename):
+    r = sr.Recognizer()
+    with sr.AudioFile(filename) as source:
+        audio = r.record(source)
+    try:
+        command = r.recognize_google(audio, language='en-IN', show_all=True)
+        print(command["alternative"][0]["transcript"])
+        return command["alternative"][0]["transcript"]
+    except:
+        return 'did not convert'
+# convert_audio_to_text('audio_input/3.wav')