from flask import Flask, request, url_for, redirect, render_template
from flask_cors import CORS
import werkzeug
import summerise.summary as summarizeed
import json
import textract
from pptx import Presentation
import os
app = Flask(__name__)
@app.route('/summerize', methods=['GET', 'POST'])
def summerize():
file = request.files['file']
ratio = float(request.form['ratio'])
filename = werkzeug.utils.secure_filename(file.filename)
print("\nReceived image File name : " + file.filename)'upload/' + filename)
f, file_extension = os.path.splitext('upload/' + filename)
if file_extension == '.docx':
text = textract.process('upload/' + filename)
arr = str(text).replace("\\n", "")
arr = arr.replace("\\t", "")
arr = arr.replace("\\", "")
prs = Presentation()
lyt = prs.slide_layouts[0] # choosing a slide layout
for x in range(0, 3):
if x == 2:
slide = prs.slides.add_slide(lyt) # adding a slide
title = slide.shapes.title # assigning a title
subtitle = slide.placeholders[1] # placeholder for subtitle
subtitle.text = arr
slide = prs.slides.add_slide(lyt) # adding a slide
title = slide.shapes.title # assigning a title
subtitle = slide.placeholders[1] # placeholder for subtitle
title.text = "ignore" # title
subtitle.text = "ignore" # subtitle"upload/slide3.pptx") # saving file
print('file saved')
res = summarizeed.create_sumall('upload/slide3.pptx', ratio)
res = summarizeed.create_sumall('upload/' + filename, ratio)
rr = []
for r in res[0]:
rr.append(r[0].replace('"', ''))
return_str = '{ "result" : ['
for i in range(len(rr)):
if i == len(rr) - 1:
return_str += '"' + rr[i] + '"'
return_str += '"' + rr[i] + '"' + ','
return_str += ']}'
return json.loads(return_str)
if __name__ == '__main__':"", port=5005, debug=True)
import glob
from pptx import Presentation
import math
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
import textract
import os.path
import nltk'stopwords')'punkt')
def create_sumall(abc, ratio):
if abc:
filename = abc
stop_word = ['is', 'a', 'and', 'the']
# Function to create Text summarization
def create_summ(text):
stopWords = set(stopwords.words("english"))
words = word_tokenize(text)
freqTable = dict()
for word in words:
word = word.lower()
if word in stopWords:
if word in freqTable:
freqTable[word] += 1
freqTable[word] = 1
sentences = sent_tokenize(text)
sentenceValue = dict()
for sentence in sentences:
for word, freq in freqTable.items():
if word in sentence.lower():
if sentence in sentenceValue:
sentenceValue[sentence] += freq
sentenceValue[sentence] = freq
sumValues = 0
for sentence in sentenceValue:
sumValues += sentenceValue[sentence]
lensenvalu = len(sentenceValue)
if lensenvalu == 0:
lensenvalu = 1
average = int(sumValues / lensenvalu)
average = int(sumValues / lensenvalu)
summary = ''
for sentence in sentences:
if (sentence in sentenceValue) and (sentenceValue[sentence] > (
ratio * average)):
summary += " " + sentence
return summary
def read_full_pptxe(filename):
sentences = []
b = []
a = 0
for eachfile in glob.glob(filename):
prs = Presentation(eachfile)
for slide in prs.slides:
a = a + 1
for shape in slide.shapes:
if hasattr(shape, "text"):
s = create_summ(shape.text.replace("\n", " "))
s = str(s)
if (len(s)) >= 1:
f = ["Slide " + str(a) + "-" + s]
return sentences
def read_full_docx(filename):
sentences = []
text = textract.process(filename)
temp = text.split(".")
for t in temp:
sentences.append(t.replace("\n", " "))
return sentences
extension = os.path.splitext(filename)[1]
if extension == 'docx':
def Convert(string):
li = list(string.split(" "))
return li
def Convert2(string):
li = list(string.split("\n"))
return li
def read_slide3(filename):
a = 1
for eachfile in glob.glob(filename):
prs = Presentation(eachfile)
for slide in prs.slides:
a = a + 1
for shape in slide.shapes:
if hasattr(shape, "text"):
if a == 4 and shape.shape_id == 3:
s3 = str(shape.text)
return s3
def read_full_pptx(filename, sss):
numberslide = []
a = 0
for eachfile in glob.glob(filename):
prs = Presentation(eachfile)
for slide in prs.slides:
a = a + 1
for shape in slide.shapes:
if hasattr(shape, "text"):
if shape.shape_id != 2:
s = shape.text.replace("\n", " ")
s = str(s)
if (len(s)) >= 20 and a != 3 and a != 1 and a != 2:
lo_1 = [a for a in new_l1 if a in s.lower()]
f_lo_l = round((len(lo_1) / len_of_l1) * 100)
if f_lo_l >= 50:
f = "Slide " + str(a)
return numberslide
loooo = Convert2(read_slide3(filename))
abc = []
for i in loooo:
l1 = Convert(i.lower())
new_l1 = [w for w in l1 if w not in stop_word]
len_of_l1 = len(new_l1)
read_full_pptx(filename, i)
abc.append(read_full_pptx(filename, i))
return (read_full_pptxe(filename), abc)
# import libraries
import json
import os
import audio_gen as topic_gen
import bert as bert
import werkzeug
from flask import Flask, request, send_file
from flask_cors import CORS
from nltk.corpus import stopwords
s = set(stopwords.words('english'))
app = Flask(__name__)
download_file = ''
# Topics API
@app.route('/topic', methods=['GET', 'POST'])
def topic():
imagefile = request.files['video']
filename = werkzeug.utils.secure_filename(imagefile.filename)
print("\nReceived image File name : " + imagefile.filename)'upload/' + filename)
global download_file
download_file = 'upload/' + str(filename).replace('.mp4', '.txt')
text_list_from_video, all_text = topic_gen.split_video_file('upload/' + filename)
# Writing to a file
file1 = open(download_file, 'w')
topic_list = []
for index in text_list_from_video:
temp_topic = bert.get_topics_new(index[1])
filtered_topics = [elem for elem in temp_topic if elem not in s]
return_json = '[ '
for i, topic in enumerate(topic_list):
if i == len(topic_list) - 1:
return_json += '{ "index" : "' + str(i) + '", "topic" : "' + str(topic) + '", "time_frame" : "' + str(
i * 240) + ' to end" } ]'
return_json += '{ "index" : "' + str(i) + '", "topic" : "' + str(topic) + '", "time_frame" : "' + str(
i * 240) + ' to ' + str((i + 1) * 240) + ' seconds"} ,'
return json.loads(return_json)
# Transcript API
@app.route('/transcript', methods=['GET', 'POST'])
def transcript():
global download_file
doc = download_file
return send_file(doc, as_attachment=True)
if __name__ == "__main__":"", port=1100, debug=True)
import moviepy.editor as mp
from import ffmpeg_extract_subclip
from moviepy.editor import VideoFileClip
import os
import text_gen
def convert_video_to_audio(filename):
clip = mp.VideoFileClip(r"" + filename)
audio_file_name = str(filename).split('/')[-1].replace('.mp4', '.wav')"audio_input/" + audio_file_name)
return text_gen.convert_audio_to_text("audio_input/" + audio_file_name)
def split_video_file(filename):
return_list = []
all_text = ''
required_video_file = filename
files = os.listdir('video_input')
for filename in files:
os.remove('video_input/' + filename)
total_length = VideoFileClip(required_video_file).duration
# print(total_length)
no_of_slices = int(total_length / 240) + 1
time_grid = []
for i in range(0, no_of_slices):
time_grid.append(i * 240)
for i in range(no_of_slices):
if i == len(time_grid) - 1:
# ffmpeg_extract_subclip(required_video_file, time_grid[i], total_length - time_grid[i],
# targetname='videos/' + str(i) + ".mp4")
ffmpeg_extract_subclip(required_video_file, time_grid[i], time_grid[i + 1],
targetname='video_input/' + str(i) + ".mp4")
text = convert_video_to_audio('video_input/' + str(i) + ".mp4")
all_text += text + ' '
return_list.append([i, text])
return return_list, all_text
import nltk
import question_generator as q_gen
from bertopic import BERTopic
from nltk.corpus import words
model = BERTopic(verbose=True)
def get_topics(file):
topics_outputs = []
docs = []
with open(file) as file:
for line in file:
topics, probabilities = model.fit_transform(docs)
for i in model.get_topic(0):
if i[0] in words.words():
return topics_outputs
def get_topics_new(text):
topics = q_gen.get_keywords(text, q_gen.summarizer(text))
return topics
from textwrap3 import wrap
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer
import random
import numpy as np
import nltk
from nltk.corpus import wordnet as wn
from nltk.tokenize import sent_tokenize
from nltk.corpus import stopwords
import string
import pke
import traceback
from flashtext import KeywordProcessor
summary_model = T5ForConditionalGeneration.from_pretrained('t5-base')
summary_tokenizer = T5Tokenizer.from_pretrained('t5-base')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
summary_model =
def set_seed(seed: int):
def postprocesstext(content):
final = ""
for sent in sent_tokenize(content):
sent = sent.capitalize()
final = final + " " + sent
return final
# text summarizing
def summarizer(text, model=summary_model, tokenizer=summary_tokenizer):
text = text.strip().replace("\n", " ")
text = "summarize: " + text
max_len = 512
encoding = tokenizer.encode_plus(text, max_length=max_len, pad_to_max_length=False, truncation=True,
input_ids, attention_mask = encoding["input_ids"], encoding["attention_mask"]
outs = model.generate(input_ids=input_ids,
dec = [tokenizer.decode(ids, skip_special_tokens=True) for ids in outs]
summary = dec[0]
summary = postprocesstext(summary)
summary = summary.strip()
return summary
def get_nouns_multipartite(content):
out = []
extractor = pke.unsupervised.MultipartiteRank()
# not contain punctuation marks or stopwords as candidates.
pos = {'PROPN', 'NOUN'}
stoplist = list(string.punctuation)
stoplist += ['-lrb-', '-rrb-', '-lcb-', '-rcb-', '-lsb-', '-rsb-']
stoplist += stopwords.words('english')
extractor.candidate_selection(pos=pos, stoplist=stoplist)
# 4. build the Multipartite graph and rank candidates using random walk,
# alpha controls the weight adjustment mechanism, see TopicRank for
# threshold/method parameters.
keyphrases = extractor.get_n_best(n=15)
for val in keyphrases:
out = []
return out
def get_keywords(originaltext, summarytext):
keywords = get_nouns_multipartite(originaltext)
# print("keywords unsummarized: ", keywords)
keyword_processor = KeywordProcessor()
for keyword in keywords:
keywords_found = keyword_processor.extract_keywords(summarytext)
keywords_found = list(set(keywords_found))
# print("keywords_found in summarized: ", keywords_found)
important_keywords = []
for keyword in keywords:
if keyword in keywords_found:
return important_keywords[:4]
question_model = T5ForConditionalGeneration.from_pretrained('ramsrigouthamg/t5_squad_v1')
question_tokenizer = T5Tokenizer.from_pretrained('ramsrigouthamg/t5_squad_v1')
question_model =
def get_question(context, answer, model, tokenizer):
text = "context: {} answer: {}".format(context, answer)
encoding = tokenizer.encode_plus(text, max_length=384, pad_to_max_length=False, truncation=True,
input_ids, attention_mask = encoding["input_ids"], encoding["attention_mask"]
outs = model.generate(input_ids=input_ids,
dec = [tokenizer.decode(ids, skip_special_tokens=True) for ids in outs]
Question = dec[0].replace("question:", "")
Question = Question.strip()
return Question
def generate_questions_and_answers(text):
summarized_text = summarizer(text, summary_model, summary_tokenizer)
imp_keywords = get_keywords(text, summarized_text)
question_and_answer_list = []
for answer in imp_keywords:
ques = get_question(summarized_text, answer, question_model, question_tokenizer)
question_and_answer_list.append([ques, answer.capitalize()])
return question_and_answer_list
pip install --ignore-installed nltk pywsd scikit-learn flask Flask-Cors PyPDF2 textwrap3 transformers pke-tool flashtext sentence_transformers spacy pydot bertopic pandas rake-nltk protobuf==3.20.0 moviepy SpeechRecognition
python -m spacy download en_core_web_sm
import glob
import math
import os.path
import nltk
import textract
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize, word_tokenize
from pptx import Presentation
def create_sumall(abc, ratio):
if abc:
filename = abc
stop_word = ['is', 'a', 'and', 'the']
# Function to create Text summarization
def create_summ(text):
stopWords = set(stopwords.words("english"))
words = word_tokenize(text)
freqTable = dict()
for word in words:
word = word.lower()
if word in stopWords:
if word in freqTable:
freqTable[word] += 1
freqTable[word] = 1
sentences = sent_tokenize(text)
sentenceValue = dict()
for sentence in sentences:
for word, freq in freqTable.items():
if word in sentence.lower():
if sentence in sentenceValue:
sentenceValue[sentence] += freq
sentenceValue[sentence] = freq
sumValues = 0
for sentence in sentenceValue:
sumValues += sentenceValue[sentence]
lensenvalu = len(sentenceValue)
if lensenvalu == 0:
lensenvalu = 1
average = int(sumValues / lensenvalu)
average = int(sumValues / lensenvalu)
summary = ''
for sentence in sentences:
if (sentence in sentenceValue) and (sentenceValue[sentence] > (
ratio * average)):
summary += " " + sentence
return summary
def read_full_pptxe(filename):
sentences = []
b = []
a = 0
for eachfile in glob.glob(filename):
prs = Presentation(eachfile)
for slide in prs.slides:
a = a + 1
for shape in slide.shapes:
if hasattr(shape, "text"):
s = create_summ(shape.text.replace("\n", " "))
s = str(s)
if (len(s)) >= 1:
f = ["Slide " + str(a) + "-" + s]
return sentences
def read_full_docx(filename):
sentences = []
text = textract.process(filename)
temp = text.split(".")
for t in temp:
sentences.append(t.replace("\n", " "))
return sentences
extension = os.path.splitext(filename)[1]
if extension == 'docx':
def Convert(string):
li = list(string.split(" "))
return li
def Convert2(string):
li = list(string.split("\n"))
return li
def read_slide3(filename):
a = 1
for eachfile in glob.glob(filename):
prs = Presentation(eachfile)
for slide in prs.slides:
a = a + 1
for shape in slide.shapes:
if hasattr(shape, "text"):
if a == 4 and shape.shape_id == 3:
s3 = str(shape.text)
return s3
def read_full_pptx(filename, sss):
numberslide = []
a = 0
for eachfile in glob.glob(filename):
prs = Presentation(eachfile)
for slide in prs.slides:
a = a + 1
for shape in slide.shapes:
if hasattr(shape, "text"):
if shape.shape_id != 2:
s = shape.text.replace("\n", " ")
s = str(s)
if (len(s)) >= 20 and a != 3 and a != 1 and a != 2:
lo_1 = [a for a in new_l1 if a in s.lower()]
f_lo_l = round((len(lo_1) / len_of_l1) * 100)
if f_lo_l >= 50:
f = "Slide " + str(a)
return numberslide
loooo = Convert2(read_slide3(filename))
abc = []
for i in loooo:
l1 = Convert(i.lower())
new_l1 = [w for w in l1 if w not in stop_word]
len_of_l1 = len(new_l1)
read_full_pptx(filename, i)
abc.append(read_full_pptx(filename, i))
return (read_full_pptxe(filename), abc)
from flask import Flask, request, url_for, redirect, render_template
from flask_cors import CORS
import werkzeug
import topics_find.summary as summarizeed
import json
import textract
from pptx import Presentation
import os
app = Flask(__name__)
@app.route('/summerize', methods=['GET', 'POST'])
def summerize():
file = request.files['file']
ratio = float(request.form['ratio'])
filename = werkzeug.utils.secure_filename(file.filename)
print("\nReceived image File name : " + file.filename)'upload/' + filename)
f, file_extension = os.path.splitext('upload/' + filename)
if file_extension == '.docx':
text = textract.process('upload/' + filename)
arr = str(text).replace("\\n", "")
arr = arr.replace("\\t", "")
arr = arr.replace("\\", "")
prs = Presentation()
lyt = prs.slide_layouts[0] # choosing a slide layout
for x in range(0, 3):
if x == 2:
slide = prs.slides.add_slide(lyt) # adding a slide
title = slide.shapes.title # assigning a title
subtitle = slide.placeholders[1] # placeholder for subtitle
subtitle.text = arr
slide = prs.slides.add_slide(lyt) # adding a slide
title = slide.shapes.title # assigning a title
subtitle = slide.placeholders[1] # placeholder for subtitle
title.text = "ignore" # title
subtitle.text = "ignore" # subtitle"upload/slide3.pptx") # saving file
print('file saved')
res = summarizeed.create_sumall('upload/slide3.pptx', ratio)
res = summarizeed.create_sumall('upload/' + filename, ratio)
rr = []
for r in res[0]:
rr.append(r[0].replace('"', ''))
return_str = '{ "result" : ['
for i in range(len(rr)):
if i == len(rr) - 1:
return_str += '"' + rr[i] + '"'
return_str += '"' + rr[i] + '"' + ','
return_str += ']}'
return json.loads(return_str)
if __name__ == '__main__':"", port=5005, debug=True)
import os
import subprocess
import sys
import speech_recognition as sr
FOLDER_AUDIO = "audio_input"
FOLDER_TEXT = "text_output"
def convert_audio_to_text(filename):
r = sr.Recognizer()
with sr.AudioFile(filename) as source:
audio = r.record(source)
command = r.recognize_google(audio, language='en-IN', show_all=True)
return command["alternative"][0]["transcript"]
return 'did not convert'
# convert_audio_to_text('audio_input/3.wav')
from flask import Flask, request, render_template, send_file
from flask_cors import CORS
import werkzeug
import cv2
import note_generator.note as note_gen
import topics_find.audio_gen as topic_gen
import note_generator.write_word as writer
import topics_find.bert as bert
import os
app = Flask(__name__)
video_file_name = ''
pptx_file_name = ''
def index():
return render_template('index.html')
@app.route('/upload_action', methods=['GET', 'POST'])
def upload_action():
lecture_video = request.files['lecture_video']
filename_v = werkzeug.utils.secure_filename(lecture_video.filename)
print("\nReceived image File name : " + lecture_video.filename)'upload/' + filename_v)
lecture_ppt = request.files['lecture_ppt']
filename = werkzeug.utils.secure_filename(lecture_ppt.filename)
print("\nReceived image File name : " + lecture_ppt.filename)'upload/' + filename)
global video_file_name
global pptx_file_name
video_file_name = filename_v
pptx_file_name = filename
return render_template('upload.html', video_file_name=video_file_name, pptx_file_name=pptx_file_name)
@app.route('/generate_topics', methods=['GET', 'POST'])
def generate_topics():
global video_file_name
global pptx_file_name
text_list_from_video, all_text = topic_gen.split_video_file('upload/' + video_file_name)
topic_list = []
for index in text_list_from_video:
topic_list.append([index[0], index[1], bert.get_topics_new(index[1])])
return render_template('topics.html', topic_list=topic_list)
@app.route('/generate_short_note', methods=['GET', 'POST'])
def generate_short_note():
global video_file_name
global pptx_file_name
text_from_pptx = note_gen.generate_note('upload/' + pptx_file_name)
text_list_from_video, all_text = topic_gen.split_video_file('upload/' + video_file_name)
writer.write_note('short note from lecture video :- ')
for i in text_list_from_video:
writer.write_note('short note from lecture slide (pptx) :- ')
writer.save_note('upload/' + pptx_file_name.split('.')[0] + '.docx')
doc_filename = pptx_file_name.split('.')[0] + '.docx'
return render_template('short_notes.html', filename=doc_filename)
def short_note(name):
doc = 'upload/' + name
print('request', doc)
return send_file(doc, as_attachment=True)
if __name__ == '__main__':"", port=5200, debug=True)
.html {
height: 100%;
* {box-sizing: border-box;}
.body {
margin: 0;
height: 100%;
font-family: Arial, Helvetica, sans-serif;
background-image: url( ../images/bg.jpg )
.body_login {
width: 50%;
padding: 10px;
margin: 60px auto;
font-family: Arial, Helvetica, sans-serif;
background-image: url( ../images/bg.jpg )
.header {
overflow: hidden;
background-color: #e28743;
padding: 5px 10px;
.header a {
float: left;
color: White;
text-align: center;
padding: 12px;
text-decoration: none;
font-size: 18px;
line-height: 25px;
border-radius: 4px;
.header a.logo {
font-size: 25px;
font-weight: bold;
.header a:hover {
background-color: #76b5c5;
color: black;
.header {
background-color: #76b5c5;
color: white;
.header-right {
float: right;
@media screen and (max-width: 500px) {
.header a {
float: none;
display: block;
text-align: left;
.header-right {
float: none;
display: flex;
align-items: center;
justify-content: center;
float: left;
width: 100%;
.login-form {
opacity: 0.9;
width: 340px;
margin: 50px auto;
font-size: 15px;
.login-form form {
margin-bottom: 15px;
background: #f7f7f7;
box-shadow: 0px 2px 2px rgba(0, 0, 0, 0.3);
padding: 30px;
.login-form h2 {
margin: 0 0 15px;
.form-control, .btn {
min-height: 38px;
border-radius: 2px;
.btn {
font-size: 15px;
font-weight: bold;
padding-top: 10px;
font-size: 14px;
margin-top: 30px;
.card-title{ font-weight:300; }
.card{opacity: 0.9;}
-webkit-box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
-moz-box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
font-size: 14px;
padding:20px 0 0;
font-size: 13px;
.modal-dialog {
max-width: 800px;
margin: 30px auto;
.modal-body {
.close {
font-weight: normal;
.custom-file-uploader {
position: relative;
input[type='file'] {
display: block;
position: absolute;
top: 0;
right: 0;
bottom: 0;
left: 0;
z-index: 5;
width: 100%;
height: 100%;
opacity: 0;
cursor: default;
\ No newline at end of file
/* Always set the map height explicitly to define the size of the div
* element that contains the map. */
#map {
height: 100%;
/* Optional: Makes the sample page fill the window. */
body {
height: 100%;
margin: 0;
padding: 0;
\ No newline at end of file
const citymap = {
chicago: {
center: { lat: 6.9061, lng: 79.9696 },
population: 100,
function initMap() {
// Create the map.
const map = new google.maps.Map(document.getElementById("map"), {
zoom: 15,
center: { lat: 6.9061, lng: 79.9696 },
mapTypeId: "terrain",
// Construct the circle for each value in citymap.
// Note: We scale the area of the circle based on the population.
for (const city in citymap) {
// Add the circle for this city to the map.
const cityCircle = new google.maps.Circle({
strokeColor: "#FF0000",
strokeOpacity: 0.8,
strokeWeight: 2,
fillColor: "#FF0000",
fillOpacity: 0.35,
center: citymap[city].center,
radius: Math.sqrt(citymap[city].population) * 100,
\ No newline at end of file
document.querySelectorAll(".drop-zone__input").forEach((inputElement) => {
const dropZoneElement = inputElement.closest(".drop-zone");
dropZoneElement.addEventListener("click", (e) => {;
inputElement.addEventListener("change", (e) => {
if (inputElement.files.length) {
updateThumbnail(dropZoneElement, inputElement.files[0]);
dropZoneElement.addEventListener("dragover", (e) => {
["dragleave", "dragend"].forEach((type) => {
dropZoneElement.addEventListener(type, (e) => {
dropZoneElement.addEventListener("drop", (e) => {
if (e.dataTransfer.files.length) {
inputElement.files = e.dataTransfer.files;
updateThumbnail(dropZoneElement, e.dataTransfer.files[0]);
* Updates the thumbnail on a drop zone element.
* @param {HTMLElement} dropZoneElement
* @param {File} file
function updateThumbnail(dropZoneElement, file) {
let thumbnailElement = dropZoneElement.querySelector(".drop-zone__thumb");
// First time - remove the prompt
if (dropZoneElement.querySelector(".drop-zone__prompt")) {
// First time - there is no thumbnail element, so lets create it
if (!thumbnailElement) {
thumbnailElement = document.createElement("div");
thumbnailElement.dataset.label =;
// Show thumbnail for image files
if (file.type.startsWith("image/")) {
const reader = new FileReader();
reader.onload = () => { = `url('${reader.result}')`;
} else { = null;
\ No newline at end of file
<!DOCTYPE html>
<html lang="en">
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="">
<script src=""></script>
<script src=""></script>
html {
height: 100%;
* {box-sizing: border-box;}
body {
margin: 0;
height: 100%;
font-family: Arial, Helvetica, sans-serif;
background-image: url({{ url_for('static', filename='images/bg.jpg') }})
.header {
overflow: hidden;
background-color: #970103;
padding: 5px 10px;
.header a {
float: left;
color: White;
text-align: center;
padding: 12px;
text-decoration: none;
font-size: 18px;
line-height: 25px;
border-radius: 4px;
.header a.logo {
font-size: 25px;
font-weight: bold;
.header a:hover {
background-color: #8a8a8a;
color: black;
.header {
background-color: #0b0b0b;
color: white;
.header-right {
float: right;
@media screen and (max-width: 500px) {
.header a {
float: none;
display: block;
text-align: left;
.header-right {
float: none;
margin-top: 20px;
display: flex;
align-items: center;
justify-content: center;
float: left;
width: 100%;
padding-top: 10px;
font-size: 14px;
margin-top: 30px;
margin-left: 50px;
margin-right: 50px;
.card-title{ font-weight:300; }
.card{opacity: 0.95;}
-webkit-box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
-moz-box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
.drop-zone {
max-width: 300px;
height: 300px;
padding: 25px;
display: flex;
align-items: center;
justify-content: center;
text-align: center;
font-family: "Quicksand", sans-serif;
font-weight: 500;
font-size: 20px;
cursor: pointer;
color: #cccccc;
border: 4px dashed #345BDE;
border-radius: 10px;
.drop-zone--over {
border-style: solid;
.drop-zone__input {
display: none;
.drop-zone__thumb {
width: 100%;
height: 100%;
border-radius: 10px;
overflow: hidden;
background-color: #cccccc;
background-size: cover;
position: relative;
.drop-zone__thumb::after {
content: attr(data-label);
position: absolute;
bottom: 0;
left: 0;
width: 100%;
padding: 5px 0;
color: #ffffff;
background: rgba(0, 0, 0, 0.75);
font-size: 14px;
text-align: center;
<div class="global-container">
<div class="card login-form effect7">
<div class="card-body">
<h3>Please Upload Lecture and Slides</h3>
<form action="upload_action" method="post" enctype="multipart/form-data">
<div class="form-group">
<h5>Please Select Lecture</h5>
<input type="file" name="lecture_video" class="form" accept="video/mp4,video/x-m4v,video/*"
<div class="form-group">
<h5>Please Select Slides</h5>
<input type="file" name="lecture_ppt" class="form" accept=".ppt, .pptx" required>
<div class="form-group">
<button type="submit" class="btn btn-primary btn-block">upload</button>
<!DOCTYPE html>
<html lang="en">
<title>Short Note</title>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="">
<script src=""></script>
<script src=""></script>
html {
height: 100%;
* {box-sizing: border-box;}
body {
margin: 0;
height: 100%;
font-family: Arial, Helvetica, sans-serif;
background-image: url({{ url_for('static', filename='images/bg.jpg') }})
.header {
overflow: hidden;
background-color: #970103;
padding: 5px 10px;
.header a {
float: left;
color: White;
text-align: center;
padding: 12px;
text-decoration: none;
font-size: 18px;
line-height: 25px;
border-radius: 4px;
.header a.logo {
font-size: 25px;
font-weight: bold;
.header a:hover {
background-color: #8a8a8a;
color: black;
.header {
background-color: #0b0b0b;
color: white;
.header-right {
float: right;
@media screen and (max-width: 500px) {
.header a {
float: none;
display: block;
text-align: left;
.header-right {
float: none;
margin-top: 20px;
display: flex;
align-items: center;
justify-content: center;
float: left;
width: 100%;
padding-top: 10px;
font-size: 14px;
margin-top: 50px;
margin-left: 50px;
margin-right: 50px;
.card-title{ font-weight:300; }
opacity: 0.95;
margin-top: 100px;
-webkit-box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
-moz-box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
<div class="global-container">
<div class="card login-form effect7">
<div class="card-body">
<h2>Short Note Ready</h2>
<a href="/short_note/{{filename}}" style="color:red;">Download as doc file</a>
<!DOCTYPE html>
<html lang="en">
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="">
<script src=""></script>
<script src=""></script>
html {
height: 100%;
* {box-sizing: border-box;}
body {
margin: 0;
height: 100%;
font-family: Arial, Helvetica, sans-serif;
background-image: url({{ url_for('static', filename='images/bg.jpg') }})
.header {
overflow: hidden;
background-color: #970103;
padding: 5px 10px;
.header a {
float: left;
color: White;
text-align: center;
padding: 12px;
text-decoration: none;
font-size: 18px;
line-height: 25px;
border-radius: 4px;
.header a.logo {
font-size: 25px;
font-weight: bold;
.header a:hover {
background-color: #8a8a8a;
color: black;
.header {
background-color: #0b0b0b;
color: white;
.header-right {
float: right;
@media screen and (max-width: 500px) {
.header a {
float: none;
display: block;
text-align: left;
.header-right {
float: none;
margin-top: 20px;
display: flex;
align-items: center;
justify-content: center;
float: left;
width: 100%;
padding-top: 10px;
font-size: 14px;
margin-top: 50px;
margin-left: 50px;
margin-right: 50px;
.card-title{ font-weight:300; }
opacity: 0.95;
margin-top: 100px;
-webkit-box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
-moz-box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
<div class="global-container">
<div class="card login-form effect7">
<div class="card-body">
<h2>Topics Ready</h2>
<table class="table" id="table">
<th>Key Points ( Topics )</th>
{% for row in topic_list %}
{% endfor %}
<!DOCTYPE html>
<html lang="en">
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="">
<script src=""></script>
<script src=""></script>
html {
height: 100%;
* {box-sizing: border-box;}
body {
margin: 0;
height: 100%;
font-family: Arial, Helvetica, sans-serif;
background-image: url({{ url_for('static', filename='images/bg.jpg') }})
.header {
overflow: hidden;
background-color: #970103;
padding: 5px 10px;
.header a {
float: left;
color: White;
text-align: center;
padding: 12px;
text-decoration: none;
font-size: 18px;
line-height: 25px;
border-radius: 4px;
.header a.logo {
font-size: 25px;
font-weight: bold;
.header a:hover {
background-color: #8a8a8a;
color: black;
.header {
background-color: #0b0b0b;
color: white;
.header-right {
float: right;
@media screen and (max-width: 500px) {
.header a {
float: none;
display: block;
text-align: left;
.header-right {
float: none;
margin-top: 20px;
display: flex;
align-items: center;
justify-content: center;
float: left;
width: 100%;
padding-top: 10px;
font-size: 14px;
margin-top: 50px;
margin-left: 50px;
margin-right: 50px;
.card-title{ font-weight:300; }
opacity: 0.95;
margin-top: 100px;
-webkit-box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
-moz-box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
box-shadow:0 1px 20px rgba(0, 0, 0, 0.3), 0 0 40px rgba(0, 0, 0, 0.1) inset;
<div class="global-container">
<div class="card login-form effect7">
<div class="card-body">
<h2>File uploaded successfully</h2>
<h4 style="color:red;">Note generation and topic analysing will take some time</h4>
<h4>Lecture Video File : {{video_file_name}}</h4>
<h4>Lecture Slide File (pptx) : {{pptx_file_name}}</h4>
<div class="form-group">
<a href="/generate_short_note" class="btn btn-primary btn-block">Generate Short Note</a>
<div class="form-group">
<a href="/generate_topics" class="btn btn-primary btn-block">Analyse Topics</a>
import moviepy.editor as mp
from import ffmpeg_extract_subclip
from moviepy.editor import VideoFileClip
import os
from topics_find import text_gen
def convert_video_to_audio(filename):
clip = mp.VideoFileClip(r"" + filename)
audio_file_name = str(filename).split('/')[-1].replace('.mp4', '.wav')"topics_find/audio_input/" + audio_file_name)
return text_gen.convert_audio_to_text("topics_find/audio_input/" + audio_file_name)
def split_video_file(filename):
return_list = []
all_text = ''
required_video_file = filename
files = os.listdir('topics_find/video_input')
for filename in files:
os.remove('topics_find/video_input/' + filename)
total_length = VideoFileClip(required_video_file).duration
no_of_slices = int(total_length / 50) + 1
time_grid = []
for i in range(0, no_of_slices):
time_grid.append(i * 50)
for i in range(no_of_slices):
if i == len(time_grid) - 1:
# ffmpeg_extract_subclip(required_video_file, time_grid[i], total_length - time_grid[i],
# targetname='videos/' + str(i) + ".mp4")
ffmpeg_extract_subclip(required_video_file, time_grid[i], time_grid[i + 1],
targetname='topics_find/video_input' + str(i) + ".mp4")
text = convert_video_to_audio('topics_find/video_input' + str(i) + ".mp4")
all_text += text + ' '
return_list.append([i, text])
return return_list, all_text
import nltk
import topics_find.question_generator as q_gen
# from bertopic import BERTopic
from nltk.corpus import words
# model = BERTopic(verbose=True)
def get_topics(file):
topics_outputs = []
docs = []
with open(file) as file:
for line in file:
topics, probabilities = model.fit_transform(docs)
# model.get_topic_freq().head(11)
for i in model.get_topic(0):
if i[0] in words.words():
return topics_outputs
def get_topics_new(text):
topics = q_gen.get_keywords(text, q_gen.summarizer(text))
return topics
from textwrap3 import wrap
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer
import random
import numpy as np
import nltk
from nltk.corpus import wordnet as wn
from nltk.tokenize import sent_tokenize
from nltk.corpus import stopwords
import string
import pke
import traceback
from flashtext import KeywordProcessor
summary_model = T5ForConditionalGeneration.from_pretrained('t5-base')
summary_tokenizer = T5Tokenizer.from_pretrained('t5-base')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
summary_model =
def set_seed(seed: int):
def postprocesstext(content):
final = ""
for sent in sent_tokenize(content):
sent = sent.capitalize()
final = final + " " + sent
return final
def summarizer(text, model=summary_model, tokenizer=summary_tokenizer):
text = text.strip().replace("\n", " ")
text = "summarize: " + text
# print (text)
max_len = 512
encoding = tokenizer.encode_plus(text, max_length=max_len, pad_to_max_length=False, truncation=True,
input_ids, attention_mask = encoding["input_ids"], encoding["attention_mask"]
outs = model.generate(input_ids=input_ids,
dec = [tokenizer.decode(ids, skip_special_tokens=True) for ids in outs]
summary = dec[0]
summary = postprocesstext(summary)
summary = summary.strip()
return summary
def get_nouns_multipartite(content):
out = []
extractor = pke.unsupervised.MultipartiteRank()
# not contain punctuation marks or stopwords as candidates.
pos = {'PROPN', 'NOUN'}
# pos = {'PROPN','NOUN'}
stoplist = list(string.punctuation)
stoplist += ['-lrb-', '-rrb-', '-lcb-', '-rcb-', '-lsb-', '-rsb-']
stoplist += stopwords.words('english')
extractor.candidate_selection(pos=pos, stoplist=stoplist)
# 4. build the Multipartite graph and rank candidates using random walk,
# alpha controls the weight adjustment mechanism, see TopicRank for
# threshold/method parameters.
keyphrases = extractor.get_n_best(n=15)
for val in keyphrases:
out = []
return out
def get_keywords(originaltext, summarytext):
keywords = get_nouns_multipartite(originaltext)
print("keywords unsummarized: ", keywords)
keyword_processor = KeywordProcessor()
for keyword in keywords:
keywords_found = keyword_processor.extract_keywords(summarytext)
keywords_found = list(set(keywords_found))
print("keywords_found in summarized: ", keywords_found)
important_keywords = []
for keyword in keywords:
if keyword in keywords_found:
return important_keywords[:1]
question_model = T5ForConditionalGeneration.from_pretrained('ramsrigouthamg/t5_squad_v1')
question_tokenizer = T5Tokenizer.from_pretrained('ramsrigouthamg/t5_squad_v1')
question_model =
def get_question(context, answer, model, tokenizer):
text = "context: {} answer: {}".format(context, answer)
encoding = tokenizer.encode_plus(text, max_length=384, pad_to_max_length=False, truncation=True,
input_ids, attention_mask = encoding["input_ids"], encoding["attention_mask"]
outs = model.generate(input_ids=input_ids,
dec = [tokenizer.decode(ids, skip_special_tokens=True) for ids in outs]
Question = dec[0].replace("question:", "")
Question = Question.strip()
return Question
def generate_questions_and_answers(text):
summarized_text = summarizer(text, summary_model, summary_tokenizer)
imp_keywords = get_keywords(text, summarized_text)
question_and_answer_list = []
for answer in imp_keywords:
ques = get_question(summarized_text, answer, question_model, question_tokenizer)
question_and_answer_list.append([ques, answer.capitalize()])
return question_and_answer_list
# xxx = """Elon Musk has shown again he can influence the digital currency market with just his tweets. After saying that his electric vehicle-making company
# Tesla will not accept payments in Bitcoin because of environmental concerns, he tweeted that he was working with developers of Dogecoin to improve
# system transaction efficiency. Following the two distinct statements from him, the world's largest cryptocurrency hit a two-month low, while Dogecoin
# rallied by about 20 percent. The SpaceX CEO has in recent months often tweeted in support of Dogecoin, but rarely for Bitcoin. In a recent tweet,
# Musk put out a statement from Tesla that it was “concerned” about the rapidly increasing use of fossil fuels for Bitcoin (price in India) mining and
# transaction, and hence was suspending vehicle purchases using the cryptocurrency. A day later he again tweeted saying, “To be clear, I strongly
# believe in crypto, but it can't drive a massive increase in fossil fuel use, especially coal”. It triggered a downward spiral for Bitcoin value but
# the cryptocurrency has stabilised since. A number of Twitter users welcomed Musk's statement. One of them said it's time people started realising
# that Dogecoin “is here to stay” and another referred to Musk's previous assertion that crypto could become the world's future currency."""
# print(generate_questions_and_answers(xxx))
# x = generate_questions_and_answers(xxx)
# for i in x:
# print(i[0])
# print(i[1])
import glob
from pptx import Presentation
import math
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
import textract
import os.path
def create_sumall(abc, ratio):
if abc:
filename = abc
stop_word = ['is', 'a', 'and', 'the']
# Function to create Text summarization
def create_summ(text):
stopWords = set(stopwords.words("english"))
words = word_tokenize(text)
freqTable = dict()
for word in words:
word = word.lower()
if word in stopWords:
if word in freqTable:
freqTable[word] += 1
freqTable[word] = 1
sentences = sent_tokenize(text)
sentenceValue = dict()
for sentence in sentences:
for word, freq in freqTable.items():
if word in sentence.lower():
if sentence in sentenceValue:
sentenceValue[sentence] += freq
sentenceValue[sentence] = freq
sumValues = 0
for sentence in sentenceValue:
sumValues += sentenceValue[sentence]
lensenvalu = len(sentenceValue)
if lensenvalu == 0:
lensenvalu = 1
average = int(sumValues / lensenvalu)
average = int(sumValues / lensenvalu)
summary = ''
for sentence in sentences:
if (sentence in sentenceValue) and (sentenceValue[sentence] > (
ratio * average)):
summary += " " + sentence
return summary
def read_full_pptxe(filename):
sentences = []
b = []
a = 0
for eachfile in glob.glob(filename):
prs = Presentation(eachfile)
for slide in prs.slides:
a = a + 1
for shape in slide.shapes:
if hasattr(shape, "text"):
s = create_summ(shape.text.replace("\n", " "))
s = str(s)
if (len(s)) >= 1:
f = ["Slide " + str(a) + "-" + s]
return sentences
def read_full_docx(filename):
sentences = []
text = textract.process(filename)
temp = text.split(".")
for t in temp:
sentences.append(t.replace("\n", " "))
return sentences
extension = os.path.splitext(filename)[1]
if extension == 'docx':
def Convert(string):
li = list(string.split(" "))
return li
def Convert2(string):
li = list(string.split("\n"))
return li
def read_slide3(filename):
a = 1
for eachfile in glob.glob(filename):
prs = Presentation(eachfile)
for slide in prs.slides:
a = a + 1
for shape in slide.shapes:
if hasattr(shape, "text"):
if a == 4 and shape.shape_id == 3:
s3 = str(shape.text)
return s3
def read_full_pptx(filename, sss):
numberslide = []
a = 0
for eachfile in glob.glob(filename):
prs = Presentation(eachfile)
for slide in prs.slides:
a = a + 1
for shape in slide.shapes:
if hasattr(shape, "text"):
if shape.shape_id != 2:
s = shape.text.replace("\n", " ")
s = str(s)
if (len(s)) >= 20 and a != 3 and a != 1 and a != 2:
lo_1 = [a for a in new_l1 if a in s.lower()]
f_lo_l = round((len(lo_1) / len_of_l1) * 100)
if f_lo_l >= 50:
f = "Slide " + str(a)
return numberslide
loooo = Convert2(read_slide3(filename))
abc = []
for i in loooo:
l1 = Convert(i.lower())
new_l1 = [w for w in l1 if w not in stop_word]
len_of_l1 = len(new_l1)
read_full_pptx(filename, i)
abc.append(read_full_pptx(filename, i))
return (read_full_pptxe(filename), abc)
from flask import Flask, request, url_for, redirect, render_template
from flask_cors import CORS
import werkzeug
import topics_find.summary as summarizeed
import json
import textract
from pptx import Presentation
import os
app = Flask(__name__)
@app.route('/summerize', methods=['GET', 'POST'])
def summerize():
file = request.files['file']
ratio = float(request.form['ratio'])
filename = werkzeug.utils.secure_filename(file.filename)
print("\nReceived image File name : " + file.filename)'upload/' + filename)
f, file_extension = os.path.splitext('upload/' + filename)
if file_extension == '.docx':
text = textract.process('upload/' + filename)
arr = str(text).replace("\\n", "")
arr = arr.replace("\\t", "")
arr = arr.replace("\\", "")
prs = Presentation()
lyt = prs.slide_layouts[0] # choosing a slide layout
for x in range(0, 3):
if x == 2:
slide = prs.slides.add_slide(lyt) # adding a slide
title = slide.shapes.title # assigning a title
subtitle = slide.placeholders[1] # placeholder for subtitle
subtitle.text = arr
slide = prs.slides.add_slide(lyt) # adding a slide
title = slide.shapes.title # assigning a title
subtitle = slide.placeholders[1] # placeholder for subtitle
title.text = "ignore" # title
subtitle.text = "ignore" # subtitle"upload/slide3.pptx") # saving file
print('file saved')
res = summarizeed.create_sumall('upload/slide3.pptx', ratio)
res = summarizeed.create_sumall('upload/' + filename, ratio)
rr = []
for r in res[0]:
rr.append(r[0].replace('"', ''))
return_str = '{ "result" : ['
for i in range(len(rr)):
if i == len(rr) - 1:
return_str += '"' + rr[i] + '"'
return_str += '"' + rr[i] + '"' + ','
return_str += ']}'
return json.loads(return_str)
if __name__ == '__main__':"", port=5005, debug=True)
import speech_recognition as sr
import subprocess
import os
import sys
FOLDER_AUDIO = "audio_input"
FOLDER_TEXT = "text_output"
# print("starting...")
# if not os.path.isdir(FOLDER_AUDIO):
# os.mkdir(FOLDER_AUDIO)
# if not os.path.isdir(FOLDER_TEXT):
# os.mkdir(FOLDER_TEXT)
# paths = [os.path.join(FOLDER_AUDIO, nome) for nome in os.listdir(FOLDER_AUDIO)]
# files = [arq for arq in paths if os.path.isfile(arq)]
# wav_files = [arq for arq in files if arq.lower().endswith(".wav")]
# for filename in wav_files:
# r = sr.Recognizer()
# with sr.AudioFile(filename) as source:
# audio = r.record(source)
# command = r.recognize_google(audio, language='en-IN', show_all=True)
# print(command)
# print("running file {}".format(filename))
# filefinal = filename.split("audio_input/")[1].split(".wav")[0]
# filefinal = '{}/{}.txt'.format(FOLDER_TEXT, filefinal)
# with open(filefinal, 'w') as arq:
# arq.write(str(command))
# print("create a new file {}".format(filefinal))
# print("finish")
def convert_audio_to_text(filename):
r = sr.Recognizer()
with sr.AudioFile(filename) as source:
audio = r.record(source)
command = r.recognize_google(audio, language='en-IN', show_all=True)
return command["alternative"][0]["transcript"]
return 'did not convert'
# convert_audio_to_text('audio_input/3.wav')
