Commit b7c7acf4 authored by De Silva K.C.C.C's avatar De Silva K.C.C.C

added model

parent 131e4893
from flask import Flask, request, url_for, redirect, render_template
from flask_cors import CORS
import werkzeug
import summerise.summary as summarizeed
import json
import textract
from pptx import Presentation
import os
app = Flask(__name__)
CORS(app)
@app.route('/summerize', methods=['GET', 'POST'])
def summerize():
file = request.files['file']
ratio = float(request.form['ratio'])
filename = werkzeug.utils.secure_filename(file.filename)
print("\nReceived image File name : " + file.filename)
file.save('upload/' + filename)
f, file_extension = os.path.splitext('upload/' + filename)
print(file_extension)
if file_extension == '.docx':
text = textract.process('upload/' + filename)
arr = str(text).replace("\\n", "")
arr = arr.replace("\\t", "")
arr = arr.replace("\\", "")
prs = Presentation()
lyt = prs.slide_layouts[0] # choosing a slide layout
for x in range(0, 3):
if x == 2:
slide = prs.slides.add_slide(lyt) # adding a slide
title = slide.shapes.title # assigning a title
subtitle = slide.placeholders[1] # placeholder for subtitle
subtitle.text = arr
else:
slide = prs.slides.add_slide(lyt) # adding a slide
title = slide.shapes.title # assigning a title
subtitle = slide.placeholders[1] # placeholder for subtitle
title.text = "ignore" # title
subtitle.text = "ignore" # subtitle
prs.save("upload/slide3.pptx") # saving file
print('file saved')
res = summarizeed.create_sumall('upload/slide3.pptx', ratio)
else:
res = summarizeed.create_sumall('upload/' + filename, ratio)
rr = []
for r in res[0]:
rr.append(r[0].replace('"', ''))
return_str = '{ "result" : ['
for i in range(len(rr)):
if i == len(rr) - 1:
return_str += '"' + rr[i] + '"'
else:
return_str += '"' + rr[i] + '"' + ','
return_str += ']}'
print(return_str)
return json.loads(return_str)
if __name__ == '__main__':
app.run(host="0.0.0.0", port=5005, debug=True)
import glob
from pptx import Presentation
import math
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
import textract
import os.path
import nltk
nltk.download('stopwords')
nltk.download('punkt')
def create_sumall(abc, ratio):
if abc:
filename = abc
stop_word = ['is', 'a', 'and', 'the']
# Function to create Text summarization
def create_summ(text):
stopWords = set(stopwords.words("english"))
words = word_tokenize(text)
freqTable = dict()
for word in words:
word = word.lower()
if word in stopWords:
continue
if word in freqTable:
freqTable[word] += 1
else:
freqTable[word] = 1
sentences = sent_tokenize(text)
sentenceValue = dict()
for sentence in sentences:
for word, freq in freqTable.items():
if word in sentence.lower():
if sentence in sentenceValue:
sentenceValue[sentence] += freq
else:
sentenceValue[sentence] = freq
sumValues = 0
for sentence in sentenceValue:
sumValues += sentenceValue[sentence]
lensenvalu = len(sentenceValue)
if lensenvalu == 0:
lensenvalu = 1
average = int(sumValues / lensenvalu)
else:
average = int(sumValues / lensenvalu)
summary = ''
for sentence in sentences:
if (sentence in sentenceValue) and (sentenceValue[sentence] > (
ratio * average)):
summary += " " + sentence
return summary
def read_full_pptxe(filename):
sentences = []
b = []
a = 0
for eachfile in glob.glob(filename):
prs = Presentation(eachfile)
for slide in prs.slides:
a = a + 1
for shape in slide.shapes:
if hasattr(shape, "text"):
s = create_summ(shape.text.replace("\n", " "))
s = str(s)
if (len(s)) >= 1:
f = ["Slide " + str(a) + "-" + s]
sentences.append(f)
return sentences
def read_full_docx(filename):
sentences = []
text = textract.process(filename)
temp = text.split(".")
for t in temp:
sentences.append(t.replace("\n", " "))
return sentences
extension = os.path.splitext(filename)[1]
if extension == 'docx':
read_full_docx(filename)
else:
read_full_pptxe(filename)
def Convert(string):
li = list(string.split(" "))
return li
def Convert2(string):
li = list(string.split("\n"))
return li
def read_slide3(filename):
a = 1
for eachfile in glob.glob(filename):
prs = Presentation(eachfile)
for slide in prs.slides:
a = a + 1
for shape in slide.shapes:
if hasattr(shape, "text"):
if a == 4 and shape.shape_id == 3:
s3 = str(shape.text)
return s3
def read_full_pptx(filename, sss):
numberslide = []
numberslide.append(sss)
a = 0
for eachfile in glob.glob(filename):
prs = Presentation(eachfile)
for slide in prs.slides:
a = a + 1
for shape in slide.shapes:
if hasattr(shape, "text"):
if shape.shape_id != 2:
s = shape.text.replace("\n", " ")
s = str(s)
if (len(s)) >= 20 and a != 3 and a != 1 and a != 2:
lo_1 = [a for a in new_l1 if a in s.lower()]
f_lo_l = round((len(lo_1) / len_of_l1) * 100)
if f_lo_l >= 50:
f = "Slide " + str(a)
numberslide.append(f)
return numberslide
loooo = Convert2(read_slide3(filename))
abc = []
for i in loooo:
l1 = Convert(i.lower())
new_l1 = [w for w in l1 if w not in stop_word]
len_of_l1 = len(new_l1)
read_full_pptx(filename, i)
abc.append(read_full_pptx(filename, i))
return (read_full_pptxe(filename), abc)
else:
print('error')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment