Commit bf761743 authored by Weerasinghe D.N.H's avatar Weerasinghe D.N.H

BACKEND : class relativity functions added with class_relationship_relativity_service

parent 740eed15
......@@ -10,6 +10,8 @@ class Relationship(db.Model):
y_min = db.Column(db.String(50), nullable=False)
x_max = db.Column(db.String(50), nullable=False)
y_max = db.Column(db.String(50), nullable=False)
comp_1 = db.Column(db.Integer, nullable=False)
comp_2 = db.Column(db.Integer, nullable=False)
def __repr__(self) -> str:
return 'class_relationship>>> {self.content}'
......@@ -4,7 +4,6 @@ import numpy as np
import pytesseract as ts
from PIL import Image
from models.attribute_model import Attribute
from paddleocr import PaddleOCR, draw_ocr # main OCR dependencies
from object_detection.utils import label_map_util
import matplotlib.pyplot as plt
import app
......@@ -16,6 +15,7 @@ from models.class_component_model import Component
from models.class_relationship_model import Relationship
from models.class_relationship_muplicity import Multiplicity
from models.method_model import Method
from services.class_relationship_relativity_service import detect_class_relationship
ts.pytesseract.tesseract_cmd = r'C:\Users\DELL\AppData\Local\Programs\Tesseract-OCR\tesseract.exe'
......@@ -38,20 +38,21 @@ def component_separation(filename, class_comp_id):
elif len(accurate_indexes) == 1:
category = category_index[class_id]['name']
print(category)
# print(category)
# select the component type and provide method to detect further details
if category == 'class':
print(filename, 'class')
# print(filename, 'class')
class_details_detection(image_nparray, boxes, index, class_comp_id, category)
elif category == 'interface':
print(filename, 'interface')
# print(filename, 'interface')
class_details_detection(image_nparray, boxes, index, class_comp_id, category)
else:
print(filename, 'relationship')
relationship_details_detection(image_nparray, boxes, index, class_comp_id, category)
# print(filename, 'relationship')
detect_class_relationship(image_nparray, boxes, index, class_comp_id, category)
# relationship_details_detection(image_nparray, boxes, index, class_comp_id, category)
def class_object_detection(model_path, label_path, image_nparray):
......@@ -82,7 +83,7 @@ def class_details_detection(image_nparray, boxes, index, class_comp_id, class_ty
methods_attributes = []
_image, cl_ymin, cl_xmin, cl_ymax, cl_xmax = crop_image_(image_nparray, boxes, index)
cv2.imwrite('image_1.jpg', _image)
# cv2.imwrite('image_1.jpg', _image)
mdl2_path = app.CLASS_COMP_SAVED_MODEL_PATH
lbl2_path = app.CLASS_COMP_SAVED_LABEL_PATH
......@@ -95,30 +96,30 @@ def class_details_detection(image_nparray, boxes, index, class_comp_id, class_ty
else:
category = category_index[class_id]['name']
print(category)
# print(category)
if category == 'class_attributes':
print(category, 'line 96 - inside attributes')
# print(category, 'line 96 - inside attributes')
class_attributes, y_min, x_min, y_max, x_max = crop_image_(_image, boxes_class, j)
class_attributes = cv2.resize(class_attributes, None, fx=2, fy=2)
cv2.imwrite('image.jpg', class_attributes)
# cv2.imwrite('image.jpg', class_attributes)
text = text_extraction(class_attributes)
attr = save_attributes_methods(text, 'attribute')
methods_attributes.append(attr)
elif category == 'class_methods':
print(category, 'line 103 - inside methods')
# print(category, 'line 103 - inside methods')
class_methods, y_min, x_min, y_max, x_max = crop_image_(_image, boxes_class, j)
class_methods = cv2.resize(class_methods, None, fx=2, fy=2)
text = text_extraction(class_methods)
methods = save_attributes_methods(text, 'method')
methods_attributes.append(methods)
print(text, '111 line')
# print(text, '111 line')
comp_name = class_name_detection(_image, boxes_class, category_index, accurate_indexes, class_id)
print(comp_name, 'comp_name line 118')
# print(comp_name, 'comp_name line 118')
comp = save_class_interface(class_type, comp_name, cl_ymin, cl_xmin, cl_ymax, cl_xmax, class_comp_id)
print(comp, 'component_id line 120')
# print(comp, 'component_id line 120')
alter_attributes_methods(methods_attributes, comp.id)
......@@ -154,7 +155,7 @@ def save_attributes_methods(text, typ):
saved_data = []
nlp = spacy.load('en_core_web_sm')
for element in text:
print(element, 'line 145')
# print(element, 'line 145')
# removable = str.maketrans('', '', '()')
nlp_ner = spacy.load('ner_models/model-best')
nlp_output = nlp_ner(element)
......@@ -184,13 +185,13 @@ def save_attributes_methods(text, typ):
method.return_type = token.text
if typ == 'attribute':
print(attr, 'line 175 - attr')
# print(attr, 'line 175 - attr')
db.session.add(attr)
db.session.commit()
saved_data.append(attr)
else:
print(method, 'line 181 method')
# print(method, 'line 181 method')
db.session.add(method)
db.session.commit()
saved_data.append(method)
......@@ -202,8 +203,8 @@ def save_attributes_methods(text, typ):
def alter_attributes_methods(element_list, component_id):
for j in element_list:
for element in j:
print(component_id)
print(element_list)
# print(component_id)
# print(element_list)
element.class_id = component_id
db.session.commit()
......@@ -227,35 +228,35 @@ def covert_to_access_specifier(access):
def class_name_detection(image, boxes, category_index, accurate_indexes, class_id):
print(category_index, 'category_index')
# print(category_index, 'category_index')
print(class_id, 'class_id')
# print(class_id, 'class_id')
height, width, c = image.shape
for i in range(0, len(accurate_indexes)):
if len(accurate_indexes) > 1:
category = category_index[class_id[i]]['name']
print(category, '225 line')
# print(category, '225 line')
else:
category = category_index[class_id]['name']
print(category, '225 line')
# print(category, '225 line')
if category is not 'interface_name' or category is not 'class_name':
if category != 'interface_name' or category != 'class_name':
ymin = boxes[i][0] * height
xmin = boxes[i][1] * width
ymax = boxes[i][2] * height
xmax = boxes[i][3] * width
cv2.rectangle(image, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (255, 255, 255), -1)
cv2.imwrite('image_2.jpg', image)
# cv2.imwrite('image_2.jpg', image)
class_name = text_extraction(image)
print(class_name, 'line 249 class name')
# print(class_name, 'line 249 class name')
if ''.join(class_name) is not None:
print(class_name, 'line 251 class name')
if "terface" in ''.join(class_name):
# print(class_name, 'line 251 class name')
if "interface" in ''.join(class_name):
name = ''.join(class_name).replace("<<interface>>", "")
else:
name = ''.join(class_name)
......@@ -269,59 +270,5 @@ def save_class_interface(class_type, comp_name, cl_ymin, cl_xmin, cl_ymax, cl_xm
y_max=cl_ymax)
db.session.add(comp)
db.session.commit()
print(comp, 'line 261 comp')
# print(comp, 'line 261 comp')
return comp
def relationship_details_detection(image_nparray, boxes, index, class_comp_id, category):
_image, y_min, x_min, y_max, x_max = crop_image_(image_nparray, boxes, index)
_image = cv2.resize(_image, None, fx=3, fy=5)
ocr_model = PaddleOCR(lang='en', use_gpu=False)
result = ocr_model.ocr(_image)
relationship = Relationship(class_answer=class_comp_id, type=category, x_min=x_min, y_min=y_min,
x_max=x_max,
y_max=y_max)
db.session.add(relationship)
db.session.commit()
if result is not None:
relationship_text(_image, result, relationship)
print(relationship, 'relationship')
def relationship_text(_image, result, relationship):
# boxes = [res[0] for res in result]
# texts = [res[1][0] for res in result]
# scores = [res[1][1] for res in result]
for element in result:
text = element[1][0]
box = element[0]
nlp_ner = spacy.load('ner_models/model-best')
nlp_output = nlp_ner(text)
print(text, 'line 290')
# box = np.array(box,dtype=float)
box = np.array(box).astype(np.int32)
xmin = min(box[:, 0])
ymin = min(box[:, 1])
xmax = max(box[:, 0])
ymax = max(box[:, 1])
for token in nlp_output.ents:
print(token, 'line 301')
print(token.label_, 'line 302')
if token.label_ == 'MULTIPLICITY' or contains_number(text):
multiplicity = Multiplicity(value=token.text, relationship_id=relationship.id, x_min=xmin,
y_min=ymin, x_max=xmax, y_max=ymax)
db.session.add(multiplicity)
db.session.commit()
if not contains_number(text):
relationship.name = text
db.session.commit()
# check if string contains any numbers
def contains_number(string):
return any([char.isdigit() for char in string])
import math
from decimal import Decimal
import cv2
import keras_ocr
import numpy as np
import spacy
from paddleocr import PaddleOCR, draw_ocr # main OCR dependencies
from config.database import db
import app
from models.class_component_model import Component
from models.class_relationship_model import Relationship
from models.class_relationship_muplicity import Multiplicity
def detect_class_relationship(image_nparray, boxes, index, class_comp_id, category):
# image = cv2.imread(app.SUBMISSION_PATH + '/' + filename)
height, width, c = image_nparray.shape
class_objects = Component.query.filter_by(class_answer=class_comp_id).all()
# for i in range(0, len(accurate_indexes))
# if category_index[class_id[i]]['name'] != 'class' and category_index[class_id[i]]['name'] != 'interface':
# category_name = category_index[class_id[i]]['name']
ymin = boxes[index][0] * height
xmin = boxes[index][1] * width
ymax = boxes[index][2] * height
xmax = boxes[index][3] * width
crop_img = image_nparray[int(ymin):int(ymax), int(xmin):int(xmax)]
img = remove_text(crop_img)
if category == 'realization':
img = line_recovery(img)
gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh_image = cv2.threshold(gray_image, 100, 255, cv2.THRESH_BINARY_INV)
arrow_image = get_filter_arrow_image(thresh_image)
if arrow_image is not None:
arrow_info_image, point1, point2 = get_arrow_info(arrow_image)
point1_x = int(xmin) + point1[0]
point1_y = int(ymin) + point1[1]
point2_x = int(xmin) + point2[0]
point2_y = int(ymin) + point2[1]
line_point1 = (point1_x, point1_y)
line_point2 = (point2_x, point2_y)
class_object1 = find_closest_components_length(line_point1, class_objects)
class_object2 = find_closest_components_length(line_point2, class_objects)
relationship_details_detection(image_nparray, boxes, index, class_comp_id, category, class_object1,
class_object2)
def midpoint(x1, y1, x2, y2):
x_mid = int((x1 + x2) / 2)
y_mid = int((y1 + y2) / 2)
return (x_mid, y_mid)
def remove_text(img_path):
# read image
pipeline = keras_ocr.pipeline.Pipeline()
img = keras_ocr.tools.read(img_path)
# generate (word, box) tuples
prediction_groups = pipeline.recognize([img])
mask = np.zeros(img.shape[:2], dtype="uint8")
for box in prediction_groups[0]:
x0, y0 = box[1][0]
x1, y1 = box[1][1]
x2, y2 = box[1][2]
x3, y3 = box[1][3]
x_mid0, y_mid0 = midpoint(x1, y1, x2, y2)
x_mid1, y_mi1 = midpoint(x0, y0, x3, y3)
thickness = int(math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2))
cv2.line(mask, (x_mid0, y_mid0), (x_mid1, y_mi1), 255,
thickness)
img = cv2.inpaint(img, mask, 7, cv2.INPAINT_NS)
return img
def line_recovery(img):
kernel1 = np.ones((3, 5), np.uint8)
kernel2 = np.ones((9, 9), np.uint8)
imgGray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
imgBW = cv2.threshold(imgGray, 230, 255, cv2.THRESH_BINARY_INV)[1]
img1 = cv2.erode(imgBW, kernel1, iterations=1)
img2 = cv2.dilate(img1, kernel2, iterations=3)
img3 = cv2.bitwise_and(imgBW, img2)
img3 = cv2.bitwise_not(img3)
img4 = cv2.bitwise_and(imgBW, imgBW, mask=img3)
imgLines = cv2.HoughLinesP(img4, 1, np.pi / 180, 20, minLineLength=0, maxLineGap=10)
for i in range(len(imgLines)):
for x1, y1, x2, y2 in imgLines[i]:
cv2.line(img, (x1, y1), (x2, y2), (0, 0, 0), 2)
return img
def get_filter_arrow_image(threslold_image):
blank_image = np.zeros_like(threslold_image)
kernel_dilate = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
threslold_image = cv2.dilate(threslold_image, kernel_dilate, iterations=1)
contours, hierarchy = cv2.findContours(threslold_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
if hierarchy is not None:
threshold_distnace = 100
for cnt in contours:
hull = cv2.convexHull(cnt, returnPoints=False)
defects = cv2.convexityDefects(cnt, hull)
if defects is not None:
for i in range(defects.shape[0]):
start_index, end_index, farthest_index, distance = defects[i, 0]
if distance > threshold_distnace:
cv2.drawContours(blank_image, [cnt], -1, 225, -1)
return blank_image
else:
return None
def get_length(p1, p2):
line_length = ((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2) ** 0.5
return line_length
def find_max_length(contours):
max_lenth = 0
for cnt in contours:
p1, p2 = get_max_distace_point(cnt)
line_length = ((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2) ** 0.5
if line_length > max_lenth:
max_lenth = line_length
return max_lenth
def get_max_distace_point(cnt):
max_distance = 0
max_points = None
for [[x1, y1]] in cnt:
for [[x2, y2]] in cnt:
distance = get_length((x1, y1), (x2, y2))
if distance > max_distance:
max_distance = distance
max_points = [(x1, y1), (x2, y2)]
return max_points
def angle_between_points(a, b):
arrow_slope = (a[0] - b[0]) / (a[1] - b[1])
arrow_angle = math.degrees(math.atan(arrow_slope))
return arrow_angle
def get_arrow_info(arrow_image):
arrow_info_image = cv2.cvtColor(arrow_image.copy(), cv2.COLOR_GRAY2BGR)
contours, hierarchy = cv2.findContours(arrow_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if hierarchy is not None:
max_lenth = find_max_length(contours)
for cnt in contours:
blank_image = np.zeros_like(arrow_image)
cv2.drawContours(blank_image, [cnt], -1, 255, -1)
point1, point2 = get_max_distace_point(cnt)
lenght = get_length(point1, point2)
if lenght == max_lenth:
cv2.circle(arrow_info_image, point1, 2, (255, 0, 0), 3)
cv2.circle(arrow_info_image, point2, 2, (255, 0, 0), 3)
cv2.putText(arrow_info_image, "point 1 : %s" % (str(point1)), point2, cv2.FONT_HERSHEY_PLAIN, 0.8,
(0, 0, 255), 1)
cv2.putText(arrow_info_image, "point 2 : %s" % (str(point2)), (point2[0], point2[1] + 20),
cv2.FONT_HERSHEY_PLAIN, 0.8, (0, 0, 255), 1)
return arrow_info_image, point1, point2
else:
return None, None
def find_closest_components_length(point, class_objects):
u_object = 0
min_length = 1000000000000
for obj in class_objects:
ymin = Decimal(obj.y_min)
xmin = Decimal(obj.x_min)
ymax = Decimal(obj.y_max)
xmax = Decimal(obj.x_max)
usecase_x = xmin + (xmax - xmin) / 2
usecase_y = ymin + (ymax - ymin) / 2
usecase_point = (int(usecase_x), int(usecase_y))
l_length = ((point[0] - usecase_point[0]) ** 2 + (point[1] - usecase_point[1]) ** 2) ** 0.5
if min_length > l_length:
min_length = l_length
u_object = obj
return u_object
def relationship_details_detection(image_nparray, boxes, index, class_comp_id, category, class_object1, class_object2):
_image, y_min, x_min, y_max, x_max = crop_image_(image_nparray, boxes, index)
_image = cv2.resize(_image, None, fx=4, fy=5)
ocr_model = PaddleOCR(lang='en', use_gpu=False)
result = ocr_model.ocr(_image)
relationship = Relationship(class_answer=class_comp_id, type=category, x_min=x_min, y_min=y_min,
x_max=x_max,
y_max=y_max, comp_1=class_object1.id, comp_2=class_object2.id)
db.session.add(relationship)
db.session.commit()
if result is not None:
relationship_text(_image, result, relationship)
# print(relationship, 'relationship')
def relationship_text(_image, result, relationship):
# boxes = [res[0] for res in result]
# texts = [res[1][0] for res in result]
# scores = [res[1][1] for res in result]
for element in result:
text = element[1][0]
box = element[0]
nlp_ner = spacy.load('ner_models/model-best')
nlp_output = nlp_ner(text)
# print(text, 'line 290')
# box = np.array(box,dtype=float)
box = np.array(box).astype(np.int32)
xmin = min(box[:, 0])
ymin = min(box[:, 1])
xmax = max(box[:, 0])
ymax = max(box[:, 1])
for token in nlp_output.ents:
# print(token.text, 'line 301')
# print(token.label_, 'line 302')
if token.label_ == 'MULTIPLICITY' or contains_number(text):
multiplicity = Multiplicity(value=token.text, relationship_id=relationship.id, x_min=xmin,
y_min=ymin, x_max=xmax, y_max=ymax)
db.session.add(multiplicity)
db.session.commit()
if not contains_number(text):
relationship.name = text
db.session.commit()
# check if string contains any numbers
def contains_number(string):
return any([char.isdigit() for char in string])
# crop image using boxes & index
def crop_image_(image, boxes, index):
height, width, c = image.shape
# crop box format: xmin, ymin, xmax, ymax
ymin = boxes[index][0] * height
xmin = boxes[index][1] * width
ymax = boxes[index][2] * height
xmax = boxes[index][3] * width
cropped_image = image[int(ymin):int(ymax), int(xmin):int(xmax)]
# image = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY)
# image = cv2.resize(image, (800, 500))
# returns cropped image , ymin,xmin,ymax & xmax
return cropped_image, ymin, xmin, ymax, xmax
......@@ -14,7 +14,7 @@ import tensorflow as tf
from config.database import db
from models.actor_and_use_case import ActorANDUseCase
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
# pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
from services.extend_include_relationship_detection_service import detect_extend_include_relationship
from services.generalization_relationship_detection_service import detect_generalization_relationship
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment