BACKEND : class relativity functions added with class_relationship_relativity_service

bf761743 · Weerasinghe D.N.H · 740eed15 · bf761743 · 740eed15 · 740eed15
Commit bf761743 authored Oct 09, 2022 by Weerasinghe D.N.H
10 changed files
--- a/backend/database.db
+++ b/backend/database.db
--- a/backend/image.jpg
+++ b/backend/image.jpg
--- a/backend/image_1.jpg
+++ b/backend/image_1.jpg
--- a/backend/image_2.jpg
+++ b/backend/image_2.jpg
--- a/backend/models/class_relationship_model.py
+++ b/backend/models/class_relationship_model.py
@@ -10,6 +10,8 @@ class Relationship(db.Model):
    y_min = db.Column(db.String(50), nullable=False)
    x_max = db.Column(db.String(50), nullable=False)
    y_max = db.Column(db.String(50), nullable=False)
+    comp_1 = db.Column(db.Integer, nullable=False)
+    comp_2 = db.Column(db.Integer, nullable=False)

    def __repr__(self) -> str:
        return 'class_relationship>>> {self.content}'
--- a/backend/services/class_diagram_class_detection_service.py
+++ b/backend/services/class_diagram_class_detection_service.py
@@ -4,7 +4,6 @@ import numpy as np
 import pytesseract as ts
 from PIL import Image
 from models.attribute_model import Attribute
-from paddleocr import PaddleOCR, draw_ocr  # main OCR dependencies
 from object_detection.utils import label_map_util
 import matplotlib.pyplot as plt
 import app
@@ -16,6 +15,7 @@ from models.class_component_model import Component
 from models.class_relationship_model import Relationship
 from models.class_relationship_muplicity import Multiplicity
 from models.method_model import Method
+from services.class_relationship_relativity_service import detect_class_relationship

 ts.pytesseract.tesseract_cmd = r'C:\Users\DELL\AppData\Local\Programs\Tesseract-OCR\tesseract.exe'

@@ -38,20 +38,21 @@ def component_separation(filename, class_comp_id):

        elif len(accurate_indexes) == 1:
            category = category_index[class_id]['name']
-            print(category)
+            # print(category)

        # select the component type and provide method to detect further details
        if category == 'class':
-            print(filename, 'class')
+            # print(filename, 'class')
            class_details_detection(image_nparray, boxes, index, class_comp_id, category)

        elif category == 'interface':
-            print(filename, 'interface')
+            # print(filename, 'interface')
            class_details_detection(image_nparray, boxes, index, class_comp_id, category)

        else:
-            print(filename, 'relationship')
-            relationship_details_detection(image_nparray, boxes, index, class_comp_id, category)
+            # print(filename, 'relationship')
+            detect_class_relationship(image_nparray, boxes, index, class_comp_id, category)
+            # relationship_details_detection(image_nparray, boxes, index, class_comp_id, category)


 def class_object_detection(model_path, label_path, image_nparray):
@@ -82,7 +83,7 @@ def class_details_detection(image_nparray, boxes, index, class_comp_id, class_ty
    methods_attributes = []

    _image, cl_ymin, cl_xmin, cl_ymax, cl_xmax = crop_image_(image_nparray, boxes, index)
-    cv2.imwrite('image_1.jpg', _image)
+    # cv2.imwrite('image_1.jpg', _image)

    mdl2_path = app.CLASS_COMP_SAVED_MODEL_PATH
    lbl2_path = app.CLASS_COMP_SAVED_LABEL_PATH
@@ -95,30 +96,30 @@ def class_details_detection(image_nparray, boxes, index, class_comp_id, class_ty

        else:
            category = category_index[class_id]['name']
-            print(category)
+            # print(category)

        if category == 'class_attributes':
-            print(category, 'line 96 - inside attributes')
+            # print(category, 'line 96 - inside attributes')
            class_attributes, y_min, x_min, y_max, x_max = crop_image_(_image, boxes_class, j)
            class_attributes = cv2.resize(class_attributes, None, fx=2, fy=2)
-            cv2.imwrite('image.jpg', class_attributes)
+            # cv2.imwrite('image.jpg', class_attributes)
            text = text_extraction(class_attributes)
            attr = save_attributes_methods(text, 'attribute')
            methods_attributes.append(attr)

        elif category == 'class_methods':
-            print(category, 'line 103 - inside methods')
+            # print(category, 'line 103 - inside methods')
            class_methods, y_min, x_min, y_max, x_max = crop_image_(_image, boxes_class, j)
            class_methods = cv2.resize(class_methods, None, fx=2, fy=2)
            text = text_extraction(class_methods)
            methods = save_attributes_methods(text, 'method')
            methods_attributes.append(methods)
-            print(text, '111 line')
+            # print(text, '111 line')

    comp_name = class_name_detection(_image, boxes_class, category_index, accurate_indexes, class_id)
-    print(comp_name, 'comp_name line 118')
+    # print(comp_name, 'comp_name line 118')
    comp = save_class_interface(class_type, comp_name, cl_ymin, cl_xmin, cl_ymax, cl_xmax, class_comp_id)
-    print(comp, 'component_id line 120')
+    # print(comp, 'component_id line 120')

    alter_attributes_methods(methods_attributes, comp.id)

@@ -154,7 +155,7 @@ def save_attributes_methods(text, typ):
    saved_data = []
    nlp = spacy.load('en_core_web_sm')
    for element in text:
-        print(element, 'line 145')
+        # print(element, 'line 145')
        # removable = str.maketrans('', '', '()')
        nlp_ner = spacy.load('ner_models/model-best')
        nlp_output = nlp_ner(element)
@@ -184,13 +185,13 @@ def save_attributes_methods(text, typ):
                    method.return_type = token.text

        if typ == 'attribute':
-            print(attr, 'line 175 - attr')
+            # print(attr, 'line 175 - attr')
            db.session.add(attr)
            db.session.commit()
            saved_data.append(attr)

        else:
-            print(method, 'line 181 method')
+            # print(method, 'line 181 method')
            db.session.add(method)
            db.session.commit()
            saved_data.append(method)
@@ -202,8 +203,8 @@ def save_attributes_methods(text, typ):
 def alter_attributes_methods(element_list, component_id):
    for j in element_list:
        for element in j:
-            print(component_id)
-            print(element_list)
+            # print(component_id)
+            # print(element_list)
            element.class_id = component_id
            db.session.commit()

@@ -227,35 +228,35 @@ def covert_to_access_specifier(access):


 def class_name_detection(image, boxes, category_index, accurate_indexes, class_id):
-    print(category_index, 'category_index')
+    # print(category_index, 'category_index')

-    print(class_id, 'class_id')
+    # print(class_id, 'class_id')

    height, width, c = image.shape

    for i in range(0, len(accurate_indexes)):
        if len(accurate_indexes) > 1:
            category = category_index[class_id[i]]['name']
-            print(category, '225 line')
+            # print(category, '225 line')

        else:
            category = category_index[class_id]['name']
-            print(category, '225 line')
+            # print(category, '225 line')

-        if category is not 'interface_name' or category is not 'class_name':
+        if category != 'interface_name' or category != 'class_name':
            ymin = boxes[i][0] * height
            xmin = boxes[i][1] * width
            ymax = boxes[i][2] * height
            xmax = boxes[i][3] * width

            cv2.rectangle(image, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (255, 255, 255), -1)
-            cv2.imwrite('image_2.jpg', image)
+            # cv2.imwrite('image_2.jpg', image)

    class_name = text_extraction(image)
-    print(class_name, 'line 249 class name')
+    # print(class_name, 'line 249 class name')
    if ''.join(class_name) is not None:
-        print(class_name, 'line 251 class name')
-        if "terface" in ''.join(class_name):
+        # print(class_name, 'line 251 class name')
+        if "interface" in ''.join(class_name):
            name = ''.join(class_name).replace("<<interface>>", "")
        else:
            name = ''.join(class_name)
@@ -269,59 +270,5 @@ def save_class_interface(class_type, comp_name, cl_ymin, cl_xmin, cl_ymax, cl_xm
                     y_max=cl_ymax)
    db.session.add(comp)
    db.session.commit()
-    print(comp, 'line 261 comp')
+    # print(comp, 'line 261 comp')
    return comp
-
-
-def relationship_details_detection(image_nparray, boxes, index, class_comp_id, category):
-    _image, y_min, x_min, y_max, x_max = crop_image_(image_nparray, boxes, index)
-    _image = cv2.resize(_image, None, fx=3, fy=5)
-    ocr_model = PaddleOCR(lang='en', use_gpu=False)
-    result = ocr_model.ocr(_image)
-    relationship = Relationship(class_answer=class_comp_id, type=category, x_min=x_min, y_min=y_min,
-                                x_max=x_max,
-                                y_max=y_max)
-    db.session.add(relationship)
-    db.session.commit()
-
-    if result is not None:
-        relationship_text(_image, result, relationship)
-
-    print(relationship, 'relationship')
-
-
-def relationship_text(_image, result, relationship):
-    # boxes = [res[0] for res in result]
-    # texts = [res[1][0] for res in result]
-    # scores = [res[1][1] for res in result]
-    for element in result:
-        text = element[1][0]
-        box = element[0]
-        nlp_ner = spacy.load('ner_models/model-best')
-        nlp_output = nlp_ner(text)
-        print(text, 'line 290')
-        # box = np.array(box,dtype=float)
-        box = np.array(box).astype(np.int32)
-
-        xmin = min(box[:, 0])
-        ymin = min(box[:, 1])
-        xmax = max(box[:, 0])
-        ymax = max(box[:, 1])
-        for token in nlp_output.ents:
-            print(token, 'line 301')
-            print(token.label_, 'line 302')
-
-            if token.label_ == 'MULTIPLICITY' or contains_number(text):
-                multiplicity = Multiplicity(value=token.text, relationship_id=relationship.id, x_min=xmin,
-                                            y_min=ymin, x_max=xmax, y_max=ymax)
-                db.session.add(multiplicity)
-                db.session.commit()
-
-        if not contains_number(text):
-            relationship.name = text
-            db.session.commit()
-
-
-# check if string contains any numbers
-def contains_number(string):
-    return any([char.isdigit() for char in string])
--- a/backend/services/class_relationship_relativity_service.py
+++ b/backend/services/class_relationship_relativity_service.py
+import math
+from decimal import Decimal
+
+import cv2
+import keras_ocr
+import numpy as np
+import spacy
+from paddleocr import PaddleOCR, draw_ocr  # main OCR dependencies
+from config.database import db
+
+import app
+from models.class_component_model import Component
+from models.class_relationship_model import Relationship
+from models.class_relationship_muplicity import Multiplicity
+
+
+def detect_class_relationship(image_nparray, boxes, index, class_comp_id, category):
+    # image = cv2.imread(app.SUBMISSION_PATH + '/' + filename)
+    height, width, c = image_nparray.shape
+    class_objects = Component.query.filter_by(class_answer=class_comp_id).all()
+
+    # for i in range(0, len(accurate_indexes))
+
+    # if category_index[class_id[i]]['name'] != 'class' and category_index[class_id[i]]['name'] != 'interface':
+    # category_name = category_index[class_id[i]]['name']
+
+    ymin = boxes[index][0] * height
+    xmin = boxes[index][1] * width
+    ymax = boxes[index][2] * height
+    xmax = boxes[index][3] * width
+
+    crop_img = image_nparray[int(ymin):int(ymax), int(xmin):int(xmax)]
+
+    img = remove_text(crop_img)
+
+    if category == 'realization':
+        img = line_recovery(img)
+
+    gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    _, thresh_image = cv2.threshold(gray_image, 100, 255, cv2.THRESH_BINARY_INV)
+    arrow_image = get_filter_arrow_image(thresh_image)
+
+    if arrow_image is not None:
+        arrow_info_image, point1, point2 = get_arrow_info(arrow_image)
+        point1_x = int(xmin) + point1[0]
+        point1_y = int(ymin) + point1[1]
+        point2_x = int(xmin) + point2[0]
+        point2_y = int(ymin) + point2[1]
+
+        line_point1 = (point1_x, point1_y)
+        line_point2 = (point2_x, point2_y)
+
+        class_object1 = find_closest_components_length(line_point1, class_objects)
+
+        class_object2 = find_closest_components_length(line_point2, class_objects)
+
+        relationship_details_detection(image_nparray, boxes, index, class_comp_id, category, class_object1,
+                                       class_object2)
+
+
+def midpoint(x1, y1, x2, y2):
+    x_mid = int((x1 + x2) / 2)
+    y_mid = int((y1 + y2) / 2)
+    return (x_mid, y_mid)
+
+
+def remove_text(img_path):
+    # read image
+    pipeline = keras_ocr.pipeline.Pipeline()
+    img = keras_ocr.tools.read(img_path)
+    # generate (word, box) tuples
+    prediction_groups = pipeline.recognize([img])
+    mask = np.zeros(img.shape[:2], dtype="uint8")
+    for box in prediction_groups[0]:
+        x0, y0 = box[1][0]
+        x1, y1 = box[1][1]
+        x2, y2 = box[1][2]
+        x3, y3 = box[1][3]
+
+        x_mid0, y_mid0 = midpoint(x1, y1, x2, y2)
+        x_mid1, y_mi1 = midpoint(x0, y0, x3, y3)
+
+        thickness = int(math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2))
+
+        cv2.line(mask, (x_mid0, y_mid0), (x_mid1, y_mi1), 255,
+                 thickness)
+        img = cv2.inpaint(img, mask, 7, cv2.INPAINT_NS)
+
+    return img
+
+
+def line_recovery(img):
+    kernel1 = np.ones((3, 5), np.uint8)
+    kernel2 = np.ones((9, 9), np.uint8)
+
+    imgGray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    imgBW = cv2.threshold(imgGray, 230, 255, cv2.THRESH_BINARY_INV)[1]
+
+    img1 = cv2.erode(imgBW, kernel1, iterations=1)
+    img2 = cv2.dilate(img1, kernel2, iterations=3)
+    img3 = cv2.bitwise_and(imgBW, img2)
+    img3 = cv2.bitwise_not(img3)
+    img4 = cv2.bitwise_and(imgBW, imgBW, mask=img3)
+    imgLines = cv2.HoughLinesP(img4, 1, np.pi / 180, 20, minLineLength=0, maxLineGap=10)
+
+    for i in range(len(imgLines)):
+        for x1, y1, x2, y2 in imgLines[i]:
+            cv2.line(img, (x1, y1), (x2, y2), (0, 0, 0), 2)
+
+    return img
+
+
+def get_filter_arrow_image(threslold_image):
+    blank_image = np.zeros_like(threslold_image)
+
+    kernel_dilate = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
+    threslold_image = cv2.dilate(threslold_image, kernel_dilate, iterations=1)
+
+    contours, hierarchy = cv2.findContours(threslold_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+
+    if hierarchy is not None:
+
+        threshold_distnace = 100
+
+        for cnt in contours:
+            hull = cv2.convexHull(cnt, returnPoints=False)
+            defects = cv2.convexityDefects(cnt, hull)
+            if defects is not None:
+                for i in range(defects.shape[0]):
+                    start_index, end_index, farthest_index, distance = defects[i, 0]
+
+                    if distance > threshold_distnace:
+                        cv2.drawContours(blank_image, [cnt], -1, 225, -1)
+
+        return blank_image
+    else:
+        return None
+
+
+def get_length(p1, p2):
+    line_length = ((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2) ** 0.5
+    return line_length
+
+
+def find_max_length(contours):
+    max_lenth = 0
+
+    for cnt in contours:
+        p1, p2 = get_max_distace_point(cnt)
+        line_length = ((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2) ** 0.5
+
+        if line_length > max_lenth:
+            max_lenth = line_length
+
+    return max_lenth
+
+
+def get_max_distace_point(cnt):
+    max_distance = 0
+    max_points = None
+    for [[x1, y1]] in cnt:
+        for [[x2, y2]] in cnt:
+            distance = get_length((x1, y1), (x2, y2))
+
+            if distance > max_distance:
+                max_distance = distance
+                max_points = [(x1, y1), (x2, y2)]
+
+    return max_points
+
+
+def angle_between_points(a, b):
+    arrow_slope = (a[0] - b[0]) / (a[1] - b[1])
+    arrow_angle = math.degrees(math.atan(arrow_slope))
+    return arrow_angle
+
+
+def get_arrow_info(arrow_image):
+    arrow_info_image = cv2.cvtColor(arrow_image.copy(), cv2.COLOR_GRAY2BGR)
+    contours, hierarchy = cv2.findContours(arrow_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+
+    if hierarchy is not None:
+
+        max_lenth = find_max_length(contours)
+
+        for cnt in contours:
+
+            blank_image = np.zeros_like(arrow_image)
+            cv2.drawContours(blank_image, [cnt], -1, 255, -1)
+
+            point1, point2 = get_max_distace_point(cnt)
+
+            lenght = get_length(point1, point2)
+
+            if lenght == max_lenth:
+                cv2.circle(arrow_info_image, point1, 2, (255, 0, 0), 3)
+                cv2.circle(arrow_info_image, point2, 2, (255, 0, 0), 3)
+
+                cv2.putText(arrow_info_image, "point 1 : %s" % (str(point1)), point2, cv2.FONT_HERSHEY_PLAIN, 0.8,
+                            (0, 0, 255), 1)
+                cv2.putText(arrow_info_image, "point 2 : %s" % (str(point2)), (point2[0], point2[1] + 20),
+                            cv2.FONT_HERSHEY_PLAIN, 0.8, (0, 0, 255), 1)
+
+                return arrow_info_image, point1, point2
+    else:
+        return None, None
+
+
+def find_closest_components_length(point, class_objects):
+    u_object = 0
+    min_length = 1000000000000
+    for obj in class_objects:
+
+        ymin = Decimal(obj.y_min)
+        xmin = Decimal(obj.x_min)
+        ymax = Decimal(obj.y_max)
+        xmax = Decimal(obj.x_max)
+
+        usecase_x = xmin + (xmax - xmin) / 2
+        usecase_y = ymin + (ymax - ymin) / 2
+
+        usecase_point = (int(usecase_x), int(usecase_y))
+
+        l_length = ((point[0] - usecase_point[0]) ** 2 + (point[1] - usecase_point[1]) ** 2) ** 0.5
+
+        if min_length > l_length:
+            min_length = l_length
+            u_object = obj
+
+    return u_object
+
+
+def relationship_details_detection(image_nparray, boxes, index, class_comp_id, category, class_object1, class_object2):
+    _image, y_min, x_min, y_max, x_max = crop_image_(image_nparray, boxes, index)
+    _image = cv2.resize(_image, None, fx=4, fy=5)
+    ocr_model = PaddleOCR(lang='en', use_gpu=False)
+    result = ocr_model.ocr(_image)
+    relationship = Relationship(class_answer=class_comp_id, type=category, x_min=x_min, y_min=y_min,
+                                x_max=x_max,
+                                y_max=y_max, comp_1=class_object1.id, comp_2=class_object2.id)
+    db.session.add(relationship)
+    db.session.commit()
+
+    if result is not None:
+        relationship_text(_image, result, relationship)
+
+    # print(relationship, 'relationship')
+
+
+def relationship_text(_image, result, relationship):
+    # boxes = [res[0] for res in result]
+    # texts = [res[1][0] for res in result]
+    # scores = [res[1][1] for res in result]
+    for element in result:
+        text = element[1][0]
+        box = element[0]
+        nlp_ner = spacy.load('ner_models/model-best')
+        nlp_output = nlp_ner(text)
+        # print(text, 'line 290')
+        # box = np.array(box,dtype=float)
+        box = np.array(box).astype(np.int32)
+
+        xmin = min(box[:, 0])
+        ymin = min(box[:, 1])
+        xmax = max(box[:, 0])
+        ymax = max(box[:, 1])
+        for token in nlp_output.ents:
+            # print(token.text, 'line 301')
+            # print(token.label_, 'line 302')
+
+            if token.label_ == 'MULTIPLICITY' or contains_number(text):
+                multiplicity = Multiplicity(value=token.text, relationship_id=relationship.id, x_min=xmin,
+                                            y_min=ymin, x_max=xmax, y_max=ymax)
+                db.session.add(multiplicity)
+                db.session.commit()
+
+        if not contains_number(text):
+            relationship.name = text
+            db.session.commit()
+
+
+# check if string contains any numbers
+def contains_number(string):
+    return any([char.isdigit() for char in string])
+
+
+# crop image using boxes & index
+def crop_image_(image, boxes, index):
+    height, width, c = image.shape
+    # crop box format: xmin, ymin, xmax, ymax
+    ymin = boxes[index][0] * height
+    xmin = boxes[index][1] * width
+    ymax = boxes[index][2] * height
+    xmax = boxes[index][3] * width
+
+    cropped_image = image[int(ymin):int(ymax), int(xmin):int(xmax)]
+    # image = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY)
+    # image = cv2.resize(image, (800, 500))
+    # returns cropped image , ymin,xmin,ymax & xmax
+    return cropped_image, ymin, xmin, ymax, xmax
--- a/backend/services/use_case_model_detection_service.py
+++ b/backend/services/use_case_model_detection_service.py
@@ -14,7 +14,7 @@ import tensorflow as tf
 from config.database import db
 from models.actor_and_use_case import ActorANDUseCase

-pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
+# pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
 from services.extend_include_relationship_detection_service import detect_extend_include_relationship
 from services.generalization_relationship_detection_service import detect_generalization_relationship


--- a/backend/submissions/class/research_classes-Page-1-3.jpg
+++ b/backend/submissions/class/research_classes-Page-1-3.jpg
--- a/backend/submissions/class/research_classes-Page-1.jpg
+++ b/backend/submissions/class/research_classes-Page-1.jpg