content filtration with azure services

823c9ce0 · ParamiLelkada · 12c06786 · 823c9ce0 · 823c9ce0 · 823c9ce0
Commit 823c9ce0 authored Feb 08, 2024 by ParamiLelkada
Showing with 43 additions and 1 deletion

App/Backend/content_filter_azure.py App/Backend/content_filter_azure.py +35 -0

App/Backend/requirements.txt App/Backend/requirements.txt +2 -1

App/Backend/word_generation.py App/Backend/word_generation.py +6 -0

No files found.
--- a/App/Backend/content_filter_azure.py
+++ b/App/Backend/content_filter_azure.py
+import requests
+import json
+
+def is_word_safe(word):
+    # Replace the placeholder values with your Azure Content Moderator endpoint and key
+    endpoint = "https://hearmeinstance.cognitiveservices.azure.com/"
+    subscription_key = "8d339b48de6d401f801459770e345532"
+
+    # Construct the URL for text moderation
+    url = f"{endpoint}/contentmoderator/moderate/v1.0/ProcessText/Screen?language=eng&classify=True"
+
+    # Prepare the headers
+    headers = {
+        'Content-Type': 'text/plain',
+        'Ocp-Apim-Subscription-Key': subscription_key,
+    }
+
+    # Make the POST request to the API
+    response = requests.post(url, headers=headers, data=word)
+    
+    # Check if the request was successful
+    if response.status_code == 200:
+        # Parse the response
+        result = response.json()
+
+        # Check for the presence of Terms that are not safe
+        if result.get('Terms'):
+            return False
+        else:
+            return True
+    else:
+        # In case of a failure, return None
+        return None, f"Error: {response.text}"
+
+
--- a/App/Backend/requirements.txt
+++ b/App/Backend/requirements.txt
@@ -2,4 +2,5 @@ Flask==2.2.3
 numpy==1.24.2
 transformers
 torch
-pymongo
\ No newline at end of file
+pymongo
+requests
\ No newline at end of file
--- a/App/Backend/word_generation.py
+++ b/App/Backend/word_generation.py
 import torch
 from transformers import RobertaTokenizer, RobertaForMaskedLM
 import pymongo
+from content_filter_azure import is_word_safe

 # Load the pretrained RoBERTa model and tokenizer
 tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
@@ -24,6 +25,11 @@ def get_similar_words(input_word, top_k=3):
    # Get the top k predicted words
    top_k_indices = torch.topk(predictions, top_k).indices.tolist()
    related_words = [tokenizer.decode(idx).strip() for idx in top_k_indices]
+    safe_related_words = []
+    for item in related_words:
+        if is_word_safe(item)==True:
+            safe_related_words.append(item)
+            
    
    # Create the result array
    result = []