Commit 823c9ce0 authored by ParamiLelkada's avatar ParamiLelkada

content filtration with azure services

parent 12c06786
import requests
import json
def is_word_safe(word):
# Replace the placeholder values with your Azure Content Moderator endpoint and key
endpoint = "https://hearmeinstance.cognitiveservices.azure.com/"
subscription_key = "8d339b48de6d401f801459770e345532"
# Construct the URL for text moderation
url = f"{endpoint}/contentmoderator/moderate/v1.0/ProcessText/Screen?language=eng&classify=True"
# Prepare the headers
headers = {
'Content-Type': 'text/plain',
'Ocp-Apim-Subscription-Key': subscription_key,
}
# Make the POST request to the API
response = requests.post(url, headers=headers, data=word)
# Check if the request was successful
if response.status_code == 200:
# Parse the response
result = response.json()
# Check for the presence of Terms that are not safe
if result.get('Terms'):
return False
else:
return True
else:
# In case of a failure, return None
return None, f"Error: {response.text}"
......@@ -2,4 +2,5 @@ Flask==2.2.3
numpy==1.24.2
transformers
torch
pymongo
\ No newline at end of file
pymongo
requests
\ No newline at end of file
import torch
from transformers import RobertaTokenizer, RobertaForMaskedLM
import pymongo
from content_filter_azure import is_word_safe
# Load the pretrained RoBERTa model and tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
......@@ -24,6 +25,11 @@ def get_similar_words(input_word, top_k=3):
# Get the top k predicted words
top_k_indices = torch.topk(predictions, top_k).indices.tolist()
related_words = [tokenizer.decode(idx).strip() for idx in top_k_indices]
safe_related_words = []
for item in related_words:
if is_word_safe(item)==True:
safe_related_words.append(item)
# Create the result array
result = []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment