SPEECH INRODUCTION PROCESSING

047fbb4e · chalikaM · e9876fc0 · 047fbb4e
Commit 047fbb4e authored Jul 03, 2021 by chalikaM
Hide whitespace changes
Inline Side-by-side

Showing with 40 additions and 0 deletions

introduction.py introduction.py +40 -0

No files found.
--- a/introduction.py
+++ b/introduction.py
+import sys
+from operator import index
+import spacy
+import textacy
+
+#Identify words that are in a introduction
+
+
+nlp = spacy.load("en_core_web_sm")
+
+s = " according to a study where does he go few moments ago he went once upon a time asdasd  dwref ADSADSADA Hello i'm Gabi :D  he asked me to do it i said good bye to him he replyed fot itgoodbye asd asl a he started running. once upon a time he went to the temple."
+
+
+totalCharacterCount = len(s)
+print(totalCharacterCount)
+
+introductionCharacterCount = (15/100)*totalCharacterCount
+print(introductionCharacterCount)
+
+introduction = (s[0:int(introductionCharacterCount)])
+print(introduction)
+
+final_words = []
+
+words = ["according to a study", "according to a research","according to a review","according to a survey","according to an analysis","according to one study","according to research","according to an investigation"]
+for wordPharse in words:
+    if wordPharse in introduction:
+        final_words.append(wordPharse)
+print(final_words)
+
+doc = nlp(introduction)
+
+patterns = [[{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "PRON"}], [{"POS": "ADV"}, {"POS": "AUX"}, {"POS": "NOUN"}]]
+
+verb_phrases = textacy.extract.token_matches(doc, patterns)
+
+# print(s[0:introductionCharacterCount])
+
+for verb_phrases in verb_phrases:
+    print(f"Identified questions : {verb_phrases}")
\ No newline at end of file