Python codes added to automatically build the answer

2567ae9b · dasunx · 33705cd0 · 2567ae9b · 2567ae9b · 2567ae9b
Commit 2567ae9b authored Jun 26, 2021 by dasunx
8 changed files
--- a/backend/python/auto-answer/.gitignore
+++ b/backend/python/auto-answer/.gitignore
+__pycache__
+*.html
\ No newline at end of file
--- a/backend/python/auto-answer/Medium.py
+++ b/backend/python/auto-answer/Medium.py
+import requests
+from requests_html import HTMLSession
+from bs4 import BeautifulSoup
+import re
+from lxml import etree
+
+
+class Medium:
+    def __init__(self, qtitle, keywords=[], description=""):
+        self.qtitle = qtitle
+        self.keywords = keywords
+        self.description = description
+        self.urls = []
+        self.session = HTMLSession()
+
+    def searchArticles(self):
+        """
+        Search details using google dorks,
+        With google dorks we can filter out other search results from other web sites.
+        """
+        html_page = requests.get(
+            f"https://google.com/search?q=site%3Amedium.com+{self.qtitle}"
+        )
+
+        soup = BeautifulSoup(html_page.content, "html.parser")
+
+        for link in soup.findAll("a"):
+            if "https://medium.com" in link["href"]:
+                self.urls.append(self.extractMediumURLS(link["href"]))
+        self.viewArticle(self.urls[0])
+
+    def extractMediumURLS(self, uriString):
+        """
+        Remove unwanted characters from the url string and filter out the targeted url
+        """
+        uriTrimmed = uriString[7:]
+        uriTrimmed = re.match(r"^.*?\&sa=", uriTrimmed).group(0)
+        return uriTrimmed.replace("&sa=", "")
+
+    def viewArticle(self, url):
+        html_page = self.session.get(url)
+        html_page.html.render(timeout=20)
+
+        # soup = BeautifulSoup(html_page.content, "html.parser")
+        # dom = etree.HTML(str(soup))
+        with open("medium.html", "wb") as med:
+            med.write(html_page.content)
+            med.close()
+        with open("medium.html", encoding="utf8") as sf:
+            soup = BeautifulSoup(sf, "html.parser")
+            dom = etree.HTML(str(soup))
+            # art = dom.xpath('//*[@class="a b c"]')[0]
+            # print(etree.tostring(art))
+            title = dom.xpath('//*[@class="ap aq ar as at ff av w"]/div/h1')[0].text
+            article = dom.xpath('//*[@class="ap aq ar as at ff av w"]')[0]
+            with open(f"article-{title.replace(' ','')}.html", "wb") as artFile:
+                artFile.write(etree.tostring(article))
+                artFile.close()
--- a/backend/python/auto-answer/database.py
+++ b/backend/python/auto-answer/database.py
+DATABASE_URL_PROD = "mongodb+srv://admin2:admin12345@cluster0.u4vl4.mongodb.net/production?retryWrites=true&w=majority"
+DATABASE_URL_DEV = "mongodb+srv://admin:admin1234@cluster0.u4vl4.mongodb.net/test?retryWrites=true&w=majority"
+
+# Change Environment accordingly
+ENV = "DEV"
+
+
+def get_database():
+    """
+    Get database instance from mongodb.
+    """
+    from pymongo import MongoClient
+
+    # Provide the mongodb atlas url to connect python to mongodb using pymongo
+    CONNECTION_STRING = ENV == "DEV" and DATABASE_URL_DEV or DATABASE_URL_PROD
+
+    # Create a connection using MongoClient. You can import MongoClient or use pymongo.MongoClient
+    client = MongoClient(CONNECTION_STRING)
+
+    # Create the database for our example (we will use the same database throughout the tutorial
+    return ENV == "DEV" and client["test"] or client["production"]
--- a/backend/python/auto-answer/github.py
+++ b/backend/python/auto-answer/github.py
--- a/backend/python/auto-answer/requirements.txt
+++ b/backend/python/auto-answer/requirements.txt
+beautifulsoup4==4.9.3
+dnspython==2.1.0
+lxml==4.6.1
+pymongo==3.11.4
+regex==2020.7.14
+requests==2.24.0
+requests-html==0.10.0
+scipy==1.5.4
+
--- a/backend/python/auto-answer/scrapper.py
+++ b/backend/python/auto-answer/scrapper.py
+from youtube import Youtube
+from Medium import Medium
+from stof import STOF
+import sys
+from database import get_database
+
+
+def saveAnswer(ans_id, stackoverflow, videos):
+    db = get_database()
+    try:
+        from bson.objectid import ObjectId
+
+        automatedanswers = db["automatedanswers"]
+        automatedanswers.update_one(
+            {"_id": ObjectId(ans_id)},
+            {"$set": {"youtube": videos, "stackoverflow": stackoverflow}},
+        )
+    except NameError as err:
+        print(err)
+
+
+if __name__ == "__main__":
+    # title = input("Enter question title: ")
+    title = "python django or flask for web development"  # sys.argv[1]
+    tags = ["react"]  # sys.argv[2]
+    AUTO_ANS_ID = "60d746076689344694ad9e30"  # sys.argv[3]
+
+    stack = STOF(title)
+    ans = stack.searchQuestion()
+    print(ans)
+    # medium = Medium(title)
+    # medium.searchArticles()
+    # f = open("data.txt", "a")
+    # f.write(f"updated {title} {tags} {AUTO_ANS_ID}\n")
+    # f.close()
+
+    youtube = Youtube(title, tags)
+    videos = youtube.find_videos()
+    saveAnswer(AUTO_ANS_ID, ans, videos)
--- a/backend/python/auto-answer/stof.py
+++ b/backend/python/auto-answer/stof.py
+import requests
+from bs4 import BeautifulSoup
+import re
+from lxml import etree
+
+
+class STOF:
+    def __init__(self, qtitle, keywords=[], description=""):
+        self.qtitle = qtitle
+        self.keywords = keywords
+        self.description = description
+        self.urls = []
+
+    def searchQuestion(self):
+        html_page = requests.get(
+            f"https://google.com/search?q=site%3Astackoverflow.com+{self.qtitle}"
+        )
+        soup = BeautifulSoup(html_page.content, "html.parser")
+
+        for link in soup.findAll("a"):
+            if "https://stackoverflow.com" in link["href"]:
+                self.urls.append(self.extractSOFUrl(link["href"]))
+        ans = self.viewStackUrls()
+        return ans
+
+    def extractSOFUrl(self, uriString):
+        uriTrimmed = uriString[7:]
+        uriTrimmed = re.match(r"^.*?\&sa=", uriTrimmed).group(0)
+        return uriTrimmed.replace("&sa=", "")
+
+    def viewStackUrls(self):
+        return self.viewStackOverFlowQuestion(self.urls[0])
+
+    def viewStackOverFlowQuestion(self, url):
+        html_page = requests.get(url)
+        soup = BeautifulSoup(html_page.content, "html.parser")
+        dom = etree.HTML(str(soup))
+        answers_count = dom.xpath('//*[@id="answers-header"]/div/div[1]/h2')[
+            0
+        ].text.strip()
+        answer = {"url": url}
+
+        if answers_count != "":
+            try:
+                verified_answer = dom.xpath(
+                    '//*[@class="answer accepted-answer"]/div/div[2]/div[1]'
+                )[0]
+                answer["content"] = etree.tostring(verified_answer).decode("utf-8")
+                answer["status"] = "Verified"
+                # with open("verified_answer.html", "wb") as htmlF:
+                #     htmlF.write(etree.tostring(verified_answer))
+                #     htmlF.close()
+
+            except:
+                try:
+                    first_answer = dom.xpath('//*[@class="answer"]/div/div[2]/div[1]')[
+                        0
+                    ]
+                    answer["content"] = etree.tostring(first_answer).decode("utf-8")
+                    answer["status"] = "Most Voted"
+                    # with open("first_anser.html", "wb") as htmlF:
+                    #     htmlF.write(etree.tostring(first_answer))
+                    #     htmlF.close()
+                except:
+                    answer[
+                        "content"
+                    ] = "Sorry the ProbExpert bot could not able find a answer from stackoverflow"
+                    answer["status"] = "Null"
+                    print("no answers")
+                    # handle no answer
+                print("no verified answer")
+
+        # print(soup.prettify().encode("utf-8"))
+        return answer
+
+        def calculateAccuracy(self):
+            """
+            Compare the user's question with stackoverflow question and calculate the accuracy
+            """
\ No newline at end of file
--- a/backend/python/auto-answer/youtube.py
+++ b/backend/python/auto-answer/youtube.py
+from re import X
+from database import get_database
+
+
+class Youtube:
+    def __init__(self, title, keywords=["react", "node"]):
+        self.collection = get_database()["AutomatedAnswer"]
+        self.title = title
+        self.keywords = keywords
+
+    def find_videos(self):
+        """
+        Find youtube videos using this method.
+        This will automatically try to find youtube videos using the question title or the question keywords
+        """
+        from youtubesearchpython import VideosSearch
+        import json
+
+        videosSearch = VideosSearch(self.title, limit=2)
+
+        response = videosSearch.result()
+        videos = []
+        if len(response["result"]) <= 0:
+            videosSearch = VideosSearch(" ".join(self.keywords), limit=2)
+            response = videosSearch.result()
+        for i in response["result"]:
+            videos.append(i["link"])
+            print(i["link"])
+        return videos
+