Commit 2567ae9b authored by dasunx's avatar dasunx

Python codes added to automatically build the answer

parent 33705cd0
__pycache__
*.html
\ No newline at end of file
import requests
from requests_html import HTMLSession
from bs4 import BeautifulSoup
import re
from lxml import etree
class Medium:
def __init__(self, qtitle, keywords=[], description=""):
self.qtitle = qtitle
self.keywords = keywords
self.description = description
self.urls = []
self.session = HTMLSession()
def searchArticles(self):
"""
Search details using google dorks,
With google dorks we can filter out other search results from other web sites.
"""
html_page = requests.get(
f"https://google.com/search?q=site%3Amedium.com+{self.qtitle}"
)
soup = BeautifulSoup(html_page.content, "html.parser")
for link in soup.findAll("a"):
if "https://medium.com" in link["href"]:
self.urls.append(self.extractMediumURLS(link["href"]))
self.viewArticle(self.urls[0])
def extractMediumURLS(self, uriString):
"""
Remove unwanted characters from the url string and filter out the targeted url
"""
uriTrimmed = uriString[7:]
uriTrimmed = re.match(r"^.*?\&sa=", uriTrimmed).group(0)
return uriTrimmed.replace("&sa=", "")
def viewArticle(self, url):
html_page = self.session.get(url)
html_page.html.render(timeout=20)
# soup = BeautifulSoup(html_page.content, "html.parser")
# dom = etree.HTML(str(soup))
with open("medium.html", "wb") as med:
med.write(html_page.content)
med.close()
with open("medium.html", encoding="utf8") as sf:
soup = BeautifulSoup(sf, "html.parser")
dom = etree.HTML(str(soup))
# art = dom.xpath('//*[@class="a b c"]')[0]
# print(etree.tostring(art))
title = dom.xpath('//*[@class="ap aq ar as at ff av w"]/div/h1')[0].text
article = dom.xpath('//*[@class="ap aq ar as at ff av w"]')[0]
with open(f"article-{title.replace(' ','')}.html", "wb") as artFile:
artFile.write(etree.tostring(article))
artFile.close()
DATABASE_URL_PROD = "mongodb+srv://admin2:admin12345@cluster0.u4vl4.mongodb.net/production?retryWrites=true&w=majority"
DATABASE_URL_DEV = "mongodb+srv://admin:admin1234@cluster0.u4vl4.mongodb.net/test?retryWrites=true&w=majority"
# Change Environment accordingly
ENV = "DEV"
def get_database():
"""
Get database instance from mongodb.
"""
from pymongo import MongoClient
# Provide the mongodb atlas url to connect python to mongodb using pymongo
CONNECTION_STRING = ENV == "DEV" and DATABASE_URL_DEV or DATABASE_URL_PROD
# Create a connection using MongoClient. You can import MongoClient or use pymongo.MongoClient
client = MongoClient(CONNECTION_STRING)
# Create the database for our example (we will use the same database throughout the tutorial
return ENV == "DEV" and client["test"] or client["production"]
beautifulsoup4==4.9.3
dnspython==2.1.0
lxml==4.6.1
pymongo==3.11.4
regex==2020.7.14
requests==2.24.0
requests-html==0.10.0
scipy==1.5.4
from youtube import Youtube
from Medium import Medium
from stof import STOF
import sys
from database import get_database
def saveAnswer(ans_id, stackoverflow, videos):
db = get_database()
try:
from bson.objectid import ObjectId
automatedanswers = db["automatedanswers"]
automatedanswers.update_one(
{"_id": ObjectId(ans_id)},
{"$set": {"youtube": videos, "stackoverflow": stackoverflow}},
)
except NameError as err:
print(err)
if __name__ == "__main__":
# title = input("Enter question title: ")
title = "python django or flask for web development" # sys.argv[1]
tags = ["react"] # sys.argv[2]
AUTO_ANS_ID = "60d746076689344694ad9e30" # sys.argv[3]
stack = STOF(title)
ans = stack.searchQuestion()
print(ans)
# medium = Medium(title)
# medium.searchArticles()
# f = open("data.txt", "a")
# f.write(f"updated {title} {tags} {AUTO_ANS_ID}\n")
# f.close()
youtube = Youtube(title, tags)
videos = youtube.find_videos()
saveAnswer(AUTO_ANS_ID, ans, videos)
import requests
from bs4 import BeautifulSoup
import re
from lxml import etree
class STOF:
def __init__(self, qtitle, keywords=[], description=""):
self.qtitle = qtitle
self.keywords = keywords
self.description = description
self.urls = []
def searchQuestion(self):
html_page = requests.get(
f"https://google.com/search?q=site%3Astackoverflow.com+{self.qtitle}"
)
soup = BeautifulSoup(html_page.content, "html.parser")
for link in soup.findAll("a"):
if "https://stackoverflow.com" in link["href"]:
self.urls.append(self.extractSOFUrl(link["href"]))
ans = self.viewStackUrls()
return ans
def extractSOFUrl(self, uriString):
uriTrimmed = uriString[7:]
uriTrimmed = re.match(r"^.*?\&sa=", uriTrimmed).group(0)
return uriTrimmed.replace("&sa=", "")
def viewStackUrls(self):
return self.viewStackOverFlowQuestion(self.urls[0])
def viewStackOverFlowQuestion(self, url):
html_page = requests.get(url)
soup = BeautifulSoup(html_page.content, "html.parser")
dom = etree.HTML(str(soup))
answers_count = dom.xpath('//*[@id="answers-header"]/div/div[1]/h2')[
0
].text.strip()
answer = {"url": url}
if answers_count != "":
try:
verified_answer = dom.xpath(
'//*[@class="answer accepted-answer"]/div/div[2]/div[1]'
)[0]
answer["content"] = etree.tostring(verified_answer).decode("utf-8")
answer["status"] = "Verified"
# with open("verified_answer.html", "wb") as htmlF:
# htmlF.write(etree.tostring(verified_answer))
# htmlF.close()
except:
try:
first_answer = dom.xpath('//*[@class="answer"]/div/div[2]/div[1]')[
0
]
answer["content"] = etree.tostring(first_answer).decode("utf-8")
answer["status"] = "Most Voted"
# with open("first_anser.html", "wb") as htmlF:
# htmlF.write(etree.tostring(first_answer))
# htmlF.close()
except:
answer[
"content"
] = "Sorry the ProbExpert bot could not able find a answer from stackoverflow"
answer["status"] = "Null"
print("no answers")
# handle no answer
print("no verified answer")
# print(soup.prettify().encode("utf-8"))
return answer
def calculateAccuracy(self):
"""
Compare the user's question with stackoverflow question and calculate the accuracy
"""
\ No newline at end of file
from re import X
from database import get_database
class Youtube:
def __init__(self, title, keywords=["react", "node"]):
self.collection = get_database()["AutomatedAnswer"]
self.title = title
self.keywords = keywords
def find_videos(self):
"""
Find youtube videos using this method.
This will automatically try to find youtube videos using the question title or the question keywords
"""
from youtubesearchpython import VideosSearch
import json
videosSearch = VideosSearch(self.title, limit=2)
response = videosSearch.result()
videos = []
if len(response["result"]) <= 0:
videosSearch = VideosSearch(" ".join(self.keywords), limit=2)
response = videosSearch.result()
for i in response["result"]:
videos.append(i["link"])
print(i["link"])
return videos
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment