Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
2
2021-155
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
2021-155
2021-155
Commits
2567ae9b
Commit
2567ae9b
authored
Jun 26, 2021
by
dasunx
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Python codes added to automatically build the answer
parent
33705cd0
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
238 additions
and
0 deletions
+238
-0
backend/python/auto-answer/.gitignore
backend/python/auto-answer/.gitignore
+2
-0
backend/python/auto-answer/Medium.py
backend/python/auto-answer/Medium.py
+58
-0
backend/python/auto-answer/database.py
backend/python/auto-answer/database.py
+21
-0
backend/python/auto-answer/github.py
backend/python/auto-answer/github.py
+0
-0
backend/python/auto-answer/requirements.txt
backend/python/auto-answer/requirements.txt
+9
-0
backend/python/auto-answer/scrapper.py
backend/python/auto-answer/scrapper.py
+39
-0
backend/python/auto-answer/stof.py
backend/python/auto-answer/stof.py
+79
-0
backend/python/auto-answer/youtube.py
backend/python/auto-answer/youtube.py
+30
-0
No files found.
backend/python/auto-answer/.gitignore
0 → 100644
View file @
2567ae9b
__pycache__
*.html
\ No newline at end of file
backend/python/auto-answer/Medium.py
0 → 100644
View file @
2567ae9b
import
requests
from
requests_html
import
HTMLSession
from
bs4
import
BeautifulSoup
import
re
from
lxml
import
etree
class
Medium
:
def
__init__
(
self
,
qtitle
,
keywords
=
[],
description
=
""
):
self
.
qtitle
=
qtitle
self
.
keywords
=
keywords
self
.
description
=
description
self
.
urls
=
[]
self
.
session
=
HTMLSession
()
def
searchArticles
(
self
):
"""
Search details using google dorks,
With google dorks we can filter out other search results from other web sites.
"""
html_page
=
requests
.
get
(
f
"https://google.com/search?q=site
%3
Amedium.com+{self.qtitle}"
)
soup
=
BeautifulSoup
(
html_page
.
content
,
"html.parser"
)
for
link
in
soup
.
findAll
(
"a"
):
if
"https://medium.com"
in
link
[
"href"
]:
self
.
urls
.
append
(
self
.
extractMediumURLS
(
link
[
"href"
]))
self
.
viewArticle
(
self
.
urls
[
0
])
def
extractMediumURLS
(
self
,
uriString
):
"""
Remove unwanted characters from the url string and filter out the targeted url
"""
uriTrimmed
=
uriString
[
7
:]
uriTrimmed
=
re
.
match
(
r"^.*?\&sa="
,
uriTrimmed
)
.
group
(
0
)
return
uriTrimmed
.
replace
(
"&sa="
,
""
)
def
viewArticle
(
self
,
url
):
html_page
=
self
.
session
.
get
(
url
)
html_page
.
html
.
render
(
timeout
=
20
)
# soup = BeautifulSoup(html_page.content, "html.parser")
# dom = etree.HTML(str(soup))
with
open
(
"medium.html"
,
"wb"
)
as
med
:
med
.
write
(
html_page
.
content
)
med
.
close
()
with
open
(
"medium.html"
,
encoding
=
"utf8"
)
as
sf
:
soup
=
BeautifulSoup
(
sf
,
"html.parser"
)
dom
=
etree
.
HTML
(
str
(
soup
))
# art = dom.xpath('//*[@class="a b c"]')[0]
# print(etree.tostring(art))
title
=
dom
.
xpath
(
'//*[@class="ap aq ar as at ff av w"]/div/h1'
)[
0
]
.
text
article
=
dom
.
xpath
(
'//*[@class="ap aq ar as at ff av w"]'
)[
0
]
with
open
(
f
"article-{title.replace(' ','')}.html"
,
"wb"
)
as
artFile
:
artFile
.
write
(
etree
.
tostring
(
article
))
artFile
.
close
()
backend/python/auto-answer/database.py
0 → 100644
View file @
2567ae9b
DATABASE_URL_PROD
=
"mongodb+srv://admin2:admin12345@cluster0.u4vl4.mongodb.net/production?retryWrites=true&w=majority"
DATABASE_URL_DEV
=
"mongodb+srv://admin:admin1234@cluster0.u4vl4.mongodb.net/test?retryWrites=true&w=majority"
# Change Environment accordingly
ENV
=
"DEV"
def
get_database
():
"""
Get database instance from mongodb.
"""
from
pymongo
import
MongoClient
# Provide the mongodb atlas url to connect python to mongodb using pymongo
CONNECTION_STRING
=
ENV
==
"DEV"
and
DATABASE_URL_DEV
or
DATABASE_URL_PROD
# Create a connection using MongoClient. You can import MongoClient or use pymongo.MongoClient
client
=
MongoClient
(
CONNECTION_STRING
)
# Create the database for our example (we will use the same database throughout the tutorial
return
ENV
==
"DEV"
and
client
[
"test"
]
or
client
[
"production"
]
backend/python/auto-answer/github.py
0 → 100644
View file @
2567ae9b
backend/python/auto-answer/requirements.txt
0 → 100644
View file @
2567ae9b
beautifulsoup4==4.9.3
dnspython==2.1.0
lxml==4.6.1
pymongo==3.11.4
regex==2020.7.14
requests==2.24.0
requests-html==0.10.0
scipy==1.5.4
backend/python/auto-answer/scrapper.py
0 → 100644
View file @
2567ae9b
from
youtube
import
Youtube
from
Medium
import
Medium
from
stof
import
STOF
import
sys
from
database
import
get_database
def
saveAnswer
(
ans_id
,
stackoverflow
,
videos
):
db
=
get_database
()
try
:
from
bson.objectid
import
ObjectId
automatedanswers
=
db
[
"automatedanswers"
]
automatedanswers
.
update_one
(
{
"_id"
:
ObjectId
(
ans_id
)},
{
"$set"
:
{
"youtube"
:
videos
,
"stackoverflow"
:
stackoverflow
}},
)
except
NameError
as
err
:
print
(
err
)
if
__name__
==
"__main__"
:
# title = input("Enter question title: ")
title
=
"python django or flask for web development"
# sys.argv[1]
tags
=
[
"react"
]
# sys.argv[2]
AUTO_ANS_ID
=
"60d746076689344694ad9e30"
# sys.argv[3]
stack
=
STOF
(
title
)
ans
=
stack
.
searchQuestion
()
print
(
ans
)
# medium = Medium(title)
# medium.searchArticles()
# f = open("data.txt", "a")
# f.write(f"updated {title} {tags} {AUTO_ANS_ID}\n")
# f.close()
youtube
=
Youtube
(
title
,
tags
)
videos
=
youtube
.
find_videos
()
saveAnswer
(
AUTO_ANS_ID
,
ans
,
videos
)
backend/python/auto-answer/stof.py
0 → 100644
View file @
2567ae9b
import
requests
from
bs4
import
BeautifulSoup
import
re
from
lxml
import
etree
class
STOF
:
def
__init__
(
self
,
qtitle
,
keywords
=
[],
description
=
""
):
self
.
qtitle
=
qtitle
self
.
keywords
=
keywords
self
.
description
=
description
self
.
urls
=
[]
def
searchQuestion
(
self
):
html_page
=
requests
.
get
(
f
"https://google.com/search?q=site
%3
Astackoverflow.com+{self.qtitle}"
)
soup
=
BeautifulSoup
(
html_page
.
content
,
"html.parser"
)
for
link
in
soup
.
findAll
(
"a"
):
if
"https://stackoverflow.com"
in
link
[
"href"
]:
self
.
urls
.
append
(
self
.
extractSOFUrl
(
link
[
"href"
]))
ans
=
self
.
viewStackUrls
()
return
ans
def
extractSOFUrl
(
self
,
uriString
):
uriTrimmed
=
uriString
[
7
:]
uriTrimmed
=
re
.
match
(
r"^.*?\&sa="
,
uriTrimmed
)
.
group
(
0
)
return
uriTrimmed
.
replace
(
"&sa="
,
""
)
def
viewStackUrls
(
self
):
return
self
.
viewStackOverFlowQuestion
(
self
.
urls
[
0
])
def
viewStackOverFlowQuestion
(
self
,
url
):
html_page
=
requests
.
get
(
url
)
soup
=
BeautifulSoup
(
html_page
.
content
,
"html.parser"
)
dom
=
etree
.
HTML
(
str
(
soup
))
answers_count
=
dom
.
xpath
(
'//*[@id="answers-header"]/div/div[1]/h2'
)[
0
]
.
text
.
strip
()
answer
=
{
"url"
:
url
}
if
answers_count
!=
""
:
try
:
verified_answer
=
dom
.
xpath
(
'//*[@class="answer accepted-answer"]/div/div[2]/div[1]'
)[
0
]
answer
[
"content"
]
=
etree
.
tostring
(
verified_answer
)
.
decode
(
"utf-8"
)
answer
[
"status"
]
=
"Verified"
# with open("verified_answer.html", "wb") as htmlF:
# htmlF.write(etree.tostring(verified_answer))
# htmlF.close()
except
:
try
:
first_answer
=
dom
.
xpath
(
'//*[@class="answer"]/div/div[2]/div[1]'
)[
0
]
answer
[
"content"
]
=
etree
.
tostring
(
first_answer
)
.
decode
(
"utf-8"
)
answer
[
"status"
]
=
"Most Voted"
# with open("first_anser.html", "wb") as htmlF:
# htmlF.write(etree.tostring(first_answer))
# htmlF.close()
except
:
answer
[
"content"
]
=
"Sorry the ProbExpert bot could not able find a answer from stackoverflow"
answer
[
"status"
]
=
"Null"
print
(
"no answers"
)
# handle no answer
print
(
"no verified answer"
)
# print(soup.prettify().encode("utf-8"))
return
answer
def
calculateAccuracy
(
self
):
"""
Compare the user's question with stackoverflow question and calculate the accuracy
"""
\ No newline at end of file
backend/python/auto-answer/youtube.py
0 → 100644
View file @
2567ae9b
from
re
import
X
from
database
import
get_database
class
Youtube
:
def
__init__
(
self
,
title
,
keywords
=
[
"react"
,
"node"
]):
self
.
collection
=
get_database
()[
"AutomatedAnswer"
]
self
.
title
=
title
self
.
keywords
=
keywords
def
find_videos
(
self
):
"""
Find youtube videos using this method.
This will automatically try to find youtube videos using the question title or the question keywords
"""
from
youtubesearchpython
import
VideosSearch
import
json
videosSearch
=
VideosSearch
(
self
.
title
,
limit
=
2
)
response
=
videosSearch
.
result
()
videos
=
[]
if
len
(
response
[
"result"
])
<=
0
:
videosSearch
=
VideosSearch
(
" "
.
join
(
self
.
keywords
),
limit
=
2
)
response
=
videosSearch
.
result
()
for
i
in
response
[
"result"
]:
videos
.
append
(
i
[
"link"
])
print
(
i
[
"link"
])
return
videos
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment