Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
2
2021-155
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
2021-155
2021-155
Commits
17231558
Commit
17231558
authored
Aug 20, 2021
by
dasunx
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Implemented a class and functions to search Github related resources such as repos and issues
parent
b77927d4
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
138 additions
and
24 deletions
+138
-24
backend/python/auto-answer/Dev.py
backend/python/auto-answer/Dev.py
+29
-9
backend/python/auto-answer/github.py
backend/python/auto-answer/github.py
+85
-0
backend/python/auto-answer/scrapper.py
backend/python/auto-answer/scrapper.py
+24
-15
No files found.
backend/python/auto-answer/Dev.py
View file @
17231558
...
@@ -9,7 +9,11 @@ class DevTo:
...
@@ -9,7 +9,11 @@ class DevTo:
self
.
title
=
title
self
.
title
=
title
self
.
tags
=
tags
self
.
tags
=
tags
def
getApiKey
(
self
):
def
get_api_key
(
self
):
"""
get random api key from api keys of rss2json.com
:return: string
"""
api_keys
=
[
api_keys
=
[
"2rk1eg4sexdnp5umrwtwbtwd2insqvgzvejooqrn"
,
"2rk1eg4sexdnp5umrwtwbtwd2insqvgzvejooqrn"
,
"yit6ytfcs3ziawdgasfd3bgkbf4tef1m2nzdxvnz"
,
"yit6ytfcs3ziawdgasfd3bgkbf4tef1m2nzdxvnz"
,
...
@@ -23,7 +27,10 @@ class DevTo:
...
@@ -23,7 +27,10 @@ class DevTo:
gresults
=
gsearch
.
search
(
*
search_args
)
gresults
=
gsearch
.
search
(
*
search_args
)
return
gresults
[
"links"
]
return
gresults
[
"links"
]
def
getValidUrls
(
self
,
links
):
def
get_valid_urls
(
self
,
links
):
"""
filter out invalid urls
"""
validUrls
=
[]
validUrls
=
[]
for
i
in
links
:
for
i
in
links
:
if
"dev.to"
in
i
:
if
"dev.to"
in
i
:
...
@@ -32,7 +39,10 @@ class DevTo:
...
@@ -32,7 +39,10 @@ class DevTo:
validUrls
.
append
(
ur
)
validUrls
.
append
(
ur
)
return
validUrls
return
validUrls
def
getValidSets
(
self
,
validUrls
):
def
get_valid_sets
(
self
,
validUrls
):
"""
extract valid usernames and tags from valid dev.to urls
"""
validSets
=
[]
validSets
=
[]
for
url
in
validUrls
:
for
url
in
validUrls
:
try
:
try
:
...
@@ -48,11 +58,14 @@ class DevTo:
...
@@ -48,11 +58,14 @@ class DevTo:
continue
continue
return
validSets
return
validSets
def
getBlogs
(
self
,
username
,
tag
):
def
get_blogs
(
self
,
username
,
tag
):
"""
get the contents of the dev.to article
"""
blog
=
{}
blog
=
{}
try
:
try
:
response
=
requests
.
get
(
response
=
requests
.
get
(
f
"https://api.rss2json.com/v1/api.json?rss_url=https
%3
A
%2
F
%2
Fdev.to
%2
Ffeed
%2
F{username}&api_key={self.get
ApiK
ey()}"
f
"https://api.rss2json.com/v1/api.json?rss_url=https
%3
A
%2
F
%2
Fdev.to
%2
Ffeed
%2
F{username}&api_key={self.get
_api_k
ey()}"
)
)
if
response
.
status_code
==
200
:
if
response
.
status_code
==
200
:
res
=
response
.
json
()
res
=
response
.
json
()
...
@@ -63,13 +76,20 @@ class DevTo:
...
@@ -63,13 +76,20 @@ class DevTo:
print
(
e
)
print
(
e
)
return
blog
return
blog
def
getDevArticles
(
self
):
def
get_dev_articles
(
self
):
"""
Search google for dev.to articles
return a list of urls
filter out invalid urls
get content of the valid urls
return the content of valid dev.to articles
"""
links
=
self
.
google
(
f
"site:dev.to {self.title} after:2020-01-01"
)
links
=
self
.
google
(
f
"site:dev.to {self.title} after:2020-01-01"
)
validUrls
=
self
.
get
ValidU
rls
(
links
)
validUrls
=
self
.
get
_valid_u
rls
(
links
)
validSets
=
self
.
get
ValidS
ets
(
validUrls
)
validSets
=
self
.
get
_valid_s
ets
(
validUrls
)
blogs
=
[]
blogs
=
[]
for
validset
in
validSets
:
for
validset
in
validSets
:
blog
=
self
.
get
B
logs
(
validset
[
"username"
],
validset
[
"tag"
])
blog
=
self
.
get
_b
logs
(
validset
[
"username"
],
validset
[
"tag"
])
if
bool
(
blog
):
if
bool
(
blog
):
blogs
.
append
(
blog
)
blogs
.
append
(
blog
)
return
{
"blogs"
:
blogs
,
"resources"
:
validUrls
}
return
{
"blogs"
:
blogs
,
"resources"
:
validUrls
}
backend/python/auto-answer/github.py
View file @
17231558
import
requests
from
search_engine_parser
import
GoogleSearch
import
re
class
Github
:
"""
A class to manage the Github API.
"""
def
__init__
(
self
):
"""
Initialize the Github API.
"""
def
get_github_resources
(
self
,
query
):
"""
this function will search github for a query and return a list of links if available,
if not available it will search google for the query and return a list of links.
"""
github
=
{}
github_repos
=
self
.
search_github_repos
(
query
)
github_links
=
self
.
search_github_repos_in_google
(
query
)
valid_github_links
=
self
.
get_valid_urls
(
github_links
)
github
[
"links"
]
=
valid_github_links
github
[
"repos"
]
=
self
.
get_first_ten_repos
(
github_repos
[
"items"
])
return
github
def
get_first_ten_repos
(
self
,
repos
):
"""
Get the first ten repos if more than 10 repos are found.
otherwise return all repos.
:param repos: The repos.
"""
if
len
(
repos
)
>
10
:
return
repos
[:
10
]
else
:
return
repos
def
get_user_repos
(
self
,
user
):
"""
Get the repos of a user.
:param user: The Github user.
:return: The repos of the user.
"""
url
=
"https://api.github.com/users/{}/repos"
.
format
(
user
)
response
=
requests
.
get
(
url
)
response
.
raise_for_status
()
return
response
.
json
()
def
search_github_repos
(
self
,
query
):
"""
Search for repos on Github.
:param query: The search query.
:return: The repos found.
"""
url
=
"https://api.github.com/search/repositories?q={}"
.
format
(
query
)
response
=
requests
.
get
(
url
)
response
.
raise_for_status
()
return
response
.
json
()
def
search_github_repos_in_google
(
self
,
query
):
"""
Search for repos on Github using google search enging.
:param query: The search query.
:return: The repos found.
"""
google_query
=
"site:github.com {}"
.
format
(
query
)
search_args
=
(
google_query
,
1
)
gsearch
=
GoogleSearch
()
gresults
=
gsearch
.
search
(
*
search_args
)
return
gresults
[
"links"
]
def
get_valid_urls
(
self
,
links
):
"""
filter out invalid urls
"""
validUrls
=
[]
for
i
in
links
:
if
"github.com"
in
i
:
uriTrimmed
=
re
.
match
(
r"^.*?\&sa="
,
i
[
29
:])
.
group
(
0
)
ur
=
uriTrimmed
.
replace
(
"&sa="
,
""
)
validUrls
.
append
(
ur
)
return
validUrls
\ No newline at end of file
backend/python/auto-answer/scrapper.py
View file @
17231558
...
@@ -2,11 +2,12 @@ from youtube import Youtube
...
@@ -2,11 +2,12 @@ from youtube import Youtube
from
Medium
import
Medium
from
Medium
import
Medium
from
Dev
import
DevTo
from
Dev
import
DevTo
from
stof
import
STOF
from
stof
import
STOF
from
Github
import
Github
import
sys
import
sys
from
database
import
get_database
from
database
import
get_database
def
saveAnswer
(
ans_id
,
stackoverflow
,
videos
,
medium_r
,
dev_r
):
def
saveAnswer
(
ans_id
,
stackoverflow
,
videos
,
medium_r
,
dev_r
,
github_r
):
db
=
get_database
()
db
=
get_database
()
try
:
try
:
from
bson.objectid
import
ObjectId
from
bson.objectid
import
ObjectId
...
@@ -16,12 +17,15 @@ def saveAnswer(ans_id, stackoverflow, videos, medium_r, dev_r):
...
@@ -16,12 +17,15 @@ def saveAnswer(ans_id, stackoverflow, videos, medium_r, dev_r):
{
"_id"
:
ObjectId
(
ans_id
)},
{
"_id"
:
ObjectId
(
ans_id
)},
{
{
"$set"
:
{
"$set"
:
{
"loading"
:
False
,
"youtube"
:
videos
,
"youtube"
:
videos
,
"stackoverflow"
:
stackoverflow
,
"stackoverflow"
:
stackoverflow
,
"medium_articles"
:
medium_r
[
"blogs"
],
"medium_articles"
:
medium_r
.
get
(
"blogs"
,
[]),
"dev_articles"
:
dev_r
[
"blogs"
],
"dev_articles"
:
dev_r
.
get
(
"blogs"
,
[]),
"medium_resources"
:
medium_r
[
"resources"
],
"medium_resources"
:
medium_r
.
get
(
"resources"
,
[]),
"dev_resources"
:
dev_r
[
"resources"
],
"dev_resources"
:
dev_r
.
get
(
"resources"
,
[]),
"github_repos"
:
github_r
.
get
(
"repos"
,
[]),
"github_links"
:
github_r
.
get
(
"links"
,
[]),
}
}
},
},
)
)
...
@@ -33,29 +37,34 @@ def saveAnswer(ans_id, stackoverflow, videos, medium_r, dev_r):
...
@@ -33,29 +37,34 @@ def saveAnswer(ans_id, stackoverflow, videos, medium_r, dev_r):
"dev_articles"
:
dev_r
[
"blogs"
],
"dev_articles"
:
dev_r
[
"blogs"
],
"medium_resources"
:
medium_r
[
"resources"
],
"medium_resources"
:
medium_r
[
"resources"
],
"dev_resources"
:
dev_r
[
"resources"
],
"dev_resources"
:
dev_r
[
"resources"
],
"github_repos"
:
github_r
[
"repos"
],
"github_links"
:
github_r
[
"links"
],
}
}
)
)
except
NameError
as
err
:
except
NameError
as
err
:
print
(
"ERRORRR"
)
print
(
err
)
print
(
err
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
# title = input("Enter question title: ")
# title = input("Enter question title: ")
title
=
sys
.
argv
[
1
]
# "python django or flask for web development"
title
=
sys
.
argv
[
1
]
tags
=
sys
.
argv
[
2
]
# ["react"]
# "what are the benefits of using java for mobile app development over flutter"
AUTO_ANS_ID
=
sys
.
argv
[
3
]
# "60dc9a5f84692f001569d7ab"
tags
=
sys
.
argv
[
2
]
# ["flutter","java"]
AUTO_ANS_ID
=
sys
.
argv
[
3
]
# "611feaff2c4db730e56d78e8"
stack
=
STOF
(
title
)
stack
=
STOF
(
title
)
ans
=
stack
.
searchQuestion
()
print
(
ans
)
medium
=
Medium
(
title
,
tags
)
medium
=
Medium
(
title
,
tags
)
medium_articels
=
medium
.
getMediumArticles
()
devto
=
DevTo
(
title
,
tags
)
devto
=
DevTo
(
title
,
tags
)
dev_articles
=
devto
.
getDevArticles
()
youtube
=
Youtube
(
title
,
tags
)
youtube
=
Youtube
(
title
,
tags
)
github
=
Github
()
ans
=
stack
.
searchQuestion
()
medium_articels
=
medium
.
getMediumArticles
()
dev_articles
=
devto
.
get_dev_articles
()
videos
=
youtube
.
find_videos
()
videos
=
youtube
.
find_videos
()
saveAnswer
(
AUTO_ANS_ID
,
ans
,
videos
,
medium_articels
,
dev_articles
)
github_resources
=
github
.
get_github_resources
(
title
)
saveAnswer
(
AUTO_ANS_ID
,
ans
,
videos
,
medium_articels
,
dev_articles
,
github_resources
)
print
(
"WORKED"
)
print
(
"WORKED"
)
sys
.
stdout
.
flush
()
sys
.
stdout
.
flush
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment