Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
2
2021-155
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
2021-155
2021-155
Commits
17231558
Commit
17231558
authored
Aug 20, 2021
by
dasunx
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Implemented a class and functions to search Github related resources such as repos and issues
parent
b77927d4
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
138 additions
and
24 deletions
+138
-24
backend/python/auto-answer/Dev.py
backend/python/auto-answer/Dev.py
+29
-9
backend/python/auto-answer/github.py
backend/python/auto-answer/github.py
+85
-0
backend/python/auto-answer/scrapper.py
backend/python/auto-answer/scrapper.py
+24
-15
No files found.
backend/python/auto-answer/Dev.py
View file @
17231558
...
...
@@ -9,7 +9,11 @@ class DevTo:
self
.
title
=
title
self
.
tags
=
tags
def
getApiKey
(
self
):
def
get_api_key
(
self
):
"""
get random api key from api keys of rss2json.com
:return: string
"""
api_keys
=
[
"2rk1eg4sexdnp5umrwtwbtwd2insqvgzvejooqrn"
,
"yit6ytfcs3ziawdgasfd3bgkbf4tef1m2nzdxvnz"
,
...
...
@@ -23,7 +27,10 @@ class DevTo:
gresults
=
gsearch
.
search
(
*
search_args
)
return
gresults
[
"links"
]
def
getValidUrls
(
self
,
links
):
def
get_valid_urls
(
self
,
links
):
"""
filter out invalid urls
"""
validUrls
=
[]
for
i
in
links
:
if
"dev.to"
in
i
:
...
...
@@ -32,7 +39,10 @@ class DevTo:
validUrls
.
append
(
ur
)
return
validUrls
def
getValidSets
(
self
,
validUrls
):
def
get_valid_sets
(
self
,
validUrls
):
"""
extract valid usernames and tags from valid dev.to urls
"""
validSets
=
[]
for
url
in
validUrls
:
try
:
...
...
@@ -48,11 +58,14 @@ class DevTo:
continue
return
validSets
def
getBlogs
(
self
,
username
,
tag
):
def
get_blogs
(
self
,
username
,
tag
):
"""
get the contents of the dev.to article
"""
blog
=
{}
try
:
response
=
requests
.
get
(
f
"https://api.rss2json.com/v1/api.json?rss_url=https
%3
A
%2
F
%2
Fdev.to
%2
Ffeed
%2
F{username}&api_key={self.get
ApiK
ey()}"
f
"https://api.rss2json.com/v1/api.json?rss_url=https
%3
A
%2
F
%2
Fdev.to
%2
Ffeed
%2
F{username}&api_key={self.get
_api_k
ey()}"
)
if
response
.
status_code
==
200
:
res
=
response
.
json
()
...
...
@@ -63,13 +76,20 @@ class DevTo:
print
(
e
)
return
blog
def
getDevArticles
(
self
):
def
get_dev_articles
(
self
):
"""
Search google for dev.to articles
return a list of urls
filter out invalid urls
get content of the valid urls
return the content of valid dev.to articles
"""
links
=
self
.
google
(
f
"site:dev.to {self.title} after:2020-01-01"
)
validUrls
=
self
.
get
ValidU
rls
(
links
)
validSets
=
self
.
get
ValidS
ets
(
validUrls
)
validUrls
=
self
.
get
_valid_u
rls
(
links
)
validSets
=
self
.
get
_valid_s
ets
(
validUrls
)
blogs
=
[]
for
validset
in
validSets
:
blog
=
self
.
get
B
logs
(
validset
[
"username"
],
validset
[
"tag"
])
blog
=
self
.
get
_b
logs
(
validset
[
"username"
],
validset
[
"tag"
])
if
bool
(
blog
):
blogs
.
append
(
blog
)
return
{
"blogs"
:
blogs
,
"resources"
:
validUrls
}
backend/python/auto-answer/github.py
View file @
17231558
import
requests
from
search_engine_parser
import
GoogleSearch
import
re
class
Github
:
"""
A class to manage the Github API.
"""
def
__init__
(
self
):
"""
Initialize the Github API.
"""
def
get_github_resources
(
self
,
query
):
"""
this function will search github for a query and return a list of links if available,
if not available it will search google for the query and return a list of links.
"""
github
=
{}
github_repos
=
self
.
search_github_repos
(
query
)
github_links
=
self
.
search_github_repos_in_google
(
query
)
valid_github_links
=
self
.
get_valid_urls
(
github_links
)
github
[
"links"
]
=
valid_github_links
github
[
"repos"
]
=
self
.
get_first_ten_repos
(
github_repos
[
"items"
])
return
github
def
get_first_ten_repos
(
self
,
repos
):
"""
Get the first ten repos if more than 10 repos are found.
otherwise return all repos.
:param repos: The repos.
"""
if
len
(
repos
)
>
10
:
return
repos
[:
10
]
else
:
return
repos
def
get_user_repos
(
self
,
user
):
"""
Get the repos of a user.
:param user: The Github user.
:return: The repos of the user.
"""
url
=
"https://api.github.com/users/{}/repos"
.
format
(
user
)
response
=
requests
.
get
(
url
)
response
.
raise_for_status
()
return
response
.
json
()
def
search_github_repos
(
self
,
query
):
"""
Search for repos on Github.
:param query: The search query.
:return: The repos found.
"""
url
=
"https://api.github.com/search/repositories?q={}"
.
format
(
query
)
response
=
requests
.
get
(
url
)
response
.
raise_for_status
()
return
response
.
json
()
def
search_github_repos_in_google
(
self
,
query
):
"""
Search for repos on Github using google search enging.
:param query: The search query.
:return: The repos found.
"""
google_query
=
"site:github.com {}"
.
format
(
query
)
search_args
=
(
google_query
,
1
)
gsearch
=
GoogleSearch
()
gresults
=
gsearch
.
search
(
*
search_args
)
return
gresults
[
"links"
]
def
get_valid_urls
(
self
,
links
):
"""
filter out invalid urls
"""
validUrls
=
[]
for
i
in
links
:
if
"github.com"
in
i
:
uriTrimmed
=
re
.
match
(
r"^.*?\&sa="
,
i
[
29
:])
.
group
(
0
)
ur
=
uriTrimmed
.
replace
(
"&sa="
,
""
)
validUrls
.
append
(
ur
)
return
validUrls
\ No newline at end of file
backend/python/auto-answer/scrapper.py
View file @
17231558
...
...
@@ -2,11 +2,12 @@ from youtube import Youtube
from
Medium
import
Medium
from
Dev
import
DevTo
from
stof
import
STOF
from
Github
import
Github
import
sys
from
database
import
get_database
def
saveAnswer
(
ans_id
,
stackoverflow
,
videos
,
medium_r
,
dev_r
):
def
saveAnswer
(
ans_id
,
stackoverflow
,
videos
,
medium_r
,
dev_r
,
github_r
):
db
=
get_database
()
try
:
from
bson.objectid
import
ObjectId
...
...
@@ -16,12 +17,15 @@ def saveAnswer(ans_id, stackoverflow, videos, medium_r, dev_r):
{
"_id"
:
ObjectId
(
ans_id
)},
{
"$set"
:
{
"loading"
:
False
,
"youtube"
:
videos
,
"stackoverflow"
:
stackoverflow
,
"medium_articles"
:
medium_r
[
"blogs"
],
"dev_articles"
:
dev_r
[
"blogs"
],
"medium_resources"
:
medium_r
[
"resources"
],
"dev_resources"
:
dev_r
[
"resources"
],
"medium_articles"
:
medium_r
.
get
(
"blogs"
,
[]),
"dev_articles"
:
dev_r
.
get
(
"blogs"
,
[]),
"medium_resources"
:
medium_r
.
get
(
"resources"
,
[]),
"dev_resources"
:
dev_r
.
get
(
"resources"
,
[]),
"github_repos"
:
github_r
.
get
(
"repos"
,
[]),
"github_links"
:
github_r
.
get
(
"links"
,
[]),
}
},
)
...
...
@@ -33,29 +37,34 @@ def saveAnswer(ans_id, stackoverflow, videos, medium_r, dev_r):
"dev_articles"
:
dev_r
[
"blogs"
],
"medium_resources"
:
medium_r
[
"resources"
],
"dev_resources"
:
dev_r
[
"resources"
],
"github_repos"
:
github_r
[
"repos"
],
"github_links"
:
github_r
[
"links"
],
}
)
except
NameError
as
err
:
print
(
"ERRORRR"
)
print
(
err
)
if
__name__
==
"__main__"
:
# title = input("Enter question title: ")
title
=
sys
.
argv
[
1
]
# "python django or flask for web development"
tags
=
sys
.
argv
[
2
]
# ["react"]
AUTO_ANS_ID
=
sys
.
argv
[
3
]
# "60dc9a5f84692f001569d7ab"
title
=
sys
.
argv
[
1
]
# "what are the benefits of using java for mobile app development over flutter"
tags
=
sys
.
argv
[
2
]
# ["flutter","java"]
AUTO_ANS_ID
=
sys
.
argv
[
3
]
# "611feaff2c4db730e56d78e8"
stack
=
STOF
(
title
)
ans
=
stack
.
searchQuestion
()
print
(
ans
)
medium
=
Medium
(
title
,
tags
)
medium_articels
=
medium
.
getMediumArticles
()
devto
=
DevTo
(
title
,
tags
)
dev_articles
=
devto
.
getDevArticles
()
youtube
=
Youtube
(
title
,
tags
)
github
=
Github
()
ans
=
stack
.
searchQuestion
()
medium_articels
=
medium
.
getMediumArticles
()
dev_articles
=
devto
.
get_dev_articles
()
videos
=
youtube
.
find_videos
()
saveAnswer
(
AUTO_ANS_ID
,
ans
,
videos
,
medium_articels
,
dev_articles
)
github_resources
=
github
.
get_github_resources
(
title
)
saveAnswer
(
AUTO_ANS_ID
,
ans
,
videos
,
medium_articels
,
dev_articles
,
github_resources
)
print
(
"WORKED"
)
sys
.
stdout
.
flush
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment