Commit 17231558 authored by dasunx's avatar dasunx

Implemented a class and functions to search Github related resources such as repos and issues

parent b77927d4
...@@ -9,7 +9,11 @@ class DevTo: ...@@ -9,7 +9,11 @@ class DevTo:
self.title = title self.title = title
self.tags = tags self.tags = tags
def getApiKey(self): def get_api_key(self):
"""
get random api key from api keys of rss2json.com
:return: string
"""
api_keys = [ api_keys = [
"2rk1eg4sexdnp5umrwtwbtwd2insqvgzvejooqrn", "2rk1eg4sexdnp5umrwtwbtwd2insqvgzvejooqrn",
"yit6ytfcs3ziawdgasfd3bgkbf4tef1m2nzdxvnz", "yit6ytfcs3ziawdgasfd3bgkbf4tef1m2nzdxvnz",
...@@ -23,7 +27,10 @@ class DevTo: ...@@ -23,7 +27,10 @@ class DevTo:
gresults = gsearch.search(*search_args) gresults = gsearch.search(*search_args)
return gresults["links"] return gresults["links"]
def getValidUrls(self, links): def get_valid_urls(self, links):
"""
filter out invalid urls
"""
validUrls = [] validUrls = []
for i in links: for i in links:
if "dev.to" in i: if "dev.to" in i:
...@@ -32,7 +39,10 @@ class DevTo: ...@@ -32,7 +39,10 @@ class DevTo:
validUrls.append(ur) validUrls.append(ur)
return validUrls return validUrls
def getValidSets(self, validUrls): def get_valid_sets(self, validUrls):
"""
extract valid usernames and tags from valid dev.to urls
"""
validSets = [] validSets = []
for url in validUrls: for url in validUrls:
try: try:
...@@ -48,11 +58,14 @@ class DevTo: ...@@ -48,11 +58,14 @@ class DevTo:
continue continue
return validSets return validSets
def getBlogs(self, username, tag): def get_blogs(self, username, tag):
"""
get the contents of the dev.to article
"""
blog = {} blog = {}
try: try:
response = requests.get( response = requests.get(
f"https://api.rss2json.com/v1/api.json?rss_url=https%3A%2F%2Fdev.to%2Ffeed%2F{username}&api_key={self.getApiKey()}" f"https://api.rss2json.com/v1/api.json?rss_url=https%3A%2F%2Fdev.to%2Ffeed%2F{username}&api_key={self.get_api_key()}"
) )
if response.status_code == 200: if response.status_code == 200:
res = response.json() res = response.json()
...@@ -63,13 +76,20 @@ class DevTo: ...@@ -63,13 +76,20 @@ class DevTo:
print(e) print(e)
return blog return blog
def getDevArticles(self): def get_dev_articles(self):
"""
Search google for dev.to articles
return a list of urls
filter out invalid urls
get content of the valid urls
return the content of valid dev.to articles
"""
links = self.google(f"site:dev.to {self.title} after:2020-01-01") links = self.google(f"site:dev.to {self.title} after:2020-01-01")
validUrls = self.getValidUrls(links) validUrls = self.get_valid_urls(links)
validSets = self.getValidSets(validUrls) validSets = self.get_valid_sets(validUrls)
blogs = [] blogs = []
for validset in validSets: for validset in validSets:
blog = self.getBlogs(validset["username"], validset["tag"]) blog = self.get_blogs(validset["username"], validset["tag"])
if bool(blog): if bool(blog):
blogs.append(blog) blogs.append(blog)
return {"blogs": blogs, "resources": validUrls} return {"blogs": blogs, "resources": validUrls}
import requests
from search_engine_parser import GoogleSearch
import re
class Github:
"""
A class to manage the Github API.
"""
def __init__(self):
"""
Initialize the Github API.
"""
def get_github_resources(self, query):
"""
this function will search github for a query and return a list of links if available,
if not available it will search google for the query and return a list of links.
"""
github = {}
github_repos = self.search_github_repos(query)
github_links = self.search_github_repos_in_google(query)
valid_github_links = self.get_valid_urls(github_links)
github["links"] = valid_github_links
github["repos"] = self.get_first_ten_repos(github_repos["items"])
return github
def get_first_ten_repos(self, repos):
"""
Get the first ten repos if more than 10 repos are found.
otherwise return all repos.
:param repos: The repos.
"""
if len(repos) > 10:
return repos[:10]
else:
return repos
def get_user_repos(self, user):
"""
Get the repos of a user.
:param user: The Github user.
:return: The repos of the user.
"""
url = "https://api.github.com/users/{}/repos".format(user)
response = requests.get(url)
response.raise_for_status()
return response.json()
def search_github_repos(self, query):
"""
Search for repos on Github.
:param query: The search query.
:return: The repos found.
"""
url = "https://api.github.com/search/repositories?q={}".format(query)
response = requests.get(url)
response.raise_for_status()
return response.json()
def search_github_repos_in_google(self, query):
"""
Search for repos on Github using google search enging.
:param query: The search query.
:return: The repos found.
"""
google_query = "site:github.com {}".format(query)
search_args = (google_query, 1)
gsearch = GoogleSearch()
gresults = gsearch.search(*search_args)
return gresults["links"]
def get_valid_urls(self, links):
"""
filter out invalid urls
"""
validUrls = []
for i in links:
if "github.com" in i:
uriTrimmed = re.match(r"^.*?\&sa=", i[29:]).group(0)
ur = uriTrimmed.replace("&sa=", "")
validUrls.append(ur)
return validUrls
\ No newline at end of file
...@@ -2,11 +2,12 @@ from youtube import Youtube ...@@ -2,11 +2,12 @@ from youtube import Youtube
from Medium import Medium from Medium import Medium
from Dev import DevTo from Dev import DevTo
from stof import STOF from stof import STOF
from Github import Github
import sys import sys
from database import get_database from database import get_database
def saveAnswer(ans_id, stackoverflow, videos, medium_r, dev_r): def saveAnswer(ans_id, stackoverflow, videos, medium_r, dev_r, github_r):
db = get_database() db = get_database()
try: try:
from bson.objectid import ObjectId from bson.objectid import ObjectId
...@@ -16,12 +17,15 @@ def saveAnswer(ans_id, stackoverflow, videos, medium_r, dev_r): ...@@ -16,12 +17,15 @@ def saveAnswer(ans_id, stackoverflow, videos, medium_r, dev_r):
{"_id": ObjectId(ans_id)}, {"_id": ObjectId(ans_id)},
{ {
"$set": { "$set": {
"loading":False,
"youtube": videos, "youtube": videos,
"stackoverflow": stackoverflow, "stackoverflow": stackoverflow,
"medium_articles": medium_r["blogs"], "medium_articles": medium_r.get("blogs", []),
"dev_articles": dev_r["blogs"], "dev_articles": dev_r.get("blogs", []),
"medium_resources": medium_r["resources"], "medium_resources": medium_r.get("resources", []),
"dev_resources": dev_r["resources"], "dev_resources": dev_r.get("resources", []),
"github_repos": github_r.get("repos", []),
"github_links": github_r.get("links", []),
} }
}, },
) )
...@@ -33,29 +37,34 @@ def saveAnswer(ans_id, stackoverflow, videos, medium_r, dev_r): ...@@ -33,29 +37,34 @@ def saveAnswer(ans_id, stackoverflow, videos, medium_r, dev_r):
"dev_articles": dev_r["blogs"], "dev_articles": dev_r["blogs"],
"medium_resources": medium_r["resources"], "medium_resources": medium_r["resources"],
"dev_resources": dev_r["resources"], "dev_resources": dev_r["resources"],
"github_repos": github_r["repos"],
"github_links": github_r["links"],
} }
) )
except NameError as err: except NameError as err:
print("ERRORRR")
print(err) print(err)
if __name__ == "__main__": if __name__ == "__main__":
# title = input("Enter question title: ") # title = input("Enter question title: ")
title = sys.argv[1] # "python django or flask for web development" title = sys.argv[1]
tags = sys.argv[2] # ["react"] # "what are the benefits of using java for mobile app development over flutter"
AUTO_ANS_ID = sys.argv[3] # "60dc9a5f84692f001569d7ab" tags = sys.argv[2] # ["flutter","java"]
AUTO_ANS_ID = sys.argv[3] # "611feaff2c4db730e56d78e8"
stack = STOF(title) stack = STOF(title)
ans = stack.searchQuestion()
print(ans)
medium = Medium(title, tags) medium = Medium(title, tags)
medium_articels = medium.getMediumArticles()
devto = DevTo(title, tags) devto = DevTo(title, tags)
dev_articles = devto.getDevArticles()
youtube = Youtube(title, tags) youtube = Youtube(title, tags)
github = Github()
ans = stack.searchQuestion()
medium_articels = medium.getMediumArticles()
dev_articles = devto.get_dev_articles()
videos = youtube.find_videos() videos = youtube.find_videos()
saveAnswer(AUTO_ANS_ID, ans, videos, medium_articels, dev_articles) github_resources = github.get_github_resources(title)
saveAnswer(
AUTO_ANS_ID, ans, videos, medium_articels, dev_articles, github_resources
)
print("WORKED") print("WORKED")
sys.stdout.flush() sys.stdout.flush()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment