Commit 17231558 authored by dasunx's avatar dasunx

Implemented a class and functions to search Github related resources such as repos and issues

parent b77927d4
......@@ -9,7 +9,11 @@ class DevTo:
self.title = title
self.tags = tags
def getApiKey(self):
def get_api_key(self):
"""
get random api key from api keys of rss2json.com
:return: string
"""
api_keys = [
"2rk1eg4sexdnp5umrwtwbtwd2insqvgzvejooqrn",
"yit6ytfcs3ziawdgasfd3bgkbf4tef1m2nzdxvnz",
......@@ -23,7 +27,10 @@ class DevTo:
gresults = gsearch.search(*search_args)
return gresults["links"]
def getValidUrls(self, links):
def get_valid_urls(self, links):
"""
filter out invalid urls
"""
validUrls = []
for i in links:
if "dev.to" in i:
......@@ -32,7 +39,10 @@ class DevTo:
validUrls.append(ur)
return validUrls
def getValidSets(self, validUrls):
def get_valid_sets(self, validUrls):
"""
extract valid usernames and tags from valid dev.to urls
"""
validSets = []
for url in validUrls:
try:
......@@ -48,11 +58,14 @@ class DevTo:
continue
return validSets
def getBlogs(self, username, tag):
def get_blogs(self, username, tag):
"""
get the contents of the dev.to article
"""
blog = {}
try:
response = requests.get(
f"https://api.rss2json.com/v1/api.json?rss_url=https%3A%2F%2Fdev.to%2Ffeed%2F{username}&api_key={self.getApiKey()}"
f"https://api.rss2json.com/v1/api.json?rss_url=https%3A%2F%2Fdev.to%2Ffeed%2F{username}&api_key={self.get_api_key()}"
)
if response.status_code == 200:
res = response.json()
......@@ -63,13 +76,20 @@ class DevTo:
print(e)
return blog
def getDevArticles(self):
def get_dev_articles(self):
"""
Search google for dev.to articles
return a list of urls
filter out invalid urls
get content of the valid urls
return the content of valid dev.to articles
"""
links = self.google(f"site:dev.to {self.title} after:2020-01-01")
validUrls = self.getValidUrls(links)
validSets = self.getValidSets(validUrls)
validUrls = self.get_valid_urls(links)
validSets = self.get_valid_sets(validUrls)
blogs = []
for validset in validSets:
blog = self.getBlogs(validset["username"], validset["tag"])
blog = self.get_blogs(validset["username"], validset["tag"])
if bool(blog):
blogs.append(blog)
return {"blogs": blogs, "resources": validUrls}
import requests
from search_engine_parser import GoogleSearch
import re
class Github:
"""
A class to manage the Github API.
"""
def __init__(self):
"""
Initialize the Github API.
"""
def get_github_resources(self, query):
"""
this function will search github for a query and return a list of links if available,
if not available it will search google for the query and return a list of links.
"""
github = {}
github_repos = self.search_github_repos(query)
github_links = self.search_github_repos_in_google(query)
valid_github_links = self.get_valid_urls(github_links)
github["links"] = valid_github_links
github["repos"] = self.get_first_ten_repos(github_repos["items"])
return github
def get_first_ten_repos(self, repos):
"""
Get the first ten repos if more than 10 repos are found.
otherwise return all repos.
:param repos: The repos.
"""
if len(repos) > 10:
return repos[:10]
else:
return repos
def get_user_repos(self, user):
"""
Get the repos of a user.
:param user: The Github user.
:return: The repos of the user.
"""
url = "https://api.github.com/users/{}/repos".format(user)
response = requests.get(url)
response.raise_for_status()
return response.json()
def search_github_repos(self, query):
"""
Search for repos on Github.
:param query: The search query.
:return: The repos found.
"""
url = "https://api.github.com/search/repositories?q={}".format(query)
response = requests.get(url)
response.raise_for_status()
return response.json()
def search_github_repos_in_google(self, query):
"""
Search for repos on Github using google search enging.
:param query: The search query.
:return: The repos found.
"""
google_query = "site:github.com {}".format(query)
search_args = (google_query, 1)
gsearch = GoogleSearch()
gresults = gsearch.search(*search_args)
return gresults["links"]
def get_valid_urls(self, links):
"""
filter out invalid urls
"""
validUrls = []
for i in links:
if "github.com" in i:
uriTrimmed = re.match(r"^.*?\&sa=", i[29:]).group(0)
ur = uriTrimmed.replace("&sa=", "")
validUrls.append(ur)
return validUrls
\ No newline at end of file
......@@ -2,11 +2,12 @@ from youtube import Youtube
from Medium import Medium
from Dev import DevTo
from stof import STOF
from Github import Github
import sys
from database import get_database
def saveAnswer(ans_id, stackoverflow, videos, medium_r, dev_r):
def saveAnswer(ans_id, stackoverflow, videos, medium_r, dev_r, github_r):
db = get_database()
try:
from bson.objectid import ObjectId
......@@ -16,12 +17,15 @@ def saveAnswer(ans_id, stackoverflow, videos, medium_r, dev_r):
{"_id": ObjectId(ans_id)},
{
"$set": {
"loading":False,
"youtube": videos,
"stackoverflow": stackoverflow,
"medium_articles": medium_r["blogs"],
"dev_articles": dev_r["blogs"],
"medium_resources": medium_r["resources"],
"dev_resources": dev_r["resources"],
"medium_articles": medium_r.get("blogs", []),
"dev_articles": dev_r.get("blogs", []),
"medium_resources": medium_r.get("resources", []),
"dev_resources": dev_r.get("resources", []),
"github_repos": github_r.get("repos", []),
"github_links": github_r.get("links", []),
}
},
)
......@@ -33,29 +37,34 @@ def saveAnswer(ans_id, stackoverflow, videos, medium_r, dev_r):
"dev_articles": dev_r["blogs"],
"medium_resources": medium_r["resources"],
"dev_resources": dev_r["resources"],
"github_repos": github_r["repos"],
"github_links": github_r["links"],
}
)
except NameError as err:
print("ERRORRR")
print(err)
if __name__ == "__main__":
# title = input("Enter question title: ")
title = sys.argv[1] # "python django or flask for web development"
tags = sys.argv[2] # ["react"]
AUTO_ANS_ID = sys.argv[3] # "60dc9a5f84692f001569d7ab"
title = sys.argv[1]
# "what are the benefits of using java for mobile app development over flutter"
tags = sys.argv[2] # ["flutter","java"]
AUTO_ANS_ID = sys.argv[3] # "611feaff2c4db730e56d78e8"
stack = STOF(title)
ans = stack.searchQuestion()
print(ans)
medium = Medium(title, tags)
medium_articels = medium.getMediumArticles()
devto = DevTo(title, tags)
dev_articles = devto.getDevArticles()
youtube = Youtube(title, tags)
github = Github()
ans = stack.searchQuestion()
medium_articels = medium.getMediumArticles()
dev_articles = devto.get_dev_articles()
videos = youtube.find_videos()
saveAnswer(AUTO_ANS_ID, ans, videos, medium_articels, dev_articles)
github_resources = github.get_github_resources(title)
saveAnswer(
AUTO_ANS_ID, ans, videos, medium_articels, dev_articles, github_resources
)
print("WORKED")
sys.stdout.flush()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment