Commit 7963cda7 authored by Ekanayake P.M.D.P IT18013610's avatar Ekanayake P.M.D.P IT18013610

Merge branch 'it18013610' into 'master'

It18013610

See merge request !8
parents d49badaa f2718707
......@@ -29,10 +29,38 @@ const AutomatedAnswerSchema = mongoose.Schema({
type: String
}
],
blogs: [
medium_articles: [
{
type: Schema.Types.ObjectId,
ref: 'BlogArticle'
title: String,
pubDate: String,
link: String,
guid: String,
author: String,
thumbnail: String,
description: String,
content: String
}
],
dev_articles: [
{
title: String,
pubDate: String,
link: String,
guid: String,
author: String,
thumbnail: String,
description: String,
content: String
}
],
medium_resources: [
{
type: String
}
],
dev_resources: [
{
type: String
}
]
});
......
const mongoose = require('mongoose');
const BlogArticleSchema = mongoose.Schema({
automatedAnswer: {
type: Schema.Types.ObjectId,
ref: 'AutomatedAnswer',
required: true
},
blogName: {
type: String,
required: true
},
link: {
type: String,
required: true
},
content: {
type: String
}
});
module.exports = mongoose.model('BlogArticle', BlogArticleSchema);
from search_engine_parser import GoogleSearch
import re
import requests
import random
class DevTo:
def __init__(self, title, tags):
self.title = title
self.tags = tags
def getApiKey(self):
api_keys = [
"2rk1eg4sexdnp5umrwtwbtwd2insqvgzvejooqrn",
"yit6ytfcs3ziawdgasfd3bgkbf4tef1m2nzdxvnz",
"mpawymyrc6derrwmgodowfsaabtuoes4iiwintd7",
]
return random.choice(api_keys)
def google(self, query):
search_args = (query, 1)
gsearch = GoogleSearch()
gresults = gsearch.search(*search_args)
return gresults["links"]
def getValidUrls(self, links):
validUrls = []
for i in links:
if "dev.to" in i:
uriTrimmed = re.match(r"^.*?\&sa=", i[29:]).group(0)
ur = uriTrimmed.replace("&sa=", "")
validUrls.append(ur)
return validUrls
def getValidSets(self, validUrls):
validSets = []
for url in validUrls:
try:
vset = {}
print(url)
username = re.search(r"https://dev.to/([^/?]+)", url).group(1)
tag = re.search(r"https://dev.to/([^/?]+)/([^/?]+)", url).group(2)
vset["username"] = username
vset["tag"] = tag
validSets.append(vset)
except Exception as e:
print(e)
continue
return validSets
def getBlogs(self, username, tag):
blog = {}
try:
response = requests.get(
f"https://api.rss2json.com/v1/api.json?rss_url=https%3A%2F%2Fdev.to%2Ffeed%2F{username}&api_key={self.getApiKey()}"
)
if response.status_code == 200:
res = response.json()
for item in res["items"]:
if tag in item["link"]:
blog = item
except Exception as e:
print(e)
return blog
def getDevArticles(self):
links = self.google(f"site:dev.to {self.title} after:2020-01-01")
validUrls = self.getValidUrls(links)
validSets = self.getValidSets(validUrls)
blogs = []
for validset in validSets:
blog = self.getBlogs(validset["username"], validset["tag"])
if bool(blog):
blogs.append(blog)
return {"blogs": blogs, "resources": validUrls}
import requests
from requests_html import HTMLSession
from bs4 import BeautifulSoup
import json
from search_engine_parser import GoogleSearch
import re
from lxml import etree
import requests
import random
class Medium:
def __init__(self, qtitle, keywords=[], description=""):
self.qtitle = qtitle
self.keywords = keywords
self.description = description
self.urls = []
self.session = HTMLSession()
def searchArticles(self):
"""
Search details using google dorks,
With google dorks we can filter out other search results from other web sites.
"""
html_page = requests.get(
f"https://google.com/search?q=site%3Amedium.com+{self.qtitle}"
)
soup = BeautifulSoup(html_page.content, "html.parser")
for link in soup.findAll("a"):
if "https://medium.com" in link["href"]:
self.urls.append(self.extractMediumURLS(link["href"]))
self.viewArticle(self.urls[0])
def extractMediumURLS(self, uriString):
"""
Remove unwanted characters from the url string and filter out the targeted url
"""
uriTrimmed = uriString[7:]
uriTrimmed = re.match(r"^.*?\&sa=", uriTrimmed).group(0)
return uriTrimmed.replace("&sa=", "")
def viewArticle(self, url):
html_page = self.session.get(url)
html_page.html.render(timeout=20)
# soup = BeautifulSoup(html_page.content, "html.parser")
# dom = etree.HTML(str(soup))
with open("medium.html", "wb") as med:
med.write(html_page.content)
med.close()
with open("medium.html", encoding="utf8") as sf:
soup = BeautifulSoup(sf, "html.parser")
dom = etree.HTML(str(soup))
# art = dom.xpath('//*[@class="a b c"]')[0]
# print(etree.tostring(art))
title = dom.xpath('//*[@class="ap aq ar as at ff av w"]/div/h1')[0].text
article = dom.xpath('//*[@class="ap aq ar as at ff av w"]')[0]
with open(f"article-{title.replace(' ','')}.html", "wb") as artFile:
artFile.write(etree.tostring(article))
artFile.close()
def __init__(self, title, tags):
self.title = title
self.tags = tags
def getApiKey(self):
"""
Returns an API key for retrieve json data
"""
api_keys = [
"2rk1eg4sexdnp5umrwtwbtwd2insqvgzvejooqrn",
"yit6ytfcs3ziawdgasfd3bgkbf4tef1m2nzdxvnz",
"mpawymyrc6derrwmgodowfsaabtuoes4iiwintd7",
]
return random.choice(api_keys)
def google(self, query):
"""
Use a query to search using google search enging
"""
search_args = (query, 1)
gsearch = GoogleSearch()
gresults = gsearch.search(*search_args)
return gresults["links"]
def getValidUrls(self, links):
"""
Validate and filter out the urls.
Returns the urls that contain medium.com in it as a list
"""
validUrls = []
for i in links:
if "medium.com" in i:
uriTrimmed = re.match(r"^.*?\&sa=", i[29:]).group(0)
ur = uriTrimmed.replace("&sa=", "")
validUrls.append(ur)
return validUrls
def getValidSets(self, validUrls):
"""
Extract usernames and article id's from article url
pass a list of urls => returns objects list that contain usernam and article id
"""
validSets = []
for url in validUrls:
try:
vset = {}
print(url)
username = re.search(r"https://medium.com/([^/?]+)", url).group(1)
tag = re.search(r"https://medium.com/([^/?]+)/([^/?]+)", url).group(2)
vset["username"] = username
vset["tag"] = tag
validSets.append(vset)
except Exception as e:
print(e)
continue
return validSets
def getBlogs(self, username, tag):
"""
Get the content of the article
"""
blog = {}
try:
response = requests.get(
f"https://api.rss2json.com/v1/api.json?rss_url=https%3A%2F%2Fmedium.com%2Ffeed%2F{username}&api_key={self.getApiKey()}"
)
if response.status_code == 200:
res = response.json()
for item in res["items"]:
if tag in item["link"]:
blog = item
except Exception as e:
print(e)
return blog
def getMediumArticles(self):
"""
return a list of articles and/or resources
"""
links = self.google(f"site:medium.com {self.title} after:2020-01-01")
validUrls = self.getValidUrls(links)
validSets = self.getValidSets(validUrls)
blogs = []
for validset in validSets:
blog = self.getBlogs(validset["username"], validset["tag"])
if bool(blog):
blogs.append(blog)
return {"blogs": blogs, "resources": validUrls}
bson==0.5.10
beautifulsoup4==4.9.3
dnspython==2.1.0
lxml==4.6.1
......@@ -6,4 +7,5 @@ regex==2020.7.14
requests==2.24.0
requests-html==0.10.0
scipy==1.5.4
search-engine-parser==0.6.2
youtube-search-python==1.4.6
from youtube import Youtube
from Medium import Medium
from Dev import DevTo
from stof import STOF
import sys
from database import get_database
def saveAnswer(ans_id, stackoverflow, videos):
def saveAnswer(ans_id, stackoverflow, videos, medium_r, dev_r):
db = get_database()
try:
from bson.objectid import ObjectId
......@@ -13,7 +14,26 @@ def saveAnswer(ans_id, stackoverflow, videos):
automatedanswers = db["automatedanswers"]
automatedanswers.update_one(
{"_id": ObjectId(ans_id)},
{"$set": {"youtube": videos, "stackoverflow": stackoverflow}},
{
"$set": {
"youtube": videos,
"stackoverflow": stackoverflow,
"medium_articles": medium_r["blogs"],
"dev_articles": dev_r["blogs"],
"medium_resources": medium_r["resources"],
"dev_resources": dev_r["resources"],
}
},
)
print(
{
"youtube": videos,
"stackoverflow": stackoverflow,
"medium_articles": medium_r["blogs"],
"dev_articles": dev_r["blogs"],
"medium_resources": medium_r["resources"],
"dev_resources": dev_r["resources"],
}
)
except NameError as err:
print(err)
......@@ -23,19 +43,19 @@ if __name__ == "__main__":
# title = input("Enter question title: ")
title = sys.argv[1] # "python django or flask for web development"
tags = sys.argv[2] # ["react"]
AUTO_ANS_ID = sys.argv[3] # "60d746076689344694ad9e30" #
AUTO_ANS_ID = sys.argv[3] # "60dc9a5f84692f001569d7ab"
stack = STOF(title)
ans = stack.searchQuestion()
print(ans)
# medium = Medium(title)
# medium.searchArticles()
# f = open("data.txt", "a")
# f.write(f"updated {title} {tags} {AUTO_ANS_ID}\n")
# f.close()
medium = Medium(title, tags)
medium_articels = medium.getMediumArticles()
devto = DevTo(title, tags)
dev_articles = devto.getDevArticles()
youtube = Youtube(title, tags)
videos = youtube.find_videos()
saveAnswer(AUTO_ANS_ID, ans, videos)
saveAnswer(AUTO_ANS_ID, ans, videos, medium_articels, dev_articles)
print("WORKED")
sys.stdout.flush()
......@@ -7,4 +7,5 @@ regex==2020.7.14
requests==2.24.0
requests-html==0.10.0
scipy==1.5.4
search-engine-parser==0.6.2
youtube-search-python==1.4.6
import React from 'react'
import StackOverflowAnswer from '../stof-answer'
import YoutubeVideoWrapper from '../youtube-videos/YoutubeVideoWrapper'
const AutomatedAnswer = ({ automatedAnswer }) => {
console.log(automatedAnswer)
......@@ -10,6 +11,13 @@ const AutomatedAnswer = ({ automatedAnswer }) => {
) : (
<h1>No Stack overflow</h1>
)}
{automatedAnswer.youtube != null &&
automatedAnswer.youtube.length >= 1 ? (
<YoutubeVideoWrapper videos={automatedAnswer.youtube} />
) : (
<h1>No youtubes found for this question</h1>
)}
</>
)
}
......
......@@ -7,7 +7,9 @@ const StackOverflowAnswer = ({ stof }) => {
}
return (
<>
<h1 className={styles.h}>Answer from Stackoverflow - {stof.status}</h1>
<h1 className={styles.h}>
I found {stof.status} answer on Stackoverflow
</h1>
<div className={styles.wrapper}>
<div dangerouslySetInnerHTML={createMarkup()}></div>
</div>
......
......@@ -5,8 +5,10 @@
padding: 0.8em;
border-radius: 10px;
font-family: 'Open Sans', sans-serif;
margin-bottom: 3em;
color: white;
}
.h {
font-size: 1.3em !important;
font-size: 2em !important;
}
import React from 'react'
import YoutubeVideo from './youtube-video/YoutubeVideo'
import styles from './YoutubeVideoWrapper.module.css'
const YoutubeVideoWrapper = ({ videos }) => {
return (
<>
<h1 className={styles.h1}>
Here are {videos.length} video{videos.length > 1 && 's'} I found on
youtube
</h1>
<div className={styles.wrapper}>
{videos.map((video, index) => {
return <YoutubeVideo video={video} key={index} />
})}
</div>
</>
)
}
export default YoutubeVideoWrapper
.wrapper {
margin-top: 0.5em;
background-color: #2d2d2d;
padding: 0.8em;
border-radius: 2px;
margin-bottom: 3em;
}
.h1 {
font-size: 2em !important;
}
import React from 'react'
import styles from './YoutubeVideo.module.css'
const YoutubeVideo = ({ video }) => {
console.log(video.substring(32, video.length - 1))
return (
<div className={styles.video_responsive}>
<iframe
width="853"
height="480"
src={`https://youtube.com/embed/${video.substring(32, video.length)}`}
frameBorder="0"
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
allowFullScreen
title="Embedded youtube"
/>
</div>
)
}
export default YoutubeVideo
.video_responsive {
overflow: hidden;
padding-bottom: 56.25%;
position: relative;
height: 0;
margin: 1em 0;
iframe {
left: 0;
top: 0;
height: 100%;
width: 100%;
position: absolute;
}
}
......@@ -4,7 +4,7 @@
line-height: 1.5em;
word-break: break-word;
font-size: 15px;
color: white;
a {
text-decoration: underline;
color: var(--blue-300);
......@@ -43,6 +43,13 @@
margin-top: 0.6em;
}
}
h2 {
color: white !important;
font-weight: bold !important;
font-size: 1.4em !important;
margin: 1.6em 0 0.7em 0;
}
}
.s-prose *:not(.s-code-block) > code {
......@@ -65,3 +72,40 @@ sup {
sub {
vertical-align: sub;
}
.s-table-container {
/* margin-bottom: 1.1em; */
overflow-x: auto;
}
.s-table {
display: table;
width: 100%;
max-width: 100%;
border-collapse: collapse;
border-spacing: 0;
font-size: 13px;
}
.s-table thead th {
vertical-align: bottom;
white-space: nowrap;
background-color: var(--black-600);
line-height: 1.15384615;
}
.s-table th {
font-weight: bold;
color: var(--fc-dark);
}
.s-table th,
.s-table td {
padding: 8px;
border-top: 1px solid var(--black-100);
border-left: 1px solid var(--black-100);
border-right: 1px solid var(--black-100);
vertical-align: middle;
color: var(--fc-medium);
text-align: left;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment