Merge branch 'it18013610' into 'master'

It18013610 See merge request !8

Merge branch 'it18013610' into 'master'
It18013610 See merge request !8
7963cda7 · Ekanayake P.M.D.P IT18013610 · d49badaa · f2718707 · 7963cda7 · d49badaa
Commit 7963cda7 authored Jul 03, 2021 by Ekanayake P.M.D.P IT18013610 ☺
15 changed files
--- a/backend/models/automatedAnswer.js
+++ b/backend/models/automatedAnswer.js
@@ -29,10 +29,38 @@ const AutomatedAnswerSchema = mongoose.Schema({
      type: String
    }
  ],
-  blogs: [
+  medium_articles: [
    {
-      type: Schema.Types.ObjectId,
-      ref: 'BlogArticle'
+      title: String,
+      pubDate: String,
+      link: String,
+      guid: String,
+      author: String,
+      thumbnail: String,
+      description: String,
+      content: String
+    }
+  ],
+  dev_articles: [
+    {
+      title: String,
+      pubDate: String,
+      link: String,
+      guid: String,
+      author: String,
+      thumbnail: String,
+      description: String,
+      content: String
+    }
+  ],
+  medium_resources: [
+    {
+      type: String
+    }
+  ],
+  dev_resources: [
+    {
+      type: String
    }
  ]
 });

--- a/backend/models/blogArticle.js
+++ b/backend/models/blogArticle.js
-const mongoose = require('mongoose');
-
-const BlogArticleSchema = mongoose.Schema({
-  automatedAnswer: {
-    type: Schema.Types.ObjectId,
-    ref: 'AutomatedAnswer',
-    required: true
-  },
-  blogName: {
-    type: String,
-    required: true
-  },
-  link: {
-    type: String,
-    required: true
-  },
-  content: {
-    type: String
-  }
-});
-
-module.exports = mongoose.model('BlogArticle', BlogArticleSchema);
--- a/backend/python/auto-answer/Dev.py
+++ b/backend/python/auto-answer/Dev.py
+from search_engine_parser import GoogleSearch
+import re
+import requests
+import random
+
+
+class DevTo:
+    def __init__(self, title, tags):
+        self.title = title
+        self.tags = tags
+
+    def getApiKey(self):
+        api_keys = [
+            "2rk1eg4sexdnp5umrwtwbtwd2insqvgzvejooqrn",
+            "yit6ytfcs3ziawdgasfd3bgkbf4tef1m2nzdxvnz",
+            "mpawymyrc6derrwmgodowfsaabtuoes4iiwintd7",
+        ]
+        return random.choice(api_keys)
+
+    def google(self, query):
+        search_args = (query, 1)
+        gsearch = GoogleSearch()
+        gresults = gsearch.search(*search_args)
+        return gresults["links"]
+
+    def getValidUrls(self, links):
+        validUrls = []
+        for i in links:
+            if "dev.to" in i:
+                uriTrimmed = re.match(r"^.*?\&sa=", i[29:]).group(0)
+                ur = uriTrimmed.replace("&sa=", "")
+                validUrls.append(ur)
+        return validUrls
+
+    def getValidSets(self, validUrls):
+        validSets = []
+        for url in validUrls:
+            try:
+                vset = {}
+                print(url)
+                username = re.search(r"https://dev.to/([^/?]+)", url).group(1)
+                tag = re.search(r"https://dev.to/([^/?]+)/([^/?]+)", url).group(2)
+                vset["username"] = username
+                vset["tag"] = tag
+                validSets.append(vset)
+            except Exception as e:
+                print(e)
+                continue
+        return validSets
+
+    def getBlogs(self, username, tag):
+        blog = {}
+        try:
+            response = requests.get(
+                f"https://api.rss2json.com/v1/api.json?rss_url=https%3A%2F%2Fdev.to%2Ffeed%2F{username}&api_key={self.getApiKey()}"
+            )
+            if response.status_code == 200:
+                res = response.json()
+                for item in res["items"]:
+                    if tag in item["link"]:
+                        blog = item
+        except Exception as e:
+            print(e)
+        return blog
+
+    def getDevArticles(self):
+        links = self.google(f"site:dev.to {self.title} after:2020-01-01")
+        validUrls = self.getValidUrls(links)
+        validSets = self.getValidSets(validUrls)
+        blogs = []
+        for validset in validSets:
+            blog = self.getBlogs(validset["username"], validset["tag"])
+            if bool(blog):
+                blogs.append(blog)
+        return {"blogs": blogs, "resources": validUrls}
--- a/backend/python/auto-answer/Medium.py
+++ b/backend/python/auto-answer/Medium.py
-import requests
-from requests_html import HTMLSession
-from bs4 import BeautifulSoup
+import json
+from search_engine_parser import GoogleSearch
 import re
-from lxml import etree
+import requests
+import random


 class Medium:
-    def __init__(self, qtitle, keywords=[], description=""):
-        self.qtitle = qtitle
-        self.keywords = keywords
-        self.description = description
-        self.urls = []
-        self.session = HTMLSession()
+    def __init__(self, title, tags):
+        self.title = title
+        self.tags = tags
+
+    def getApiKey(self):
+        """
+        Returns an API key for retrieve json data
+        """
+        api_keys = [
+            "2rk1eg4sexdnp5umrwtwbtwd2insqvgzvejooqrn",
+            "yit6ytfcs3ziawdgasfd3bgkbf4tef1m2nzdxvnz",
+            "mpawymyrc6derrwmgodowfsaabtuoes4iiwintd7",
+        ]
+        return random.choice(api_keys)
+
+    def google(self, query):
+        """
+        Use a query to search using google search enging
+        """
+        search_args = (query, 1)
+        gsearch = GoogleSearch()
+        gresults = gsearch.search(*search_args)
+        return gresults["links"]

-    def searchArticles(self):
+    def getValidUrls(self, links):
        """
-        Search details using google dorks,
-        With google dorks we can filter out other search results from other web sites.
+        Validate and filter out the urls.
+        Returns the urls that contain medium.com in it as a list
        """
-        html_page = requests.get(
-            f"https://google.com/search?q=site%3Amedium.com+{self.qtitle}"
+        validUrls = []
+        for i in links:
+            if "medium.com" in i:
+                uriTrimmed = re.match(r"^.*?\&sa=", i[29:]).group(0)
+                ur = uriTrimmed.replace("&sa=", "")
+                validUrls.append(ur)
+        return validUrls
+
+    def getValidSets(self, validUrls):
+        """
+        Extract usernames and article id's from article url
+        pass a list of urls => returns objects list that contain usernam and article id
+        """
+        validSets = []
+        for url in validUrls:
+            try:
+                vset = {}
+                print(url)
+                username = re.search(r"https://medium.com/([^/?]+)", url).group(1)
+                tag = re.search(r"https://medium.com/([^/?]+)/([^/?]+)", url).group(2)
+                vset["username"] = username
+                vset["tag"] = tag
+                validSets.append(vset)
+            except Exception as e:
+                print(e)
+                continue
+        return validSets
+
+    def getBlogs(self, username, tag):
+        """
+        Get the content of the article
+        """
+        blog = {}
+        try:
+            response = requests.get(
+                f"https://api.rss2json.com/v1/api.json?rss_url=https%3A%2F%2Fmedium.com%2Ffeed%2F{username}&api_key={self.getApiKey()}"
            )
+            if response.status_code == 200:
+                res = response.json()
+                for item in res["items"]:
+                    if tag in item["link"]:
+                        blog = item
+        except Exception as e:
+            print(e)
+        return blog

-        soup = BeautifulSoup(html_page.content, "html.parser")
-
-        for link in soup.findAll("a"):
-            if "https://medium.com" in link["href"]:
-                self.urls.append(self.extractMediumURLS(link["href"]))
-        self.viewArticle(self.urls[0])
-
-    def extractMediumURLS(self, uriString):
-        """
-        Remove unwanted characters from the url string and filter out the targeted url
-        """
-        uriTrimmed = uriString[7:]
-        uriTrimmed = re.match(r"^.*?\&sa=", uriTrimmed).group(0)
-        return uriTrimmed.replace("&sa=", "")
-
-    def viewArticle(self, url):
-        html_page = self.session.get(url)
-        html_page.html.render(timeout=20)
-
-        # soup = BeautifulSoup(html_page.content, "html.parser")
-        # dom = etree.HTML(str(soup))
-        with open("medium.html", "wb") as med:
-            med.write(html_page.content)
-            med.close()
-        with open("medium.html", encoding="utf8") as sf:
-            soup = BeautifulSoup(sf, "html.parser")
-            dom = etree.HTML(str(soup))
-            # art = dom.xpath('//*[@class="a b c"]')[0]
-            # print(etree.tostring(art))
-            title = dom.xpath('//*[@class="ap aq ar as at ff av w"]/div/h1')[0].text
-            article = dom.xpath('//*[@class="ap aq ar as at ff av w"]')[0]
-            with open(f"article-{title.replace(' ','')}.html", "wb") as artFile:
-                artFile.write(etree.tostring(article))
-                artFile.close()
+    def getMediumArticles(self):
+        """
+        return a list of articles and/or resources
+        """
+        links = self.google(f"site:medium.com {self.title} after:2020-01-01")
+        validUrls = self.getValidUrls(links)
+        validSets = self.getValidSets(validUrls)
+        blogs = []
+        for validset in validSets:
+            blog = self.getBlogs(validset["username"], validset["tag"])
+            if bool(blog):
+                blogs.append(blog)
+        return {"blogs": blogs, "resources": validUrls}
--- a/backend/python/auto-answer/requirements.txt
+++ b/backend/python/auto-answer/requirements.txt
+bson==0.5.10
 beautifulsoup4==4.9.3
 dnspython==2.1.0
 lxml==4.6.1
@@ -6,4 +7,5 @@ regex==2020.7.14
 requests==2.24.0
 requests-html==0.10.0
 scipy==1.5.4
-
+search-engine-parser==0.6.2
+youtube-search-python==1.4.6
--- a/backend/python/auto-answer/scrapper.py
+++ b/backend/python/auto-answer/scrapper.py
 from youtube import Youtube
 from Medium import Medium
+from Dev import DevTo
 from stof import STOF
 import sys
 from database import get_database


-def saveAnswer(ans_id, stackoverflow, videos):
+def saveAnswer(ans_id, stackoverflow, videos, medium_r, dev_r):
    db = get_database()
    try:
        from bson.objectid import ObjectId
@@ -13,7 +14,26 @@ def saveAnswer(ans_id, stackoverflow, videos):
        automatedanswers = db["automatedanswers"]
        automatedanswers.update_one(
            {"_id": ObjectId(ans_id)},
-            {"$set": {"youtube": videos, "stackoverflow": stackoverflow}},
+            {
+                "$set": {
+                    "youtube": videos,
+                    "stackoverflow": stackoverflow,
+                    "medium_articles": medium_r["blogs"],
+                    "dev_articles": dev_r["blogs"],
+                    "medium_resources": medium_r["resources"],
+                    "dev_resources": dev_r["resources"],
+                }
+            },
+        )
+        print(
+            {
+                "youtube": videos,
+                "stackoverflow": stackoverflow,
+                "medium_articles": medium_r["blogs"],
+                "dev_articles": dev_r["blogs"],
+                "medium_resources": medium_r["resources"],
+                "dev_resources": dev_r["resources"],
+            }
        )
    except NameError as err:
        print(err)
@@ -23,19 +43,19 @@ if __name__ == "__main__":
    # title = input("Enter question title: ")
    title = sys.argv[1]  # "python django or flask for web development"
    tags = sys.argv[2]  # ["react"]
-    AUTO_ANS_ID = sys.argv[3]  # "60d746076689344694ad9e30"  #
+    AUTO_ANS_ID = sys.argv[3]  # "60dc9a5f84692f001569d7ab"

    stack = STOF(title)
    ans = stack.searchQuestion()
    print(ans)
-    # medium = Medium(title)
-    # medium.searchArticles()
-    # f = open("data.txt", "a")
-    # f.write(f"updated {title} {tags} {AUTO_ANS_ID}\n")
-    # f.close()
+    medium = Medium(title, tags)
+    medium_articels = medium.getMediumArticles()
+
+    devto = DevTo(title, tags)
+    dev_articles = devto.getDevArticles()

    youtube = Youtube(title, tags)
    videos = youtube.find_videos()
-    saveAnswer(AUTO_ANS_ID, ans, videos)
+    saveAnswer(AUTO_ANS_ID, ans, videos, medium_articels, dev_articles)
    print("WORKED")
    sys.stdout.flush()
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -7,4 +7,5 @@ regex==2020.7.14
 requests==2.24.0
 requests-html==0.10.0
 scipy==1.5.4
+search-engine-parser==0.6.2
 youtube-search-python==1.4.6
--- a/client/components/automated-answer/index.js
+++ b/client/components/automated-answer/index.js
 import React from 'react'
 import StackOverflowAnswer from '../stof-answer'
+import YoutubeVideoWrapper from '../youtube-videos/YoutubeVideoWrapper'

 const AutomatedAnswer = ({ automatedAnswer }) => {
  console.log(automatedAnswer)
@@ -10,6 +11,13 @@ const AutomatedAnswer = ({ automatedAnswer }) => {
      ) : (
        <h1>No Stack overflow</h1>
      )}
+
+      {automatedAnswer.youtube != null &&
+      automatedAnswer.youtube.length >= 1 ? (
+        <YoutubeVideoWrapper videos={automatedAnswer.youtube} />
+      ) : (
+        <h1>No youtubes found for this question</h1>
+      )}
    </>
  )
 }

--- a/client/components/stof-answer/index.js
+++ b/client/components/stof-answer/index.js
@@ -7,7 +7,9 @@ const StackOverflowAnswer = ({ stof }) => {
  }
  return (
    <>
-      <h1 className={styles.h}>Answer from Stackoverflow - {stof.status}</h1>
+      <h1 className={styles.h}>
+        I found {stof.status} answer on Stackoverflow
+      </h1>
      <div className={styles.wrapper}>
        <div dangerouslySetInnerHTML={createMarkup()}></div>
      </div>

--- a/client/components/stof-answer/stof-answer.module.css
+++ b/client/components/stof-answer/stof-answer.module.css
@@ -5,8 +5,10 @@
  padding: 0.8em;
  border-radius: 10px;
  font-family: 'Open Sans', sans-serif;
+  margin-bottom: 3em;
+  color: white;
 }

 .h {
-  font-size: 1.3em !important;
+  font-size: 2em !important;
 }
--- a/client/components/youtube-videos/YoutubeVideoWrapper.js
+++ b/client/components/youtube-videos/YoutubeVideoWrapper.js
+import React from 'react'
+import YoutubeVideo from './youtube-video/YoutubeVideo'
+import styles from './YoutubeVideoWrapper.module.css'
+const YoutubeVideoWrapper = ({ videos }) => {
+  return (
+    <>
+      <h1 className={styles.h1}>
+        Here are {videos.length} video{videos.length > 1 && 's'} I found on
+        youtube
+      </h1>
+      <div className={styles.wrapper}>
+        {videos.map((video, index) => {
+          return <YoutubeVideo video={video} key={index} />
+        })}
+      </div>
+    </>
+  )
+}
+
+export default YoutubeVideoWrapper
--- a/client/components/youtube-videos/YoutubeVideoWrapper.module.css
+++ b/client/components/youtube-videos/YoutubeVideoWrapper.module.css
+.wrapper {
+  margin-top: 0.5em;
+  background-color: #2d2d2d;
+  padding: 0.8em;
+  border-radius: 2px;
+  margin-bottom: 3em;
+}
+.h1 {
+  font-size: 2em !important;
+}
--- a/client/components/youtube-videos/youtube-video/YoutubeVideo.js
+++ b/client/components/youtube-videos/youtube-video/YoutubeVideo.js
+import React from 'react'
+import styles from './YoutubeVideo.module.css'
+const YoutubeVideo = ({ video }) => {
+  console.log(video.substring(32, video.length - 1))
+  return (
+    <div className={styles.video_responsive}>
+      <iframe
+        width="853"
+        height="480"
+        src={`https://youtube.com/embed/${video.substring(32, video.length)}`}
+        frameBorder="0"
+        allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
+        allowFullScreen
+        title="Embedded youtube"
+      />
+    </div>
+  )
+}
+
+export default YoutubeVideo
--- a/client/components/youtube-videos/youtube-video/YoutubeVideo.module.css
+++ b/client/components/youtube-videos/youtube-video/YoutubeVideo.module.css
+.video_responsive {
+  overflow: hidden;
+  padding-bottom: 56.25%;
+  position: relative;
+  height: 0;
+  margin: 1em 0;
+
+  iframe {
+    left: 0;
+    top: 0;
+    height: 100%;
+    width: 100%;
+    position: absolute;
+  }
+}
--- a/client/styles/automatedanswer.css
+++ b/client/styles/automatedanswer.css
@@ -4,7 +4,7 @@
  line-height: 1.5em;
  word-break: break-word;
  font-size: 15px;
-
+  color: white;
  a {
    text-decoration: underline;
    color: var(--blue-300);
@@ -43,6 +43,13 @@
      margin-top: 0.6em;
    }
  }
+
+  h2 {
+    color: white !important;
+    font-weight: bold !important;
+    font-size: 1.4em !important;
+    margin: 1.6em 0 0.7em 0;
+  }
 }

 .s-prose *:not(.s-code-block) > code {
@@ -65,3 +72,40 @@ sup {
 sub {
  vertical-align: sub;
 }
+
+.s-table-container {
+  /* margin-bottom: 1.1em; */
+  overflow-x: auto;
+}
+
+.s-table {
+  display: table;
+  width: 100%;
+  max-width: 100%;
+  border-collapse: collapse;
+  border-spacing: 0;
+  font-size: 13px;
+}
+
+.s-table thead th {
+  vertical-align: bottom;
+  white-space: nowrap;
+  background-color: var(--black-600);
+  line-height: 1.15384615;
+}
+
+.s-table th {
+  font-weight: bold;
+  color: var(--fc-dark);
+}
+
+.s-table th,
+.s-table td {
+  padding: 8px;
+  border-top: 1px solid var(--black-100);
+  border-left: 1px solid var(--black-100);
+  border-right: 1px solid var(--black-100);
+  vertical-align: middle;
+  color: var(--fc-medium);
+  text-align: left;
+}