Upload New File

bb95304d · sashika sewwandi · 9b0f68a0 · bb95304d
Commit bb95304d authored Oct 10, 2022 by sashika sewwandi
Hide whitespace changes
Inline Side-by-side

Showing with 74 additions and 0 deletions

quotes.py quotes.py +74 -0

No files found.
--- a/quotes.py
+++ b/quotes.py
+from __future__ import print_function
+import csv
+import sys
+import requests
+from bs4 import BeautifulSoup
+from Template import Baseclass
+class ScrapData(Baseclass):
+    def __init__(self):
+        self.page_link = None
+        self.quotes = []
+        self.zip_file_path = None
+        self.download_url = None
+    def create_download_url(self):
+        """
+        Extract data from quotes
+        :return: compressed data file
+        """
+        try:
+            self.page_link = self.base_url
+            page_response = requests.get(self.page_link, timeout=5)
+            page_string = page_response.content
+            # Download the latest release
+            soup = BeautifulSoup(page_string, "html.parser")
+            r = soup.find('div', attrs={'id': 'all_quotes'})
+            for row in r.findAll('div', attrs={'class': 'col-6 col-lg-4 text-center margin-30px-bottom '
+                                                        'sm-margin-30px-top'}):
+                quote = {'theme': row.h5.text, 'url': row.a['href'], 'img': row.img['src'],
+                         'lines': row.img['alt'].split(" #")[0], 'author': row.img['alt'].split(" #")[1]}
+                self.quotes.append(quote)
+        except Exception as e:
+            print("Can't scrape data from this site:", self.page_link)
+            print(e)
+            sys.exit(-1)
+        else:
+            print("Data has been extracted successfully")
+    def download_zip_file(self):
+        """
+        download the file and uncompressed the file
+        :return: uncompressed file
+        """
+        try:
+            filename = self.output_file
+            with open(filename, 'w', newline='') as f:
+                w = csv.DictWriter(f, ['theme', 'url', 'img', 'lines', 'author'])
+                w.writeheader()
+                for quote in self.quotes:
+                    w.writerow(quote)
+                f.close()
+        except Exception as e:
+            print("file download is unsuccessful")
+            print(e)
+            sys.exit(-1)
+        else:
+            print("File extraction success")
+    def invoke(self):
+        self.metadata_setup("quotes_config")
+        self.create_download_url()
+        self.download_zip_file()
+        self.s3_data_transfer(self.output_file)
+if __name__ == '__main__':
+    custom_driver_params = 'Create a new file WebScraping inspirational-quotes data'
+    new_file = ScrapData()
+    new_file.invoke()