Merge branch 'IT19240466' into 'master'

It19240466 See merge request !1

Merge branch 'IT19240466' into 'master'
It19240466 See merge request !1
ed1b3f11 · Dilip Wijethunga · a8c72c93 · 5ad2479f · ed1b3f11 · ed1b3f11
Commit ed1b3f11 authored Oct 09, 2022 by Dilip Wijethunga
70 changed files
--- a/analysis/btc-usd/btc-usd-market-cap.ipynb
+++ b/analysis/btc-usd/btc-usd-market-cap.ipynb
--- a/analysis/btc-usd/btc-usd-price.ipynb
+++ b/analysis/btc-usd/btc-usd-price.ipynb
--- a/analysis/btc-usd/btc-usd-volume.ipynb
+++ b/analysis/btc-usd/btc-usd-volume.ipynb
--- a/analysis/etc-usd/eth-usd-market-cap.ipynb
+++ b/analysis/etc-usd/eth-usd-market-cap.ipynb
--- a/analysis/etc-usd/eth-usd-price.ipynb
+++ b/analysis/etc-usd/eth-usd-price.ipynb
--- a/analysis/etc-usd/eth-usd-volume.ipynb
+++ b/analysis/etc-usd/eth-usd-volume.ipynb
--- a/analysis/pkex-usd/pkex-usd-market-cap.ipynb
+++ b/analysis/pkex-usd/pkex-usd-market-cap.ipynb
--- a/analysis/pkex-usd/pkex-usd-price.ipynb
+++ b/analysis/pkex-usd/pkex-usd-price.ipynb
--- a/analysis/pkex-usd/pkex-usd-volume.ipynb
+++ b/analysis/pkex-usd/pkex-usd-volume.ipynb
--- a/analysis/scraping/.idea/.gitignore
+++ b/analysis/scraping/.idea/.gitignore
+# Default ignored files
+/shelf/
+/workspace.xml
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
+# Editor-based HTTP Client requests
+/httpRequests/
--- a/analysis/scraping/.idea/modules.xml
+++ b/analysis/scraping/.idea/modules.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/scraping.iml" filepath="$PROJECT_DIR$/.idea/scraping.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
--- a/analysis/scraping/.idea/runConfigurations.xml
+++ b/analysis/scraping/.idea/runConfigurations.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="RunConfigurationProducerService">
+    <option name="ignoredProducers">
+      <set>
+        <option value="com.android.tools.idea.compose.preview.runconfiguration.ComposePreviewRunConfigurationProducer" />
+      </set>
+    </option>
+  </component>
+</project>
\ No newline at end of file
--- a/analysis/scraping/.idea/scraping.iml
+++ b/analysis/scraping/.idea/scraping.iml
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="JAVA_MODULE" version="4">
+  <component name="NewModuleRootManager" inherit-compiler-output="true">
+    <exclude-output />
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
--- a/analysis/scraping/.idea/vcs.xml
+++ b/analysis/scraping/.idea/vcs.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$/../.." vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
--- a/analysis/scraping/Crypto Sentiment Dataset.csv
+++ b/analysis/scraping/Crypto Sentiment Dataset.csv
--- a/analysis/scraping/Pickles/X_test.pickle
+++ b/analysis/scraping/Pickles/X_test.pickle
--- a/analysis/scraping/Pickles/X_train.pickle
+++ b/analysis/scraping/Pickles/X_train.pickle
--- a/analysis/scraping/Pickles/best_rfc.pickle
+++ b/analysis/scraping/Pickles/best_rfc.pickle
--- a/analysis/scraping/Pickles/df_2.pickle
+++ b/analysis/scraping/Pickles/df_2.pickle
--- a/analysis/scraping/Pickles/features_test.pickle
+++ b/analysis/scraping/Pickles/features_test.pickle
--- a/analysis/scraping/Pickles/features_train.pickle
+++ b/analysis/scraping/Pickles/features_train.pickle
--- a/analysis/scraping/Pickles/labels_test.pickle
+++ b/analysis/scraping/Pickles/labels_test.pickle
--- a/analysis/scraping/Pickles/labels_train.pickle
+++ b/analysis/scraping/Pickles/labels_train.pickle
--- a/analysis/scraping/Pickles/vectorizer.pickle
+++ b/analysis/scraping/Pickles/vectorizer.pickle
--- a/analysis/scraping/Pickles/y_test.pickle
+++ b/analysis/scraping/Pickles/y_test.pickle
--- a/analysis/scraping/Pickles/y_train.pickle
+++ b/analysis/scraping/Pickles/y_train.pickle
--- a/analysis/scraping/README.md
+++ b/analysis/scraping/README.md
+### How to set up and run
+##### Create virtual environment
+###### Windows
+    py -3 -m venv <name of environment>
+###### Linux/MaxOS
+    python3 -m venv <name of environment>
+##### Activate virtual environment
+###### Windows
+    <name of environment>\Scripts\activate
+###### Linux/MaxOS
+    . <name of environment>/bin/activate
+##### Install required libraries
+    pip install -r requirements.txt
+##### Run app locally
+    python main.py
\ No newline at end of file
--- a/analysis/scraping/__pycache__/config.cpython-39.pyc
+++ b/analysis/scraping/__pycache__/config.cpython-39.pyc
--- a/analysis/scraping/config.py
+++ b/analysis/scraping/config.py
+HEADER = {
+    "User-Agent": 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) '
+                  'Chrome/50.0.2661.102 Safari/537.36 '
+}
--- a/analysis/scraping/final model/best_rfc.pickle
+++ b/analysis/scraping/final model/best_rfc.pickle
--- a/analysis/scraping/final model/vectorizer.pickle
+++ b/analysis/scraping/final model/vectorizer.pickle
--- a/analysis/scraping/keywords.csv
+++ b/analysis/scraping/keywords.csv
--- a/analysis/scraping/lr model.ipynb
+++ b/analysis/scraping/lr model.ipynb
--- a/analysis/scraping/main.py
+++ b/analysis/scraping/main.py
+import csv
+import requests
+from bs4 import BeautifulSoup
+from config import HEADER
+
+# html tags need to be scrapped
+TAGS = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'span', 'td', 'li', 'a']
+# keywords csv file path
+KEYWORDS_PATH = 'keywords.csv'
+# init KEYWORDS global dictionary to store keywords and their respective weight
+KEYWORDS = dict()
+
+
+def test_with_bs4(url):
+    response = requests.get(url, headers=HEADER)
+    soup = BeautifulSoup(response.text, "html.parser")
+    file = open("test.html", "w+")
+    file.write(str(soup))
+    file.close()
+
+
+def load_keywords():
+    # access global KEYWORDS
+    global KEYWORDS
+    KEYWORDS = dict()
+    # read csv file
+    with open(KEYWORDS_PATH) as csv_file:
+        # init csv reader
+        csv_reader = csv.reader(csv_file, delimiter=',')
+        line_count = 0
+        for row in csv_reader:
+            # header row
+            if line_count == 0:
+                print(f'Column names are {", ".join(row)}')
+                line_count += 1
+            # other rows
+            else:
+                KEYWORDS[row[0]] = int(row[1])
+                line_count += 1
+        print(f'Processed {line_count} lines.')
+
+
+def scrapping_words(url):
+    # init word dictionary
+    words = dict()
+    # download html page of the website
+    response = requests.get(url, headers=HEADER)
+    # parse with bs4
+    soup = BeautifulSoup(response.text, "html.parser")
+    # iterate through html tags
+    for tag in TAGS:
+        # find all inner texts for the tag
+        rows = soup.find_all(tag)
+        # iterate through all rows found related to the given tag
+        for row in rows:
+            # inner text to lower
+            sentence = row.get_text().lower()
+            # keep only alphabet
+            sentence = ''.join(x for x in sentence if x.isalpha() or x == ' ')
+            # split into words
+            array = sentence.split(' ')
+            # cleaning array
+            modified_array = [e.strip() for e in array if len(e.strip()) > 0]
+            # iterate through each word
+            for word in modified_array:
+                # if word not exists in dict add it
+                if word not in words.keys():
+                    words[word] = 0
+                # increase count by 1
+                words[word] += 1
+    # return scrapped words from the given webpage
+    return words
+
+
+def calculate_score(words):
+    # init total score to zero initially
+    total_score = 0
+    # iterate through scrapped words
+    for word, frequency in words.items():
+        # if scrapped word exists in keywords
+        if word in KEYWORDS.keys():
+            # multiply weight of the keyword by frequency and add it to total score
+            total_score += (KEYWORDS[word] * words[word])
+    return total_score
+
+
+if __name__ == "__main__":
+    load_keywords()
+    scrapped_words = scrapping_words("https://cointelegraph.com/")
+    score = calculate_score(scrapped_words)
+    print(f"Score = {score}")
+    # if score > 0:
+    #     print("========> POSITIVE")
+    # else:
+    #     print("========> NEGATIVE")
--- a/analysis/scraping/pre-processed sentiments dataset.csv
+++ b/analysis/scraping/pre-processed sentiments dataset.csv
--- a/analysis/scraping/requirements.txt
+++ b/analysis/scraping/requirements.txt
+bs4==0.0.1
+requests==2.26.0
\ No newline at end of file
--- a/analysis/scraping/rf model.ipynb
+++ b/analysis/scraping/rf model.ipynb
--- a/analysis/scraping/sentiment analysis.ipynb
+++ b/analysis/scraping/sentiment analysis.ipynb
--- a/analysis/scraping/testing.ipynb
+++ b/analysis/scraping/testing.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle\n",
+    "import numpy as np\n",
+    "import nltk\n",
+    "from nltk.corpus import stopwords\n",
+    "from nltk.stem import WordNetLemmatizer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load vectorizer\n",
+    "path_vectorizer = 'final model/vectorizer.pickle'\n",
+    "with open(path_vectorizer, 'rb') as data:\n",
+    "    vectorizer = pickle.load(data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load model\n",
+    "path_model = 'final model/best_rfc.pickle'\n",
+    "with open(path_model, 'rb') as data:\n",
+    "    model = pickle.load(data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[nltk_data] Downloading package punkt to\n",
+      "[nltk_data]     /Users/ameshmjayaweera/nltk_data...\n",
+      "[nltk_data]   Package punkt is already up-to-date!\n",
+      "[nltk_data] Downloading package wordnet to\n",
+      "[nltk_data]     /Users/ameshmjayaweera/nltk_data...\n",
+      "[nltk_data]   Package wordnet is already up-to-date!\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Downloading punkt and wordnet from NLTK\n",
+    "nltk.download('punkt')\n",
+    "print(\"------------------------------------------------------------\")\n",
+    "nltk.download('wordnet')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[nltk_data] Downloading package stopwords to\n",
+      "[nltk_data]     /Users/ameshmjayaweera/nltk_data...\n",
+      "[nltk_data]   Package stopwords is already up-to-date!\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Downloading the stop words list\n",
+    "nltk.download('stopwords')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Saving the lemmatizer into an object\n",
+    "wordnet_lemmatizer = WordNetLemmatizer()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Loading the stop words in english\n",
+    "stop_words = list(stopwords.words('english'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def pre_processing(sentence):\n",
+    "    # 1.1. Replace \\n and \\t\n",
+    "    sentence = sentence.replace(\"\\r\", \" \")\n",
+    "    sentence = sentence.replace(\"\\n\", \" \")\n",
+    "    \n",
+    "    # 1.2. Convert to lowercase\n",
+    "    sentence = sentence.lower()\n",
+    "    \n",
+    "    # 1.3. Remove punctuation signs\n",
+    "    punctuation_signs = list(\"?:!.,;-$&^*%(){}[]/><@#~`|+_=“”…’−‘\")\n",
+    "    for punct_sign in punctuation_signs:\n",
+    "        sentence = sentence.replace(punct_sign, '')\n",
+    "            \n",
+    "    # 1.4. Remove possessive pronouns\n",
+    "    sentence = sentence.replace(\"'s\", \"\")\n",
+    "    \n",
+    "    # 1.5. Remove numbers\n",
+    "    digits = list(\"1234567890\")\n",
+    "    for digit in digits:\n",
+    "        sentence = sentence.replace(digit, '')\n",
+    "        \n",
+    "    # 1.6. Remove single quote and double quote\n",
+    "    sentence = sentence.replace(\"'\", \"\")\n",
+    "    sentence = sentence.replace('\"', '')\n",
+    "    \n",
+    "    # 1.7. Lemmatization\n",
+    "    lemmatized_list = []\n",
+    "    text_words = sentence.split(\" \")\n",
+    "    for word in text_words:\n",
+    "        lemmatized_list.append(wordnet_lemmatizer.lemmatize(word, pos=\"v\"))\n",
+    "    sentence = \" \".join(lemmatized_list)\n",
+    "\n",
+    "    # 1.8. Remove Stop words\n",
+    "    for stop_word in stop_words:\n",
+    "        regex_stopword = r\"\\b\" + stop_word + r\"\\b\"\n",
+    "        sentence = sentence.replace(regex_stopword, '')\n",
+    "        \n",
+    "    # 1.9. Remove Extra Spaces\n",
+    "    sentence = sentence.split()\n",
+    "    sentence = \" \".join(sentence)\n",
+    "    \n",
+    "    return sentence"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_input_1 = 'or, how about this. terra was a bad investment because all cryptos operate as if they are ponzi schemes.'\n",
+    "test_input_2 = 'Honestly, after reading this post and many of the responses, I have to conclude most of the crypto-space is totally fucked. The consept of crypto has been entirely lost, waves of noobs arrive on crypto island, and instead of revelling in the freedom, do everything they can to plan their way to get back off of the island.'\n",
+    "test_input_3 = 'Funny how people think Bitcoin\\'s risk is comparable to stocks. A lot of these crypto \"investors\" are gonna learn the hard way sooner or later.'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'or how about this terra be a bad investment because all cryptos operate as if they be ponzi scheme'"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pre_processing(test_input_1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'funny how people think bitcoin risk be comparable to stock a lot of these crypto investors be gonna learn the hard way sooner or later'"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pre_processing(test_input_3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def predict(sentence):\n",
+    "    sentence = pre_processing(sentence)\n",
+    "    vector = vectorizer.transform([sentence]).toarray()\n",
+    "    pred = model.predict(vector)\n",
+    "    return pred[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predict(test_input_1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predict(test_input_3)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/api/Procfile
+++ b/api/Procfile
+web: gunicorn app:app
\ No newline at end of file
--- a/api/README.md
+++ b/api/README.md
+### How to set up and run
+##### Create virtual environment
+###### Windows
+    py -3 -m venv <name of environment>
+###### Linux/MaxOS
+    python3 -m venv <name of environment>
+##### Activate virtual environment
+###### Windows
+    <name of environment>\Scripts\activate
+###### Linux/MaxOS
+    . <name of environment>/bin/activate
+##### Install required libraries
+    pip3 install -r requirements.txt
+##### Run app locally
+    flask run
--- a/api/__pycache__/app.cpython-39.pyc
+++ b/api/__pycache__/app.cpython-39.pyc
--- a/api/__pycache__/model.cpython-39.pyc
+++ b/api/__pycache__/model.cpython-39.pyc
--- a/api/app.py
+++ b/api/app.py
+from flask import Flask, jsonify
+from flask_cors import CORS
+from flask_apscheduler import APScheduler
+
+from model import schedule_model_training, is_training, CURRENCIES
+from web_scrapping import get_sentiment
+
+app = Flask(__name__)
+cors = CORS(app, resources={r"/crypto-currency/*": {"origins": "*"}})
+app.config['CORS_HEADERS'] = 'Content-Type'
+scheduler = APScheduler()
+
+schedule_model_training()
+'''
+    schedule model re-training
+'''
+scheduler.add_job(id='Scheduled Task', func=schedule_model_training, trigger="interval", seconds=3600)
+scheduler.start()
+
+
+@app.route('/crypto-currency', methods=['GET'])
+def index():
+    return f"<div align='center'><h2>Crypto Currency Forecasting Sever is Active</h2></div>"
+
+
+@app.route("/crypto-currency/predict", methods=['GET'])
+def predict():
+    if is_training():
+        response = jsonify({
+            "message": "all forecasting models are training now!",
+            "code": 100
+        })
+    else:
+        data = dict()
+        for currency in list(CURRENCIES.keys()):
+            if CURRENCIES[currency]["enable"] and CURRENCIES[currency]["available_data"]:
+                data[currency] = {
+                    "price": CURRENCIES[currency]["price"],
+                    "volume": CURRENCIES[currency]["volume"],
+                    "market_cap": CURRENCIES[currency]["market_cap"]
+                }
+
+        response = jsonify({
+            "code": 200,
+            "message": "Success",
+            "data": data
+        })
+    response.headers.add('Access-Control-Allow-Origin', '*')
+    return response, 200
+
+
+@app.route("/crypto-currency/sentiment", methods=['GET'])
+def sentiment():
+    response = jsonify({
+        "code": 200,
+        "message": "Success",
+        "sentiment": get_sentiment()
+    })
+    response.headers.add('Access-Control-Allow-Origin', '*')
+    return response, 200
+
+
+if __name__ == "__main__":
+    app.run()
--- a/api/config.py
+++ b/api/config.py
+HEADER = {
+    "User-Agent": 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) '
+                  'Chrome/50.0.2661.102 Safari/537.36 '
+}
--- a/api/database/BTC_USD.csv
+++ b/api/database/BTC_USD.csv
--- a/api/database/ETH_USD.csv
+++ b/api/database/ETH_USD.csv
--- a/api/database/PKEX_USD.csv
+++ b/api/database/PKEX_USD.csv
--- a/api/model.py
+++ b/api/model.py
+import os
+import ssl
+from urllib.request import Request, urlopen
+import certifi
+from model_training.pp_market_cap import pp_market_cap
+from model_training.pp_price import pp_price
+from model_training.pp_volume import pp_volume
+from web_scrapping import start_web_scrapping, set_sentiment
+
+DATABASE_DIR = f"database{os.sep}"
+
+TRAINING = False
+THRESHOLD = 1000000
+
+CURRENCIES = {
+    "BTC_USD": {
+        "url": "https://coingecko.com/price_charts/export/1/usd.csv",
+        "available_data": False,
+        "path": None,
+        "enable": True,
+        "price": {
+            "today": 0,
+            "tomorrow": 0,
+            "score": 0,
+            "exceeded": False
+        },
+        "volume": {
+            "today": 0,
+            "tomorrow": 0,
+            "score": 0,
+            "exceeded": False
+        },
+        "market_cap": {
+            "today": 0,
+            "tomorrow": 0,
+            "score": 0,
+            "exceeded": False
+        }
+    },
+    "ETH_USD": {
+        "url": "https://www.coingecko.com/price_charts/export/279/usd.csv",
+        "available_data": False,
+        "path": None,
+        "enable": True,
+        "price": {
+            "today": 0,
+            "tomorrow": 0,
+            "score": 0,
+            "exceeded": False
+        },
+        "volume": {
+            "today": 0,
+            "tomorrow": 0,
+            "score": 0,
+            "exceeded": False
+        },
+        "market_cap": {
+            "today": 0,
+            "tomorrow": 0,
+            "score": 0,
+            "exceeded": False
+        }
+    },
+    "PKEX_USD": {
+        "url": "https://www.coingecko.com/price_charts/export/18616/usd.csv",
+        "available_data": False,
+        "path": None,
+        "enable": True,
+        "price": {
+            "today": 0,
+            "tomorrow": 0,
+            "score": 0,
+            "exceeded": False
+        },
+        "volume": {
+            "today": 0,
+            "tomorrow": 0,
+            "score": 0,
+            "exceeded": False
+        },
+        "market_cap": {
+            "today": 0,
+            "tomorrow": 0,
+            "score": 0,
+            "exceeded": False
+        }
+    }
+}
+
+
+def download_data_sources():
+    for currency in list(CURRENCIES.keys()):
+        CURRENCIES[currency]["available_data"] = False
+        CURRENCIES[currency]["path"] = None
+    for currency in list(CURRENCIES.keys()):
+        request = Request(
+            url=CURRENCIES[currency]["url"],
+            headers={'User-Agent': 'Mozilla/5.0'}
+        )
+        print(f"download data source for {currency}")
+        with urlopen(request, context=ssl.create_default_context(cafile=certifi.where())) as file:
+            downloaded_file = file.read().decode('utf-8')
+            csv_file = open(f'{DATABASE_DIR}{currency}.csv', "w+")
+            csv_file.write(downloaded_file)
+            csv_file.close()
+            CURRENCIES[currency]["available_data"] = True
+            CURRENCIES[currency]["path"] = f'{DATABASE_DIR}{currency}.csv'
+            print(f"successfully downloaded data source for {currency}")
+
+
+def is_data_sources_configured():
+    for currency in list(CURRENCIES.keys()):
+        if CURRENCIES[currency]["enable"] and not CURRENCIES[currency]["available_data"]:
+            return False
+    return True
+
+
+def set_training(_flag):
+    global TRAINING
+    TRAINING = _flag
+
+
+def is_training():
+    return TRAINING
+
+
+def schedule_model_training():
+    set_training(True)
+    print("start model training")
+    retry_count = 0
+    while retry_count < 3:
+        print("downloading data sources")
+        retry_count += 1
+        print(f"attempting - {retry_count}")
+        download_data_sources()
+        if is_data_sources_configured():
+            print("data sources successfully downloaded")
+            break
+
+    # model training
+    for currency in list(CURRENCIES.keys()):
+        if CURRENCIES[currency]["enable"] and CURRENCIES[currency]["available_data"]:
+            file_path = CURRENCIES[currency]["path"]
+            today_price, pred_price = pp_price(file_path)
+            today_volume, pred_volume = pp_volume(file_path)
+            today_market_cap, pred_market_cap = pp_market_cap(file_path)
+            # price
+            CURRENCIES[currency]["price"]["today"] = today_price
+            CURRENCIES[currency]["price"]["tomorrow"] = pred_price
+            score = ((pred_price - today_price) / today_price) * 10
+            if score < 0:
+                score = 0
+            CURRENCIES[currency]["price"]["score"] = score
+            flag = False
+            if pred_price >= THRESHOLD:
+                flag = True
+            CURRENCIES[currency]["price"]["exceeded"] = flag
+
+            # volume
+            CURRENCIES[currency]["volume"]["today"] = today_volume
+            CURRENCIES[currency]["volume"]["tomorrow"] = pred_volume
+            score = ((pred_volume - today_volume) / today_volume) * 10
+            if score < 0:
+                score = 0
+            CURRENCIES[currency]["volume"]["score"] = score
+            flag = False
+            if pred_volume >= THRESHOLD:
+                flag = True
+            CURRENCIES[currency]["volume"]["exceeded"] = flag
+
+            # market cap
+            CURRENCIES[currency]["market_cap"]["today"] = today_market_cap
+            CURRENCIES[currency]["market_cap"]["tomorrow"] = pred_market_cap
+            score = ((pred_market_cap - today_market_cap) / today_market_cap) * 10
+            if score < 0:
+                score = 0
+            CURRENCIES[currency]["market_cap"]["score"] = score
+            flag = False
+            if pred_market_cap >= THRESHOLD:
+                flag = True
+            CURRENCIES[currency]["market_cap"]["exceeded"] = flag
+
+    print("end model training")
+    set_training(False)
+
+    print(CURRENCIES)
+
+    set_sentiment('Not Available')
+    start_web_scrapping()
--- a/api/model/best_rfc.pickle
+++ b/api/model/best_rfc.pickle
--- a/api/model/vectorizer.pickle
+++ b/api/model/vectorizer.pickle
--- a/api/model_training/__init__.py
+++ b/api/model_training/__init__.py
--- a/api/model_training/__pycache__/__init__.cpython-39.pyc
+++ b/api/model_training/__pycache__/__init__.cpython-39.pyc
--- a/api/model_training/__pycache__/helper.cpython-39.pyc
+++ b/api/model_training/__pycache__/helper.cpython-39.pyc
--- a/api/model_training/__pycache__/pp_market_cap.cpython-39.pyc
+++ b/api/model_training/__pycache__/pp_market_cap.cpython-39.pyc
--- a/api/model_training/__pycache__/pp_price.cpython-39.pyc
+++ b/api/model_training/__pycache__/pp_price.cpython-39.pyc
--- a/api/model_training/__pycache__/pp_volume.cpython-39.pyc
+++ b/api/model_training/__pycache__/pp_volume.cpython-39.pyc
--- a/api/model_training/helper.py
+++ b/api/model_training/helper.py
+import statsmodels.api as sm
+import warnings
+
+warnings.filterwarnings('ignore')
+
+
+def model_training(training_data, scaler):
+    history = [x for x in training_data]
+    model = sm.tsa.arima.ARIMA(history, order=(5, 1, 0))
+    model_fit = model.fit()
+    output = model_fit.forecast()
+    return scaler.inverse_transform([[history[-1]]])[0][0], scaler.inverse_transform([[output[0]]])[0][0]
--- a/api/model_training/pp_market_cap.py
+++ b/api/model_training/pp_market_cap.py
+import pandas as pd
+from sklearn.preprocessing import StandardScaler
+
+from model_training.helper import model_training
+
+
+def pp_market_cap(file_path):
+    df = pd.read_csv(file_path, delimiter=',', parse_dates=True, squeeze=True)
+    df.drop(['total_volume', 'price'], axis=1, inplace=True)
+    df['market_cap'] = df['market_cap'].fillna(0)
+    df['snapped_at'] = df['snapped_at'].apply(lambda x: x.split(' ')[0].strip())
+    df['snapped_at'] = pd.to_datetime(df['snapped_at'], infer_datetime_format=True)
+    scaler = StandardScaler()
+    df[['market_cap']] = scaler.fit_transform(df[['market_cap']])
+    training_data = df['market_cap'].values
+    return model_training(training_data, scaler)
--- a/api/model_training/pp_price.py
+++ b/api/model_training/pp_price.py
+import pandas as pd
+from sklearn.preprocessing import StandardScaler
+
+from model_training.helper import model_training
+
+
+def pp_price(file_path):
+    df = pd.read_csv(file_path, delimiter=',', parse_dates=True, squeeze=True)
+    df.drop(['total_volume', 'market_cap'], axis=1, inplace=True)
+    df['price'] = df['price'].fillna(0)
+    df['snapped_at'] = df['snapped_at'].apply(lambda x: x.split(' ')[0].strip())
+    df['snapped_at'] = pd.to_datetime(df['snapped_at'], infer_datetime_format=True)
+    scaler = StandardScaler()
+    df[['price']] = scaler.fit_transform(df[['price']])
+    training_data = df['price'].values
+    return model_training(training_data, scaler)
--- a/api/model_training/pp_volume.py
+++ b/api/model_training/pp_volume.py
+import pandas as pd
+from sklearn.preprocessing import StandardScaler
+
+from model_training.helper import model_training
+
+
+def pp_volume(file_path):
+    df = pd.read_csv(file_path, delimiter=',', parse_dates=True, squeeze=True)
+    df.drop(['price', 'market_cap'], axis=1, inplace=True)
+    df['total_volume'] = df['total_volume'].fillna(0)
+    df['snapped_at'] = df['snapped_at'].apply(lambda x: x.split(' ')[0].strip())
+    df['snapped_at'] = pd.to_datetime(df['snapped_at'], infer_datetime_format=True)
+    scaler = StandardScaler()
+    df[['total_volume']] = scaler.fit_transform(df[['total_volume']])
+    training_data = df['total_volume'].values
+    return model_training(training_data, scaler)
--- a/api/requirements.txt
+++ b/api/requirements.txt
--- a/api/sentiment_analysis.py
+++ b/api/sentiment_analysis.py
--- a/api/web_scrapping.py
+++ b/api/web_scrapping.py
--- a/datasets/bfly-usd-max.csv
+++ b/datasets/bfly-usd-max.csv
--- a/datasets/btc-usd-max.csv
+++ b/datasets/btc-usd-max.csv
--- a/datasets/doge-usd-max.csv
+++ b/datasets/doge-usd-max.csv
--- a/datasets/eth-usd-max.csv
+++ b/datasets/eth-usd-max.csv
--- a/datasets/keywords.csv
+++ b/datasets/keywords.csv
--- a/datasets/pkex-usd-max.csv
+++ b/datasets/pkex-usd-max.csv