analysis part is done

79fc5cb2 · Dilip Wijethunga · 3608a613 · 3608a613 · 79fc5cb2 · 79fc5cb2
Commit 79fc5cb2 authored Oct 09, 2022 by Dilip Wijethunga
40 changed files
--- a/analysis/.gitkeep
+++ b/analysis/.gitkeep
--- a/analysis/btc-usd/btc-usd-market-cap.ipynb
+++ b/analysis/btc-usd/btc-usd-market-cap.ipynb
--- a/analysis/btc-usd/btc-usd-price.ipynb
+++ b/analysis/btc-usd/btc-usd-price.ipynb
--- a/analysis/btc-usd/btc-usd-volume.ipynb
+++ b/analysis/btc-usd/btc-usd-volume.ipynb
--- a/analysis/etc-usd/eth-usd-market-cap.ipynb
+++ b/analysis/etc-usd/eth-usd-market-cap.ipynb
--- a/analysis/etc-usd/eth-usd-price.ipynb
+++ b/analysis/etc-usd/eth-usd-price.ipynb
--- a/analysis/etc-usd/eth-usd-volume.ipynb
+++ b/analysis/etc-usd/eth-usd-volume.ipynb
--- a/analysis/pkex-usd/pkex-usd-market-cap.ipynb
+++ b/analysis/pkex-usd/pkex-usd-market-cap.ipynb
--- a/analysis/pkex-usd/pkex-usd-price.ipynb
+++ b/analysis/pkex-usd/pkex-usd-price.ipynb
--- a/analysis/pkex-usd/pkex-usd-volume.ipynb
+++ b/analysis/pkex-usd/pkex-usd-volume.ipynb
--- a/analysis/scraping/.gitkeep
+++ b/analysis/scraping/.gitkeep
--- a/analysis/scraping/.idea/.gitignore
+++ b/analysis/scraping/.idea/.gitignore
+# Default ignored files
+/shelf/
+/workspace.xml
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
+# Editor-based HTTP Client requests
+/httpRequests/
--- a/analysis/scraping/.idea/modules.xml
+++ b/analysis/scraping/.idea/modules.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/scraping.iml" filepath="$PROJECT_DIR$/.idea/scraping.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
--- a/analysis/scraping/.idea/runConfigurations.xml
+++ b/analysis/scraping/.idea/runConfigurations.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="RunConfigurationProducerService">
+    <option name="ignoredProducers">
+      <set>
+        <option value="com.android.tools.idea.compose.preview.runconfiguration.ComposePreviewRunConfigurationProducer" />
+      </set>
+    </option>
+  </component>
+</project>
\ No newline at end of file
--- a/analysis/scraping/.idea/scraping.iml
+++ b/analysis/scraping/.idea/scraping.iml
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="JAVA_MODULE" version="4">
+  <component name="NewModuleRootManager" inherit-compiler-output="true">
+    <exclude-output />
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
--- a/analysis/scraping/.idea/vcs.xml
+++ b/analysis/scraping/.idea/vcs.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$/../.." vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
--- a/analysis/scraping/Crypto Sentiment Dataset.csv
+++ b/analysis/scraping/Crypto Sentiment Dataset.csv
--- a/analysis/scraping/Pickles/X_test.pickle
+++ b/analysis/scraping/Pickles/X_test.pickle
--- a/analysis/scraping/Pickles/X_train.pickle
+++ b/analysis/scraping/Pickles/X_train.pickle
--- a/analysis/scraping/Pickles/best_rfc.pickle
+++ b/analysis/scraping/Pickles/best_rfc.pickle
--- a/analysis/scraping/Pickles/df_2.pickle
+++ b/analysis/scraping/Pickles/df_2.pickle
--- a/analysis/scraping/Pickles/features_test.pickle
+++ b/analysis/scraping/Pickles/features_test.pickle
--- a/analysis/scraping/Pickles/features_train.pickle
+++ b/analysis/scraping/Pickles/features_train.pickle
--- a/analysis/scraping/Pickles/labels_test.pickle
+++ b/analysis/scraping/Pickles/labels_test.pickle
--- a/analysis/scraping/Pickles/labels_train.pickle
+++ b/analysis/scraping/Pickles/labels_train.pickle
--- a/analysis/scraping/Pickles/vectorizer.pickle
+++ b/analysis/scraping/Pickles/vectorizer.pickle
--- a/analysis/scraping/Pickles/y_test.pickle
+++ b/analysis/scraping/Pickles/y_test.pickle
--- a/analysis/scraping/Pickles/y_train.pickle
+++ b/analysis/scraping/Pickles/y_train.pickle
--- a/analysis/scraping/README.md
+++ b/analysis/scraping/README.md
+### How to set up and run
+##### Create virtual environment
+###### Windows
+    py -3 -m venv <name of environment>
+###### Linux/MaxOS
+    python3 -m venv <name of environment>
+##### Activate virtual environment
+###### Windows
+    <name of environment>\Scripts\activate
+###### Linux/MaxOS
+    . <name of environment>/bin/activate
+##### Install required libraries
+    pip install -r requirements.txt
+##### Run app locally
+    python main.py
\ No newline at end of file
--- a/analysis/scraping/__pycache__/.gitkeep
+++ b/analysis/scraping/__pycache__/.gitkeep
--- a/analysis/scraping/__pycache__/config.cpython-310.pyc
+++ b/analysis/scraping/__pycache__/config.cpython-310.pyc
--- a/analysis/scraping/final model/best_rfc.pickle
+++ b/analysis/scraping/final model/best_rfc.pickle
--- a/analysis/scraping/final model/vectorizer.pickle
+++ b/analysis/scraping/final model/vectorizer.pickle
--- a/analysis/scraping/keywords.csv
+++ b/analysis/scraping/keywords.csv
--- a/analysis/scraping/lr model.ipynb
+++ b/analysis/scraping/lr model.ipynb
--- a/analysis/scraping/main.py
+++ b/analysis/scraping/main.py
@@ -24,7 +24,7 @@ def load_keywords():
    global KEYWORDS
    KEYWORDS = dict()
    # read csv file
-    with open("E:/SLIIT/Lectures/4th Year 1st Sem/RP/crypto-currency-forecasting-main/analysis/scraping/keywords.csv") as csv_file:
+    with open(KEYWORDS_PATH) as csv_file:
        # init csv reader
        csv_reader = csv.reader(csv_file, delimiter=',')
        line_count = 0
@@ -89,7 +89,7 @@ if __name__ == "__main__":
    scrapped_words = scrapping_words("https://cointelegraph.com/")
    score = calculate_score(scrapped_words)
    print(f"Score = {score}")
-    if score > 0:
-        print("========> POSITIVE")
-    else:
-        print("========> NEGATIVE")
+    # if score > 0:
+    #     print("========> POSITIVE")
+    # else:
+    #     print("========> NEGATIVE")
--- a/analysis/scraping/pre-processed sentiments dataset.csv
+++ b/analysis/scraping/pre-processed sentiments dataset.csv
--- a/analysis/scraping/rf model.ipynb
+++ b/analysis/scraping/rf model.ipynb
--- a/analysis/scraping/sentiment analysis.ipynb
+++ b/analysis/scraping/sentiment analysis.ipynb
--- a/analysis/scraping/testing.ipynb
+++ b/analysis/scraping/testing.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle\n",
+    "import numpy as np\n",
+    "import nltk\n",
+    "from nltk.corpus import stopwords\n",
+    "from nltk.stem import WordNetLemmatizer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load vectorizer\n",
+    "path_vectorizer = 'final model/vectorizer.pickle'\n",
+    "with open(path_vectorizer, 'rb') as data:\n",
+    "    vectorizer = pickle.load(data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load model\n",
+    "path_model = 'final model/best_rfc.pickle'\n",
+    "with open(path_model, 'rb') as data:\n",
+    "    model = pickle.load(data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[nltk_data] Downloading package punkt to\n",
+      "[nltk_data]     /Users/ameshmjayaweera/nltk_data...\n",
+      "[nltk_data]   Package punkt is already up-to-date!\n",
+      "[nltk_data] Downloading package wordnet to\n",
+      "[nltk_data]     /Users/ameshmjayaweera/nltk_data...\n",
+      "[nltk_data]   Package wordnet is already up-to-date!\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Downloading punkt and wordnet from NLTK\n",
+    "nltk.download('punkt')\n",
+    "print(\"------------------------------------------------------------\")\n",
+    "nltk.download('wordnet')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[nltk_data] Downloading package stopwords to\n",
+      "[nltk_data]     /Users/ameshmjayaweera/nltk_data...\n",
+      "[nltk_data]   Package stopwords is already up-to-date!\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Downloading the stop words list\n",
+    "nltk.download('stopwords')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Saving the lemmatizer into an object\n",
+    "wordnet_lemmatizer = WordNetLemmatizer()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Loading the stop words in english\n",
+    "stop_words = list(stopwords.words('english'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def pre_processing(sentence):\n",
+    "    # 1.1. Replace \\n and \\t\n",
+    "    sentence = sentence.replace(\"\\r\", \" \")\n",
+    "    sentence = sentence.replace(\"\\n\", \" \")\n",
+    "    \n",
+    "    # 1.2. Convert to lowercase\n",
+    "    sentence = sentence.lower()\n",
+    "    \n",
+    "    # 1.3. Remove punctuation signs\n",
+    "    punctuation_signs = list(\"?:!.,;-$&^*%(){}[]/><@#~`|+_=“”…’−‘\")\n",
+    "    for punct_sign in punctuation_signs:\n",
+    "        sentence = sentence.replace(punct_sign, '')\n",
+    "            \n",
+    "    # 1.4. Remove possessive pronouns\n",
+    "    sentence = sentence.replace(\"'s\", \"\")\n",
+    "    \n",
+    "    # 1.5. Remove numbers\n",
+    "    digits = list(\"1234567890\")\n",
+    "    for digit in digits:\n",
+    "        sentence = sentence.replace(digit, '')\n",
+    "        \n",
+    "    # 1.6. Remove single quote and double quote\n",
+    "    sentence = sentence.replace(\"'\", \"\")\n",
+    "    sentence = sentence.replace('\"', '')\n",
+    "    \n",
+    "    # 1.7. Lemmatization\n",
+    "    lemmatized_list = []\n",
+    "    text_words = sentence.split(\" \")\n",
+    "    for word in text_words:\n",
+    "        lemmatized_list.append(wordnet_lemmatizer.lemmatize(word, pos=\"v\"))\n",
+    "    sentence = \" \".join(lemmatized_list)\n",
+    "\n",
+    "    # 1.8. Remove Stop words\n",
+    "    for stop_word in stop_words:\n",
+    "        regex_stopword = r\"\\b\" + stop_word + r\"\\b\"\n",
+    "        sentence = sentence.replace(regex_stopword, '')\n",
+    "        \n",
+    "    # 1.9. Remove Extra Spaces\n",
+    "    sentence = sentence.split()\n",
+    "    sentence = \" \".join(sentence)\n",
+    "    \n",
+    "    return sentence"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_input_1 = 'or, how about this. terra was a bad investment because all cryptos operate as if they are ponzi schemes.'\n",
+    "test_input_2 = 'Honestly, after reading this post and many of the responses, I have to conclude most of the crypto-space is totally fucked. The consept of crypto has been entirely lost, waves of noobs arrive on crypto island, and instead of revelling in the freedom, do everything they can to plan their way to get back off of the island.'\n",
+    "test_input_3 = 'Funny how people think Bitcoin\\'s risk is comparable to stocks. A lot of these crypto \"investors\" are gonna learn the hard way sooner or later.'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'or how about this terra be a bad investment because all cryptos operate as if they be ponzi scheme'"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pre_processing(test_input_1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'funny how people think bitcoin risk be comparable to stock a lot of these crypto investors be gonna learn the hard way sooner or later'"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pre_processing(test_input_3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def predict(sentence):\n",
+    "    sentence = pre_processing(sentence)\n",
+    "    vector = vectorizer.transform([sentence]).toarray()\n",
+    "    pred = model.predict(vector)\n",
+    "    return pred[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predict(test_input_1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predict(test_input_3)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}