Commit 79fc5cb2 authored by Dilip Wijethunga's avatar Dilip Wijethunga

analysis part is done

parent 3608a613
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
# Default ignored files
/shelf/
/workspace.xml
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
# Editor-based HTTP Client requests
/httpRequests/
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/scraping.iml" filepath="$PROJECT_DIR$/.idea/scraping.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="RunConfigurationProducerService">
<option name="ignoredProducers">
<set>
<option value="com.android.tools.idea.compose.preview.runconfiguration.ComposePreviewRunConfigurationProducer" />
</set>
</option>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$/../.." vcs="Git" />
</component>
</project>
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
### How to set up and run
##### Create virtual environment
###### Windows
py -3 -m venv <name of environment>
###### Linux/MaxOS
python3 -m venv <name of environment>
##### Activate virtual environment
###### Windows
<name of environment>\Scripts\activate
###### Linux/MaxOS
. <name of environment>/bin/activate
##### Install required libraries
pip install -r requirements.txt
##### Run app locally
python main.py
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
......@@ -24,7 +24,7 @@ def load_keywords():
global KEYWORDS
KEYWORDS = dict()
# read csv file
with open("E:/SLIIT/Lectures/4th Year 1st Sem/RP/crypto-currency-forecasting-main/analysis/scraping/keywords.csv") as csv_file:
with open(KEYWORDS_PATH) as csv_file:
# init csv reader
csv_reader = csv.reader(csv_file, delimiter=',')
line_count = 0
......@@ -89,7 +89,7 @@ if __name__ == "__main__":
scrapped_words = scrapping_words("https://cointelegraph.com/")
score = calculate_score(scrapped_words)
print(f"Score = {score}")
if score > 0:
print("========> POSITIVE")
else:
print("========> NEGATIVE")
# if score > 0:
# print("========> POSITIVE")
# else:
# print("========> NEGATIVE")
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"import numpy as np\n",
"import nltk\n",
"from nltk.corpus import stopwords\n",
"from nltk.stem import WordNetLemmatizer"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# load vectorizer\n",
"path_vectorizer = 'final model/vectorizer.pickle'\n",
"with open(path_vectorizer, 'rb') as data:\n",
" vectorizer = pickle.load(data)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# load model\n",
"path_model = 'final model/best_rfc.pickle'\n",
"with open(path_model, 'rb') as data:\n",
" model = pickle.load(data)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"------------------------------------------------------------\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[nltk_data] Downloading package punkt to\n",
"[nltk_data] /Users/ameshmjayaweera/nltk_data...\n",
"[nltk_data] Package punkt is already up-to-date!\n",
"[nltk_data] Downloading package wordnet to\n",
"[nltk_data] /Users/ameshmjayaweera/nltk_data...\n",
"[nltk_data] Package wordnet is already up-to-date!\n"
]
},
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Downloading punkt and wordnet from NLTK\n",
"nltk.download('punkt')\n",
"print(\"------------------------------------------------------------\")\n",
"nltk.download('wordnet')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[nltk_data] Downloading package stopwords to\n",
"[nltk_data] /Users/ameshmjayaweera/nltk_data...\n",
"[nltk_data] Package stopwords is already up-to-date!\n"
]
},
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Downloading the stop words list\n",
"nltk.download('stopwords')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# Saving the lemmatizer into an object\n",
"wordnet_lemmatizer = WordNetLemmatizer()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# Loading the stop words in english\n",
"stop_words = list(stopwords.words('english'))"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"def pre_processing(sentence):\n",
" # 1.1. Replace \\n and \\t\n",
" sentence = sentence.replace(\"\\r\", \" \")\n",
" sentence = sentence.replace(\"\\n\", \" \")\n",
" \n",
" # 1.2. Convert to lowercase\n",
" sentence = sentence.lower()\n",
" \n",
" # 1.3. Remove punctuation signs\n",
" punctuation_signs = list(\"?:!.,;-$&^*%(){}[]/><@#~`|+_=“”…’−‘\")\n",
" for punct_sign in punctuation_signs:\n",
" sentence = sentence.replace(punct_sign, '')\n",
" \n",
" # 1.4. Remove possessive pronouns\n",
" sentence = sentence.replace(\"'s\", \"\")\n",
" \n",
" # 1.5. Remove numbers\n",
" digits = list(\"1234567890\")\n",
" for digit in digits:\n",
" sentence = sentence.replace(digit, '')\n",
" \n",
" # 1.6. Remove single quote and double quote\n",
" sentence = sentence.replace(\"'\", \"\")\n",
" sentence = sentence.replace('\"', '')\n",
" \n",
" # 1.7. Lemmatization\n",
" lemmatized_list = []\n",
" text_words = sentence.split(\" \")\n",
" for word in text_words:\n",
" lemmatized_list.append(wordnet_lemmatizer.lemmatize(word, pos=\"v\"))\n",
" sentence = \" \".join(lemmatized_list)\n",
"\n",
" # 1.8. Remove Stop words\n",
" for stop_word in stop_words:\n",
" regex_stopword = r\"\\b\" + stop_word + r\"\\b\"\n",
" sentence = sentence.replace(regex_stopword, '')\n",
" \n",
" # 1.9. Remove Extra Spaces\n",
" sentence = sentence.split()\n",
" sentence = \" \".join(sentence)\n",
" \n",
" return sentence"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"test_input_1 = 'or, how about this. terra was a bad investment because all cryptos operate as if they are ponzi schemes.'\n",
"test_input_2 = 'Honestly, after reading this post and many of the responses, I have to conclude most of the crypto-space is totally fucked. The consept of crypto has been entirely lost, waves of noobs arrive on crypto island, and instead of revelling in the freedom, do everything they can to plan their way to get back off of the island.'\n",
"test_input_3 = 'Funny how people think Bitcoin\\'s risk is comparable to stocks. A lot of these crypto \"investors\" are gonna learn the hard way sooner or later.'"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'or how about this terra be a bad investment because all cryptos operate as if they be ponzi scheme'"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pre_processing(test_input_1)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'funny how people think bitcoin risk be comparable to stock a lot of these crypto investors be gonna learn the hard way sooner or later'"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pre_processing(test_input_3)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"def predict(sentence):\n",
" sentence = pre_processing(sentence)\n",
" vector = vectorizer.transform([sentence]).toarray()\n",
" pred = model.predict(vector)\n",
" return pred[0]"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"predict(test_input_1)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"predict(test_input_3)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment