Commit 5ad2479f authored by Dilip Wijethunga's avatar Dilip Wijethunga

update api

parent 49f9f3df
### How to set up and run
##### Create virtual environment
###### Windows
py -3 -m venv <name of environment>
###### Linux/MaxOS
python3 -m venv <name of environment>
##### Activate virtual environment
###### Windows
<name of environment>\Scripts\activate
###### Linux/MaxOS
. <name of environment>/bin/activate
##### Install required libraries
pip3 install -r requirements.txt
##### Run app locally
flask run
from flask import Flask
from flask import Flask, jsonify
from flask_cors import CORS
from flask_apscheduler import APScheduler
from model import schedule_model_training, is_training, CURRENCIES
from web_scrapping import get_sentiment
app = Flask(__name__)
cors = CORS(app, resources={r"/crypto-currency/*": {"origins": "*"}})
app.config['CORS_HEADERS'] = 'Content-Type'
scheduler = APScheduler()
schedule_model_training()
'''
schedule model re-training
'''
scheduler.add_job(id='Scheduled Task', func=schedule_model_training, trigger="interval", seconds=3600)
scheduler.start()
@app.route('/crypto-currency', methods=['GET'])
def index():
return f"<div align='center'><h2>Crypto Currency Forecasting Sever is Active ...</h2></div>"
return f"<div align='center'><h2>Crypto Currency Forecasting Sever is Active</h2></div>"
@app.route("/crypto-currency/predict", methods=['GET'])
def predict():
if is_training():
response = jsonify({
"message": "all forecasting models are training now!",
"code": 100
})
else:
data = dict()
for currency in list(CURRENCIES.keys()):
if CURRENCIES[currency]["enable"] and CURRENCIES[currency]["available_data"]:
data[currency] = {
"price": CURRENCIES[currency]["price"],
"volume": CURRENCIES[currency]["volume"],
"market_cap": CURRENCIES[currency]["market_cap"]
}
response = jsonify({
"code": 200,
"message": "Success",
"data": data
})
response.headers.add('Access-Control-Allow-Origin', '*')
return response, 200
@app.route("/crypto-currency/sentiment", methods=['GET'])
def sentiment():
response = jsonify({
"code": 200,
"message": "Success",
"sentiment": get_sentiment()
})
response.headers.add('Access-Control-Allow-Origin', '*')
return response, 200
if __name__ == "__main__":
......
HEADER = {
"User-Agent": 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/50.0.2661.102 Safari/537.36 '
}
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
import os
import ssl
from urllib.request import Request, urlopen
import certifi
from model_training.pp_market_cap import pp_market_cap
from model_training.pp_price import pp_price
from model_training.pp_volume import pp_volume
from web_scrapping import start_web_scrapping, set_sentiment
DATABASE_DIR = f"database{os.sep}"
TRAINING = False
THRESHOLD = 1000000
CURRENCIES = {
"BTC_USD": {
"url": "https://coingecko.com/price_charts/export/1/usd.csv",
"available_data": False,
"path": None,
"enable": True,
"price": {
"today": 0,
"tomorrow": 0,
"score": 0,
"exceeded": False
},
"volume": {
"today": 0,
"tomorrow": 0,
"score": 0,
"exceeded": False
},
"market_cap": {
"today": 0,
"tomorrow": 0,
"score": 0,
"exceeded": False
}
},
"ETH_USD": {
"url": "https://www.coingecko.com/price_charts/export/279/usd.csv",
"available_data": False,
"path": None,
"enable": True,
"price": {
"today": 0,
"tomorrow": 0,
"score": 0,
"exceeded": False
},
"volume": {
"today": 0,
"tomorrow": 0,
"score": 0,
"exceeded": False
},
"market_cap": {
"today": 0,
"tomorrow": 0,
"score": 0,
"exceeded": False
}
},
"PKEX_USD": {
"url": "https://www.coingecko.com/price_charts/export/18616/usd.csv",
"available_data": False,
"path": None,
"enable": True,
"price": {
"today": 0,
"tomorrow": 0,
"score": 0,
"exceeded": False
},
"volume": {
"today": 0,
"tomorrow": 0,
"score": 0,
"exceeded": False
},
"market_cap": {
"today": 0,
"tomorrow": 0,
"score": 0,
"exceeded": False
}
}
}
def download_data_sources():
for currency in list(CURRENCIES.keys()):
CURRENCIES[currency]["available_data"] = False
CURRENCIES[currency]["path"] = None
for currency in list(CURRENCIES.keys()):
request = Request(
url=CURRENCIES[currency]["url"],
headers={'User-Agent': 'Mozilla/5.0'}
)
print(f"download data source for {currency}")
with urlopen(request, context=ssl.create_default_context(cafile=certifi.where())) as file:
downloaded_file = file.read().decode('utf-8')
csv_file = open(f'{DATABASE_DIR}{currency}.csv', "w+")
csv_file.write(downloaded_file)
csv_file.close()
CURRENCIES[currency]["available_data"] = True
CURRENCIES[currency]["path"] = f'{DATABASE_DIR}{currency}.csv'
print(f"successfully downloaded data source for {currency}")
def is_data_sources_configured():
for currency in list(CURRENCIES.keys()):
if CURRENCIES[currency]["enable"] and not CURRENCIES[currency]["available_data"]:
return False
return True
def set_training(_flag):
global TRAINING
TRAINING = _flag
def is_training():
return TRAINING
def schedule_model_training():
set_training(True)
print("start model training")
retry_count = 0
while retry_count < 3:
print("downloading data sources")
retry_count += 1
print(f"attempting - {retry_count}")
download_data_sources()
if is_data_sources_configured():
print("data sources successfully downloaded")
break
# model training
for currency in list(CURRENCIES.keys()):
if CURRENCIES[currency]["enable"] and CURRENCIES[currency]["available_data"]:
file_path = CURRENCIES[currency]["path"]
today_price, pred_price = pp_price(file_path)
today_volume, pred_volume = pp_volume(file_path)
today_market_cap, pred_market_cap = pp_market_cap(file_path)
# price
CURRENCIES[currency]["price"]["today"] = today_price
CURRENCIES[currency]["price"]["tomorrow"] = pred_price
score = ((pred_price - today_price) / today_price) * 10
if score < 0:
score = 0
CURRENCIES[currency]["price"]["score"] = score
flag = False
if pred_price >= THRESHOLD:
flag = True
CURRENCIES[currency]["price"]["exceeded"] = flag
# volume
CURRENCIES[currency]["volume"]["today"] = today_volume
CURRENCIES[currency]["volume"]["tomorrow"] = pred_volume
score = ((pred_volume - today_volume) / today_volume) * 10
if score < 0:
score = 0
CURRENCIES[currency]["volume"]["score"] = score
flag = False
if pred_volume >= THRESHOLD:
flag = True
CURRENCIES[currency]["volume"]["exceeded"] = flag
# market cap
CURRENCIES[currency]["market_cap"]["today"] = today_market_cap
CURRENCIES[currency]["market_cap"]["tomorrow"] = pred_market_cap
score = ((pred_market_cap - today_market_cap) / today_market_cap) * 10
if score < 0:
score = 0
CURRENCIES[currency]["market_cap"]["score"] = score
flag = False
if pred_market_cap >= THRESHOLD:
flag = True
CURRENCIES[currency]["market_cap"]["exceeded"] = flag
print("end model training")
set_training(False)
print(CURRENCIES)
set_sentiment('Not Available')
start_web_scrapping()
import statsmodels.api as sm
import warnings
warnings.filterwarnings('ignore')
def model_training(training_data, scaler):
history = [x for x in training_data]
model = sm.tsa.arima.ARIMA(history, order=(5, 1, 0))
model_fit = model.fit()
output = model_fit.forecast()
return scaler.inverse_transform([[history[-1]]])[0][0], scaler.inverse_transform([[output[0]]])[0][0]
import pandas as pd
from sklearn.preprocessing import StandardScaler
from model_training.helper import model_training
def pp_market_cap(file_path):
df = pd.read_csv(file_path, delimiter=',', parse_dates=True, squeeze=True)
df.drop(['total_volume', 'price'], axis=1, inplace=True)
df['market_cap'] = df['market_cap'].fillna(0)
df['snapped_at'] = df['snapped_at'].apply(lambda x: x.split(' ')[0].strip())
df['snapped_at'] = pd.to_datetime(df['snapped_at'], infer_datetime_format=True)
scaler = StandardScaler()
df[['market_cap']] = scaler.fit_transform(df[['market_cap']])
training_data = df['market_cap'].values
return model_training(training_data, scaler)
import pandas as pd
from sklearn.preprocessing import StandardScaler
from model_training.helper import model_training
def pp_price(file_path):
df = pd.read_csv(file_path, delimiter=',', parse_dates=True, squeeze=True)
df.drop(['total_volume', 'market_cap'], axis=1, inplace=True)
df['price'] = df['price'].fillna(0)
df['snapped_at'] = df['snapped_at'].apply(lambda x: x.split(' ')[0].strip())
df['snapped_at'] = pd.to_datetime(df['snapped_at'], infer_datetime_format=True)
scaler = StandardScaler()
df[['price']] = scaler.fit_transform(df[['price']])
training_data = df['price'].values
return model_training(training_data, scaler)
import pandas as pd
from sklearn.preprocessing import StandardScaler
from model_training.helper import model_training
def pp_volume(file_path):
df = pd.read_csv(file_path, delimiter=',', parse_dates=True, squeeze=True)
df.drop(['price', 'market_cap'], axis=1, inplace=True)
df['total_volume'] = df['total_volume'].fillna(0)
df['snapped_at'] = df['snapped_at'].apply(lambda x: x.split(' ')[0].strip())
df['snapped_at'] = pd.to_datetime(df['snapped_at'], infer_datetime_format=True)
scaler = StandardScaler()
df[['total_volume']] = scaler.fit_transform(df[['total_volume']])
training_data = df['total_volume'].values
return model_training(training_data, scaler)
Flask
numpy
pandas
gunicorn
Flask~=2.1.1
numpy~=1.23.3
pandas~=1.4.2
gunicorn~=20.1.0
scikit-learn==1.0
flask_cors
\ No newline at end of file
flask_cors
pip~=20.3.2
wheel~=0.36.2
cryptography~=3.3.1
lxml~=4.6.2
pytz~=2020.4
MarkupSafe~=2.0.1
Werkzeug~=2.1.1
Jinja2~=3.0.1
click~=8.1.2
itsdangerous~=2.1.2
setuptools~=49.2.1
nose~=1.3.7
cffi~=1.14.4
numba~=0.56.2
scipy~=1.5.4
joblib~=0.17.0
python-dateutil~=2.8.1
threadpoolctl~=2.1.0
Pillow~=9.2.0
six~=1.15.0
tornado~=6.1
decorator~=5.0.9
zipp~=3.7.0
certifi~=2020.12.5
flask_apscheduler
defusedxml~=0.7.1
ipython~=7.25.0
pyzmq~=22.1.0
pexpect~=4.8.0
parso~=0.8.2
jedi~=0.18.0
attrs~=20.3.0
llvmlite~=0.39.1
Pygments~=2.9.0
ipykernel~=5.5.5
nbformat~=5.1.3
traitlets~=5.0.5
testpath~=0.5.0
packaging~=21.3
backcall~=0.2.0
pickleshare~=0.7.5
wcwidth~=0.2.5
ptyprocess~=0.7.0
tzlocal~=4.2
jsonschema~=3.2.0
pycparser~=2.20
pyrsistent~=0.17.3
idna~=2.10
APScheduler~=3.9.1
entrypoints~=0.3
statsmodels~=0.13.2
nltk~=3.7
tqdm~=4.64.1
requests~=2.25.1
regex~=2022.9.13
pyparsing~=2.4.7
keras~=2.10.0
ipywidgets~=7.6.3
notebook~=6.4.0
patsy~=0.5.2
uritemplate~=3.0.1
google-api-python-client~=2.8.0
bs4==0.0.1
import pickle
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import ssl
import nltk
# load vectorizer
path_vectorizer = 'model/vectorizer.pickle'
with open(path_vectorizer, 'rb') as data:
vectorizer = pickle.load(data)
# load model
path_model = 'model/best_rfc.pickle'
with open(path_model, 'rb') as data:
model = pickle.load(data)
try:
_create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
pass
else:
ssl._create_default_https_context = _create_unverified_https_context
# Downloading punkt and wordnet from NLTK
nltk.download('omw-1.4')
nltk.download('punkt')
print("------------------------------------------------------------")
nltk.download('wordnet')
# Downloading the stop words list
nltk.download('stopwords')
# Saving the lemmatizer into an object
wordnet_lemmatizer = WordNetLemmatizer()
# Loading the stop words in english
stop_words = list(stopwords.words('english'))
def pre_processing(sentence):
# 1.1. Replace \n and \t
sentence = sentence.replace("\r", " ")
sentence = sentence.replace("\n", " ")
# 1.2. Convert to lowercase
sentence = sentence.lower()
# 1.3. Remove punctuation signs
punctuation_signs = list("?:!.,;-$&^*%(){}[]/><@#~`|+_=“”…’−‘")
for punct_sign in punctuation_signs:
sentence = sentence.replace(punct_sign, '')
# 1.4. Remove possessive pronouns
sentence = sentence.replace("'s", "")
# 1.5. Remove numbers
digits = list("1234567890")
for digit in digits:
sentence = sentence.replace(digit, '')
# 1.6. Remove single quote and double quote
sentence = sentence.replace("'", "")
sentence = sentence.replace('"', '')
# 1.7. Lemmatization
lemmatized_list = []
text_words = sentence.split(" ")
for word in text_words:
lemmatized_list.append(wordnet_lemmatizer.lemmatize(word, pos="v"))
sentence = " ".join(lemmatized_list)
# 1.8. Remove Stop words
for stop_word in stop_words:
regex_stopword = r"\b" + stop_word + r"\b"
sentence = sentence.replace(regex_stopword, '')
# 1.9. Remove Extra Spaces
sentence = sentence.split()
sentence = " ".join(sentence)
return sentence
def predict(sentence):
sentence = pre_processing(sentence)
vector = vectorizer.transform([sentence]).toarray()
pred = model.predict(vector)
return pred[0]
import requests
from bs4 import BeautifulSoup
from config import HEADER
# html tags need to be scrapped
from sentiment_analysis import predict
TAGS = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'span', 'td', 'li', 'a']
SOURCES = ['https://cointelegraph.com/', 'https://news.bitcoin.com/']
sentiment = 'Not Available'
def get_sentiment():
return sentiment
def set_sentiment(_sentiment):
global sentiment
sentiment = _sentiment
def test_with_bs4(url):
response = requests.get(url, headers=HEADER)
soup = BeautifulSoup(response.text, "html.parser")
file = open("test.html", "w+")
file.write(str(soup))
file.close()
def scrapping_sentences(url):
sentences = []
# download html page of the website
response = requests.get(url, headers=HEADER)
# parse with bs4
soup = BeautifulSoup(response.text, "html.parser")
# iterate through html tags
for tag in TAGS:
# find all inner texts for the tag
rows = soup.find_all(tag)
# iterate through all rows found related to the given tag
for row in rows:
# inner text to lower
sentence = row.get_text().lower()
# keep only alphabet
sentence = ''.join(x for x in sentence if x.isalpha() or x == ' ')
sentences.append(sentence)
return sentences
def start_web_scrapping():
m = {
1: 0,
0: 0
}
for source in SOURCES:
sentences = scrapping_sentences(source)
for sentence in sentences:
if len(sentence.strip()) > 0:
# there should be a minimum 6 words
if len(sentence.split()) >= 6:
pred = predict(sentence)
m[pred] += 1
if m[1] > m[0]:
set_sentiment('Positive')
else:
set_sentiment('Negative')
# only for testing
'''
if __name__ == "__main__":
start_web_scrapping()
'''
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment