pushing the project

9cafb8de · thili97 · 17d4ba9c · 9cafb8de · 9cafb8de · 9cafb8de
Commit 9cafb8de authored Jul 04, 2021 by thili97
3 changed files
--- a/.ipynb_checkpoints/digits-checkpoint.ipynb
+++ b/.ipynb_checkpoints/digits-checkpoint.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ab90d4da",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "import cv2\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "from tensorflow.keras.models import load_model\n",
+    "\n",
+    "dataset = tf.keras.datasets.mnist\n",
+    "\n",
+    "#### train - test - split ####\n",
+    "(X_train, y_train), (X_test, y_test) = dataset.load_data()\n",
+    "\n",
+    "\n",
+    "#### normalize value to b/w 0and1 ###\n",
+    "X_train= X_train/255.0\n",
+    "X_test= X_test/255.0\n",
+    "\n",
+    "\n",
+    "### CNN (BATCH , HEIGHT, WIDTH, 1)\n",
+    "#### ANN (BATCH_SIZE, FEATURES)\n",
+    "#### FEATURES = WIDTH * HEIGHT\n",
+    "#### reshape array to fit in network ####\n",
+    "\n",
+    "X_train = X_train.reshape(X_train.shape[0], -1)\n",
+    "X_test = X_test.reshape(X_test.shape[0], -1)\n",
+    "\n",
+    "# (batch_size, height, width, 1)\n",
+    "#### ANN ########\n",
+    "\n",
+    "from tensorflow.keras.models import Sequential\n",
+    "from tensorflow.keras.layers import Dense, Dropout\n",
+    "\n",
+    "# 0-1\n",
+    "model = Sequential()\n",
+    "model.add(Dense(128, activation='relu'))\n",
+    "model.add(Dropout(0.2))\n",
+    "\n",
+    "model.add(Dense(128, activation='relu'))\n",
+    "model.add(Dropout(0.2))\n",
+    "\n",
+    "## [0-9] ##\n",
+    "model.add(Dense(10, activation='softmax'))\n",
+    "\n",
+    "model.compile('adam', 'sparse_categorical_crossentropy', metrics=['acc'])\n",
+    "\n",
+    "model.fit(X_train, y_train, epochs=3, batch_size=12, validation_split=0.1)\n",
+    "\n",
+    "\n",
+    "#### making prediction #######\n",
+    "plt.imshow(X_test[1255].reshape(28,28), cmap='gray')\n",
+    "plt.xlabel(y_test[1255])\n",
+    "plt.ylabel(np.argmax(model.predict(X_test)[1255]))\n",
+    "\n",
+    "\n",
+    "model.save('digit_trained.h5')\n",
+    "\n",
+    "\n",
+    "##### open cv for capture and predicting through camera #####\n",
+    "'''\n",
+    "##### cv2\n",
+    "\n",
+    "\n",
+    "cap = cv2.VideoCapture(0)\n",
+    "while True:\n",
+    "    ret, img = cap.read()\n",
+    "    #img = cv2.flip(img, 1)\n",
+    "    img = img[200:400, 200:400]\n",
+    "    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
+    "    _, gray = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)\n",
+    "    cv2.imshow(\"gray_wind\", gray)\n",
+    "    gray = cv2.resize(gray, (28, 28))\n",
+    "    #cv2.imshow('resized')\n",
+    "    gray = gray.reshape(1, 784)\n",
+    "    result = np.argmax(model.predict(gray))\n",
+    "    result = 'cnn : {}'.format(result)\n",
+    "    cv2.putText(img, org=(25,25), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, text= result, color=(255,0,0), thickness=1)\n",
+    "    cv2.imshow(\"image\", img)\n",
+    "   \n",
+    "    if cv2.waitKey(1) == 13:\n",
+    "        break\n",
+    "\n",
+    "cap.release()\n",
+    "cv2.destroyAllWindows()\n",
+    "#plt.imshow(img)\n",
+    "'''\n",
+    "\n",
+    "\n",
+    "############  prediction via paints ##########\n",
+    "### glob\n",
+    "run = False\n",
+    "ix,iy = -1,-1\n",
+    "follow = 25\n",
+    "img = np.zeros((512,512,1))\n",
+    "\n",
+    "### func\n",
+    "def draw(event, x, y, flag, params):\n",
+    "    global run,ix,iy,img,follow\n",
+    "    if event == cv2.EVENT_LBUTTONDOWN:\n",
+    "        run = True\n",
+    "        ix, iy = x, y\n",
+    "    elif event == cv2.EVENT_MOUSEMOVE:\n",
+    "        if run == True:\n",
+    "            cv2.circle(img, (x,y), 20, (255,255,255), -1)\n",
+    "\n",
+    "    elif event == cv2.EVENT_LBUTTONUP:\n",
+    "        run = False\n",
+    "        cv2.circle(img, (x,y), 20, (255,255,255), -1)\n",
+    "        gray = cv2.resize(img, (28, 28))\n",
+    "        gray = gray.reshape(1, 784)\n",
+    "        result = np.argmax(model.predict(gray))\n",
+    "        result = 'cnn : {}'.format(result)\n",
+    "        cv2.putText(img, org=(25,follow), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, text= result, color=(255,0,0), thickness=1)\n",
+    "        follow += 25\n",
+    "    elif event == cv2.EVENT_RBUTTONDOWN:\n",
+    "        img = np.zeros((512,512,1))\n",
+    "        follow = 25\n",
+    "\n",
+    "\n",
+    "### param\n",
+    "cv2.namedWindow('image')\n",
+    "cv2.setMouseCallback('image', draw)\n",
+    "\n",
+    "\n",
+    "\n",
+    "while True:    \n",
+    "    cv2.imshow(\"image\", img)\n",
+    "   \n",
+    "    if cv2.waitKey(1) == 27:\n",
+    "        break\n",
+    "\n",
+    "cv2.destroyAllWindows()\n",
+    "\n",
+    "########## THANKS ##########\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/.ipynb_checkpoints/speech-checkpoint.ipynb
+++ b/.ipynb_checkpoints/speech-checkpoint.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "906b56b1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from tensorflow.compat.v1 import ConfigProto\n",
+    "from tensorflow.compat.v1 import Session\n",
+    "import os\n",
+    "import librosa\n",
+    "import IPython.display as ipd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "from scipy.io import wavfile\n",
+    "from tqdm import tqdm\n",
+    "import warnings\n",
+    "\n",
+    "\n",
+    "config = ConfigProto()\n",
+    "config.gpu_options.allow_growth = True\n",
+    "sess = Session(config=config)\n",
+    "\n",
+    "warnings.filterwarnings(\"ignore\")\n",
+    "\n",
+    "labels = [\n",
+    "    'left', 'cat', 'wow', 'six', 'go', 'one', 'dog', 'nine', 'sheila', 'yes',\n",
+    "    'down', 'bird', 'tree', 'up', 'eight', 'bed', 'three', 'on', 'house',\n",
+    "    'five', 'seven', 'zero', 'right', 'four', 'no', 'two', 'off', 'happy',\n",
+    "    'stop', 'marvin'\n",
+    "]\n",
+    "\n",
+    "\n",
+    "train_audio_path = './train/audio/'\n",
+    "\n",
+    "all_wave = []\n",
+    "all_label = []\n",
+    "for label in tqdm(labels):\n",
+    "    waves = [f for f in os.listdir(train_audio_path + '/'+ label) if f.endswith('.wav')]\n",
+    "    for wav in waves:\n",
+    "        samples, sample_rate = librosa.load(train_audio_path + '/' + label + '/' + wav, sr = 16000)\n",
+    "        samples = librosa.resample(samples, sample_rate, 8000)\n",
+    "        if(len(samples)== 8000) : \n",
+    "            all_wave.append(samples)\n",
+    "            all_label.append(label)\n",
+    "            \n",
+    "from sklearn.preprocessing import LabelEncoder\n",
+    "from keras.utils import np_utils\n",
+    "\n",
+    "label_enconder = LabelEncoder()\n",
+    "y = label_enconder.fit_transform(all_label)\n",
+    "classes = list(label_enconder.classes_)\n",
+    "y = np_utils.to_categorical(y, num_classes=len(labels))\n",
+    "\n",
+    "all_wave = np.array(all_wave).reshape(-1,8000,1)\n",
+    "\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "x_train, x_valid, y_train, y_valid = train_test_split(np.array(all_wave),np.array(y),stratify=y,test_size = 0.2,random_state=777,shuffle=True)\n",
+    "\n",
+    "from keras.layers import Bidirectional, BatchNormalization, CuDNNGRU, TimeDistributed\n",
+    "\n",
+    "from keras.layers import Dense, Dropout, Flatten, Conv1D, Input, MaxPooling1D\n",
+    "from keras.models import Model\n",
+    "from keras.callbacks import EarlyStopping, ModelCheckpoint\n",
+    "from keras import backend as K\n",
+    "K.clear_session()\n",
+    "\n",
+    "inputs = Input(shape=(8000,1))\n",
+    "x = BatchNormalization(axis=-1, momentum=0.99, epsilon=1e-3, center=True, scale=True)(inputs)\n",
+    "\n",
+    "#First Conv1D layer\n",
+    "x = Conv1D(8,13, padding='valid', activation='relu', strides=1)(x)\n",
+    "x = MaxPooling1D(3)(x)\n",
+    "x = Dropout(0.3)(x)\n",
+    "\n",
+    "#Second Conv1D layer\n",
+    "x = Conv1D(16, 11, padding='valid', activation='relu', strides=1)(x)\n",
+    "x = MaxPooling1D(3)(x)\n",
+    "x = Dropout(0.3)(x)\n",
+    "\n",
+    "#Third Conv1D layer\n",
+    "x = Conv1D(32, 9, padding='valid', activation='relu', strides=1)(x)\n",
+    "x = MaxPooling1D(3)(x)\n",
+    "x = Dropout(0.3)(x)\n",
+    "\n",
+    "x = BatchNormalization(axis=-1, momentum=0.99, epsilon=1e-3, center=True, scale=True)(x)\n",
+    "\n",
+    "x = Bidirectional(CuDNNGRU(128, return_sequences=True), merge_mode='sum')(x)\n",
+    "x = Bidirectional(CuDNNGRU(128, return_sequences=True), merge_mode='sum')(x)\n",
+    "x = Bidirectional(CuDNNGRU(128, return_sequences=False), merge_mode='sum')(x)\n",
+    "\n",
+    "x = BatchNormalization(axis=-1, momentum=0.99, epsilon=1e-3, center=True, scale=True)(x)\n",
+    "\n",
+    "#Flatten layer\n",
+    "# x = Flatten()(x)\n",
+    "\n",
+    "#Dense Layer 1\n",
+    "x = Dense(256, activation='relu')(x)\n",
+    "outputs = Dense(len(labels), activation=\"softmax\")(x)\n",
+    "\n",
+    "model = Model(inputs, outputs)\n",
+    "model.summary()\n",
+    "\n",
+    "model.compile(loss='categorical_crossentropy',optimizer='nadam',metrics=['accuracy'])\n",
+    "\n",
+    "early_stop = EarlyStopping(monitor='val_loss', mode='min', \n",
+    "                           verbose=1, patience=10, min_delta=0.0001)\n",
+    "\n",
+    "checkpoint = ModelCheckpoint('speech2text_model.hdf5', monitor='val_acc', \n",
+    "                             verbose=1, save_best_only=True, mode='max')\n",
+    "\n",
+    "hist = model.fit(\n",
+    "    x=x_train, \n",
+    "    y=y_train,\n",
+    "    epochs=100, \n",
+    "    callbacks=[early_stop, checkpoint], \n",
+    "    batch_size=32, \n",
+    "    validation_data=(x_valid,y_valid)\n",
+    ")\n",
+    "\n",
+    "from matplotlib import pyplot\n",
+    "pyplot.plot(hist.history['loss'], label='train')\n",
+    "pyplot.plot(hist.history['val_loss'], label='test')\n",
+    "pyplot.legend()\n",
+    "pyplot.show()\n",
+    "\n",
+    "model.save('speech2text_model.hdf5')\n",
+    "\n",
+    "from keras.models import load_model\n",
+    "model = load_model('speech2text_model.hdf5')\n",
+    "\n",
+    "def s2t_predict(audio, shape_num=8000):\n",
+    "    prob=model.predict(audio.reshape(1,shape_num,1))\n",
+    "    index=np.argmax(prob[0])\n",
+    "    return classes[index]\n",
+    "\n",
+    "import random\n",
+    "index=random.randint(0,len(x_valid)-1)\n",
+    "samples=x_valid[index].ravel()\n",
+    "print(\"Audio:\",classes[np.argmax(y_valid[index])])\n",
+    "ipd.Audio(samples, rate=8000)\n",
+    "\n",
+    "samples.shape\n",
+    "\n",
+    "print(\"Text:\",s2t_predict(samples))\n",
+    "\n",
+    "import sounddevice as sd\n",
+    "import soundfile as sf\n",
+    "\n",
+    "samplerate = 16000  \n",
+    "duration = 1 # seconds\n",
+    "filename = 'zero.wav'\n",
+    "print(\"start\")\n",
+    "mydata = sd.rec(int(samplerate * duration), samplerate=samplerate,\n",
+    "    channels=1, blocking=True)\n",
+    "print(\"end\")\n",
+    "sd.wait()\n",
+    "sf.write(filename, mydata, samplerate)\n",
+    "\n",
+    "test, test_rate = librosa.load('yes.wav', sr = 16000)\n",
+    "test_sample = librosa.resample(test, test_rate, 8000)\n",
+    "print(test_sample.shape)\n",
+    "ipd.Audio(test_sample,rate=8000)     \n",
+    "\n",
+    "s2t_predict(test_sample)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/digits.ipynb
+++ b/digits.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "060a52c4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/3\n",
+      "4500/4500 [==============================] - 3s 637us/step - loss: 0.2888 - acc: 0.9111 - val_loss: 0.0949 - val_acc: 0.9733\n",
+      "Epoch 2/3\n",
+      "4500/4500 [==============================] - 3s 612us/step - loss: 0.1527 - acc: 0.9536 - val_loss: 0.0909 - val_acc: 0.9710\n",
+      "Epoch 3/3\n",
+      "4500/4500 [==============================] - 3s 632us/step - loss: 0.1241 - acc: 0.9625 - val_loss: 0.0878 - val_acc: 0.9740\n"
+     ]
+    }
+   ],
+   "source": [
+    "import tensorflow as tf\n",
+    "import cv2\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "from tensorflow.keras.models import load_model\n",
+    "\n",
+    "dataset = tf.keras.datasets.mnist\n",
+    "\n",
+    "#### train - test - split ####\n",
+    "(X_train, y_train), (X_test, y_test) = dataset.load_data()\n",
+    "\n",
+    "\n",
+    "#### normalize value to b/w 0and1 ###\n",
+    "X_train= X_train/255.0\n",
+    "X_test= X_test/255.0\n",
+    "\n",
+    "\n",
+    "### CNN (BATCH , HEIGHT, WIDTH, 1)\n",
+    "#### ANN (BATCH_SIZE, FEATURES)\n",
+    "#### FEATURES = WIDTH * HEIGHT\n",
+    "#### reshape array to fit in network ####\n",
+    "\n",
+    "X_train = X_train.reshape(X_train.shape[0], -1)\n",
+    "X_test = X_test.reshape(X_test.shape[0], -1)\n",
+    "\n",
+    "# (batch_size, height, width, 1)\n",
+    "#### ANN ########\n",
+    "\n",
+    "from tensorflow.keras.models import Sequential\n",
+    "from tensorflow.keras.layers import Dense, Dropout\n",
+    "\n",
+    "# 0-1\n",
+    "model = Sequential()\n",
+    "model.add(Dense(128, activation='relu'))\n",
+    "model.add(Dropout(0.2))\n",
+    "\n",
+    "model.add(Dense(128, activation='relu'))\n",
+    "model.add(Dropout(0.2))\n",
+    "\n",
+    "## [0-9] ##\n",
+    "model.add(Dense(10, activation='softmax'))\n",
+    "\n",
+    "model.compile('adam', 'sparse_categorical_crossentropy', metrics=['acc'])\n",
+    "\n",
+    "model.fit(X_train, y_train, epochs=3, batch_size=12, validation_split=0.1)\n",
+    "\n",
+    "\n",
+    "#### making prediction #######\n",
+    "plt.imshow(X_test[1255].reshape(28,28), cmap='gray')\n",
+    "plt.xlabel(y_test[1255])\n",
+    "plt.ylabel(np.argmax(model.predict(X_test)[1255]))\n",
+    "\n",
+    "\n",
+    "model.save('digit_trained.h5')\n",
+    "\n",
+    "\n",
+    "##### open cv for capture and predicting through camera #####\n",
+    "'''\n",
+    "##### cv2\n",
+    "\n",
+    "\n",
+    "cap = cv2.VideoCapture(0)\n",
+    "while True:\n",
+    "    ret, img = cap.read()\n",
+    "    #img = cv2.flip(img, 1)\n",
+    "    img = img[200:400, 200:400]\n",
+    "    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
+    "    _, gray = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)\n",
+    "    cv2.imshow(\"gray_wind\", gray)\n",
+    "    gray = cv2.resize(gray, (28, 28))\n",
+    "    #cv2.imshow('resized')\n",
+    "    gray = gray.reshape(1, 784)\n",
+    "    result = np.argmax(model.predict(gray))\n",
+    "    result = 'cnn : {}'.format(result)\n",
+    "    cv2.putText(img, org=(25,25), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, text= result, color=(255,0,0), thickness=1)\n",
+    "    cv2.imshow(\"image\", img)\n",
+    "   \n",
+    "    if cv2.waitKey(1) == 13:\n",
+    "        break\n",
+    "\n",
+    "cap.release()\n",
+    "cv2.destroyAllWindows()\n",
+    "#plt.imshow(img)\n",
+    "'''\n",
+    "\n",
+    "\n",
+    "############  prediction via paints ##########\n",
+    "### glob\n",
+    "run = False\n",
+    "ix,iy = -1,-1\n",
+    "follow = 25\n",
+    "img = np.zeros((512,512,1))\n",
+    "\n",
+    "### func\n",
+    "def draw(event, x, y, flag, params):\n",
+    "    global run,ix,iy,img,follow\n",
+    "    if event == cv2.EVENT_LBUTTONDOWN:\n",
+    "        run = True\n",
+    "        ix, iy = x, y\n",
+    "    elif event == cv2.EVENT_MOUSEMOVE:\n",
+    "        if run == True:\n",
+    "            cv2.circle(img, (x,y), 20, (255,255,255), -1)\n",
+    "\n",
+    "    elif event == cv2.EVENT_LBUTTONUP:\n",
+    "        run = False\n",
+    "        cv2.circle(img, (x,y), 20, (255,255,255), -1)\n",
+    "        gray = cv2.resize(img, (28, 28))\n",
+    "        gray = gray.reshape(1, 784)\n",
+    "        result = np.argmax(model.predict(gray))\n",
+    "        result = 'cnn : {}'.format(result)\n",
+    "        cv2.putText(img, org=(25,follow), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, text= result, color=(255,0,0), thickness=1)\n",
+    "        follow += 25\n",
+    "    elif event == cv2.EVENT_RBUTTONDOWN:\n",
+    "        img = np.zeros((512,512,1))\n",
+    "        follow = 25\n",
+    "\n",
+    "\n",
+    "### param\n",
+    "cv2.namedWindow('image')\n",
+    "cv2.setMouseCallback('image', draw)\n",
+    "\n",
+    "\n",
+    "\n",
+    "while True:    \n",
+    "    cv2.imshow(\"image\", img)\n",
+    "   \n",
+    "    if cv2.waitKey(1) == 27:\n",
+    "        break\n",
+    "\n",
+    "cv2.destroyAllWindows()\n",
+    "\n",
+    "########## THANKS ##########\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3624a4a0",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "127a71a9",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}