upload preprocessing files

659f9e8a · Pulasthi Tharaka · 2d0a9ddc · 659f9e8a
Commit 659f9e8a authored May 27, 2023 by Pulasthi Tharaka
Hide whitespace changes
Inline Side-by-side

Showing with 229 additions and 0 deletions

Training_models/Preprocessing and Training_voices.py Training_models/Preprocessing and Training_voices.py +229 -0

No files found.
--- a/Training_models/Preprocessing and Training_voices.py
+++ b/Training_models/Preprocessing and Training_voices.py
+# -*- coding: utf-8 -*-
+"""Untitled1.ipynb
+
+Automatically generated by Colaboratory.
+
+Original file is located at
+    https://colab.research.google.com/drive/1VT1vD9lWM4WJeb0Y6JmRbsH89yR5C2dE
+"""
+
+import os
+import torch
+import torchaudio
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import Dataset, DataLoader
+
+# Set the paths to your dataset folders
+data_dir = 'drive/MyDrive/DatasetNew'
+
+# Set the dimensions and parameters for audio processing
+sample_rate = 44100  # Adjust based on your dataset
+duration = 4  # Adjust based on your dataset
+num_classes = len(os.listdir(data_dir))
+batch_size = 32
+
+# Custom dataset class
+class VoiceDataset(Dataset):
+    def _init_(self, data_dir):
+        self.data_dir = data_dir
+        self.file_list = []
+        self.labels = []
+        
+        class_labels = sorted(os.listdir(data_dir))
+        for i, label in enumerate(class_labels):
+            class_dir = os.path.join(data_dir, label)
+            for audio_file in os.listdir(class_dir):
+                audio_path = os.path.join(class_dir, audio_file)
+                self.file_list.append(audio_path)
+                self.labels.append(i)
+        
+    def _getitem_(self, index):
+        audio_path = self.file_list[index]
+        waveform, _ = torchaudio.load(audio_path, num_frames=duration*sample_rate)
+        label = self.labels[index]
+        return waveform, label
+    
+    def _len_(self):
+        return len(self.file_list)
+
+class VoiceModel(nn.Module):
+    def __init__(self, num_classes):
+        super(VoiceModel, self).__init__()
+        self.conv1 = nn.Conv1d(1, 32, kernel_size=3, stride=2)
+        self.relu1 = nn.ReLU()
+        self.conv2 = nn.Conv1d(32, 64, kernel_size=3, stride=2)
+        self.relu2 = nn.ReLU()
+        self.flatten = nn.Flatten()
+        self.fc1 = nn.Linear(64*25, 128)
+        self.relu3 = nn.ReLU()
+        self.fc2 = nn.Linear(128, num_classes)
+        self.softmax = nn.Softmax(dim=1)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.relu1(x)
+        x = self.conv2(x)
+        x = self.relu2(x)
+        x = self.flatten(x)
+        x = self.fc1(x)
+        x = self.relu3(x)
+        x = self.fc2(x)
+        x = self.softmax(x)
+        return x
+
+# Initialize the model
+model = VoiceModel(num_classes)
+
+# Define the loss function and optimizer
+criterion = nn.CrossEntropyLoss()
+optimizer = optim.Adam(model.parameters(), lr=0.001)
+
+# Train the model
+def collate_fn(batch):
+    # Get the maximum length of the waveforms in the batch
+    max_length = max([waveform.size(1) for waveform, label in batch])
+    
+    # Resize all waveforms to the maximum length
+    batch = [(F.pad(waveform, (0, max_length - waveform.size(1))), label) for waveform, label in batch]
+    
+    # Stack the waveforms and labels
+    waveforms = torch.stack([waveform for waveform, label in batch], dim=0)
+    labels = torch.stack([label for waveform, label in batch], dim=0)
+    
+    return waveforms, labels
+
+# Assuming you have a dataset called 'dataset' and have defined appropriate transformations
+batch_size = 32  # Specify your desired batch size
+shuffle = True  # Specify whether you want to shuffle the data
+num_workers = 4  # Specify the number of worker processes for data loading
+
+dataloader = torch.utils.data.DataLoader(data_dir, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
+# Evaluate the model
+model.eval()
+total = 0
+correct = 0
+with torch.no_grad():
+    for i, (waveform, label) in enumerate(dataloader):
+        waveform = waveform.to(device)
+        label = label.to(device)
+        
+        outputs = model(waveform.unsqueeze(1))
+        _, predicted = torch.max(outputs.data, 1)
+        
+        total += label.size(0)
+        correct += (predicted == label).sum().item()
+
+accuracy = 100 * correct / total
+print('Accuracy:', accuracy)
+
+import os
+import torch
+import torchaudio
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import Dataset, DataLoader
+import torch.nn.functional as F
+
+# Set the paths to your dataset folders
+data_dir = 'drive/MyDrive/DatasetNew'
+
+# Set the dimensions and parameters for audio processing
+sample_rate = 44100  # Adjust based on your dataset
+duration = 4  # Adjust based on your dataset
+num_classes = len(os.listdir(data_dir))
+batch_size = 32
+
+# Custom dataset class
+class VoiceDataset(Dataset):
+    def __init__(self, data_dir):
+        self.data_dir = data_dir
+        self.file_list = []
+        self.labels = []
+        
+        class_labels = sorted(os.listdir(data_dir))
+        for i, label in enumerate(class_labels):
+            class_dir = os.path.join(data_dir, label)
+            for audio_file in os.listdir(class_dir):
+                audio_path = os.path.join(class_dir, audio_file)
+                self.file_list.append(audio_path)
+                self.labels.append(i)
+        
+    def __getitem__(self, index):
+        audio_path = self.file_list[index]
+        waveform, _ = torchaudio.load(audio_path, num_frames=duration*sample_rate)
+        label = self.labels[index]
+        return waveform, label
+    
+    def __len__(self):
+        return len(self.file_list)
+
+class VoiceModel(nn.Module):
+    def __init__(self, num_classes):
+        super(VoiceModel, self).__init__()
+        self.conv1 = nn.Conv1d(1, 32, kernel_size=3, stride=2)
+        self.relu1 = nn.ReLU()
+        self.conv2 = nn.Conv1d(32, 64, kernel_size=3, stride=2)
+        self.relu2 = nn.ReLU()
+        self.flatten = nn.Flatten()
+        self.fc1 = nn.Linear(64*25, 128)
+        self.relu3 = nn.ReLU()
+        self.fc2 = nn.Linear(128, num_classes)
+        self.softmax = nn.Softmax(dim=1)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.relu1(x)
+        x = self.conv2(x)
+        x = self.relu2(x)
+        x = self.flatten(x)
+        x = self.fc1(x)
+        x = self.relu3(x)
+        x = self.fc2(x)
+        x = self.softmax(x)
+        return x
+
+# Initialize the model
+model = VoiceModel(num_classes)
+
+# Define the loss function and optimizer
+criterion = nn.CrossEntropyLoss()
+optimizer = optim.Adam(model.parameters(), lr=0.001)
+
+# Train the model
+def collate_fn(batch):
+    # Get the maximum length of the waveforms in the batch
+    max_length = max([waveform.size(0) for waveform, label in batch])
+    
+    # Pad the waveforms to the maximum length
+    padded_batch = []
+    for waveform, label in batch:
+        padded_waveform = F.pad(waveform, (0, max_length - waveform.size(0)))
+        padded_batch.append((padded_waveform, label))
+    
+    # Stack the padded waveforms and labels
+    waveforms = torch.stack([waveform for waveform, label in padded_batch], dim=0)
+    labels = torch.stack([label for waveform, label in padded_batch], dim=0)
+    
+    return waveforms, labels
+
+
+
+# Create the dataset and dataloader
+dataset = VoiceDataset(data_dir)
+dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4, collate_fn=collate_fn)
+
+# Evaluate the model
+model.eval()
+total = 0
+correct = 0
+with torch.no_grad():
+    for i, (waveform, label) in enumerate(dataloader):
+        outputs = model(waveform.unsqueeze(1))
+        _, predicted = torch.max(outputs.data, 1)
+        
+        total += label.size(0)
+        correct += (predicted == label).sum().item()
+
+accuracy = 100 * correct / total
+print('Accuracy:', accuracy)
\ No newline at end of file