Commit 659f9e8a authored by Pulasthi Tharaka's avatar Pulasthi Tharaka

upload preprocessing files

parent 2d0a9ddc
# -*- coding: utf-8 -*-
"""Untitled1.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1VT1vD9lWM4WJeb0Y6JmRbsH89yR5C2dE
"""
import os
import torch
import torchaudio
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
# Set the paths to your dataset folders
data_dir = 'drive/MyDrive/DatasetNew'
# Set the dimensions and parameters for audio processing
sample_rate = 44100 # Adjust based on your dataset
duration = 4 # Adjust based on your dataset
num_classes = len(os.listdir(data_dir))
batch_size = 32
# Custom dataset class
class VoiceDataset(Dataset):
def _init_(self, data_dir):
self.data_dir = data_dir
self.file_list = []
self.labels = []
class_labels = sorted(os.listdir(data_dir))
for i, label in enumerate(class_labels):
class_dir = os.path.join(data_dir, label)
for audio_file in os.listdir(class_dir):
audio_path = os.path.join(class_dir, audio_file)
self.file_list.append(audio_path)
self.labels.append(i)
def _getitem_(self, index):
audio_path = self.file_list[index]
waveform, _ = torchaudio.load(audio_path, num_frames=duration*sample_rate)
label = self.labels[index]
return waveform, label
def _len_(self):
return len(self.file_list)
class VoiceModel(nn.Module):
def __init__(self, num_classes):
super(VoiceModel, self).__init__()
self.conv1 = nn.Conv1d(1, 32, kernel_size=3, stride=2)
self.relu1 = nn.ReLU()
self.conv2 = nn.Conv1d(32, 64, kernel_size=3, stride=2)
self.relu2 = nn.ReLU()
self.flatten = nn.Flatten()
self.fc1 = nn.Linear(64*25, 128)
self.relu3 = nn.ReLU()
self.fc2 = nn.Linear(128, num_classes)
self.softmax = nn.Softmax(dim=1)
def forward(self, x):
x = self.conv1(x)
x = self.relu1(x)
x = self.conv2(x)
x = self.relu2(x)
x = self.flatten(x)
x = self.fc1(x)
x = self.relu3(x)
x = self.fc2(x)
x = self.softmax(x)
return x
# Initialize the model
model = VoiceModel(num_classes)
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Train the model
def collate_fn(batch):
# Get the maximum length of the waveforms in the batch
max_length = max([waveform.size(1) for waveform, label in batch])
# Resize all waveforms to the maximum length
batch = [(F.pad(waveform, (0, max_length - waveform.size(1))), label) for waveform, label in batch]
# Stack the waveforms and labels
waveforms = torch.stack([waveform for waveform, label in batch], dim=0)
labels = torch.stack([label for waveform, label in batch], dim=0)
return waveforms, labels
# Assuming you have a dataset called 'dataset' and have defined appropriate transformations
batch_size = 32 # Specify your desired batch size
shuffle = True # Specify whether you want to shuffle the data
num_workers = 4 # Specify the number of worker processes for data loading
dataloader = torch.utils.data.DataLoader(data_dir, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
# Evaluate the model
model.eval()
total = 0
correct = 0
with torch.no_grad():
for i, (waveform, label) in enumerate(dataloader):
waveform = waveform.to(device)
label = label.to(device)
outputs = model(waveform.unsqueeze(1))
_, predicted = torch.max(outputs.data, 1)
total += label.size(0)
correct += (predicted == label).sum().item()
accuracy = 100 * correct / total
print('Accuracy:', accuracy)
import os
import torch
import torchaudio
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
# Set the paths to your dataset folders
data_dir = 'drive/MyDrive/DatasetNew'
# Set the dimensions and parameters for audio processing
sample_rate = 44100 # Adjust based on your dataset
duration = 4 # Adjust based on your dataset
num_classes = len(os.listdir(data_dir))
batch_size = 32
# Custom dataset class
class VoiceDataset(Dataset):
def __init__(self, data_dir):
self.data_dir = data_dir
self.file_list = []
self.labels = []
class_labels = sorted(os.listdir(data_dir))
for i, label in enumerate(class_labels):
class_dir = os.path.join(data_dir, label)
for audio_file in os.listdir(class_dir):
audio_path = os.path.join(class_dir, audio_file)
self.file_list.append(audio_path)
self.labels.append(i)
def __getitem__(self, index):
audio_path = self.file_list[index]
waveform, _ = torchaudio.load(audio_path, num_frames=duration*sample_rate)
label = self.labels[index]
return waveform, label
def __len__(self):
return len(self.file_list)
class VoiceModel(nn.Module):
def __init__(self, num_classes):
super(VoiceModel, self).__init__()
self.conv1 = nn.Conv1d(1, 32, kernel_size=3, stride=2)
self.relu1 = nn.ReLU()
self.conv2 = nn.Conv1d(32, 64, kernel_size=3, stride=2)
self.relu2 = nn.ReLU()
self.flatten = nn.Flatten()
self.fc1 = nn.Linear(64*25, 128)
self.relu3 = nn.ReLU()
self.fc2 = nn.Linear(128, num_classes)
self.softmax = nn.Softmax(dim=1)
def forward(self, x):
x = self.conv1(x)
x = self.relu1(x)
x = self.conv2(x)
x = self.relu2(x)
x = self.flatten(x)
x = self.fc1(x)
x = self.relu3(x)
x = self.fc2(x)
x = self.softmax(x)
return x
# Initialize the model
model = VoiceModel(num_classes)
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Train the model
def collate_fn(batch):
# Get the maximum length of the waveforms in the batch
max_length = max([waveform.size(0) for waveform, label in batch])
# Pad the waveforms to the maximum length
padded_batch = []
for waveform, label in batch:
padded_waveform = F.pad(waveform, (0, max_length - waveform.size(0)))
padded_batch.append((padded_waveform, label))
# Stack the padded waveforms and labels
waveforms = torch.stack([waveform for waveform, label in padded_batch], dim=0)
labels = torch.stack([label for waveform, label in padded_batch], dim=0)
return waveforms, labels
# Create the dataset and dataloader
dataset = VoiceDataset(data_dir)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4, collate_fn=collate_fn)
# Evaluate the model
model.eval()
total = 0
correct = 0
with torch.no_grad():
for i, (waveform, label) in enumerate(dataloader):
outputs = model(waveform.unsqueeze(1))
_, predicted = torch.max(outputs.data, 1)
total += label.size(0)
correct += (predicted == label).sum().item()
accuracy = 100 * correct / total
print('Accuracy:', accuracy)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment