Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dataset sumedha #12

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
__pycache__
.vscode

**/.DS_Store
training_summaries
cassava-leaf-disease-classification

3 changes: 3 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions .idea/projects-skeleton-code.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions constants.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
EPOCHS = 100
BATCH_SIZE = 32
N_EVAL = 100
EPOCHS = 3
BATCH_SIZE = 64
N_EVAL = 500
PATH_TO_DATA = 'cassava-leaf-disease-classification/'
Binary file added constants.pyc
Binary file not shown.
40 changes: 31 additions & 9 deletions data/StartingDataset.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,41 @@
import torch


import torchvision
import numpy
import constants
from PIL import Image
import pandas
#from resizeimage import resizeimage
import matplotlib.pyplot as plt
# from matplotlib import image
# from matplotlib import pyplot
class StartingDataset(torch.utils.data.Dataset):
"""
Dataset that contains 100000 3x224x224 black images (all zeros).
"""
###create this function

def __init__(self):
pass

def __init__(self, isTrain, datapath = 'cassava-leaf-disease-classification/'):
self.datapath = datapath
if(isTrain):
self.csv_data = pandas.read_csv(self.datapath + 'train.csv').head(19257).to_numpy()
else:
self.csv_data = pandas.read_csv(self.datapath + 'train.csv').tail(2140).to_numpy()
def __getitem__(self, index):
inputs = torch.zeros([3, 224, 224])
label = 0
## do loading here
image_name, label = self.csv_data[index]

# save image
with Image.open(self.datapath+'/train_images/'+image_name) as inputs:
inputs = torchvision.transforms.functional.resize(inputs, (224, 224))
inputs = torchvision.transforms.ToTensor()(inputs)
return inputs, label

return inputs, label
def countTypes(self):
df = pandas.DataFrame(data=self.csv_data, columns=['id', 'label'])
df['label']=df['label'].astype(int)
df.hist(column=['label'])
plt.show()
return

def __len__(self):
return 10000
return len(self.csv_data)
23 changes: 12 additions & 11 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import os

import torch
import constants
from data.StartingDataset import StartingDataset
from networks.StartingNetwork import StartingNetwork
from networks.StartingNetwork import StartingNetwork, Model_b
from train_functions.starting_train import starting_train


Expand All @@ -12,22 +12,23 @@ def main():

# TODO: Add GPU support. This line of code might be helpful.
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device('cuda')

print("Epochs:", constants.EPOCHS)
print("Batch size:", constants.BATCH_SIZE)

# Initalize dataset and model. Then train the model!
train_dataset = StartingDataset()
val_dataset = StartingDataset()
model = StartingNetwork()
train_dataset = StartingDataset(True)
val_dataset = StartingDataset(False)
model = Model_b()
starting_train(
train_dataset=train_dataset,
val_dataset=val_dataset,
model=model,
hyperparameters=hyperparameters,
n_eval=constants.N_EVAL,
train_dataset=train_dataset,
val_dataset=val_dataset,
model=model,
hyperparameters=hyperparameters,
n_eval=constants.N_EVAL,
)


if __name__ == "__main__":
main()
main()
26 changes: 23 additions & 3 deletions networks/StartingNetwork.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import torch
import torch.nn as nn
import torchvision.models as models


class StartingNetwork(torch.nn.Module):
Expand All @@ -10,11 +11,30 @@ class StartingNetwork(torch.nn.Module):
def __init__(self):
super().__init__()
self.flatten = nn.Flatten()
self.fc = nn.Linear(224 * 224 * 3, 1)
self.sigmoid = nn.Sigmoid()
self.fc = nn.Linear(224 * 224 * 3, 5)
# self.sigmoid = nn.Sigmoid()

def forward(self, x):
x = self.flatten(x)
x = self.fc(x)
x = self.sigmoid(x)
# x = self.sigmoid(x)
return x


class Model_b(nn.Module):
def __init__(self):
super(Model_b, self).__init__()
self.encoder = models.resnet18(pretrained = True)
self.encoder = nn.Sequential(*list(self.encoder.children())[:-1])
self.fc = nn.Linear(512, 5)

def forward(self, x):
with torch.no_grad():
features = self.encoder(x)
features = torch.flatten(features, 1)
return self.fc(features)





71 changes: 63 additions & 8 deletions train_functions/starting_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,17 @@
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import constants

def starting_train( train_dataset, val_dataset, model, hyperparameters, n_eval):
# Use GPU
if torch.cuda.is_available(): # Check if GPU is available
device = torch.device('cuda')
else:
device = torch.device('cpu')

def starting_train(train_dataset, val_dataset, model, hyperparameters, n_eval):
# Move the model to the GPU
model = model.to(device)
"""
Trains and evaluates a model.

Expand Down Expand Up @@ -38,22 +46,40 @@ def starting_train(train_dataset, val_dataset, model, hyperparameters, n_eval):
# Loop over each batch in the dataset
for batch in tqdm(train_loader):
# TODO: Backpropagation and gradient descent

# Periodically evaluate our model + log to Tensorboard
model.train()
batch_inputs, batch_labels = batch
batch_inputs = batch_inputs.to(device)
batch_labels = batch_labels.to(device)
predictions = model(batch_inputs)
loss = loss_fn(predictions, batch_labels)
loss.backward()
optimizer.step()
optimizer.zero_grad()
# Periodically evaluate our model + log to Tensorboard
if step % n_eval == 0:
# TODO:
# Compute training loss and accuracy.
# Log the results to Tensorboard.

model.eval()
# pass
print('Training Loss: ', loss.item())

# for data in iter(train_loader):
batch_inputs, batch_labels = batch
batch_inputs = batch_inputs.to(device)
batch_labels = batch_labels.to(device)
predictions = model(batch_inputs).argmax(axis=1)
accuracy = 100 * compute_accuracy(predictions, batch_labels)
print(accuracy, "%")
# TODO:
# Compute validation loss and accuracy.
# Log the results to Tensorboard.
# Don't forget to turn off gradient calculations!
evaluate(val_loader, model, loss_fn)

model.train()
step += 1

print()
print(step)


def compute_accuracy(outputs, labels):
Expand All @@ -68,7 +94,7 @@ def compute_accuracy(outputs, labels):
0.75
"""

n_correct = (torch.round(outputs) == labels).sum().item()
n_correct = (torch.round(outputs.float()) == labels).sum().item()
n_total = len(outputs)
return n_correct / n_total

Expand All @@ -79,4 +105,33 @@ def evaluate(val_loader, model, loss_fn):

TODO!
"""
pass
# pass
#if torch.cuda.is_available(): # Check if GPU is available
device = torch.device('cuda')
# else:
# device = torch.device('cpu')

# Move the model to the GPU
model = model.to(device)
model.eval()
loss_fn = nn.CrossEntropyLoss()

for batch in tqdm(val_loader):
batch_inputs, batch_labels = batch
batch_inputs = batch_inputs.to(device)
batch_labels = batch_labels.to(device)
predictions = model(batch_inputs)

loss = loss_fn(predictions, batch_labels)
print('Validation Loss: ', loss.item())



for data in iter(val_loader):
batch_inputs, batch_labels = data
batch_inputs = batch_inputs.to(device)
batch_labels = batch_labels.to(device)
predictions = model(batch_inputs).argmax(axis=1)

accuracy = 100 * compute_accuracy(predictions, batch_labels)
print(accuracy, "%")