Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

data/benchmarks #422

Closed
wants to merge 25 commits into from
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
[skip ci] push
msaroufim committed Jun 1, 2022

Verified

This commit was signed with the committer’s verified signature.
neo1973 Markus Härer
commit cfa49aa161403402d6b9b6a68fe1427ae2c340ec
5 changes: 4 additions & 1 deletion benchmarks/args.py
Original file line number Diff line number Diff line change
@@ -4,6 +4,7 @@
def arg_parser():
parser = argparse.ArgumentParser()
parser.add_argument("--dataset", type=str, default="gtsrb", help="The name of the dataset")
parser.add_argument("--ispipe", action="store_true", help="is datapipe or dataset?")
parser.add_argument("--model_name", type=str, default="resnext50_32x4d", help="The name of the model")
parser.add_argument("--batch_size", type=int, default=1, help="")
parser.add_argument("--device", type=str, default="cuda:0", help="Options are are cpu or cuda:0")
@@ -14,7 +15,9 @@ def arg_parser():
parser.add_argument("--dataloaderv", type=int, default=1)

args = parser.parse_args()
print(args)
dataset = args.dataset
ispipe = args.ispipe
model_name = args.model_name
batch_size = args.batch_size
device = args.device
@@ -23,4 +26,4 @@ def arg_parser():
num_workers = args.num_workers
shuffle = args.shuffle
dataloaderv = args.dataloaderv
return dataset,model_name,batch_size,device,num_epochs,num_workers,shuffle,dataloaderv
return dataset, ispipe, model_name,batch_size,device,num_epochs,num_workers,shuffle,dataloaderv
32 changes: 16 additions & 16 deletions benchmarks/datasets.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,28 @@
from torchvision import transforms
from torchvision import transforms, datasets
import torch

def prepare_gtsrb(batch_size, device, dp):
def transform(img):
t= transforms.Compose([
transforms.ToPILImage(),
transforms.Resize(size=(100,100)),
transforms.ToTensor()]
)
return t(img).to(torch.device(device))
def transform(img):
t= transforms.Compose([
transforms.ToPILImage(),
transforms.Resize(size=(100,100)),
transforms.ToTensor()]
)
return t(img)

def str_to_list(str):
l = []
for char in str:
l.append(int(char))
return l
def str_to_list(str):
l = []
for char in str:
l.append(int(char))
return l

def prepare_gtsrb_datapipe(batch_size, device, dp):
# Filter out bounding box and path to image
dp = dp.map(lambda sample : {"image" : sample["image"], "label" : sample["label"]})

# Apply image preprocessing
dp = dp.map(lambda sample : transform(sample.decode()), input_col="image")
dp = dp.map(lambda sample : transform(sample.decode().to(torch.device(device))), input_col="image")
dp = dp.map(lambda sample : torch.tensor(str_to_list(sample.to_categories())).to(torch.device(device)), input_col="label")

# Batch
dp = dp.batch(batch_size)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure this is needed as long as you pass batch_size=... to the DataLoader.
(But not certain either)

return dp
return dp
1 change: 0 additions & 1 deletion benchmarks/report.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from statistics import mean


def create_report(per_epoch_durations, batch_durations, total_duration):
print(f"Total duration is {total_duration}")
print(f"Per epoch duration {mean(per_epoch_durations)}")
55 changes: 30 additions & 25 deletions benchmarks/run_benchmark.py
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@
import torchvision
import torch
import transformers
from torchvision.prototype.datasets import load
from torchvision.prototype.datasets import load as loadpipe
import torch.nn.functional as F
from torchvision import transforms
import time
@@ -14,15 +14,24 @@

# Relative imports
from args import arg_parser
from benchmarks.datasets import prepare_gtsrb_dataset
from utils import init_fn
from datasets import prepare_gtsrb
from datasets import prepare_gtsrb_datapipe
from trainers import train
from report import create_report

logging.basicConfig(filename='example.log', level=logging.DEBUG)


dataset, model_name, batch_size, device, num_epochs, num_workers, shuffle, dataloaderv = arg_parser()
dataset, ispipe, model_name, batch_size, device, num_epochs, num_workers, shuffle, dataloaderv = arg_parser()

if device.startswith("cuda"):
nvidiasmi = subprocess.check_output("nvidia-smi", shell=True, text=True)
logging.debug(nvidiasmi)

lscpu = subprocess.check_output("lscpu", shell=True, text=True)
logging.debug(lscpu)


if dataloaderv == 1:
from torch.utils.data import DataLoader
@@ -31,8 +40,6 @@
else:
raise(f"dataloaderv{dataloaderv} is not a valid option")



# Download model
model_map = {
"resnext50_32x4d": torchvision.models.resnext50_32x4d,
@@ -45,39 +52,38 @@
model = model_map[model_name]().to(torch.device(device))

# setup data pipe
if model_name in ["resnext50_32x4d", "mobilenet_v3_large"]:
dp = load(dataset, split="train")
if dataset == "gtsrb":
if ispipe:
dp = loadpipe(dataset, split="train")
logging.debug(f"data format before preprocessing is {next(iter(dp))}")

else:
print(f"{model} not supported yet")
dp = prepare_gtsrb_datapipe(batch_size, device, dp)
logging.debug(f"data format after preprocessing is \n {next(iter(dp))}\n")

if device.startswith("cuda"):
nvidiasmi = subprocess.check_output("nvidia-smi", shell=True, text=True)
print(nvidiasmi)
else:
# No further preprocessing needed this returns a tuple of Images and labels as ints
# Do I need to do batching and collation manually?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do I need to do batching and collation manually?

No, you just need to pass batch_size=... to the DataLoader

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nicolas is right that you can do that for DataLoader.

I would think it is better to use .batch since it will be necessary for DataLoaderV2. Then you can pass the same DataPipe to both versions of DL without more changes later.

ds = torchvision.datasets.GTSRB(root=".",split="train", download=True)


else:
print(f"{dataset} not supported yet")

lscpu = subprocess.check_output("lscpu", shell=True, text=True)
print(lscpu)

print(f"batch size {batch_size}")
print(f"Dataset name {dp}")
print(f"Dataset length {len(dp)}")

# Datapipe format
logging.debug(f"data format before preprocessing is {next(iter(dp))}")
# Setup data loader

if dataset == "gtsrb":
dp = prepare_gtsrb(batch_size, device, dp)

# Datapipe format after preprocessing
logging.debug(f"data format after preprocessing is \n {next(iter(dp))}\n")
data = dp if dp else ds

# Setup data loader
if num_workers == 1:
dl = DataLoader(dataset=dp, batch_size=batch_size, shuffle=shuffle)
dl = DataLoader(dataset=data, batch_size=batch_size, shuffle=shuffle)

# Shuffle won't work in distributed yet
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shuffle and sharding won't work out of the box with DDP. There are some suggestions here pytorch/text#1755, but no definite recommended practices yet

else:
dl = DataLoader(dataset=dp, batch_size=batch_size, shuffle=True, num_workers=num_workers, worker_init_fn=init_fn, multiprocessing_context="spawn")
dl = DataLoader(dataset=data, batch_size=batch_size, shuffle=True, num_workers=num_workers, worker_init_fn=init_fn, multiprocessing_context="spawn")

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
@@ -107,7 +113,6 @@
total_duration = total_end - total_start

# TODO: Make this output some human readable markdown file

create_report(per_epoch_durations, batch_durations, total_duration)


14 changes: 9 additions & 5 deletions benchmarks/trainers.py
Original file line number Diff line number Diff line change
@@ -8,10 +8,7 @@ def train(num_epochs, model, dl, per_epoch_durations, batch_durations, criterion
for i, elem in enumerate(dl):
batch_start = time.time()

labels = torch.argmax(elem[0]["label"], dim=1)
optimizer.zero_grad()
outputs = model(elem[0]["image"])
loss = criterion(outputs,labels)
loss = process(model, criterion, optimizer, elem)
loss.backward()
optimizer.step()

@@ -27,4 +24,11 @@ def train(num_epochs, model, dl, per_epoch_durations, batch_durations, criterion

epoch_end = time.time()
epoch_duration = epoch_end - epoch_start
per_epoch_durations.append(epoch_duration)
per_epoch_durations.append(epoch_duration)

def process(model, criterion, optimizer, elem):
labels = torch.argmax(elem[0]["label"], dim=1)
optimizer.zero_grad()
outputs = model(elem[0]["image"])
loss = criterion(outputs,labels)
return loss
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
urllib3 >= 1.25
requests
portalocker >= 2.0.0