Skip to content

Commit

Permalink
Merge pull request #163 from Rufaim/adding_ninapro_and_darcyflow
Browse files Browse the repository at this point in the history
Adding ninapro and darcyflow datasets
  • Loading branch information
Neonkraft authored Jul 21, 2023
2 parents be1a834 + c199536 commit dfa2e67
Show file tree
Hide file tree
Showing 11 changed files with 294 additions and 45 deletions.
19 changes: 19 additions & 0 deletions naslib/data/taskonomydata_mini/download_tnb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,22 @@ do
mv -- "$j" "${j%class_places.npy}class_scene.npy"
done
done

# download all normal files
for dataset in ${datasets[@]}
do
file=$dataset\_normal.tar
filepath=http://downloads.cs.stanford.edu/downloads/taskonomy_data/normal/$file
echo $filepath
cd $dataset
if [ -d "normal" ]
then
echo normal exists
else
echo normal does not exist
wget $filepath
tar -xvf $file
rm $file
fi
cd ..
done
41 changes: 41 additions & 0 deletions naslib/data/taskonomydata_mini/generate_splits.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import os
import json
import argparse


def main(vals_buildings, test_buildings):
all_tasks = []
dirs = [ f.path for f in os.scandir(os.path.dirname(os.path.abspath(__file__))) if f.is_dir() ]
for d in dirs:
taskname = os.path.basename(d)
templates = [ f"{taskname}/{{domain}}/"+os.path.basename(f.path).replace("_rgb.","_{domain}.") for f in os.scandir(os.path.join(d,"rgb")) if f.is_file() ]
templates = sorted(templates)
with open(d+".json", "w") as f:
json.dump(templates, f)

all_tasks.append(taskname)

train_tasks = []
val_tasks = []
test_tasks = []
for task in all_tasks:
if task in test_buildings:
test_tasks.append(task)
elif task in vals_buildings:
val_tasks.append(task)
else:
train_tasks.append(task)

foldername = os.path.dirname(d)
for s,f in zip([train_tasks, val_tasks, test_tasks], ["train_split.json", "val_split.json", "test_split.json"]):
with open(os.path.join(foldername, f), "w") as file:
json.dump(s, file)


if __name__ == '__main__':
parser = argparse.ArgumentParser("Taskonomy splits generator")
parser.add_argument("--val", nargs="*", type=str, default=[])
parser.add_argument("--test", nargs="+", type=str, default=["uvalda", "merom", "stockman"])
args = parser.parse_args()

main(args.val, args.test)
13 changes: 6 additions & 7 deletions naslib/search_spaces/nasbench201/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ def query(
"cifar10",
"cifar100",
"ImageNet16-120",
"ninapro"
], "Unknown dataset: {}".format(dataset)
if dataset_api is None:
raise NotImplementedError("Must pass in dataset_api to query NAS-Bench-201")
Expand Down Expand Up @@ -188,16 +189,14 @@ def query(
# return all data
return dataset_api["nb201_data"][arch_str]

if dataset not in ["cifar10", "cifar10-valid", "cifar100", "ImageNet16-120", "ninapro"]:
raise NotImplementedError("Invalid dataset")

if dataset in ["cifar10", "cifar10-valid"]:
query_results = dataset_api["nb201_data"][arch_str]
# set correct cifar10 dataset
dataset = "cifar10-valid"
elif dataset == "cifar100":
query_results = dataset_api["nb201_data"][arch_str]
elif dataset == "ImageNet16-120":
query_results = dataset_api["nb201_data"][arch_str]
else:
raise NotImplementedError("Invalid dataset")

query_results = dataset_api["nb201_data"][arch_str]

if metric == Metric.HP:
# return hyperparameter info
Expand Down
66 changes: 66 additions & 0 deletions naslib/utils/darcyflow_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import os
import numpy as np
import scipy.io
import torch.utils.data
import torchvision.transforms


class UnitGaussianNormalizer(object):
def __init__(self, x, eps=0.00001):
super(UnitGaussianNormalizer, self).__init__()

# x could be in shape of ntrain*n or ntrain*T*n or ntrain*n*T
self.mean = torch.mean(x, 0)
self.std = torch.std(x, 0)
self.eps = eps

def __call__(self, x):
x = (x - self.mean) / (self.std + self.eps)
return x

# adapted from https://github.com/rtu715/NAS-Bench-360/blob/0d1af0ce37b5f656d6491beee724488c3fccf871/perceiver-io/perceiver/data/nb360/darcyflow.py#L73
def load_darcyflow_data(path):
train_path = os.path.join(path, "piececonst_r421_N1024_smooth1.mat")
test_path = os.path.join(path, "piececonst_r421_N1024_smooth2.mat")

r = 5
s = int(((421 - 1) / r) + 1)

x_train, y_train = read_mat(train_path, r, s)
x_test, y_test = read_mat(test_path, r, s)

x_normalizer = UnitGaussianNormalizer(x_train)
x_train = x_normalizer(x_train)
x_test = x_normalizer(x_test)

y_normalizer = UnitGaussianNormalizer(y_train)
y_train = y_normalizer(y_train)
y_test = y_normalizer(y_test)

x_train = x_train.reshape((-1, s, s, 1))
x_test = x_test.reshape((-1, s, s, 1))

trainset = torch.utils.data.TensorDataset(x_train, y_train)
testset = torch.utils.data.TensorDataset(x_test, y_test)

return trainset, testset


def read_mat(file_path, r, s):
data = scipy.io.loadmat(file_path)
x = read_mat_field(data, "coeff")[:, ::r, ::r][:, :s, :s]
y = read_mat_field(data, "sol")[:, ::r, ::r][:, :s, :s]
del data
return x, y


def read_mat_field(mat, field):
x = mat[field]
x = x.astype(np.float32)
return torch.from_numpy(x)


def darcyflow_transform(args):
transform_list = []
transform_list.append(torchvision.transforms.ToTensor())
return torchvision.transforms.Compose(transform_list), torchvision.transforms.Compose(transform_list)
70 changes: 39 additions & 31 deletions naslib/utils/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def get_train_val_loaders(config, mode="train"):

train_transform, valid_transform = _data_transforms_ImageNet_16_120(
config)
data_folder = f"{data}/{dataset}"
data_folder = os.path.join(data, dataset)
train_data = ImageNet16(
root=data_folder,
train=True,
Expand All @@ -71,6 +71,20 @@ def get_train_val_loaders(config, mode="train"):
transform=valid_transform,
use_num_of_class_only=120,
)
elif dataset == 'ninapro':
from naslib.utils.ninapro_dataset import NinaPro, ninapro_transform

train_transform, valid_transform = ninapro_transform(config)
data_folder = os.path.join(data, dataset)
train_data = NinaPro(data_folder, split="train", transform=train_transform)
test_data = NinaPro(data_folder, split="test", transform=valid_transform)
elif dataset == "darcyflow":
from naslib.utils.darcyflow_dataset import load_darcyflow_data, darcyflow_transform

train_transform, valid_transform = darcyflow_transform(config)

data_folder = os.path.join(data, dataset)
train_data, test_data = load_darcyflow_data(data_folder)
elif dataset == 'jigsaw':
cfg = get_jigsaw_configs()

Expand Down Expand Up @@ -294,12 +308,11 @@ def get_jigsaw_configs():

cfg['dataset_dir'] = os.path.join(
get_project_root(), "data", "taskonomydata_mini")
cfg['data_split_dir'] = os.path.join(
get_project_root(), "data", "final5K_splits")
cfg['data_split_dir'] = cfg['dataset_dir']

cfg['train_filenames'] = 'train_filenames_final5k.json'
cfg['val_filenames'] = 'val_filenames_final5k.json'
cfg['test_filenames'] = 'test_filenames_final5k.json'
cfg['train_filenames'] = 'train_split.json'
cfg['val_filenames'] = 'val_split.json'
cfg['test_filenames'] = 'test_split.json'

cfg['target_dim'] = 1000
cfg['target_load_fn'] = load_ops.random_jigsaw_permutation
Expand Down Expand Up @@ -350,12 +363,11 @@ def get_class_object_configs():

cfg['dataset_dir'] = os.path.join(
get_project_root(), "data", "taskonomydata_mini")
cfg['data_split_dir'] = os.path.join(
get_project_root(), "data", "final5K_splits")
cfg['data_split_dir'] = cfg['dataset_dir']

cfg['train_filenames'] = 'train_filenames_final5k.json'
cfg['val_filenames'] = 'val_filenames_final5k.json'
cfg['test_filenames'] = 'test_filenames_final5k.json'
cfg['train_filenames'] = 'train_split.json'
cfg['val_filenames'] = 'val_split.json'
cfg['test_filenames'] = 'test_split.json'

cfg['target_dim'] = 75

Expand Down Expand Up @@ -406,12 +418,11 @@ def get_class_scene_configs():

cfg['dataset_dir'] = os.path.join(
get_project_root(), "data", "taskonomydata_mini")
cfg['data_split_dir'] = os.path.join(
get_project_root(), "data", "final5K_splits")
cfg['data_split_dir'] = cfg['dataset_dir']

cfg['train_filenames'] = 'train_filenames_final5k.json'
cfg['val_filenames'] = 'val_filenames_final5k.json'
cfg['test_filenames'] = 'test_filenames_final5k.json'
cfg['train_filenames'] = 'train_split.json'
cfg['val_filenames'] = 'val_split.json'
cfg['test_filenames'] = 'test_split.json'

cfg['target_dim'] = 47

Expand Down Expand Up @@ -465,12 +476,11 @@ def get_autoencoder_configs():

cfg['dataset_dir'] = os.path.join(
get_project_root(), "data", "taskonomydata_mini")
cfg['data_split_dir'] = os.path.join(
get_project_root(), "data", "final5K_splits")
cfg['data_split_dir'] = cfg['dataset_dir']

cfg['train_filenames'] = 'train_filenames_final5k.json'
cfg['val_filenames'] = 'val_filenames_final5k.json'
cfg['test_filenames'] = 'test_filenames_final5k.json'
cfg['train_filenames'] = 'train_split.json'
cfg['val_filenames'] = 'val_split.json'
cfg['test_filenames'] = 'test_split.json'

cfg['target_load_fn'] = load_ops.load_raw_img_label
cfg['target_load_kwargs'] = {}
Expand Down Expand Up @@ -516,12 +526,11 @@ def get_segmentsemantic_configs():

cfg['dataset_dir'] = os.path.join(
get_project_root(), "data", "taskonomydata_mini")
cfg['data_split_dir'] = os.path.join(
get_project_root(), "data", "final5K_splits")
cfg['data_split_dir'] = cfg['dataset_dir']

cfg['train_filenames'] = 'train_filenames_final5k.json'
cfg['val_filenames'] = 'val_filenames_final5k.json'
cfg['test_filenames'] = 'test_filenames_final5k.json'
cfg['train_filenames'] = 'train_split.json'
cfg['val_filenames'] = 'val_split.json'
cfg['test_filenames'] = 'test_split.json'

cfg['target_load_fn'] = load_ops.semantic_segment_label
cfg['target_load_kwargs'] = {}
Expand Down Expand Up @@ -567,12 +576,11 @@ def get_normal_configs():

cfg['dataset_dir'] = os.path.join(
get_project_root(), "data", "taskonomydata_mini")
cfg['data_split_dir'] = os.path.join(
get_project_root(), "data", "final5K_splits")
cfg['data_split_dir'] = cfg['dataset_dir']

cfg['train_filenames'] = 'train_filenames_final5k.json'
cfg['val_filenames'] = 'val_filenames_final5k.json'
cfg['test_filenames'] = 'test_filenames_final5k.json'
cfg['train_filenames'] = 'train_split.json'
cfg['val_filenames'] = 'val_split.json'
cfg['test_filenames'] = 'test_split.json'

cfg['target_load_fn'] = load_ops.load_raw_img_label
cfg['target_load_kwargs'] = {}
Expand Down
3 changes: 2 additions & 1 deletion naslib/utils/get_dataset_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ def get_nasbench201_api(dataset=None):
datafiles = {
'cifar10': 'nb201_cifar10_full_training.pickle',
'cifar100': 'nb201_cifar100_full_training.pickle',
'ImageNet16-120': 'nb201_ImageNet16_full_training.pickle'
'ImageNet16-120': 'nb201_ImageNet16_full_training.pickle',
'ninapro': 'nb201_ninapro_full_training.pickle'
}

datafile_path = os.path.join(
Expand Down
44 changes: 44 additions & 0 deletions naslib/utils/ninapro_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import os
import numpy as np
import torch.utils.data
import torchvision.transforms


# adapted from https://github.com/rtu715/NAS-Bench-360/blob/0d1af0ce37b5f656d6491beee724488c3fccf871/perceiver-io/perceiver/data/nb360/ninapro.py#L64
class NinaPro(torch.utils.data.Dataset):
def __init__(self, root, split="train", transform=None):
self.root = root
self.split = split
self.transform = transform
self.x = np.load(os.path.join(root, f"ninapro_{split}.npy")).astype(np.float32)
self.x = self.x[:, np.newaxis, :, :].transpose(0, 2, 3, 1)
self.y = np.load(os.path.join(root, f"label_{split}.npy")).astype(int)

def __len__(self):
return len(self.y)

def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()

x = self.x[idx, :]
y = self.y[idx]

if self.transform:
x = self.transform(x)
return x, y


def ninapro_transform(args, channels_last: bool = True):
transform_list = []

def channels_to_last(img: torch.Tensor):
return img.permute(1, 2, 0).contiguous()

transform_list.append(torchvision.transforms.ToTensor())

if channels_last:
transform_list.append(channels_to_last)

return torchvision.transforms.Compose(transform_list), torchvision.transforms.Compose(transform_list)

4 changes: 2 additions & 2 deletions naslib/utils/taskonomy_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,10 @@ def get_all_templates(dataset_dir, filenames_path):
dataset_dir (string): Directory with all the images.
filenames_path (string): /path/to/json_file for train/val/test_filenames (specify which buildings to include)
"""
building_lists = load_ops.read_json(filenames_path)['filename_list']
building_lists = load_ops.read_json(filenames_path)
all_template_paths = []
for building in building_lists:
all_template_paths += load_ops.read_json(osp.join(dataset_dir, building))
all_template_paths += load_ops.read_json(osp.join(dataset_dir, f"{building}.json"))
for i, path in enumerate(all_template_paths):
f_split = path.split('.')
if f_split[-1] in ['npy', 'png']:
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ emcee==3.1.0
pybnn==0.0.5
grakel==0.1.8
pyro-ppl==1.6.0
scipy==1.4.1

# additional from setup.py prev
tqdm==4.61.1
Expand Down
Loading

0 comments on commit dfa2e67

Please sign in to comment.