-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparams.py
49 lines (38 loc) · 1.28 KB
/
params.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import torch
import tokenmonster
class Params:
# hyperparameters
batch_size = 128 # how many independent sequences will we process in parallel?
block_size = 512 # what is the maximum context length for predictions?
max_iters = 8000
eval_interval = 500
learning_rate = 2e-4
device = torch.device('cpu')
eval_iters = 200
n_embd = 1024
n_head = 8
n_layer = 8
dropout = 0.1
vocab_size = 1024
@classmethod
def initialize(cls):
cls.device = cls.autoselectDevice(verbose=1)
# Check the devices that we have available and prefer CUDA over MPS and CPU
@classmethod
def autoselectDevice(cls, verbose=1):
# default: CPU
device = torch.device('cpu')
if torch.cuda.is_available():
# CUDA
device = torch.device('cuda')
elif torch.backends.mps.is_available() and torch.backends.mps.is_built():
# MPS (acceleration on Apple silicon M1 / M2 chips)
device = torch.device('mps')
if verbose:
print('Using device:', device)
# Additional Info when using cuda
if verbose and device.type == 'cuda':
print(torch.cuda.get_device_name(0))
return device
# Initialize the class attributes
Params.initialize()