-
-
Notifications
You must be signed in to change notification settings - Fork 5
/
tpu-vm.env
62 lines (51 loc) · 2.2 KB
/
tpu-vm.env
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/env bash
# dot-source this, ok?
export PYTHONUNBUFFERED='1'
## General log level opts for Accelerate/Transformers
#export ACCELERATE_LOG_LEVEL='INFO'
#export TRANSFORMERS_LOG_LEVEL='INFO'
# tcmalloc breaks things and google enable it by default, so that's gotta go
unset LD_PRELOAD
# add the dir where `libtpu-nightly` puts the library to LD_LIBRARY_PATH
export LD_LIBRARY_PATH="/usr/local/lib/python3.8/dist-packages/libtpu/:${LD_LIBRARY_PATH}"
# PJRT doesn't work with Accelerate yet so we deconfigure it and go back to old XRT
unset PJRT_DEVICE
export XRT_TPU_CONFIG='localservice;0;localhost:51011'
export MASTER_ADDR='localhost'
export MASTER_PORT='12355'
## see https://github.com/pytorch/xla/issues/4914
export XLA_IR_SHAPE_CACHE_SIZE=12288
## useful options for debug
#export PT_XLA_DEBUG=1
# Enables the Python stack trace to be captured where creating IR nodes, hence allowing to understand which PyTorch operation was responsible for generating the IR.
#export XLA_IR_DEBUG=1
# Path to save the IR graphs generated during execution.
#export XLA_SAVE_TENSORS_FILE=''
# File type for above. can be text, dot (GraphViz), or hlo (native)
#export XLA_SAVE_TENSORS_FMT='text'
# Path to save metrics after every op
#export XLA_METRICS_FILE=
# In case of compilation/execution error, the offending HLO graph will be saved here.
#export XLA_SAVE_HLO_FILE=
# Enable OpByOp dispatch for "get tensors"
#export XLA_GET_TENSORS_OPBYOP=1
# Enable OpByOp dispatch for "sync tensors"
#export XLA_SYNC_TENSORS_OPBYOP=1
# Force XLA tensor sync before moving to next step
#export XLA_SYNC_WAIT=1
# Force downcasting of fp32 to bf16
#export XLA_USE_BF16=1
# Force downcasting of fp32 to fp16
#export XLA_USE_F16=1
# Force downcasting of fp64 to fp32
#export XLA_USE_32BIT_LONG=1
## TPU runtime / compilation debug logging
# All XLA log messages are INFO level so this is required
#export TF_CPP_MIN_LOG_LEVEL=0
# Print the thread ID in log messages
#export TF_CPP_LOG_THREAD_ID=1
# What modules to print from at what level
#export TF_CPP_VMODULE='tensor=4,computation_client=5,xrt_computation_client=5,aten_xla_type=5'
## Limit to single TPU chip/core, can be useful for testing
# export TPU_PROCESS_BOUNDS='1,1,1'
# export TPU_VISIBLE_CHIPS=0