You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
First,
Hi, I have followed all the necessary steps as mentioned in the readme section. I have install mxnet, nccl, cudnn using "sudo bash install.sh". Upto here, everything works fine, all the necessary files have downloaded and installed. On continuing, executing the final command i.e "sudo bash train.sh", I got the following error, please guide me in this.
Traceback (most recent call last):
File "train.py", line 13, in
from common import fit, evaluate
File "/content/gdrive/My Drive/MobulaOP/tasn/tasn-mxnet/example/tasn/common/fit.py", line 1, in
import mxnet as mx
File "/content/gdrive/My Drive/MobulaOP/tasn/tasn-mxnet/example/tasn/mxnet_python/python/mxnet/init.py", line 24, in
from .context import Context, current_context, cpu, gpu, cpu_pinned
File "/content/gdrive/My Drive/MobulaOP/tasn/tasn-mxnet/example/tasn/mxnet_python/python/mxnet/context.py", line 24, in
from .base import classproperty, with_metaclass, _MXClassPropertyMetaClass
File "/content/gdrive/My Drive/MobulaOP/tasn/tasn-mxnet/example/tasn/mxnet_python/python/mxnet/base.py", line 213, in
_LIB = _load_lib()
File "/content/gdrive/My Drive/MobulaOP/tasn/tasn-mxnet/example/tasn/mxnet_python/python/mxnet/base.py", line 204, in _load_lib
lib = ctypes.CDLL(lib_path[0], ctypes.RTLD_LOCAL)
File "/usr/lib/python3.6/ctypes/init.py", line 348, in init
self._handle = _dlopen(self._name, mode)
OSError: libcudart.so.8.0: cannot open shared object file: No such file or directory
Second,
I have followed similar process, except I have commented mxnet installation process from "install.sh" files, with the target of removing above mentioned error. So, I have installed mxnet using command "pip install mxnet-cu101". On executing command "sudo bash train.sh", above error was gone but I have been with next error which is shown below:
Error-Start
[10:25:25] src/operator/nn/./cudnn/./cudnn_algoreg-inl.h:97: Running performance tests to find the best convolution algorithm, this can take a while... (set the environment variable MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable)
sh: 1: nvcc: not found
sh: 1: nvcc: not found
Error in CustomOp.forward: Traceback (most recent call last):
File "/usr/local/lib/python3.6/dist-packages/mxnet/operator.py", line 1005, in forward_entry
aux=tensors[4])
File "/content/gdrive/My Drive/MobulaOP/mobula/glue/mxnet_glue.py", line 109, in forward
out = self._forward(*in_data)
File "./AttentionSampler/attention_sampler/attention_sampler.py", line 60, in forward
mobula.func.map_step(N, attxi, index_y, stepx, att_size, out_size)
File "/content/gdrive/My Drive/MobulaOP/mobula/func.py", line 264, in call
using_async=using_async)
File "/content/gdrive/My Drive/MobulaOP/mobula/func.py", line 145, in call
func = self.loader(self, arg_types, ctx, **self.loader_kwargs)
File "/content/gdrive/My Drive/MobulaOP/mobula/op/loader.py", line 499, in init
_build_lib(cpp_fname, code_buffer, ctx, dll_fname)
File "/content/gdrive/My Drive/MobulaOP/mobula/op/loader.py", line 237, in _build_lib
source_to_so_ctx(build_path, srcs, target_name, ctx)
File "/content/gdrive/My Drive/MobulaOP/mobula/building/build.py", line 167, in source_to_so_ctx
buildin_cpp, buildin_o), compiler, cflags)
File "/content/gdrive/My Drive/MobulaOP/mobula/building/build.py", line 41, in source_to_o
run_command_parallel(commands)
File "/content/gdrive/My Drive/MobulaOP/mobula/building/build_utils.py", line 97, in run_command_parallel
raise RuntimeError(info)
RuntimeError: Error, terminated :-(
Traceback (most recent call last):
File "train.py", line 57, in
eval_metric = evaluate.Multi_Accuracy(num=6))
File "/content/gdrive/My Drive/MobulaOP/tasn/tasn-mxnet/example/tasn/common/fit.py", line 195, in fit
monitor = monitor)
File "/usr/local/lib/python3.6/dist-packages/mxnet/module/base_module.py", line 533, in fit
self.update_metric(eval_metric, data_batch.label)
File "/usr/local/lib/python3.6/dist-packages/mxnet/module/module.py", line 775, in update_metric
self.exec_group.update_metric(eval_metric, labels, pre_sliced)
File "/usr/local/lib/python3.6/dist-packages/mxnet/module/executor_group.py", line 648, in update_metric
eval_metric.update_dict(labels, preds)
File "/usr/local/lib/python3.6/dist-packages/mxnet/metric.py", line 132, in update_dict
self.update(label, pred)
File "/content/gdrive/My Drive/MobulaOP/tasn/tasn-mxnet/example/tasn/common/evaluate.py", line 23, in update
pred_label = mx.nd.argmax_channel(preds[i]).asnumpy()
File "/usr/local/lib/python3.6/dist-packages/mxnet/ndarray/ndarray.py", line 2566, in asnumpy
ctypes.c_size_t(data.size)))
File "/usr/local/lib/python3.6/dist-packages/mxnet/base.py", line 246, in check_call
raise get_last_ffi_error()
mxnet.base.MXNetError: Traceback (most recent call last):
File "src/operator/custom/custom.cc", line 346
MXNetError: Check failed: reinterpret_cast( params.info->callbacks[kCustomOpForward])( ptrs.size(), const_cast<void**>(ptrs.data()), const_cast<int*>(tags.data()), reinterpret_cast<const int*>(req.data()), static_cast(ctx.is_train), params.info->contexts[kCustomOpForward]):
**Error-End
Please, guide me in this, how I can detach these errors and run TASN successfully
The text was updated successfully, but these errors were encountered:
First,
Hi, I have followed all the necessary steps as mentioned in the readme section. I have install mxnet, nccl, cudnn using "sudo bash install.sh". Upto here, everything works fine, all the necessary files have downloaded and installed. On continuing, executing the final command i.e "sudo bash train.sh", I got the following error, please guide me in this.
Traceback (most recent call last):
File "train.py", line 13, in
from common import fit, evaluate
File "/content/gdrive/My Drive/MobulaOP/tasn/tasn-mxnet/example/tasn/common/fit.py", line 1, in
import mxnet as mx
File "/content/gdrive/My Drive/MobulaOP/tasn/tasn-mxnet/example/tasn/mxnet_python/python/mxnet/init.py", line 24, in
from .context import Context, current_context, cpu, gpu, cpu_pinned
File "/content/gdrive/My Drive/MobulaOP/tasn/tasn-mxnet/example/tasn/mxnet_python/python/mxnet/context.py", line 24, in
from .base import classproperty, with_metaclass, _MXClassPropertyMetaClass
File "/content/gdrive/My Drive/MobulaOP/tasn/tasn-mxnet/example/tasn/mxnet_python/python/mxnet/base.py", line 213, in
_LIB = _load_lib()
File "/content/gdrive/My Drive/MobulaOP/tasn/tasn-mxnet/example/tasn/mxnet_python/python/mxnet/base.py", line 204, in _load_lib
lib = ctypes.CDLL(lib_path[0], ctypes.RTLD_LOCAL)
File "/usr/lib/python3.6/ctypes/init.py", line 348, in init
self._handle = _dlopen(self._name, mode)
OSError: libcudart.so.8.0: cannot open shared object file: No such file or directory
Second,
I have followed similar process, except I have commented mxnet installation process from "install.sh" files, with the target of removing above mentioned error. So, I have installed mxnet using command "pip install mxnet-cu101". On executing command "sudo bash train.sh", above error was gone but I have been with next error which is shown below:
Error-Start
[10:25:25] src/operator/nn/./cudnn/./cudnn_algoreg-inl.h:97: Running performance tests to find the best convolution algorithm, this can take a while... (set the environment variable MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable)
sh: 1: nvcc: not found
sh: 1: nvcc: not found
Error in CustomOp.forward: Traceback (most recent call last):
File "/usr/local/lib/python3.6/dist-packages/mxnet/operator.py", line 1005, in forward_entry
aux=tensors[4])
File "/content/gdrive/My Drive/MobulaOP/mobula/glue/mxnet_glue.py", line 109, in forward
out = self._forward(*in_data)
File "./AttentionSampler/attention_sampler/attention_sampler.py", line 60, in forward
mobula.func.map_step(N, attxi, index_y, stepx, att_size, out_size)
File "/content/gdrive/My Drive/MobulaOP/mobula/func.py", line 264, in call
using_async=using_async)
File "/content/gdrive/My Drive/MobulaOP/mobula/func.py", line 145, in call
func = self.loader(self, arg_types, ctx, **self.loader_kwargs)
File "/content/gdrive/My Drive/MobulaOP/mobula/op/loader.py", line 499, in init
_build_lib(cpp_fname, code_buffer, ctx, dll_fname)
File "/content/gdrive/My Drive/MobulaOP/mobula/op/loader.py", line 237, in _build_lib
source_to_so_ctx(build_path, srcs, target_name, ctx)
File "/content/gdrive/My Drive/MobulaOP/mobula/building/build.py", line 167, in source_to_so_ctx
buildin_cpp, buildin_o), compiler, cflags)
File "/content/gdrive/My Drive/MobulaOP/mobula/building/build.py", line 41, in source_to_o
run_command_parallel(commands)
File "/content/gdrive/My Drive/MobulaOP/mobula/building/build_utils.py", line 97, in run_command_parallel
raise RuntimeError(info)
RuntimeError: Error, terminated :-(
Traceback (most recent call last):
File "train.py", line 57, in
eval_metric = evaluate.Multi_Accuracy(num=6))
File "/content/gdrive/My Drive/MobulaOP/tasn/tasn-mxnet/example/tasn/common/fit.py", line 195, in fit
monitor = monitor)
File "/usr/local/lib/python3.6/dist-packages/mxnet/module/base_module.py", line 533, in fit
self.update_metric(eval_metric, data_batch.label)
File "/usr/local/lib/python3.6/dist-packages/mxnet/module/module.py", line 775, in update_metric
self.exec_group.update_metric(eval_metric, labels, pre_sliced)
File "/usr/local/lib/python3.6/dist-packages/mxnet/module/executor_group.py", line 648, in update_metric
eval_metric.update_dict(labels, preds)
File "/usr/local/lib/python3.6/dist-packages/mxnet/metric.py", line 132, in update_dict
self.update(label, pred)
File "/content/gdrive/My Drive/MobulaOP/tasn/tasn-mxnet/example/tasn/common/evaluate.py", line 23, in update
pred_label = mx.nd.argmax_channel(preds[i]).asnumpy()
File "/usr/local/lib/python3.6/dist-packages/mxnet/ndarray/ndarray.py", line 2566, in asnumpy
ctypes.c_size_t(data.size)))
File "/usr/local/lib/python3.6/dist-packages/mxnet/base.py", line 246, in check_call
raise get_last_ffi_error()
mxnet.base.MXNetError: Traceback (most recent call last):
File "src/operator/custom/custom.cc", line 346
MXNetError: Check failed: reinterpret_cast( params.info->callbacks[kCustomOpForward])( ptrs.size(), const_cast<void**>(ptrs.data()), const_cast<int*>(tags.data()), reinterpret_cast<const int*>(req.data()), static_cast(ctx.is_train), params.info->contexts[kCustomOpForward]):
**Error-End
Please, guide me in this, how I can detach these errors and run TASN successfully
The text was updated successfully, but these errors were encountered: