Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Can not get inference or RIFE with ORT_DML working #56

Open
Selur opened this issue Sep 28, 2023 · 6 comments
Open

Can not get inference or RIFE with ORT_DML working #56

Selur opened this issue Sep 28, 2023 · 6 comments

Comments

@Selur
Copy link

Selur commented Sep 28, 2023

I downloaded all the downloads from https://github.com/AmusementClub/vs-mlrt/releases/tag/v14.test into "F:/Hybrid/64bit/vs-mlrt" and when calling:

# Imports
import vapoursynth as vs
import os
import sys
os.environ["CUDA_MODULE_LOADING"] = "LAZY"
# getting Vapoursynth core
core = vs.core
# Import scripts folder
scriptPath = 'F:/Hybrid/64bit/vsscripts'
sys.path.insert(0, os.path.abspath(scriptPath))
# Loading Plugins
core.std.LoadPlugin(path="F:/Hybrid/64bit/vs-mlrt/vsort.dll")
import site
core.std.LoadPlugin(path="F:/Hybrid/64bit/vsfilters/Support/fmtconv.dll")
core.std.LoadPlugin(path="F:/Hybrid/64bit/vsfilters/SourceFilter/LSmashSource/vslsmashsource.dll")
# Import scripts
from importlib.machinery import SourceFileLoader
vsmlrt = SourceFileLoader('vsmlrt', 'F:/Hybrid/64bit/vs-mlrt/vsmlrt.py').load_module()
# source: 'G:\TestClips&Co\files\test.avi'
# current color space: YUV420P8, bit depth: 8, resolution: 640x352, fps: 25, color matrix: 470bg, yuv luminance scale: limited, scanorder: progressive
# Loading G:\TestClips&Co\files\test.avi using LWLibavSource
clip = core.lsmas.LWLibavSource(source="G:/TestClips&Co/files/test.avi", format="YUV420P8", stream_index=0, cache=0, prefer_hw=0)
# Setting detected color matrix (470bg).
clip = core.std.SetFrameProps(clip, _Matrix=5)
# Setting color transfer info (470bg), when it is not set
clip = clip if not core.text.FrameProps(clip,'_Transfer') else core.std.SetFrameProps(clip, _Transfer=5)
# Setting color primaries info (), when it is not set
clip = clip if not core.text.FrameProps(clip,'_Primaries') else core.std.SetFrameProps(clip, _Primaries=5)
# Setting color range to TV (limited) range.
clip = core.std.SetFrameProp(clip=clip, prop="_ColorRange", intval=1)
# making sure frame rate is set to 25
clip = core.std.AssumeFPS(clip=clip, fpsnum=25, fpsden=1)
clip = core.std.SetFrameProp(clip=clip, prop="_FieldBased", intval=0) # progressive
original = clip
# changing range from limited to full range
clip = core.resize.Bicubic(clip, range_in_s="limited", range_s="full")
# Setting color range to PC (full) range.
clip = core.std.SetFrameProp(clip=clip, prop="_ColorRange", intval=0)
# adjusting color space from YUV420P8 to RGBS for vsVSMLRT
clip = core.resize.Bicubic(clip=clip, format=vs.RGBS, matrix_in_s="470bg", range_s="full")
# resizing using VSMLRT
from vsmlrt import Backend
clip = vsmlrt.inference([clip],network_path="F:/Hybrid/64bit/onnx_models/4x_BSRGAN.onnx", backend=Backend.ORT_DML(fp16=True, device_id=0,num_streams=1))
# resizing 2560x1408 to 640x352
# adjusting resizing
clip = core.fmtc.resample(clip=clip, w=640, h=352, kernel="spline64", interlaced=False, interlacedd=False)
# changing range from full to limited range
clip = core.resize.Bicubic(clip, range_in_s="full", range_s="limited")
# Setting color range to TV (limited) range.
clip = core.std.SetFrameProp(clip=clip, prop="_ColorRange", intval=1)
# adjusting output color from: YUV420P8 to YUV420P10 for QSVEncModel
original = core.resize.Bicubic(clip=original, format=vs.YUV420P10, range_s="limited")
# adjusting output color from: RGBS to YUV420P10 for QSVEncModel
clip = core.resize.Bicubic(clip=clip, format=vs.YUV420P10, matrix_s="470bg", range_s="limited", dither_type="error_diffusion")
# DEBUG: clip: YUV420P10 vs original: YUV420P10
original = core.text.Text(clip=original,text="Original",scale=1,alignment=7)
clip = core.text.Text(clip=clip,text="Filtered",scale=1,alignment=7)
interleaved = core.std.Interleave([original, clip])
# set output frame rate to 25fps (progressive)
interleaved = core.std.AssumeFPS(clip=interleaved, fpsnum=25, fpsden=1)
# Output
interleaved.set_output()

I got:

2023-09-28 15:25:20.240
Failed to evaluate the script:
Python exception: operator (): 'ortapi->CreateSessionFromArray( d->environment, std::data(onnx_data), std::size(onnx_data), session_options, &resource.session )' failed: Exception during initialization: D:\a\onnxruntime\onnxruntime\onnxruntime\core\providers\dml\DmlExecutionProvider\src\AbiCustomRegistry.cpp(516)\onnxruntime.dll!00007FF94FDC269C: (caller: 00007FF94FDC9509) Exception(3) tid(4a34) 80070057 Falscher Parameter. 


Traceback (most recent call last):
File "src\cython\vapoursynth.pyx", line 3115, in vapoursynth._vpy_evaluate
File "src\cython\vapoursynth.pyx", line 3116, in vapoursynth._vpy_evaluate
File "J:\tmp\tempPreviewVapoursynthFile15_25_15_732.vpy", line 43, in 
clip = vsmlrt.inference([clip],network_path="F:/Hybrid/64bit/onnx_models/4x_BSRGAN.onnx", backend=Backend.ORT_DML(fp16=True, device_id=0,num_streams=1))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "F:/Hybrid/64bit/vs-mlrt/vsmlrt.py", line 1635, in inference
return inference_with_fallback(
^^^^^^^^^^^^^^^^^^^^^^^^
File "F:/Hybrid/64bit/vs-mlrt/vsmlrt.py", line 1611, in inference_with_fallback
raise e
File "F:/Hybrid/64bit/vs-mlrt/vsmlrt.py", line 1590, in inference_with_fallback
return _inference(
^^^^^^^^^^^
File "F:/Hybrid/64bit/vs-mlrt/vsmlrt.py", line 1460, in _inference
clip = core.ort.Model(
^^^^^^^^^^^^^^^
File "src\cython\vapoursynth.pyx", line 2847, in vapoursynth.Function.__call__
vapoursynth.Error: operator (): 'ortapi->CreateSessionFromArray( d->environment, std::data(onnx_data), std::size(onnx_data), session_options, &resource.session )' failed: Exception during initialization: D:\a\onnxruntime\onnxruntime\onnxruntime\core\providers\dml\DmlExecutionProvider\src\AbiCustomRegistry.cpp(516)\onnxruntime.dll!00007FF94FDC269C: (caller: 00007FF94FDC9509) Exception(3) tid(4a34) 80070057 Falscher Parameter. 

Trying RIFE:

# Imports
import vapoursynth as vs
import os
import sys
os.environ["CUDA_MODULE_LOADING"] = "LAZY"
# getting Vapoursynth core
core = vs.core
# Import scripts folder
scriptPath = 'F:/Hybrid/64bit/vsscripts'
sys.path.insert(0, os.path.abspath(scriptPath))
# Loading Plugins
core.std.LoadPlugin(path="F:/Hybrid/64bit/vsfilters/Support/fmtconv.dll")
core.std.LoadPlugin(path="F:/Hybrid/64bit/vs-mlrt/vsort.dll")
import site
core.std.LoadPlugin(path="F:/Hybrid/64bit/vsfilters/Support/akarin.dll")
core.std.LoadPlugin(path="F:/Hybrid/64bit/vsfilters/MiscFilter/MiscFilters/MiscFilters.dll")
core.std.LoadPlugin(path="F:/Hybrid/64bit/vsfilters/SourceFilter/LSmashSource/vslsmashsource.dll")
# Import scripts
import ChangeFPS
from importlib.machinery import SourceFileLoader
vsmlrt = SourceFileLoader('vsmlrt', 'F:/Hybrid/64bit/vs-mlrt/vsmlrt.py').load_module()
# source: 'G:\TestClips&Co\files\test.avi'
# current color space: YUV420P8, bit depth: 8, resolution: 640x352, fps: 25, color matrix: 470bg, yuv luminance scale: limited, scanorder: progressive
# Loading G:\TestClips&Co\files\test.avi using LWLibavSource
clip = core.lsmas.LWLibavSource(source="G:/TestClips&Co/files/test.avi", format="YUV420P8", stream_index=0, cache=0, fpsnum=25, prefer_hw=0)
# Setting detected color matrix (470bg).
clip = core.std.SetFrameProps(clip, _Matrix=5)
# Setting color transfer info (470bg), when it is not set
clip = clip if not core.text.FrameProps(clip,'_Transfer') else core.std.SetFrameProps(clip, _Transfer=5)
# Setting color primaries info (), when it is not set
clip = clip if not core.text.FrameProps(clip,'_Primaries') else core.std.SetFrameProps(clip, _Primaries=5)
# Setting color range to TV (limited) range.
clip = core.std.SetFrameProp(clip=clip, prop="_ColorRange", intval=1)
# making sure frame rate is set to 25
clip = core.std.AssumeFPS(clip=clip, fpsnum=25, fpsden=1)
clip = core.std.SetFrameProp(clip=clip, prop="_FieldBased", intval=0) # progressive
original = clip
from vsmlrt import Backend
clip = core.misc.SCDetect(clip=clip,threshold=0.150)
# adjusting color space from YUV420P8 to RGBS for vsRIFEmlrt
clip = core.resize.Bicubic(clip=clip, format=vs.RGBS, matrix_in_s="470bg", range_s="limited")
# adjusting frame count&rate with RIFE (mlrt)
clip = vsmlrt.RIFE(clip, model=44, backend=Backend.ORT_DML(fp16=True,device_id=0)) # new fps: 50
# adjusting frame count by adding duplicate frames
original = ChangeFPS.ChangeFPS(clip=original,target_fps_num=50,target_fps_den=1)

# no resizing since resolution is already archived
# adjusting output color from: YUV420P8 to YUV420P10 for QSVEncModel
original = core.resize.Bicubic(clip=original, format=vs.YUV420P10, range_s="limited")
# adjusting output color from: RGBS to YUV420P10 for QSVEncModel
clip = core.resize.Bicubic(clip=clip, format=vs.YUV420P10, matrix_s="470bg", range_s="limited", dither_type="error_diffusion")
# DEBUG: clip: YUV420P10 vs original: YUV420P10
original = core.text.Text(clip=original,text="Original",scale=1,alignment=7)
clip = core.text.Text(clip=clip,text="Filtered",scale=1,alignment=7)
interleaved = core.std.Interleave([original, clip])
# set output frame rate to 50fps (progressive)
interleaved = core.std.AssumeFPS(clip=interleaved, fpsnum=50, fpsden=1)
# Output
interleaved.set_output()

I get:


2023-09-28 15:28:54.407
Failed to evaluate the script:
Python exception: operator (): 'ortapi->CreateSessionFromArray( d->environment, std::data(onnx_data), std::size(onnx_data), session_options, &resource.session )' failed: Exception during initialization: D:\a\onnxruntime\onnxruntime\onnxruntime\core\providers\dml\DmlExecutionProvider\src\AbiCustomRegistry.cpp(516)\onnxruntime.dll!00007FF94E91269C: (caller: 00007FF94E919509) Exception(3) tid(1254) 80070057 Falscher Parameter. 


Traceback (most recent call last):
File "src\cython\vapoursynth.pyx", line 3115, in vapoursynth._vpy_evaluate
File "src\cython\vapoursynth.pyx", line 3116, in vapoursynth._vpy_evaluate
File "J:\tmp\tempPreviewVapoursynthFile15_28_52_504.vpy", line 43, in 
clip = vsmlrt.RIFE(clip, model=44, backend=Backend.ORT_DML(fp16=True,device_id=0)) # new fps: 50
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "F:/Hybrid/64bit/vs-mlrt/vsmlrt.py", line 1042, in RIFE
output0 = RIFEMerge(
^^^^^^^^^^
File "F:/Hybrid/64bit/vs-mlrt/vsmlrt.py", line 916, in RIFEMerge
return inference_with_fallback(
^^^^^^^^^^^^^^^^^^^^^^^^
File "F:/Hybrid/64bit/vs-mlrt/vsmlrt.py", line 1611, in inference_with_fallback
raise e
File "F:/Hybrid/64bit/vs-mlrt/vsmlrt.py", line 1590, in inference_with_fallback
return _inference(
^^^^^^^^^^^
File "F:/Hybrid/64bit/vs-mlrt/vsmlrt.py", line 1460, in _inference
clip = core.ort.Model(
^^^^^^^^^^^^^^^
File "src\cython\vapoursynth.pyx", line 2847, in vapoursynth.Function.__call__
vapoursynth.Error: operator (): 'ortapi->CreateSessionFromArray( d->environment, std::data(onnx_data), std::size(onnx_data), session_options, &resource.session )' failed: Exception during initialization: D:\a\onnxruntime\onnxruntime\onnxruntime\core\providers\dml\DmlExecutionProvider\src\AbiCustomRegistry.cpp(516)\onnxruntime.dll!00007FF94E91269C: (caller: 00007FF94E919509) Exception(3) tid(1254) 80070057 Falscher Parameter. 

After that I thought ORT_DML in general didn't work, but DPIR works fine:

clip = vsmlrt.DPIR(clip, strength=5.000, overlap=16, model=1, backend=Backend.ORT_DML(fp16=False,device_id=0))

Is this a bug or am I missing something?

Cu Selur

Ps.: In case it's relevant here are my general system specs:

  • Operating System: Windows 11 Pro 64-bit (10.0, Build 22621) (22621.ni_release.220506-1250)
  • System Model: X670E PG Lightning
  • Processor: AMD Ryzen 9 7950X 16-Core Processor (32 CPUs), ~4.5GHz
  • Memory: 65536MB RAM
  • Available OS Memory: 64632MB RAM
  • DirectX Version: DirectX 12
  • Vapoursynth Version: R63 (portable)
  • Graphic cards:
    • NVIDIA GeForce RTX 4080, Driver Version: 31.0.15.3742
    • Intel(R) Arc(TM) A380 Graphics, Driver Version: 31.0.101.4824
    • AMD Radeon(TM) Graphics (onboard from the CPU), Driver Version: 31.0.14000.61002
@WolframRhodium
Copy link
Contributor

Thanks for the information. One possible issue is that a system-level (in system32) old directml.dll is loaded instead of the new one packaged with vs-mlrt.

If this is the case, I am sorry that I currently don't have enough time to debug it, and one solution is to temporarily rename the old one. If not, please let me know.

@Selur
Copy link
Author

Selur commented Sep 28, 2023

Okay, atm. I got C:\Windows\System32\directml.dll with product version 1.8.0+220126-2359-1,dml-1.8.89dd732
vsort/DirectML.dll is 1.12.0+230518-0058.1.dml-1.12.0e90406.

But the strange thing is: Why does DPIR work fine?

=> I'll try to figure out how to rename the existing C:\Windows\System32\directml.dll to C:\Windows\System32\directml_old.dll and report back. (even in a command line with administrative rights, I get a 'permission denied' when trying to rename the file)

@WolframRhodium
Copy link
Contributor

Yes you need to first change the permission on this file of your user group.
DPIR only requires simple implementations while RIFE requires complex operator implementation.
I'm not sure whether manually loading the new directml.dll before ort.dll work. I also struggle with this problem this week.

@Selur
Copy link
Author

Selur commented Sep 28, 2023

I'll do some testing and report back. :)

@Selur
Copy link
Author

Selur commented Sep 28, 2023

Got it working by adding the vsort-folder as first element to the os.path. :)

# Imports
import vapoursynth as vs
# getting Vapoursynth core
import ctypes
import site
import sys
import os
core = vs.core
# Import scripts folder
scriptPath = 'F:/Hybrid/64bit/vsscripts'
sys.path.insert(0, os.path.abspath(scriptPath))
os.environ["CUDA_MODULE_LOADING"] = "LAZY"
# Adding dml dependencies to PATH
path = site.getsitepackages()[0]+'/../vs-mlrt/vsort'
ctypes.windll.kernel32.SetDllDirectoryW(path)
path = path.replace('\\', '/')
os.environ["PATH"] = path + os.pathsep + os.environ["PATH"]
# Loading Plugins
core.std.LoadPlugin(path="F:/Hybrid/64bit/vs-mlrt/vsort.dll")
core.std.LoadPlugin(path="F:/Hybrid/64bit/vsfilters/Support/akarin.dll")
core.std.LoadPlugin(path="F:/Hybrid/64bit/vsfilters/MiscFilter/MiscFilters/MiscFilters.dll")
core.std.LoadPlugin(path="F:/Hybrid/64bit/vsfilters/SourceFilter/LSmashSource/vslsmashsource.dll")
# Import scripts
from importlib.machinery import SourceFileLoader
vsmlrt = SourceFileLoader('vsmlrt', 'F:/Hybrid/64bit/vs-mlrt/vsmlrt.py').load_module()
# source: 'G:\TestClips&Co\files\test.avi'
# current color space: YUV420P8, bit depth: 8, resolution: 640x352, fps: 25, color matrix: 470bg, yuv luminance scale: limited, scanorder: progressive
# Loading G:\TestClips&Co\files\test.avi using LWLibavSource
clip = core.lsmas.LWLibavSource(source="G:/TestClips&Co/files/test.avi", format="YUV420P8", stream_index=0, cache=0, fpsnum=25, prefer_hw=0)
# Setting detected color matrix (470bg).
clip = core.std.SetFrameProps(clip, _Matrix=5)
# Setting color transfer info (470bg), when it is not set
clip = clip if not core.text.FrameProps(clip,'_Transfer') else core.std.SetFrameProps(clip, _Transfer=5)
# Setting color primaries info (), when it is not set
clip = clip if not core.text.FrameProps(clip,'_Primaries') else core.std.SetFrameProps(clip, _Primaries=5)
# Setting color range to TV (limited) range.
clip = core.std.SetFrameProp(clip=clip, prop="_ColorRange", intval=1)
# making sure frame rate is set to 25
clip = core.std.AssumeFPS(clip=clip, fpsnum=25, fpsden=1)
clip = core.std.SetFrameProp(clip=clip, prop="_FieldBased", intval=0) # progressive
from vsmlrt import Backend
clip = core.misc.SCDetect(clip=clip,threshold=0.150)
# adjusting color space from YUV420P8 to RGBS for vsRIFEmlrt
clip = core.resize.Bicubic(clip=clip, format=vs.RGBS, matrix_in_s="470bg", range_s="limited")
# adjusting frame count&rate with RIFE (mlrt)
clip = vsmlrt.RIFE(clip, model=44, backend=Backend.ORT_DML(fp16=True,device_id=0)) # new fps: 50
# adjusting output color from: RGBS to YUV420P10 for QSVEncModel
clip = core.resize.Bicubic(clip=clip, format=vs.YUV420P10, matrix_s="470bg", range_s="limited", dither_type="error_diffusion")
# set output frame rate to 50fps (progressive)
clip = core.std.AssumeFPS(clip=clip, fpsnum=50, fpsden=1)
# Output
clip.set_output()

It's kind of ugly and most users probably will not know how to do this, so if you find a better way that could be directly added to vsmlrt.py, please add it. :)

@WolframRhodium
Copy link
Contributor

Yeah I'll try to fix it ASAP.

WolframRhodium added a commit that referenced this issue Sep 29, 2023
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants