@@ -962,16 +962,189 @@ def __init__(self, *args, **kwargs):
962962 super ().__init__ (* args , ** kwargs )
963963
964964
965+ def relatively_deep_copy (obj , memo ):
966+ # WARNING: there are some issues with genarators which were not investigated and root cause is not found.
967+ # Though copied IR seems to work fine there are some extra references kept on generator objects which may result
968+ # in memory "leak"
969+
970+ obj_id = id (obj )
971+ if obj_id in memo :
972+ return memo [obj_id ]
973+
974+ from numba .core .dispatcher import _DispatcherBase
975+ from numba .core .types .functions import Function , Dispatcher
976+ from numba .core .bytecode import FunctionIdentity
977+ from numba .core .typing .templates import Signature
978+ from numba .dppl .compiler import DPPLFunctionTemplate
979+ from numba .core .compiler import CompileResult
980+ from numba .np .ufunc .dufunc import DUFunc
981+ from ctypes import _CFuncPtr
982+ from cffi .api import FFI
983+ from types import ModuleType
984+ from numba .core .types .abstract import Type
985+
986+ # objects which shouldn't or can't be copied and it's ok not to copy it.
987+ if isinstance (obj , (FunctionIdentity , _DispatcherBase , Function , Type , Dispatcher , ModuleType ,
988+ Signature , DPPLFunctionTemplate , CompileResult ,
989+ DUFunc , _CFuncPtr , FFI ,
990+ type , str , bool , type (None ))):
991+ return obj
992+
993+ from numba .core .ir import Global , FreeVar
994+ from numba .core .ir import FunctionIR
995+ from numba .core .postproc import PostProcessor
996+ from numba .core .funcdesc import FunctionDescriptor
997+
998+ if isinstance (obj , FunctionDescriptor ):
999+ cpy = FunctionDescriptor (native = obj .native , modname = obj .modname , qualname = obj .qualname ,
1000+ unique_name = obj .unique_name , doc = obj .doc ,
1001+ typemap = relatively_deep_copy (obj .typemap , memo ),
1002+ restype = obj .restype ,
1003+ calltypes = relatively_deep_copy (obj .calltypes , memo ),
1004+ args = obj .args , kws = obj .kws , mangler = None ,
1005+ argtypes = relatively_deep_copy (obj .argtypes , memo ),
1006+ inline = obj .inline , noalias = obj .noalias , env_name = obj .env_name ,
1007+ global_dict = obj .global_dict )
1008+ # mangler parameter is not saved in FunctionDescriptor, but used to generated name.
1009+ # So pass None as mangler parameter and then copy mangled_name by hands
1010+ cpy .mangled_name = obj .mangled_name
1011+
1012+ memo [obj_id ] = cpy
1013+
1014+ return cpy
1015+
1016+ if isinstance (obj , FunctionIR ):
1017+ # PostProcessor do the following:
1018+ # 1. canonicolize cfg, modifying IR
1019+ # 2. fills internal generators status
1020+ # 3. creates and fills VariableLifetime object
1021+ # We can't copy this objects. So in order to have copy of it we need run PostProcessor on copied IR.
1022+ # This means, that in case PostProcess wasn't run for original object copied object would defer.
1023+ # In order to avoid this we are running PostProcess on original object firstly.
1024+ # This means that copy of IR actually has a side effect on it.
1025+ pp = PostProcessor (obj )
1026+ pp .run ()
1027+ cpy = FunctionIR (blocks = relatively_deep_copy (obj .blocks , memo ),
1028+ is_generator = relatively_deep_copy (obj .is_generator , memo ),
1029+ func_id = relatively_deep_copy (obj .func_id , memo ),
1030+ loc = obj .loc ,
1031+ definitions = relatively_deep_copy (obj ._definitions , memo ),
1032+ arg_count = obj .arg_count ,
1033+ arg_names = relatively_deep_copy (obj .arg_names , memo ))
1034+ pp = PostProcessor (cpy )
1035+ pp .run ()
1036+
1037+ memo [obj_id ] = cpy
1038+
1039+ return cpy
1040+
1041+ if isinstance (obj , Global ):
1042+ cpy = Global (name = obj .name , value = obj .value , loc = obj .loc )
1043+ memo [obj_id ] = cpy
1044+
1045+ return cpy
1046+
1047+ if isinstance (obj , FreeVar ):
1048+ cpy = FreeVar (index = obj .index , name = obj .name , value = obj .value , loc = obj .loc )
1049+ memo [obj_id ] = cpy
1050+
1051+ return cpy
1052+
1053+ # for containers we need to copy container itself first. And then fill it with copied items.
1054+ if isinstance (obj , list ):
1055+ cpy = copy .copy (obj )
1056+ cpy .clear ()
1057+ for item in obj :
1058+ cpy .append (relatively_deep_copy (item , memo ))
1059+ memo [obj_id ] = cpy
1060+ return cpy
1061+ elif isinstance (obj , dict ):
1062+ cpy = copy .copy (obj )
1063+ cpy .clear ()
1064+ for key , item in obj .items ():
1065+ cpy [relatively_deep_copy (key , memo )] = relatively_deep_copy (item , memo )
1066+ memo [obj_id ] = cpy
1067+ return cpy
1068+ elif isinstance (obj , tuple ):
1069+ # subclass constructors could have different parameters than superclass.
1070+ # e.g. tuple and namedtuple constructors accepts quite different parameters.
1071+ # it is better to have separate section for namedtuple
1072+ tpl = tuple ([relatively_deep_copy (item , memo ) for item in obj ])
1073+ if type (obj ) == tuple :
1074+ cpy = tpl
1075+ else :
1076+ cpy = type (obj )(* tpl )
1077+ memo [obj_id ] = cpy
1078+ return cpy
1079+ elif isinstance (obj , set ):
1080+ cpy = copy .copy (obj )
1081+ cpy .clear ()
1082+ for item in obj :
1083+ cpy .add (relatively_deep_copy (item , memo ))
1084+ memo [obj_id ] = cpy
1085+ return cpy
1086+
1087+ # some python objects are not copyable. In such case exception would be raised
1088+ # it is just a convinient point to find such objects
1089+ try :
1090+ cpy = copy .copy (obj )
1091+ except Exception as e :
1092+ raise e
1093+
1094+ # __slots__ for subclass specify only members declared in subclass. So to get all members we need to go through
1095+ # all supeclasses
1096+ def get_slots_members (obj ):
1097+ keys = []
1098+ typ = obj
1099+ if not isinstance (typ , type ):
1100+ typ = type (obj )
1101+
1102+ try :
1103+ if len (typ .__slots__ ):
1104+ keys .extend (typ .__slots__ )
1105+ if len (typ .__bases__ ):
1106+ for base in typ .__bases__ :
1107+ keys .extend (get_slots_members (base ))
1108+ except :
1109+ pass
1110+
1111+ return keys
1112+
1113+ memo [obj_id ] = cpy
1114+ keys = []
1115+
1116+ # Objects have either __dict__ or __slots__ or neither.
1117+ # If object has none of it and it is copyable we already made a copy, just return it
1118+ # If object is not copyable we shouldn't reach this point.
1119+ try :
1120+ keys = obj .__dict__ .keys ()
1121+ except :
1122+ try :
1123+ obj .__slots__
1124+ keys = get_slots_members (obj )
1125+ except :
1126+ return cpy
1127+
1128+ for key in keys :
1129+ attr = getattr (obj , key )
1130+ attr_cpy = relatively_deep_copy (attr , memo )
1131+ setattr (cpy , key , attr_cpy )
1132+
1133+ return cpy
1134+
1135+
9651136class DPPLLower (Lower ):
9661137 def __init__ (self , context , library , fndesc , func_ir , metadata = None ):
9671138 Lower .__init__ (self , context , library , fndesc , func_ir , metadata )
968- fndesc_cpu = copy .copy (fndesc )
969- fndesc_cpu .calltypes = fndesc .calltypes .copy ()
970- fndesc_cpu .typemap = fndesc .typemap .copy ()
1139+ memo = {}
1140+
1141+ fndesc_cpu = relatively_deep_copy (fndesc , memo )
1142+ func_ir_cpu = relatively_deep_copy (func_ir , memo )
1143+
9711144
9721145 cpu_context = context .cpu_context if isinstance (context , DPPLTargetContext ) else context
973- self .gpu_lower = Lower (context , library , fndesc , func_ir . copy () , metadata )
974- self .cpu_lower = Lower (cpu_context , library , fndesc_cpu , func_ir . copy () , metadata )
1146+ self .gpu_lower = Lower (context , library , fndesc , func_ir , metadata )
1147+ self .cpu_lower = Lower (cpu_context , library , fndesc_cpu , func_ir_cpu , metadata )
9751148
9761149 def lower (self ):
9771150 # Basically we are trying to lower on GPU first and if failed - try to lower on CPU.
@@ -991,11 +1164,9 @@ def lower(self):
9911164 # different solution should be used.
9921165
9931166 try :
994- #lowering.lower_extensions[parfor.Parfor] = lower_parfor_rollback
9951167 lowering .lower_extensions [parfor .Parfor ].append (lower_parfor_rollback )
9961168 self .gpu_lower .lower ()
9971169 self .base_lower = self .gpu_lower
998- #lowering.lower_extensions[parfor.Parfor] = numba.parfors.parfor_lowering._lower_parfor_parallel
9991170 lowering .lower_extensions [parfor .Parfor ].pop ()
10001171 except Exception as e :
10011172 if numba .dppl .compiler .DEBUG :
@@ -1015,80 +1186,13 @@ def create_cpython_wrapper(self, release_gil=False):
10151186
10161187
10171188def copy_block (block ):
1018- def relatively_deep_copy (obj , memo ):
1019- obj_id = id (obj )
1020- if obj_id in memo :
1021- return memo [obj_id ]
1022-
1023- from numba .core .dispatcher import Dispatcher
1024- from numba .core .types .functions import Function
1025- from types import ModuleType
1026-
1027- if isinstance (obj , (Dispatcher , Function , ModuleType )):
1028- return obj
1029-
1030- if isinstance (obj , list ):
1031- cpy = copy .copy (obj )
1032- cpy .clear ()
1033- for item in obj :
1034- cpy .append (relatively_deep_copy (item , memo ))
1035- memo [obj_id ] = cpy
1036- return cpy
1037- elif isinstance (obj , dict ):
1038- cpy = copy .copy (obj )
1039- cpy .clear ()
1040- # do we need to copy keys?
1041- for key , item in obj .items ():
1042- cpy [relatively_deep_copy (key , memo )] = relatively_deep_copy (item , memo )
1043- memo [obj_id ] = cpy
1044- return cpy
1045- elif isinstance (obj , tuple ):
1046- cpy = type (obj )([relatively_deep_copy (item , memo ) for item in obj ])
1047- memo [obj_id ] = cpy
1048- return cpy
1049- elif isinstance (obj , set ):
1050- cpy = copy .copy (obj )
1051- cpy .clear ()
1052- for item in obj :
1053- cpy .add (relatively_deep_copy (item , memo ))
1054- memo [obj_id ] = cpy
1055- return cpy
1056-
1057- cpy = copy .copy (obj )
1058-
1059- memo [obj_id ] = cpy
1060- keys = []
1061- try :
1062- keys = obj .__dict__ .keys ()
1063- except :
1064- try :
1065- keys = obj .__slots__
1066- except :
1067- return cpy
1068-
1069- for key in keys :
1070- attr = getattr (obj , key )
1071- attr_cpy = relatively_deep_copy (attr , memo )
1072- setattr (cpy , key , attr_cpy )
1073-
1074- return cpy
1075-
10761189 memo = {}
10771190 new_block = ir .Block (block .scope , block .loc )
10781191 new_block .body = [relatively_deep_copy (stmt , memo ) for stmt in block .body ]
10791192 return new_block
10801193
10811194
10821195def lower_parfor_rollback (lowerer , parfor ):
1083- try :
1084- cache_parfor_races = copy .copy (parfor .races )
1085- cache_parfor_params = copy .copy (parfor .params )
1086- cache_parfor_loop_body = {key : copy_block (block ) for key , block in parfor .loop_body .items ()}
1087- cache_parfor_init_block = parfor .init_block .copy ()
1088- cache_parfor_loop_nests = parfor .loop_nests .copy ()
1089- except Exception as e :
1090- raise CopyIRException ("Failed to copy IR" ) from e
1091-
10921196 try :
10931197 _lower_parfor_gufunc (lowerer , parfor )
10941198 if numba .dppl .compiler .DEBUG :
@@ -1098,12 +1202,6 @@ def lower_parfor_rollback(lowerer, parfor):
10981202 msg = "Failed to lower parfor on DPPL-device.\n To see details set environment variable NUMBA_DPPL_DEBUG=1"
10991203 warnings .warn (NumbaPerformanceWarning (msg , parfor .loc ))
11001204 raise e
1101- finally :
1102- parfor .params = cache_parfor_params
1103- parfor .loop_body = cache_parfor_loop_body
1104- parfor .init_block = cache_parfor_init_block
1105- parfor .loop_nests = cache_parfor_loop_nests
1106- parfor .races = cache_parfor_races
11071205
11081206
11091207def dppl_lower_array_expr (lowerer , expr ):
0 commit comments