- cpython/Objects/moduleobject.c
- cpython/Include/moduleobject.h
- cpython/Objects/clinic/moduleobject.c.h
- cpython/Python/import.c
- cpython/Python/clinic/import.c.h
- cpython/Lib/importlib/_bootstrap.py
there's a struct named PyModuleDef defined in Include/moduleobject.h
the PyModuleObject is defined in Objects/moduleobject.c
, which contains a field with type PyModuleDef
the field md_dict is the __dict__
attribute of the module object
PyModuleDef is optional, the index located in the m_base field is used for find the module by index in sys.modules, not by name
m_size stores the size of per-module data
m_clear and m_free are used for deallolcation
for more detail please refer to PEP 3121 -- Extension Module Initialization and Finalization
import _locale
import re
when you try to look into the source code to find out how import
works
follow the call stack
the core callable object is interp->importlib
which is initialized in
/* cpython/Python/pylifecycle.c */
static _PyInitError
initimport(PyInterpreterState *interp, PyObject *sysmod)
{
/* omit */
importlib = PyImport_AddModule("_frozen_importlib");
if (importlib == NULL) {
return _Py_INIT_ERR("couldn't get _frozen_importlib from sys.modules");
}
interp->importlib = importlib;
Py_INCREF(interp->importlib);
/* omit */
}
search the _frozen_importlib
, you can find the half-binary file Python/importlib.h
with content
/* Auto-generated by Programs/_freeze_importlib.c */
const unsigned char _Py_M__importlib_bootstrap[] = {
99,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,
...
}
*/
it turns out that _freeze_importlib.c
will freeze the python code in Lib/importlib/_bootstrap.py
to the half-binary formatPython/importlib.h
it's interesting that the import
procedure is actually written in pure python, the attribute PyId__find_and_load
of interp->importlib
maps to the python function name _find_and_load
defined in Lib/importlib/_bootstrap.py
whenever you modify the source code in Lib/importlib/_bootstrap.py
, you need to regenerate the Python/importlib.h
file, and recompile the source code
# compile ./Programs/_freeze_importlib.c, and use the program to freeze
# _bootstrap_external.py -> importlib_external.h
# _bootstrap.py -> importlib.h
# zipimport.py -> importlib_zipimport.h
make regen-importlib
# recompile
make
let's compile a script with only one line import _locale
./python.exe -m dis test.py
1 0 LOAD_CONST 0 (0)
2 LOAD_CONST 1 (None)
4 IMPORT_NAME 0 (_locale)
6 STORE_NAME 0 (_locale)
8 LOAD_CONST 1 (None)
10 RETURN_VALUE
the core opcde here is IMPORT_NAME
it's tedious to copy and paste all the source code related to IMPORT_NAME
, even with annotation
imagine that there're two or more threads currently importing the same _locale
module, how does CPython handle this situation ?
if you read the source code and following images, you will notice that the lock mechanism is used for preventing race condition
the procedure is listed
- opcode
IMPORT_NAME
will check if the name being imported is in sys.module, if so return what's in the sys.module - try to acquire the lock
_imp
- get the lock object in
_module_locks
with module name, create if necessary(inposition 1
) - try to acquire the lock object in step 3 (in
position 2
) - release the lock
_imp
(inposition 3
) - check if the name being imported is in sys.module, if so release the lock object in
_module_locks
and return what's in the sys.module (inposition 4
) - for
finder
insys.meta_path
, iffinder
can load the module name, release the lock object in_module_locks
and return what's loaded - raise an error
in position 1
, only thread holds _imp
can modify the _module_locks
, the current thread will check if the module name being imported is in _module_locks
, if not, insert a new lock object into _module_locks
in position 3
, lock _imp
is released, if there're other thread importing other module, it's able to acquire the _imp
lock and continue the procedure, if there's other thread importing the same module, even if it acquire _imp
successfully, it will fail in acquiring the lock in _module_locks
, since the previous thread is holding it
in position 4
, current thread checks for cache in sys.modules
again
in position 5
, it will acquire the lock _imp
before the call of every finder.find
and release it after the function call
there're currently three different finder in my sys.meta_path
>>> sys.meta_path
[<class '_frozen_importlib.BuiltinImporter'>, <class '_frozen_importlib.FrozenImporter'>, <class '_frozen_importlib_external.PathFinder'>]
BuiltinImporter
will handle all the built-in module, when we call
`import _locale`
BuiltinImporter
is defined in Lib/importlib/_bootstrap.py
, BuiltinImporter.find_spec
will return a ModuleSpec
object with attribute loader
binds to the BuiltinImporter
object, ModuleSpec.loader.create_module
then will be called, which finally calls _imp_create_builtin
/* defined in cpython/Python/import.c */
static PyObject *
_imp_create_builtin(PyObject *module, PyObject *spec)
{
/* do some check */
PyObject *modules = NULL;
for (p = PyImport_Inittab; p->name != NULL; p++) {
/* PyImport_Inittab is a c array, each element stores a built-in module name and module initialization c function
the for loop here traverse the PyImport_Inittab list, find a built-in module name which match the name, call the initialize function, and return the module object */
PyModuleDef *def;
if (_PyUnicode_EqualToASCIIString(name, p->name)) {
if (p->initfunc == NULL) {
/* Cannot re-init internal module ("sys" or "builtins") */
mod = PyImport_AddModule(namestr);
Py_DECREF(name);
return mod;
}
mod = (*p->initfunc)();
/* do some check */
}
}
Py_DECREF(name);
Py_RETURN_NONE;
}
/* in cpython/PC/config.c
struct _inittab _PyImport_Inittab[] = {
{"_abc", PyInit__abc},
{"array", PyInit_array},
{"_ast", PyInit__ast},
{"audioop", PyInit_audioop},
{"binascii", PyInit_binascii},
{"cmath", PyInit_cmath},
{"errno", PyInit_errno},
{"faulthandler", PyInit_faulthandler},
{"gc", PyInit_gc},
{"math", PyInit_math},
{"nt", PyInit_nt}, /* Use the NT os functions, not posix */
{"_operator", PyInit__operator},
{"_signal", PyInit__signal},
{"_md5", PyInit__md5},
...
}
*/
defination of FrozenImporter.loader.create_module
is similiar to BuiltinImporter
int
PyImport_ImportFrozenModuleObject(PyObject *name)
{
p = find_frozen(name);
/* check */
co = PyMarshal_ReadObjectFromString((const char *)p->code, size);
/* check */
d = module_dict_for_exec(name);
/* check */
m = exec_code_in_module(name, d, co);
if (m == NULL)
goto err_return;
Py_DECREF(co);
Py_DECREF(m);
return 1;
err_return:
Py_DECREF(co);
return -1;
}
static const struct _frozen *find_frozen(PyObject *name)
{
/* check */
for (p = PyImport_FrozenModules; ; p++) {
/* iter through the pre defined c array to find the match object */
if (p->name == NULL)
return NULL;
if (_PyUnicode_EqualToASCIIString(name, p->name))
break;
}
return p;
}
/* in cpython/Python/frozen.c
static const struct _frozen _PyImport_FrozenModules[] = {
/* importlib */
{"_frozen_importlib", _Py_M__importlib_bootstrap,
(int)sizeof(_Py_M__importlib_bootstrap)},
{"_frozen_importlib_external", _Py_M__importlib_bootstrap_external,
(int)sizeof(_Py_M__importlib_bootstrap_external)},
{"zipimport", _Py_M__zipimport,
(int)sizeof(_Py_M__zipimport)},
/* Test module */
{"__hello__", M___hello__, SIZE},
/* Test package (negative size indicates package-ness) */
{"__phello__", M___hello__, -SIZE},
{"__phello__.spam", M___hello__, SIZE},
{0, 0, 0} /* sentinel */
};
const struct _frozen *PyImport_FrozenModules = _PyImport_FrozenModules;
*/
PathFinder.loader.create_module
will simply call _new_module
, which is defined in cpython/Lib/importlib/_bootstrap.py
, there's no c function to be called
def module_from_spec(spec):
"""Create a module based on the provided spec."""
# Typically loaders will not implement create_module().
module = None
if hasattr(spec.loader, 'create_module'):
# BuiltinImporter and FrozenImporter will call create_module here
# which delegate the call to c function
module = spec.loader.create_module(spec)
elif hasattr(spec.loader, 'exec_module'):
raise ImportError('loaders that define exec_module() '
'must also define create_module()')
if module is None:
# PathFinder will reach here
# _new_module simply retuens a initial module
# the loading process is done by spec
module = _new_module(spec.name)
_init_module_attrs(spec, module)
return module
def _new_module(name):
return type(sys)(name)
PathFinder.find_spec
will extract what's in sys.path_hooks
, and use those object in sys.path_hooks
as the finder to handle the finding procedure(in the order they inserted)
FileFinder
is installed as the default path_hooks
in cpython/Lib/importlib/_bootstrap_external.py
def _install(_bootstrap_module):
"""Install the path-based import components."""
_setup(_bootstrap_module)
supported_loaders = _get_supported_file_loaders()
sys.path_hooks.extend([FileFinder.path_hook(*supported_loaders)])
sys.meta_path.append(PathFinder)
FileFinder.find_spec
will handle the finding procedure, interactions with the file system, and cached the files for performance, the files will be refreshed when the directory the finder is handling has been modified
if FileFinder
finds the files related to the module name your provided,
you can modify the default sys.path_hooks
to define your own Finder
or custom your import behaviour