From e2d842adeadfb6faca33260fd4190d89066bcafc Mon Sep 17 00:00:00 2001 From: Jameson Nash <vtjnash@gmail.com> Date: Fri, 17 Jul 2015 00:11:41 -0400 Subject: [PATCH] add incremental compile docs --- NEWS.md | 8 +++ base/client.jl | 2 +- doc/helpdb.jl | 160 ++++++++++++++++++++++++++--------------- doc/manual/modules.rst | 121 +++++++++++++++++++++++++------ doc/stdlib/base.rst | 12 ++-- 5 files changed, 221 insertions(+), 82 deletions(-) diff --git a/NEWS.md b/NEWS.md index 3bd56ed843d6b..af23f70a0d3e2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -31,6 +31,12 @@ New language features * The syntax `function foo end` can be used to introduce a generic function without yet adding any methods ([#8283]). + * Incremental compilation of modules: ``Base.compile(module::Symbol)`` (stored in `~/.julia/lib/v0.4`) + + * See manual section on `Module initialization and precompilation` (under `Modules`) for details and errata. + + * New option `--compile-incremental={yes|no}` added to invoke the equivalent of ``Base.compile`` from the command line. + Language changes ---------------- @@ -450,6 +456,8 @@ Deprecated or removed * `sync_gc_total_bytes` -> `jl_gc_sync_total_bytes` + * `require(::AbstractString)` and `reload` (see news about addition of `compile`) + Julia v0.3.0 Release Notes ========================== diff --git a/base/client.jl b/base/client.jl index 918d1812961fa..77005e5f352fe 100644 --- a/base/client.jl +++ b/base/client.jl @@ -332,7 +332,7 @@ function init_load_path() end push!(LOAD_PATH,abspath(JULIA_HOME,"..","local","share","julia","site",vers)) push!(LOAD_PATH,abspath(JULIA_HOME,"..","share","julia","site",vers)) - push!(LOAD_CACHE_PATH,abspath(homedir(),".julia","libs",vers)) + push!(LOAD_CACHE_PATH,abspath(homedir(),".julia","lib",vers)) push!(LOAD_CACHE_PATH,abspath(JULIA_HOME,"..","usr","lib","julia")) #TODO: fixme end diff --git a/doc/helpdb.jl b/doc/helpdb.jl index c7bf85f074ac5..fc986af5dcbe3 100644 --- a/doc/helpdb.jl +++ b/doc/helpdb.jl @@ -1308,9 +1308,15 @@ Any[ "), -("Base","require","require(file::AbstractString...) +("Base","require","require(module::Symbol) - Load source files once, in the context of the \"Main\" module, on + This function is part of the implementation of \"using\" / + \"import\", if a module is not already defined in \"Main\". It can + also be called directly to force reloading a module, regardless of + whether it has been loaded before (for exmple, when interactively + developing libraries). + + Loads a source files, in the context of the \"Main\" module, on every active node, searching standard locations for files. \"require\" is considered a top-level operation, so it sets the current \"include\" path but does not use it to search for files @@ -1324,11 +1330,14 @@ Any[ "), -("Base","reload","reload(file::AbstractString) +("Base","compile","compile(module::String) - Like \"require\", except forces loading of files regardless of - whether they have been loaded before. Typically used when - interactively developing libraries. + Creates a precompiled cache file for module (see help for + \"require\") and all of its dependencies. This can be used to + reduce package load times. Cache files are stored in + LOAD_CACHE_PATH[1], which defaults to *~/.julia/lib/VERSION*. See + the manual section *Module initialization and precompilation* + (under *Modules*) for important notes. "), @@ -1345,7 +1354,7 @@ Any[ "), -("Base","include_string","include_string(code::AbstractString) +("Base","include_string","include_string(code::AbstractString[, filename]) Like \"include\", except reads code from the given string rather than from a file. Since there is no file path involved, no path @@ -5701,18 +5710,26 @@ Millisecond(v) "), -("Base","readbytes!","readbytes!(stream, b::Vector{UInt8}, nb=length(b)) +("Base","readbytes!","readbytes!(stream, b::Vector{UInt8}, nb=length(b); all=true) Read at most \"nb\" bytes from the stream into \"b\", returning the number of bytes read (increasing the size of \"b\" as needed). + See \"readbytes\" for a description of the \"all\" option. + "), -("Base","readbytes","readbytes(stream, nb=typemax(Int)) +("Base","readbytes","readbytes(stream, nb=typemax(Int); all=true) Read at most \"nb\" bytes from the stream, returning a \"Vector{UInt8}\" of the bytes read. + If \"all\" is true (the default), this function will block + repeatedly trying to read all requested bytes, until an error or + end-of-file occurs. If \"all\" is false, at most one \"read\" call + is performed, and the amount of data returned is device-dependent. + Note that not all stream types support the \"all\" option. + "), ("Base","position","position(s) @@ -5920,8 +5937,8 @@ Millisecond(v) ("Base","countlines","countlines(io[, eol::Char]) - Read io until the end of the stream/file and count the number of - non-empty lines. To specify a file pass the filename as the first + Read \"io\" until the end of the stream/file and count the number + of lines. To specify a file pass the filename as the first argument. EOL markers other than '\\n' are supported by passing them as the second argument. @@ -6387,31 +6404,55 @@ popdisplay(d::Display) "), -("Base","mmap_array","mmap_array(type, dims, stream[, offset]) +("Base","Mmap","Mmap.Anonymous(name, readonly, create) + + Create an \"IO\"-like object for creating zeroed-out mmapped-memory + that is not tied to a file for use in \"Mmap.mmap\". Used by + \"SharedArray\" for creating shared memory arrays. + +"), + +("Base","Mmap","Mmap.mmap(io::Union(IOStream,AbstractString,Mmap.AnonymousMmap)[, type::Type{Array{T,N}}, dims, offset]; grow::Bool=true, shared::Bool=true) +Mmap.mmap(type::Type{Array{T, N}}, dims) Create an \"Array\" whose values are linked to a file, using memory-mapping. This provides a convenient way of working with data too large to fit in the computer's memory. - The type determines how the bytes of the array are interpreted. - Note that the file must be stored in binary format, and no format - conversions are possible (this is a limitation of operating - systems, not Julia). + The type is an \"Array{T,N}\" with a bits-type element of \"T\" and + dimension \"N\" that determines how the bytes of the array are + interpreted. Note that the file must be stored in binary format, + and no format conversions are possible (this is a limitation of + operating systems, not Julia). + + \"dims\" is a tuple or single \"Integer\" specifying the size or + length of the array. - \"dims\" is a tuple specifying the size of the array. + The file is passed via the stream argument, either as an open + \"IOStream\" or filename string. When you initialize the stream, + use \"\"r\"\" for a \"read-only\" array, and \"\"w+\"\" to create a + new array used to write values to disk. - The file is passed via the stream argument. When you initialize - the stream, use \"\"r\"\" for a \"read-only\" array, and \"\"w+\"\" - to create a new array used to write values to disk. + If no \"type\" argument is specified, the default is + \"Vector{UInt8}\". Optionally, you can specify an offset (in bytes) if, for example, you want to skip over a header in the file. The default value for - the offset is the current stream position. + the offset is the current stream position for an \"IOStream\". + + The \"grow\" keyword argument specifies whether the disk file + should be grown to accomodate the requested size of array (if the + total file size is < requested array size). Write privileges are + required to grow the file. + + The \"shared\" keyword argument specifies whether the resulting + \"Array\" and changes made to it will be visible to other processes + mapping the same file. For example, the following code: # Create a file for mmapping - # (you could alternatively use mmap_array to do this step, too) + # (you could alternatively use mmap to do this step, too) A = rand(1:20, 5, 30) s = open(\"/tmp/mmap.bin\", \"w+\") # We'll write the dimensions of the array as the first two Ints in the file @@ -6425,7 +6466,7 @@ popdisplay(d::Display) s = open(\"/tmp/mmap.bin\") # default is read-only m = read(s, Int) n = read(s, Int) - A2 = mmap_array(Int, (m,n), s) + A2 = Mmap.mmap(s, Matrix{Int}, (m,n)) creates a \"m\"-by-\"n\" \"Matrix{Int}\", linked to the file associated with stream \"s\". @@ -6437,22 +6478,21 @@ popdisplay(d::Display) "), -("Base","mmap_bitarray","mmap_bitarray([type], dims, stream[, offset]) +("Base","Mmap","Mmap.mmap(io, BitArray[, dims, offset]) Create a \"BitArray\" whose values are linked to a file, using memory-mapping; it has the same purpose, works in the same way, and - has the same arguments, as \"mmap_array()\", but the byte - representation is different. The \"type\" parameter is optional, - and must be \"Bool\" if given. + has the same arguments, as \"mmap()\", but the byte representation + is different. - **Example**: \"B = mmap_bitarray((25,30000), s)\" + **Example**: \"B = Mmap.mmap(s, BitArray, (25,30000))\" This would create a 25-by-30000 \"BitArray\", linked to the file associated with stream \"s\". "), -("Base","msync","msync(array) +("Base","Mmap","Mmap.sync!(array) Forces synchronization between the in-memory version of a memory- mapped \"Array\" or \"BitArray\" and the on-disk version. @@ -6794,12 +6834,18 @@ popdisplay(d::Display) \"RTLD_FIRST\". These are converted to the corresponding flags of the POSIX (and/or GNU libc and/or MacOS) dlopen command, if possible, or are ignored if the specified functionality is not - available on the current platform. The default is - \"RTLD_LAZY|RTLD_DEEPBIND|RTLD_LOCAL\". An important usage of - these flags, on POSIX platforms, is to specify - \"RTLD_LAZY|RTLD_DEEPBIND|RTLD_GLOBAL\" in order for the library's - symbols to be available for usage in other shared libraries, in - situations where there are dependencies between shared libraries. + available on the current platform. The default flags are platform + specific. On MacOS the default \"dlopen\" flags are + \"RTLD_LAZY|RTLD_DEEPBIND|RTLD_GLOBAL\" while on other platforms + the defaults are \"RTLD_LAZY|RTLD_DEEPBIND|RTLD_LOCAL\". An + important usage of these flags is to specify non default behavior + for when the dynamic library loader binds library references to + exported symbols and if the bound references are put into process + local or global scope. For instance + \"RTLD_LAZY|RTLD_DEEPBIND|RTLD_GLOBAL\" allows the library's + symbols to be available for usage in other shared libraries, + addressing situations where there are dependencies between shared + libraries. "), @@ -6991,15 +7037,15 @@ popdisplay(d::Display) \"Complex{Float64}\" the return type is \"UmfpackLU\". Some examples are shown in the table below. - +-------------------------+---------------------------+----------------------------------------------+ - | Type of input \\\"A\\\" | Type of output \\\"F\\\" | Relationship between \\\"F\\\" and \\\"A\\\" | - +-------------------------+---------------------------+----------------------------------------------+ - | \\\"Matrix()\\\" | \\\"LU\\\" | \\\"F[:L]*F[:U] == A[F[:p], :]\\\" | - +-------------------------+---------------------------+----------------------------------------------+ - | \\\"Tridiagonal()\\\" | \\\"LU{T,Tridiagonal{T}}\\\" | N/A | - +-------------------------+---------------------------+----------------------------------------------+ - | \\\"SparseMatrixCSC()\\\" | \\\"UmfpackLU\\\" | \\\"F[:L]*F[:U] == F[:Rs] .* A[F[:p], F[:q]]\\\" | - +-------------------------+---------------------------+----------------------------------------------+ + +-------------------------+---------------------------+------------------------------------------------+ + | Type of input \\\"A\\\" | Type of output \\\"F\\\" | Relationship between \\\"F\\\" and \\\"A\\\" | + +-------------------------+---------------------------+------------------------------------------------+ + | \\\"Matrix()\\\" | \\\"LU\\\" | \\\"F[:L]*F[:U] == A[F[:p], :]\\\" | + +-------------------------+---------------------------+------------------------------------------------+ + | \\\"Tridiagonal()\\\" | \\\"LU{T,Tridiagonal{T}}\\\" | N/A | + +-------------------------+---------------------------+------------------------------------------------+ + | \\\"SparseMatrixCSC()\\\" | \\\"UmfpackLU\\\" | \\\"F[:L]*F[:U] == (F[:Rs] .* A)[F[:p], F[:q]]\\\" | + +-------------------------+---------------------------+------------------------------------------------+ The individual components of the factorization \"F\" can be accessed by indexing: @@ -10278,12 +10324,12 @@ popdisplay(d::Display) "), -("Base","median","median(v) +("Base","median","median(v[, region]) - Compute the median of a vector \"v\". \"NaN\" is returned if the - data contains any \"NaN\" values. For applications requiring the - handling of missing data, the \"DataArrays\" package is - recommended. + Compute the median of whole array \"v\", or optionally along the + dimensions in \"region\". \"NaN\" is returned if the data contains + any \"NaN\" values. For applications requiring the handling of + missing data, the \"DataArrays\" package is recommended. "), @@ -10562,7 +10608,7 @@ popdisplay(d::Display) the transform has conjugate symmetry in order to save roughly half the computational time and storage costs compared with \"fft()\". If \"A\" has size \"(n_1, ..., n_d)\", the result has size - \"(floor(n_1/2)+1, ..., n_d)\". + \"(div(n_1,2)+1, ..., n_d)\". The optional \"dims\" argument specifies an iterable subset of one or more dimensions of \"A\" to transform, similar to \"fft()\". @@ -10580,10 +10626,11 @@ popdisplay(d::Display) transform, defaulting to \"1:ndims(A)\". \"d\" is the length of the transformed real array along the - \"dims[1]\" dimension, which must satisfy \"d == - floor(size(A,dims[1])/2)+1\". (This parameter cannot be inferred - from \"size(A)\" due to the possibility of rounding by the - \"floor\" function here.) + \"dims[1]\" dimension, which must satisfy \"div(d,2)+1 == + size(A,dims[1])\". (This parameter cannot be inferred from + \"size(A)\" since both \"2*size(A,dims[1])-2\" as well as + \"2*size(A,dims[1])-1\" are valid sizes for the transformed real + array.) "), @@ -13235,8 +13282,9 @@ golden ("Base.Test","@test_throws","@test_throws(extype, ex) Test that the expression \"ex\" throws an exception of type - \"extype\" and calls the current handler to handle the result. - The default handler returns the exception if it is of the expected type. + \"extype\" and calls the current handler to handle the result. The + default handler returns the exception if it is of the expected + type. "), diff --git a/doc/manual/modules.rst b/doc/manual/modules.rst index 09e2acb909653..aa47052141b31 100644 --- a/doc/manual/modules.rst +++ b/doc/manual/modules.rst @@ -254,13 +254,21 @@ Module initialization and precompilation ---------------------------------------- Large modules can take several second to load because executing all of -the statements in a module often involves compiling a large amount of -code. However, Julia is progressively gaining more ability to cache -the parsed and compiled binary image of a package. Currently, this -requires one to recompile Julia after modifying the file -``base/userimg.jl`` to require the desired modules, but in a future -version of Julia the module caching will be simpler and more -automated. In order to make your module work with precompilation, +the statements in a module often involves compiling a large amount of code. +Julia provides the ability to create precompiled versions of modules +to reduce this time. + +There are two mechanisms that can achieve this: +incremental compile and custom system image. + +To create a custom system image that can be used to start julia with the -J option, +recompile Julia after modifying the file ``base/userimg.jl`` to require the desired modules. + +To create an incremental precompiled module file, +call ``Base.compile(modulename::Symbol)``. +The resulting cache files will be stored in ``Base.LOAD_CACHE_PATH[1]``. + +In order to make your module work with precompilation, however, you may need to change your module to explicitly separate any initialization steps that must occur at *runtime* from steps that can occur at *compile time*. For this purpose, Julia allows you to define @@ -293,18 +301,20 @@ in your module:: global const foo_data_ptr = ccall((:foo_data,:libfoo), Ptr{Void}, ()) end -(Notice that it is perfectly possible to define a global constant inside +Notice that it is perfectly possible to define a global inside a function like ``__init__``; this is one of the advantages of using a -dynamic language.) Obviously, any other constant in your module that -depends on ``foo_data_ptr`` would also have to be initialized in ``__init__``. +dynamic language. +Obviously, any other globals in your module that depends on ``foo_data_ptr`` +would also have to be initialized in ``__init__``. Constants involving most Julia objects that are not produced by ``ccall`` do not need to be placed in ``__init__``: their definitions -can be precompiled and loaded from the cached module image. (This -includes complicated heap-allocated objects like arrays.) However, -any routine that returns a raw pointer value must be called at runtime -for precompilation to work. This includes the Julia functions -``cfunction`` and ``pointer``. +can be precompiled and loaded from the cached module image. +This includes complicated heap-allocated objects like arrays. +However, any routine that returns a raw pointer value must be called +at runtime for precompilation to work +(Ptr objects will turn into null pointers unless they are hidden inside an isbits object). +This includes the return values of the Julia functions ``cfunction`` and ``pointer``. Dictionary and set types, or in general anything that depends on the output of a ``hash(key)`` method, are a trickier case. In the common @@ -314,9 +324,80 @@ they are safe to precompile. However, for a few other key types, such as ``Function`` or ``DataType`` and generic user-defined types where you haven't defined a ``hash`` method, the fallback ``hash`` method depends on the memory address of the object (via its ``object_id``) -and hence may change from run to run. If you have one of these key -types, or if you aren't sure, to be safe you can initialize dictionary -and set globals from within your ``__init__`` function. -Alternatively, you can use the ``ObjectIdDict`` dictionary type, which -is specially handled by precompilation so that it is safe to +and hence may change from run to run. +If you have one of these key types, or if you aren't sure, +to be safe you can initialize this dictionary from within your +``__init__`` function. +Alternatively, you can use the ``ObjectIdDict`` dictionary type, +which is specially handled by precompilation so that it is safe to initialize at compile-time. + +When using precompilation, it is important to keep a clear sense of the +distinction between the compilation phase and the execution phase. +In this mode, it will often be much more clearly apparent that +Julia is a compiler which allows execution of arbitrary Julia code, +not a standalone interpreter that also generates compiled code. + +Other known potential failure scenarios include: + +1. Global counters (for example, for uniquely identify objects) + Consider the following code snippet:: + + type UniquedById + myid::Int + let counter = 0 + UniquedById() = new(counter += 1) + end + end + + while the intent of this code was to give every instance a unique id, + the counter value is recorded at the end of compilation. + All subsequent usages of this incrementally compiled module + will start from that same counter value. + + One alternative is to store both ``current_module()`` and the current ``counter`` value, + however, it may be better to redesign the code to not depend on this global state. + +2. Associative collections (such as ``Dict`` and ``Set``) need to be re-hashed in ``__init__``. + (In the future, a mechanism may be provided to register an initializer function.) + +3. Depending on compile-time side-effects persisting through load-time. + Example include: + modifying arrays or other variables in other Julia modules; + maintaining handles to open files or devices; + storing pointers to other system resources (including memory); + +4. Creating accidental "copies" of global state from another module, + by referencing it directly instead of via its lookup path. + For example, (in global scope):: + #mystdout = Base.STDOUT #= will not work correctly, since this will copy Base.STDOUT into this module =# + # instead use accessor functions: + getstdout() = Base.STDOUT #= best option =# + # or move the assignment into the runtime: + __init__() = global mystdout = Base.STDOUT #= also works =# + +Several additional restrictions are placed on the operations that can be done while compiling code +to help the user avoid other wrong-behavior situations: + +1. Calling ``eval`` to cause a side-effect in another module. + This will also cause a warning to be emitted when the incremental compile flag is set. + +2. ``global const`` statements from local scope after ``__init__()`` has been started (see issue #12010 for plans to add an error for this) + +3. Replacing a module (or calling ``workspace()`` is a runtime error while doing an incremental compile. + +A few other points to be aware of: + +1. No code reload / cache invalidation is performed after changes are made to the source files themselves, + (including by ``Pkg.update``), and no cleanup is done after ``Pkg.rm`` + +2. The memory sharing behavior of a reshaped array is disregarded by precompilation (each view gets its own copy) + +3. Expecting the filesystem to be unchanged between compile-time and runtime + e.g. ``@__FILE__``/``source_path()`` to find resources at runtime, + or the BinDeps ``@checked_lib`` macro. Sometimes this is unavoidable. + However, when possible, it can be good practice to copy resources + into the module at compile-time so they won't need to be found at runtime. + +4. WeakRef objects and finalizers are not captured by currently handled by the serializer + (this will be fixed in an upcoming release). diff --git a/doc/stdlib/base.rst b/doc/stdlib/base.rst index e7ead054d2064..5ce55500c1167 100644 --- a/doc/stdlib/base.rst +++ b/doc/stdlib/base.rst @@ -78,21 +78,23 @@ Getting Around Return a string with the contents of the operating system clipboard ("paste"). -.. function:: require(file::AbstractString...) +.. function:: require(module::Symbol) - Load source files once, in the context of the ``Main`` module, on every active node, searching standard locations for files. ``require`` is considered a top-level operation, so it sets the current ``include`` path but does not use it to search for files (see help for ``include``). This function is typically used to load library code, and is implicitly called by ``using`` to load packages. + This function is part of the implementation of ``using`` / ``import``, if a module is not already defined in ``Main``. It can also be called directly to force reloading a module, regardless of whether it has been loaded before (for exmple, when interactively developing libraries). + + Loads a source files, in the context of the ``Main`` module, on every active node, searching standard locations for files. ``require`` is considered a top-level operation, so it sets the current ``include`` path but does not use it to search for files (see help for ``include``). This function is typically used to load library code, and is implicitly called by ``using`` to load packages. When searching for files, ``require`` first looks in the current working directory, then looks for package code under ``Pkg.dir()``, then tries paths in the global array ``LOAD_PATH``. -.. function:: reload(file::AbstractString) +.. function:: compile(module::String) - Like ``require``, except forces loading of files regardless of whether they have been loaded before. Typically used when interactively developing libraries. + Creates a precompiled cache file for module (see help for ``require``) and all of its dependencies. This can be used to reduce package load times. Cache files are stored in LOAD_CACHE_PATH[1], which defaults to `~/.julia/lib/VERSION`. See the manual section `Module initialization and precompilation` (under `Modules`) for important notes. .. function:: include(path::AbstractString) Evaluate the contents of a source file in the current context. During including, a task-local include path is set to the directory containing the file. Nested calls to ``include`` will search relative to that path. All paths refer to files on node 1 when running in parallel, and files will be fetched from node 1. This function is typically used to load source interactively, or to combine files in packages that are broken into multiple source files. -.. function:: include_string(code::AbstractString) +.. function:: include_string(code::AbstractString, [filename]) Like ``include``, except reads code from the given string rather than from a file. Since there is no file path involved, no path processing or fetching from node 1 is done.