Skip to content

method cache prevents overspecialization of code_native #15456

@timholy

Description

@timholy

Over at #15030, I'm trying to rewrite ntuple(f, Val{N}) so that we don't need @generated functions. It's not yet viable because, for this particular problem, codegen quality is poor. I think I've narrowed it down to a failure to substitute integer-valued GenSyms during codegen. Here's a lispy rewrite of that code that produces very simple lowered & typed code:

using Base: @_inline_meta

# The "reference implementation"
myntuple1(f, ::Type{Val{3}}) = (f(1), f(2), f(3))

# The implementation I want
function myntuple2{N}(f, ::Type{Val{N}})
    @_inline_meta   # this doesn't seem to affect anything
    _ntuple((), f, Val{N})
end

# Build up the output until it has length N
_ntuple{N}(out::NTuple{N}, f, ::Type{Val{N}}) = out
function _ntuple{N}(out, f, ::Type{Val{N}})
    @_inline_meta
    _ntuple(out, (out..., 0), f, Val{N})  # compile-time evaluation of length(out)+1
end
function _ntuple{N,M}(out, ::NTuple{M}, f, ::Type{Val{N}})
    @_inline_meta
    _ntuple((out..., f(M)), f, Val{N})
end

Results:

julia> @code_typed myntuple1(identity, Val{3})
1-element Array{Any,1}:
 :($(Expr(:lambda, Any[symbol("#self#"),:f,symbol("#unused#")], Any[Any[Any[symbol("#self#"),#myntuple1,0],Any[:f,Base.#identity,64]],Any[],Any[]], :(begin  # /tmp/ntuple2.jl, line 4:
        return (top(tuple))(1,2,3)::Tuple{Int64,Int64,Int64}
    end::Tuple{Int64,Int64,Int64}))))

julia> @code_typed myntuple2(identity, Val{3})
1-element Array{Any,1}:
 :($(Expr(:lambda, Any[symbol("#self#"),:f,symbol("#unused#")], Any[Any[Any[symbol("#self#"),#myntuple2,0],Any[:f,Base.#identity,0]],Any[],Any[Tuple{Int64,Int64},Int64,Int64]], :(begin  # /tmp/ntuple2.jl, line 8:
        $(Expr(:meta, :inline)) # /tmp/ntuple2.jl, line 9: # /tmp/ntuple2.jl, line 14: # /tmp/ntuple2.jl, line 15: # /tmp/ntuple2.jl, line 18: # /tmp/ntuple2.jl, line 19: # /tmp/ntuple2.jl, line 14: # /tmp/ntuple2.jl, line 15: # /tmp/ntuple2.jl, line 18: # /tmp/ntuple2.jl, line 19:
        GenSym(1) = 1
        GenSym(2) = 2 # /tmp/ntuple2.jl, line 14: # /tmp/ntuple2.jl, line 15: # /tmp/ntuple2.jl, line 18: # /tmp/ntuple2.jl, line 19:
        return (top(tuple))(GenSym(1),GenSym(2),3)::Tuple{Int64,Int64,Int64}
    end::Tuple{Int64,Int64,Int64}))))

julia> @code_native myntuple1(identity, Val{3})
        .text
Filename: ntuple2.jl
Source line: 0
        pushq   %rbp
        movq    %rsp, %rbp
Source line: 4
        movq    $3, 16(%rdi)
        movq    $2, 8(%rdi)
        movq    $1, (%rdi)
        movq    %rdi, %rax
        popq    %rbp
        retq

julia> @code_native myntuple2(identity, Val{3})
        .text
Filename: ntuple2.jl
Source line: 0
        pushq   %rbp
        movq    %rsp, %rbp
        pushq   %r15
        pushq   %r14
        pushq   %r13
        pushq   %r12
        pushq   %rbx
        subq    $104, %rsp
        movq    %rdi, %rbx
        leaq    -56(%rbp), %r12
        leaq    -88(%rbp), %rdi
        movq    $0, -120(%rbp)
        movq    $0, -112(%rbp)
        movq    $0, -104(%rbp)
        movq    $0, -96(%rbp)
        movq    $0, -72(%rbp)
        movq    $0, -64(%rbp)
        movq    $0, -56(%rbp)
        movq    $0, -48(%rbp)
        movq    $20, -136(%rbp)
        movabsq $jl_tls_states, %r13
        movq    (%r13), %rax
        movq    %rax, -128(%rbp)
        leaq    -136(%rbp), %rax
        movq    %rax, (%r13)
Source line: 19
        movq    %rbx, -88(%rbp)
        movabsq $140407980499072, %r14  # imm = 0x7FB347CD2080
        movq    %r14, -80(%rbp)
        movabsq $jl_apply_generic, %r15
        movl    $2, %esi
        callq   *%r15
        movq    %rax, -112(%rbp)
        movq    %rbx, -56(%rbp)
        leaq    48(%r14), %rax
        movq    %rax, -48(%rbp)
        movl    $2, %esi
        movq    %r12, %rdi
        callq   *%r15
        movq    %rax, -104(%rbp)
        movq    %rbx, -72(%rbp)
        orq     $96, %r14
        movq    %r14, -64(%rbp)
        movl    $2, %esi
        leaq    -72(%rbp), %rdi
        callq   *%r15
        movq    %rax, -96(%rbp)
        movabsq $jl_f_tuple, %rax
        xorl    %edi, %edi
        movl    $3, %edx
        leaq    -112(%rbp), %rsi
        callq   *%rax
        movq    %rax, -120(%rbp)
Source line: 12
        movq    -128(%rbp), %rcx
        movq    %rcx, (%r13)
        addq    $104, %rsp
        popq    %rbx
        popq    %r12
        popq    %r13
        popq    %r14
        popq    %r15
        popq    %rbp
        retq

Is there any chance this has an easy fix? Like, add a "substitute constant-valued GenSyms" pass during codegen?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions