diff --git a/gen/generator.toml b/gen/generator.toml index 7647b9f1..1c1ac96a 100644 --- a/gen/generator.toml +++ b/gen/generator.toml @@ -1,5 +1,7 @@ [general] # it could also be an expression as long as `Meta.parse` can parse this string successfully. +# basically, it should be the `expression` in the following code: +# ccall((function_name, expression), returntype, (argtype1, ...), argvalue1, ...) library_name = "libclang" # the new generator is able to print everthing into one single file instead of a pair of two files. @@ -64,7 +66,14 @@ opaque_func_arg_as_PtrCvoid = false opaque_as_mutable_struct = true [codegen.macro] +# it highly recommended to set this entry to "basic". +# if you'd like to skip all of the macros, please set this entry to "disable". +# if you'd like to aggresively translate macros to Julia, please set this entry to "aggresive". +macro_mode = "basic" +# if this entry is true, the generator prints the following message as comments. +# "# Skipping MacroDefinition: ..." +add_comment_for_skipped_macro = true [general.log] # CollectTopLevelNode_log = false @@ -77,8 +86,8 @@ opaque_as_mutable_struct = true # DeAnonymize_log = false # Audit_log = true # Codegen_log = false -# CodegenPreprocessing_log = false +# CodegenMacro_log = false +# CodegenPostprocessing_log = false # GeneralPrinter_log = true # ProloguePrinter_log = true # EpiloguePrinter_log = true - diff --git a/src/LibClang.jl b/src/LibClang.jl index a6ec43e6..0c0688ee 100644 --- a/src/LibClang.jl +++ b/src/LibClang.jl @@ -2704,6 +2704,38 @@ function clang_Type_visitFields(T, visitor, client_data) ccall((:clang_Type_visitFields, libclang), Cuint, (CXType, CXFieldVisitor, CXClientData), T, visitor, client_data) end +const LLVM_CLANG_C_BUILDSYSTEM_H = nothing + +const LLVM_CLANG_C_CXCOMPILATIONDATABASE_H = nothing + +const LLVM_CLANG_C_CXERRORCODE_H = nothing + +const LLVM_CLANG_C_CXSTRING_H = nothing + +const LLVM_CLANG_C_DOCUMENTATION_H = nothing + +const LLVM_CLANG_C_INDEX_H = nothing + +const CINDEX_VERSION_MAJOR = 0 + +const CINDEX_VERSION_MINOR = 59 + +CINDEX_VERSION_ENCODE(major, minor) = major * 10000 + minor * 1 + +# Skipping MacroDefinition: CINDEX_VERSION CINDEX_VERSION_ENCODE ( CINDEX_VERSION_MAJOR , CINDEX_VERSION_MINOR ) + +CINDEX_VERSION_STRINGIZE_(major, minor) = nothing + +CINDEX_VERSION_STRINGIZE(major, minor) = CINDEX_VERSION_STRINGIZE_(major, minor) + +# Skipping MacroDefinition: CINDEX_VERSION_STRING CINDEX_VERSION_STRINGIZE ( CINDEX_VERSION_MAJOR , CINDEX_VERSION_MINOR ) + +const LLVM_CLANG_C_PLATFORM_H = nothing + +const CINDEX_LINKAGE = nothing + +# Skipping MacroDefinition: CINDEX_DEPRECATED __attribute__ ( ( deprecated ) ) + # exports const PREFIXES = ["CX", "clang_"] foreach(names(@__MODULE__; all=true)) do s diff --git a/src/generator/Generators.jl b/src/generator/Generators.jl index 02671efa..cdc34a6d 100644 --- a/src/generator/Generators.jl +++ b/src/generator/Generators.jl @@ -57,10 +57,13 @@ export pretty_print include("audit.jl") export report_default_tag_types +include("macro.jl") + include("passes.jl") export AbstractPass export Audit export Codegen +export CodegenMacro export CodegenPostprocessing export CodegenPreprocessing export CollectTopLevelNode diff --git a/src/generator/context.jl b/src/generator/context.jl index 4e0d0ba0..81671d54 100644 --- a/src/generator/context.jl +++ b/src/generator/context.jl @@ -79,6 +79,7 @@ function create_context(headers::Vector, args::Vector, options::Dict) end push!(ctx.passes, Audit()) push!(ctx.passes, Codegen()) + push!(ctx.passes, CodegenMacro()) push!(ctx.passes, CodegenPostprocessing()) # support old behavior diff --git a/src/generator/macro.jl b/src/generator/macro.jl new file mode 100644 index 00000000..6b94ef14 --- /dev/null +++ b/src/generator/macro.jl @@ -0,0 +1,264 @@ +# FIXME: this file is a mess and needs to be purged. + +const LITERAL_LONG = ["L", "l"] +const LITERAL_ULONG = ["UL", "Ul", "uL", "ul", "LU", "Lu", "lU", "lu"] +const LITERAL_ULONGLONG = ["ULL", "Ull", "uLL", "ull", "LLU", "LLu", "llU", "llu"] +const LITERAL_LONGLONG = ["LL", "ll"] + +const LITERAL_SUFFIXES = [ + LITERAL_ULONGLONG..., LITERAL_LONGLONG..., LITERAL_ULONG..., LITERAL_LONG..., + "U", "u", "F", "f" +] + +function literal_totype(literal, txt) + literal = lowercase(literal) + + # Floats following http://en.cppreference.com/w/cpp/language/floating_literal + float64 = occursin(".", txt) && occursin("l", literal) # long double + float32 = occursin("f", literal) + + if float64 || float32 + float64 && return "Float64" + float32 && return "Float32" + end + + # Integers following http://en.cppreference.com/w/cpp/language/integer_literal + unsigned = occursin("u", literal) + if unsigned && (endswith(literal, "llu") || endswith(literal, "ull")) + return "Culonglong" + elseif !unsigned && endswith(literal, "ll") + return "Clonglong" + elseif occursin("ul", literal) || occursin("lu", literal) + return "Culong" + elseif !unsigned && endswith(literal, "l") + return "Clong" + else + return unsigned ? "Cuint" : "Cint" + end +end + +normalize_literal(tok) = strip(tok.text) + +function normalize_literal(tok::Literal) + txt = strip(tok.text) + for sfx in LITERAL_SUFFIXES + if endswith(txt, sfx) + type = literal_totype(sfx, txt) + txt = txt[1:(end - length(sfx))] + return "$(type)($txt)" + end + end + # Char to Cchar + if match(r"^'.*'$", txt) !== nothing + return "Cchar($txt)" + end + return txt +end + +function literally(tok) + m = match(r"^0[0-9]*\d$", tok.text) + if m !== nothing && m.match !== "0" + literals = "0o"*normalize_literal(tok) + else + literals = normalize_literal(tok) + end + if occursin('\$', literals) + return "($(replace(literals, "\$"=>"\\\$")))" + else + return "($literals)" + end +end + +is_macro_pure_definition(toks) = toks.size == 1 && toks[1].kind == CXToken_Identifier + +function is_macro_constants(toks) + if toks.size == 2 && + toks[1].kind == CXToken_Identifier && + toks[2].kind == CXToken_Literal + # `#define CONSTANT_LITERALS_1 0` + return true + elseif toks.size == 4 && + toks[1].kind == CXToken_Identifier && + toks[2].kind == CXToken_Punctuation && + toks[3].kind == CXToken_Literal && + toks[4].kind == CXToken_Punctuation + # `#define CONSTANT_LITERALS_BRACKET ( 0x10u )` + return true + else + return false + end +end + +function is_macro_naive_alias(toks) + if toks.size == 2 && + toks[1].kind == CXToken_Identifier && + toks[2].kind == CXToken_Identifier + # `#define CONSTANT_ALIAS CONSTANT_LITERALS_1` + return true + else + return false + end +end + +const C_KEYWORDS_DATATYPE = [ + "char", "double", "float", "int", "long", "short", "signed", "unsigned", "void", + "_Bool", "_Complex", "_Noreturn" + ] +const C_KEYWORDS_CVR = ["const", "volatile", "restrict"] +const C_KEYWORDS = [ + C_KEYWORDS_DATATYPE..., "auto", "break", "case", "continue", "default", "do", + "else", "enum", "extern", "for", "goto", "if", "inline", "register", + "return", "sizeof", "static", "struct", "switch", "typedef", "union", + "while", "_Alignas", "_Alignof", "_Atomic", "_Decimal128", "_Decimal32", + "_Decimal64", "_Generic", "_Imaginary", "_Static_assert", "_Thread_local" + ] + +const C_DATATYPE_TO_JULIA_DATATYPE = Dict( + "char" => :Cchar, + "double" => :Float64, + "float" => :Float32, + "int" => :Cint, + "long" => :Clong, + "short" => :Cshort, + "void" => :Cvoid, + "_Bool" => :Cuchar, + "_Complex float" => :ComplexF32, + "_Complex double" => :ComplexF64, + "_Noreturn" => :(Union{}), + "signed char" => :Int8, + "signed int" => :Cint, + "signed long" => :Clong, + "signed short" => :Cshort, + "unsigned char" => :Cuchar, + "unsigned int" => :Cuint, + "unsigned long" => :Culong, + "unsigned short" => :Cushort, + "long long" => :Clonglong, + "long long int" => :Clonglong, + "signed long long int" => :Clonglong, + "unsigned long long" => :Culonglong, + "unsigned long long int" => :Culonglong +) + +function is_macro_keyword_alias(toks) + if toks.size ≥ 2 && toks[1].kind == CXToken_Identifier + toks_kind = [tok.kind for tok in collect(toks)[2:end]] + return all(x -> x == CXToken_Keyword, toks_kind) + else + return false + end +end + +function add_spaces_for_macros(lhs, rhs) + if startswith(rhs, "(") # handle function call + if endswith(lhs, "?") || endswith(lhs, ":") # handle trinary operator + return lhs * " " * rhs + else + return lhs * rhs + end + else + return lhs * " " * rhs + end +end + +function get_comment_expr(tokens) + code = join([tok.text for tok in tokens], " ") + return Expr(:block, "# Skipping MacroDefinition: " * code) +end + + +""" + macro_emit! +Emit Julia expression for macros. +""" +function macro_emit! end + +macro_emit!(dag::ExprDAG, node::ExprNode, options::Dict) = dag + +function macro_emit!(dag::ExprDAG, node::ExprNode{MacroFunctionLike}, options::Dict) + print_comment = get(options, "add_comment_for_skipped_macro", "basic") + + cursor = node.cursor + tokens = tokenize(cursor) + + toks = collect(tokens) + lhs_sym = make_symbol_safe(tokens[1].text) + + i = findfirst(x->x.text == ")", toks) + sig_ex = Meta.parse(mapreduce(x->x.text, *, toks[1:i])) + sig_ex.args[1] = lhs_sym + if i == tokens.size + push!(node.exprs, Expr(:(=), sig_ex, nothing)) + else + body_toks = toks[1+i:end] + txts = [tok.kind == CXToken_Literal ? literally(tok) : tok.text for tok in body_toks] + str = reduce(add_spaces_for_macros, txts) + try + push!(node.exprs, Expr(:(=), sig_ex, Meta.parse(str))) + catch err + print_comment && push!(node.exprs, get_comment_expr(tokens)) + end + end + + return dag +end + +function macro_emit!(dag::ExprDAG, node::ExprNode{MacroDefault}, options::Dict) + mode = get(options, "macro_mode", "basic") + print_comment = get(options, "add_comment_for_skipped_macro", "basic") + + cursor = node.cursor + tokens = tokenize(cursor) + + if is_macro_pure_definition(tokens) + sym = make_symbol_safe(tokens[1].text) + push!(node.exprs, Expr(:const, Expr(:(=), sym, :nothing))) + return dag + end + + if is_macro_constants(tokens) + literal_tok = tokens.size == 2 ? tokens[2] : tokens[3] + literals = literally(literal_tok) + sym = make_symbol_safe(tokens[1].text) + literal_sym = Meta.parse(literals) + push!(node.exprs, Expr(:const, Expr(:(=), sym, literal_sym))) + return dag + end + + if is_macro_naive_alias(tokens) + lhs, rhs = tokens + lhs_sym = make_symbol_safe(lhs.text) + rhs_sym = make_symbol_safe(rhs.text) + push!(node.exprs, Expr(:const, Expr(:(=), lhs_sym, rhs_sym))) + return dag + end + + if is_macro_keyword_alias(tokens) + lhs_sym = symbol_safe(tokens[1].text) + keywords = [tok.text for tok in collect(tokens)[2:end] if tok.text ∉ C_KEYWORDS_CVR] + str = join(keywords, " ") + if all(x->x ∈ C_KEYWORDS_DATATYPE, keywords) && haskey(C_DATATYPE_TO_JULIA_DATATYPE, str) + rhs_sym = C_DATATYPE_TO_JULIA_DATATYPE[str] + push!(node.exprs, Expr(:const, Expr(:(=), lhs_sym, rhs_sym))) + else + print_comment && push!(node.exprs, get_comment_expr(tokens)) + end + return dag + end + + # for all the other cases, we just blindly use Julia's Meta.parse to parse the C code. + if tokens.size > 1 && tokens[1].kind == CXToken_Identifier && mode == "aggresive" + sym = make_symbol_safe(tokens[1].text) + try + txts = [tok.kind == CXToken_Literal ? literally(tok) : tok.text for tok in collect(tokens)[2:end]] + str = reduce(add_spaces_for_macros, txts) + push!(node.exprs, Expr(:const, Expr(:(=), sym, Meta.parse(str)))) + catch err + print_comment && push!(node.exprs, get_comment_expr(tokens)) + end + else + print_comment && push!(node.exprs, get_comment_expr(tokens)) + end + + return dag +end diff --git a/src/generator/passes.jl b/src/generator/passes.jl index d7cbdb4a..1093e312 100644 --- a/src/generator/passes.jl +++ b/src/generator/passes.jl @@ -589,9 +589,14 @@ function (x::CommonPrinter)(dag::ExprDAG, options::Dict) show_info && @info "[CommonPrinter]: print code to $(x.file)" open(x.file, "w") do io for node in dag.nodes - node.type isa AbstractFunctionNodeType && continue + node.type isa AbstractMacroNodeType && continue pretty_print(io, node, general_options) end + # print macros in the bottom of the file + for node in dag.nodes + node.type isa AbstractMacroNodeType || continue + pretty_print(io, node, options) + end end return dag end @@ -614,8 +619,14 @@ function (x::GeneralPrinter)(dag::ExprDAG, options::Dict) show_info && @info "[GeneralPrinter]: print code to $(x.file)" open(x.file, "a") do io for node in dag.nodes + node.type isa AbstractMacroNodeType && continue pretty_print(io, node, general_options) end + # print macros in the bottom of the file + for node in dag.nodes + node.type isa AbstractMacroNodeType || continue + pretty_print(io, node, options) + end end return dag end @@ -714,3 +725,31 @@ end""" end ## EXPERIMENTAL +""" + CodegenMacro <: AbstractPass +[`Codegen`](@ref) pass for macros. +""" +mutable struct CodegenMacro <: AbstractPass + show_info::Bool +end +CodegenMacro(; info=false) = CodegenMacro(info) + +function (x::CodegenMacro)(dag::ExprDAG, options::Dict) + general_options = get(options, "general", Dict()) + log_options = get(general_options, "log", Dict()) + show_info = get(log_options, "CodegenMacro_log", x.show_info) + codegen_options = get(options, "codegen", Dict()) + macro_options = get(codegen_options, "macro", Dict()) + macro_mode = get(macro_options, "macro_mode", "basic") + + macro_mode == "none" && return dag + + for node in dag.nodes + node.type isa AbstractMacroNodeType || continue + !isempty(node.exprs) && empty!(node.exprs) + macro_emit!(dag, node, macro_options) + show_info && @info "[CodegenMacro]: emit Julia expression for $(node.id)" + end + + return dag +end diff --git a/src/generator/print.jl b/src/generator/print.jl index 0d6b6c46..b2b91302 100644 --- a/src/generator/print.jl +++ b/src/generator/print.jl @@ -163,3 +163,16 @@ function pretty_print(io, node::ExprNode{<:OpaqueTags}, options::Dict) end pretty_print(io, node::ExprNode{UnknownDefaults}, options::Dict) = nothing + +## EXPERIMENTAL +function pretty_print(io, node::ExprNode{<:AbstractMacroNodeType}, options::Dict) + for expr in node.exprs + if Meta.isexpr(expr, :block) + println(io, string(expr.args[1])) + else + println(io, expr) + end + end + println(io) + return nothing +end