Skip to content

Commit

Permalink
PCRE: make PCRE into an exported submodule of Base.
Browse files Browse the repository at this point in the history
Significantly reduces the amount of nonsense that Base exports.
  • Loading branch information
StefanKarpinski committed Jul 28, 2012
1 parent 949f5a7 commit fadd70d
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 123 deletions.
2 changes: 1 addition & 1 deletion base/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ PCRE_CONST = 0x[0-9a-fA-F]+|[-+]?\s*[0-9]+
all: pcre_h.jl errno_h.jl os_detect.jl

pcre_h.jl:
$(QUIET_PERL) ${CC} -E -dM $(shell $(PCRE_CONFIG) --prefix)/include/pcre.h | perl -nle '/^\s*#define\s+(PCRE\w*)\s*\(?($(PCRE_CONST))\)?\s*$$/ and print "const $$1 = int32($$2)"' | sort > $@
$(QUIET_PERL) ${CC} -E -dM $(shell $(PCRE_CONFIG) --prefix)/include/pcre.h | perl -nle '/^\s*#define\s+PCRE_(\w*)\s*\(?($(PCRE_CONST))\)?\s*$$/ and print "const $$1 = int32($$2)"' | sort > $@

errno_h.jl:
$(QUIET_PERL) echo '#include "errno.h"' | cpp -dM - | perl -nle 'print "const $$1 = int32($$2)" if /^#define\s+(E\w+)\s+(\d+)\s*$$/' | sort > $@
Expand Down
114 changes: 60 additions & 54 deletions base/pcre.jl
Original file line number Diff line number Diff line change
@@ -1,111 +1,117 @@
## low-level pcre interface ##

include("pcre_h.jl")
libpcre = dlopen("libpcre")

module PCRE
import Base.*
global info, compile, study, exec

_jl_libpcre = dlopen("libpcre")
include("pcre_h.jl")

const PCRE_VERSION = cstring(ccall(dlsym(_jl_libpcre, :pcre_version), Ptr{Uint8}, ()))
const VERSION = cstring(ccall(dlsym(Base.libpcre, :pcre_version), Ptr{Uint8}, ()))

# supported options for different use cases

const PCRE_COMPILE_MASK =
PCRE_ANCHORED |
PCRE_CASELESS |
PCRE_DOLLAR_ENDONLY |
PCRE_DOTALL |
PCRE_EXTENDED |
PCRE_FIRSTLINE |
PCRE_MULTILINE |
PCRE_NEWLINE_ANY |
PCRE_NEWLINE_ANYCRLF |
PCRE_NEWLINE_CR |
PCRE_NEWLINE_CRLF |
PCRE_NEWLINE_LF |
PCRE_NO_AUTO_CAPTURE |
PCRE_NO_START_OPTIMIZE |
PCRE_NO_UTF8_CHECK |
PCRE_UNGREEDY |
PCRE_UTF8
const COMPILE_MASK =
ANCHORED |
CASELESS |
DOLLAR_ENDONLY |
DOTALL |
EXTENDED |
FIRSTLINE |
MULTILINE |
NEWLINE_ANY |
NEWLINE_ANYCRLF |
NEWLINE_CR |
NEWLINE_CRLF |
NEWLINE_LF |
NO_AUTO_CAPTURE |
NO_START_OPTIMIZE |
NO_UTF8_CHECK |
UNGREEDY |
UTF8

const PCRE_EXECUTE_MASK =
PCRE_NEWLINE_ANY |
PCRE_NEWLINE_ANYCRLF |
PCRE_NEWLINE_CR |
PCRE_NEWLINE_CRLF |
PCRE_NEWLINE_LF |
PCRE_NOTBOL |
PCRE_NOTEMPTY |
PCRE_NOTEMPTY_ATSTART |
PCRE_NOTEOL |
PCRE_NO_START_OPTIMIZE |
PCRE_NO_UTF8_CHECK |
PCRE_PARTIAL_HARD |
PCRE_PARTIAL_SOFT
const EXECUTE_MASK =
NEWLINE_ANY |
NEWLINE_ANYCRLF |
NEWLINE_CR |
NEWLINE_CRLF |
NEWLINE_LF |
NOTBOL |
NOTEMPTY |
NOTEMPTY_ATSTART |
NOTEOL |
NO_START_OPTIMIZE |
NO_UTF8_CHECK |
PARTIAL_HARD |
PARTIAL_SOFT

const PCRE_OPTIONS_MASK = PCRE_COMPILE_MASK | PCRE_EXECUTE_MASK
const OPTIONS_MASK = COMPILE_MASK | EXECUTE_MASK

function pcre_info{T}(
function info{T}(
regex::Union(Ptr{Void},Vector{Uint8}),
extra::Ptr{Void}, what::Integer, ::Type{T}
)
buf = Array(Uint8,sizeof(T))
ret = ccall(dlsym(_jl_libpcre, :pcre_fullinfo), Int32,
ret = ccall(dlsym(Base.libpcre, :pcre_fullinfo), Int32,
(Ptr{Void}, Ptr{Void}, Int32, Ptr{Uint8}),
regex, extra, what, buf)
if ret != 0
error("pcre_info: ",
ret == PCRE_ERROR_NULL ? "NULL regex object" :
ret == PCRE_ERROR_BADMAGIC ? "invalid regex object" :
ret == PCRE_ERROR_BADOPTION ? "invalid option flags" :
error("info: ",
ret == ERROR_NULL ? "NULL regex object" :
ret == ERROR_BADMAGIC ? "invalid regex object" :
ret == ERROR_BADOPTION ? "invalid option flags" :
"unknown error")
end
reinterpret(T,buf)[1]
end

function pcre_compile(pattern::String, options::Integer)
function compile(pattern::String, options::Integer)
errstr = Array(Ptr{Uint8},1)
erroff = Array(Int32,1)
re_ptr = (()->ccall(dlsym(_jl_libpcre, :pcre_compile), Ptr{Void},
re_ptr = (()->ccall(dlsym(Base.libpcre, :pcre_compile), Ptr{Void},
(Ptr{Uint8}, Int32, Ptr{Ptr{Uint8}}, Ptr{Int32}, Ptr{Uint8}),
pattern, options, errstr, erroff, C_NULL))()
if re_ptr == C_NULL
error("pcre_compile: $(errstr[1])",
error("compile: $(errstr[1])",
" at position $(erroff[1]+1)",
" in $(quote_string(pattern))")
end
size = pcre_info(re_ptr, C_NULL, PCRE_INFO_SIZE, Int32)
size = info(re_ptr, C_NULL, INFO_SIZE, Int32)
regex = Array(Uint8,size)
ccall(:memcpy, Ptr{Void}, (Ptr{Void}, Ptr{Void}, Uint), regex, re_ptr, size)
regex
end

function pcre_study(regex::Array{Uint8}, options::Integer)
function study(regex::Array{Uint8}, options::Integer)
# NOTE: options should always be zero in current PCRE
errstr = Array(Ptr{Uint8},1)
extra = (()->ccall(dlsym(_jl_libpcre, :pcre_study), Ptr{Void},
extra = (()->ccall(dlsym(Base.libpcre, :pcre_study), Ptr{Void},
(Ptr{Void}, Int32, Ptr{Ptr{Uint8}}),
regex, options, errstr))()
if errstr[1] != C_NULL
error("pcre_study: $(errstr[1])")
error("study: $(errstr[1])")
end
extra
end
pcre_study(re::Array{Uint8}) = pcre_study(re, int32(0))
study(re::Array{Uint8}) = study(re, int32(0))

function pcre_exec(regex::Array{Uint8}, extra::Ptr{Void},
function exec(regex::Array{Uint8}, extra::Ptr{Void},
str::ByteString, offset::Integer, options::Integer, cap::Bool)
if offset < 0 || length(str) < offset
error("index out of range")
end
ncap = pcre_info(regex, extra, PCRE_INFO_CAPTURECOUNT, Int32)
ncap = info(regex, extra, INFO_CAPTURECOUNT, Int32)
ovec = Array(Int32, 3(ncap+1))
n = ccall(dlsym(_jl_libpcre, :pcre_exec), Int32,
n = ccall(dlsym(Base.libpcre, :pcre_exec), Int32,
(Ptr{Void}, Ptr{Void}, Ptr{Uint8}, Int32,
Int32, Int32, Ptr{Int32}, Int32),
regex, extra, str, length(str),
offset, options, ovec, length(ovec))
if n < -1
error("pcre_exec: error $n")
error("exec: error $n")
end
cap ? ((n > -1 ? ovec[1:2(ncap+1)] : Array(Int32,0)), ncap) : n > -1
end

end # module
44 changes: 22 additions & 22 deletions base/regex.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
include("pcre.jl")

## object-oriented Regex interface ##

include("pcre.jl")

type Regex
pattern::ByteString
options::Int32
Expand All @@ -10,11 +10,11 @@ type Regex

function Regex(pat::String, opts::Integer, study::Bool)
pat = cstring(pat); opts = int32(opts)
if (opts & ~PCRE_OPTIONS_MASK) != 0
if (opts & ~PCRE.OPTIONS_MASK) != 0
error("invalid regex option(s)")
end
re = pcre_compile(pat, opts & PCRE_COMPILE_MASK)
ex = study ? pcre_study(re) : C_NULL
re = PCRE.compile(pat, opts & PCRE.COMPILE_MASK)
ex = study ? PCRE.study(re) : C_NULL
new(pat, opts, re, ex)
end
end
Expand All @@ -29,26 +29,26 @@ copy(r::Regex) = r
# constructs are correctly handled.

macro r_str(pattern, flags...)
options = PCRE_UTF8
options = PCRE.UTF8
for fx in flags, f in fx
options |= f=='i' ? PCRE_CASELESS :
f=='m' ? PCRE_MULTILINE :
f=='s' ? PCRE_DOTALL :
f=='x' ? PCRE_EXTENDED :
options |= f=='i' ? PCRE.CASELESS :
f=='m' ? PCRE.MULTILINE :
f=='s' ? PCRE.DOTALL :
f=='x' ? PCRE.EXTENDED :
error("unknown regex flag: $f")
end
Regex(pattern, options)
end

function show(io, re::Regex)
imsx = PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL|PCRE_EXTENDED
if (re.options & ~imsx) == PCRE_UTF8
imsx = PCRE.CASELESS|PCRE.MULTILINE|PCRE.DOTALL|PCRE.EXTENDED
if (re.options & ~imsx) == PCRE.UTF8
print(io, 'r')
print_quoted_literal(io, re.pattern)
if (re.options & PCRE_CASELESS ) != 0; print(io, 'i'); end
if (re.options & PCRE_MULTILINE) != 0; print(io, 'm'); end
if (re.options & PCRE_DOTALL ) != 0; print(io, 's'); end
if (re.options & PCRE_EXTENDED ) != 0; print(io, 'x'); end
if (re.options & PCRE.CASELESS ) != 0; print(io, 'i'); end
if (re.options & PCRE.MULTILINE) != 0; print(io, 'm'); end
if (re.options & PCRE.DOTALL ) != 0; print(io, 's'); end
if (re.options & PCRE.EXTENDED ) != 0; print(io, 'x'); end
else
print(io, "Regex(")
show(io, re.pattern)
Expand Down Expand Up @@ -85,31 +85,31 @@ function show(io, m::RegexMatch)
end

matches(r::Regex, s::String, o::Integer) =
pcre_exec(r.regex, r.extra, cstring(s), 0, o, false)
matches(r::Regex, s::String) = matches(r, s, r.options & PCRE_EXECUTE_MASK)
PCRE.exec(r.regex, r.extra, cstring(s), 0, o, false)
matches(r::Regex, s::String) = matches(r, s, r.options & PCRE.EXECUTE_MASK)

contains(s::String, r::Regex, opts::Integer) = matches(r,s,opts)
contains(s::String, r::Regex) = matches(r,s)

function match(re::Regex, str::ByteString, idx::Integer, opts::Integer)
m, n = pcre_exec(re.regex, re.extra, str, idx-1, opts, true)
m, n = PCRE.exec(re.regex, re.extra, str, idx-1, opts, true)
if isempty(m); return nothing; end
mat = str[m[1]+1:m[2]]
cap = ntuple(n, i->(m[2i+1] < 0 ? nothing : str[m[2i+1]+1:m[2i+2]]))
off = map(i->m[2i+1]+1, [1:n])
RegexMatch(mat, cap, m[1]+1, off)
end
match(r::Regex, s::String, i::Integer, o::Integer) = match(r, cstring(s), i, o)
match(r::Regex, s::String, i::Integer) = match(r, s, i, r.options & PCRE_EXECUTE_MASK)
match(r::Regex, s::String, i::Integer) = match(r, s, i, r.options & PCRE.EXECUTE_MASK)
match(r::Regex, s::String) = match(r, s, start(s))

function search(str::ByteString, re::Regex, idx::Integer)
len = length(str)
if idx >= len+2
return idx == len+2 ? (0,0) : error("index out of range")
end
opts = re.options & PCRE_EXECUTE_MASK
m, n = pcre_exec(re.regex, re.extra, str, idx-1, opts, true)
opts = re.options & PCRE.EXECUTE_MASK
m, n = PCRE.exec(re.regex, re.extra, str, idx-1, opts, true)
isempty(m) ? (0,0) : (m[1]+1,m[2]+1)
end
search(s::ByteString, r::Regex) = search(s,r,start(s))
Expand Down
2 changes: 1 addition & 1 deletion base/start_image.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ _jl_lib = ccall(:jl_load_dynamic_library,Ptr{Void},(Ptr{None},),C_NULL)
@windows_only _jl_repl = ccall(:GetModuleHandleA,stdcall,Ptr{Void},(Ptr{Void},),C_NULL)

# Essential libraries
_jl_libpcre = dlopen("libpcre")
libpcre = dlopen("libpcre")
_jl_libgrisu = dlopen("libgrisu")
_jl_libm = dlopen("libm")
_jl_libfdm = dlopen("libfdm")
Expand Down
45 changes: 1 addition & 44 deletions base/sysimg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ module Base

export
# Module
Base,
Base, PCRE,
# Types
AbstractMatrix,AbstractVector,Array,Associative,CharString,Chars,Cmd,Cmds,
Colon,Complex,Complex128,Complex64,ComplexPair,DArray,Dict,Dims,EachLine,
Expand Down Expand Up @@ -40,49 +40,6 @@ export
EREMOTEIO,ERESTART,ERFKILL,EROFS,ESHUTDOWN,ESOCKTNOSUPPORT,ESPIPE,ESRCH,
ESRMNT,ESTALE,ESTRPIPE,ETIME,ETIMEDOUT,ETOOMANYREFS,ETXTBSY,EUCLEAN,EUNATCH,
EUSERS,EXDEV,EXFULL,
# PCRE constants
#PCRE_VERSION,PCRE_COMPILE_MASK,PCRE_EXECUTE_MASK,PCRE_OPTIONS_MASK,
PCRE_ANCHORED,PCRE_AUTO_CALLOUT,PCRE_BSR_ANYCRLF,PCRE_BSR_UNICODE,
PCRE_CASELESS,PCRE_CONFIG_BSR,PCRE_CONFIG_JIT,PCRE_CONFIG_JITTARGET,
PCRE_CONFIG_LINK_SIZE,PCRE_CONFIG_MATCH_LIMIT,
PCRE_CONFIG_MATCH_LIMIT_RECURSION,PCRE_CONFIG_NEWLINE,
PCRE_CONFIG_POSIX_MALLOC_THRESHOLD,PCRE_CONFIG_STACKRECURSE,
PCRE_CONFIG_UNICODE_PROPERTIES,PCRE_CONFIG_UTF16,
PCRE_CONFIG_UTF8,PCRE_DFA_RESTART,PCRE_DFA_SHORTEST,PCRE_DOLLAR_ENDONLY,
PCRE_DOTALL,PCRE_DUPNAMES,PCRE_ERROR_BADCOUNT,PCRE_ERROR_BADENDIANNESS,
PCRE_ERROR_BADMAGIC,PCRE_ERROR_BADMODE,PCRE_ERROR_BADNEWLINE,
PCRE_ERROR_BADOFFSET,PCRE_ERROR_BADOPTION,PCRE_ERROR_BADPARTIAL,
PCRE_ERROR_BADUTF16,PCRE_ERROR_BADUTF16_OFFSET,PCRE_ERROR_BADUTF8,
PCRE_ERROR_BADUTF8_OFFSET,PCRE_ERROR_CALLOUT,PCRE_ERROR_DFA_RECURSE,
PCRE_ERROR_DFA_UCOND,PCRE_ERROR_DFA_UITEM,PCRE_ERROR_DFA_UMLIMIT,
PCRE_ERROR_DFA_WSSIZE,PCRE_ERROR_INTERNAL,PCRE_ERROR_JIT_STACKLIMIT,
PCRE_ERROR_MATCHLIMIT,PCRE_ERROR_NOMATCH,PCRE_ERROR_NOMEMORY,
PCRE_ERROR_NOSUBSTRING,PCRE_ERROR_NULL,PCRE_ERROR_NULLWSLIMIT,
PCRE_ERROR_PARTIAL,PCRE_ERROR_RECURSELOOP,PCRE_ERROR_RECURSIONLIMIT,
PCRE_ERROR_SHORTUTF16,PCRE_ERROR_SHORTUTF8,PCRE_ERROR_UNKNOWN_NODE,
PCRE_ERROR_UNKNOWN_OPCODE,PCRE_EXTENDED,PCRE_EXTRA,
PCRE_EXTRA_CALLOUT_DATA,PCRE_EXTRA_EXECUTABLE_JIT,PCRE_EXTRA_MARK,
PCRE_EXTRA_MATCH_LIMIT,PCRE_EXTRA_MATCH_LIMIT_RECURSION,
PCRE_EXTRA_STUDY_DATA,PCRE_EXTRA_TABLES,PCRE_FIRSTLINE,PCRE_INFO_BACKREFMAX,
PCRE_INFO_CAPTURECOUNT,PCRE_INFO_DEFAULT_TABLES,PCRE_INFO_FIRSTBYTE,
PCRE_INFO_FIRSTCHAR,PCRE_INFO_FIRSTTABLE,PCRE_INFO_HASCRORLF,
PCRE_INFO_JCHANGED,PCRE_INFO_JIT,PCRE_INFO_JITSIZE,PCRE_INFO_LASTLITERAL,
PCRE_INFO_MINLENGTH,PCRE_INFO_NAMECOUNT,PCRE_INFO_NAMEENTRYSIZE,
PCRE_INFO_NAMETABLE,PCRE_INFO_OKPARTIAL,PCRE_INFO_OPTIONS,PCRE_INFO_SIZE,
PCRE_INFO_STUDYSIZE,PCRE_JAVASCRIPT_COMPAT,PCRE_MAJOR,PCRE_MINOR,
PCRE_MULTILINE,PCRE_NEWLINE_ANY,PCRE_NEWLINE_ANYCRLF,PCRE_NEWLINE_CR,
PCRE_NEWLINE_CRLF,PCRE_NEWLINE_LF,PCRE_NOTBOL,PCRE_NOTEMPTY,
PCRE_NOTEMPTY_ATSTART,PCRE_NOTEOL,PCRE_NO_AUTO_CAPTURE,
PCRE_NO_START_OPTIMISE,PCRE_NO_START_OPTIMIZE,PCRE_NO_UTF16_CHECK,
PCRE_NO_UTF8_CHECK,PCRE_PARTIAL,PCRE_PARTIAL_HARD,
PCRE_PARTIAL_SOFT,PCRE_STUDY_JIT_COMPILE,PCRE_UCP,PCRE_UNGREEDY,PCRE_UTF16,
PCRE_UTF16_ERR0,PCRE_UTF16_ERR1,PCRE_UTF16_ERR2,PCRE_UTF16_ERR3,
PCRE_UTF16_ERR4,PCRE_UTF8,PCRE_UTF8_ERR0,PCRE_UTF8_ERR1,PCRE_UTF8_ERR10,
PCRE_UTF8_ERR11,PCRE_UTF8_ERR12,PCRE_UTF8_ERR13,PCRE_UTF8_ERR14,
PCRE_UTF8_ERR15,PCRE_UTF8_ERR16,PCRE_UTF8_ERR17,PCRE_UTF8_ERR18,
PCRE_UTF8_ERR19,PCRE_UTF8_ERR2,PCRE_UTF8_ERR20,PCRE_UTF8_ERR21,
PCRE_UTF8_ERR3,PCRE_UTF8_ERR4,PCRE_UTF8_ERR5,PCRE_UTF8_ERR6,PCRE_UTF8_ERR7,
PCRE_UTF8_ERR8,PCRE_UTF8_ERR9,
# Operators
!,!=,$,%,&,*,+,-,.!=,.*,./,.<,.<=,.==,.>,.>=,.\,.^,/,//,:,<,<:,<<,<=,==,
>,>=,>>,>>>,\,^,|,~,
Expand Down
2 changes: 1 addition & 1 deletion base/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ end
function apropos(txt::String)
_jl_init_help()
n = 0
r = Regex("\\Q$txt", PCRE_CASELESS)
r = Regex("\\Q$txt", PCRE.CASELESS)
first = true
for (cat, _) in _jl_help_category_dict
if matches(r, cat)
Expand Down

0 comments on commit fadd70d

Please sign in to comment.