Skip to content

Commit

Permalink
resolves asciidoctor#247 add support for the Rouge source highlighter
Browse files Browse the repository at this point in the history
- add integration with Rouge for highlighting source listings
- use no-break space for indent guard instead of zero-width space
- guard indentation within Rouge formatter
- organize the code to setup source highlighting
- enable line number support when highlighting with Rouge
- add pastie theme for Rouge
- patch Rouge style lookup (see rouge-ruby/rouge#280)
- cast lookup collections to set
- optimize code
  • Loading branch information
mojavelinux committed Jul 6, 2015
1 parent 68b4fb5 commit 24b3950
Show file tree
Hide file tree
Showing 6 changed files with 261 additions and 30 deletions.
83 changes: 57 additions & 26 deletions lib/asciidoctor-pdf/converter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ def self.unicode_char number
end

# NOTE require_library doesn't support require_relative and we don't modify the load path for this gem
CodeRayRequirePath = ::File.join((::File.dirname __FILE__), 'prawn_ext/coderay_encoder')
CodeRayRequirePath = ::File.join (::File.dirname __FILE__), 'prawn_ext/coderay_encoder'
RougeRequirePath = ::File.join (::File.dirname __FILE__), 'rouge_ext'

AsciidoctorVersion = ::Gem::Version.create ::Asciidoctor::VERSION
AdmonitionIcons = {
Expand All @@ -46,8 +47,10 @@ def self.unicode_char number
EOL = %(\n)
TAB = %(\t)
InnerIndent = %(\n )
IndentGuard = %(\u200b)
GuardedInnerIndent = %(\n\u200b )
# a no-break space is used to replace a leading space to prevent Prawn from trimming indentation
# a leading zero-width space can't be used as it gets dropped when calculating the line width
GuardedIndent = %(\u00a0)
GuardedInnerIndent = %(\n\u00a0)
TabRx = /\t/
TabIndentRx = /^\t+/
NoBreakSpace = unicode_char 0x00a0
Expand All @@ -68,14 +71,15 @@ def self.unicode_char number
checked: (unicode_char 0x2611),
unchecked: (unicode_char 0x2610)
}
IconSets = ['fa', 'fi', 'octicon', 'pf']
IconSets = ['fa', 'fi', 'octicon', 'pf'].to_set
MeasurementRxt = '\\d+(?:\\.\\d+)?(?:in|cm|mm|pt|)'
MeasurementPartsRx = /^(\d+(?:\.\d+)?)(in|mm|cm|pt|)$/
PageSizeRx = /^(?:\[(#{MeasurementRxt}), ?(#{MeasurementRxt})\]|(#{MeasurementRxt})(?: x |x)(#{MeasurementRxt})|\S+)$/
# CalloutExtractRx synced from /lib/asciidoctor.rb of Asciidoctor core
CalloutExtractRx = /(?:(?:\/\/|#|--|;;) ?)?(\\)?<!?(--|)(\d+)\2>(?=(?: ?\\?<!?\2\d+\2>)*$)/
ImageAttributeValueRx = /^image:{1,2}(.*?)\[(.*?)\]$/
LineScanRx = /\n|.+/
SourceHighlighters = ['coderay', 'pygments', 'rouge'].to_set

def initialize backend, opts
super
Expand Down Expand Up @@ -885,19 +889,40 @@ def convert_image node
# QUESTION can we avoid arranging fragments multiple times (conums & autofit) by eagerly preparing arranger?
def convert_listing_or_literal node
add_dest_for_block node if node.id

# HACK disable built-in syntax highlighter; must be done before calling node.content!
# NOTE the highlight sub is only set for coderay and pygments
if node.style == 'source' && !scratch? && ((subs = node.subs).include? :highlight)
highlighter = node.document.attr 'source-highlighter'
# NOTE the source highlighter logic below handles the callouts and highlight subs
prev_subs = subs.dup
subs.delete_all :highlight, :callouts
source_string = preserve_indentation node.content, (node.attr 'tabsize'), false
if node.style == 'source' && node.attributes['language'] &&
(highlighter = node.document.attributes['source-highlighter']) &&
(SourceHighlighters.include? highlighter)
prev_subs = (subs = node.subs).dup
# NOTE the highlight sub is only set for coderay and pygments atm
highlight_idx = subs.index :highlight
# NOTE scratch? here only applies if listing block is nested inside another block
if scratch?
highlighter = nil
if highlight_idx
# switch the :highlight sub back to :specialcharacters
subs[highlight_idx] = :specialcharacters
else
prev_subs = nil
end
source_string = preserve_indentation node.content, (node.attr 'tabsize')
else
# NOTE the source highlighter logic below handles the callouts and highlight subs
if highlight_idx
subs.delete_all :highlight, :callouts
else
subs.delete_all :specialcharacters, :callouts
end
# the indent guard will be added by the source highlighter logic
source_string = preserve_indentation node.content, (node.attr 'tabsize'), false
end
else
highlighter = nil
prev_subs = nil
source_string = preserve_indentation node.content, (node.attr 'tabsize')
end

source_chunks = case highlighter
when 'coderay'
Helpers.require_library CodeRayRequirePath, 'coderay' unless defined? ::Asciidoctor::Prawn::CodeRayEncoder
Expand All @@ -906,14 +931,22 @@ def convert_listing_or_literal node
conum_mapping ? (restore_conums fragments, conum_mapping) : fragments
when 'pygments'
Helpers.require_library 'pygments', 'pygments.rb' unless defined? ::Pygments
source_string, conum_mapping = extract_conums source_string
lexer = ::Pygments::Lexer[node.attr 'language', 'text', false] || ::Pygments::Lexer['text']
pygments_config = { nowrap: true, noclasses: true, style: (node.document.attr 'pygments-style') || 'pastie' }
source_string, conum_mapping = extract_conums source_string
result = lexer.highlight source_string, options: pygments_config
fragments = guard_indentation text_formatter.format result
conum_mapping ? (restore_conums fragments, conum_mapping) : fragments
when 'rouge'
Helpers.require_library RougeRequirePath, 'rouge' unless defined? ::Rouge::Formatters::Prawn
lexer = ::Rouge::Lexer.find(node.attr 'language', 'text', false) || ::Rouge::Lexers::PlainText
formatter = (@rouge_formatter ||= ::Rouge::Formatters::Prawn.new theme: (node.document.attr 'rouge-style'))
source_string, conum_mapping = extract_conums source_string
# NOTE trailing endline is added to address https://github.com/jneen/rouge/issues/279
fragments = formatter.format (lexer.lex %(#{source_string}#{EOL})), line_numbers: (node.attr? 'linenums')
conum_mapping ? (restore_conums fragments, conum_mapping) : fragments
else
# NOTE only format if we detect a need
# NOTE only format if we detect a need (callouts or inline formatting)
if source_string =~ BuiltInEntityCharOrTagRx
text_formatter.format source_string
else
Expand Down Expand Up @@ -1021,7 +1054,7 @@ def restore_conums fragments, conum_mapping
elsif text.include? EOL
text.split(EOL, -1).each_with_index do |line_in_fragment, idx|
line = (lines[line_num += 1] ||= []) unless idx == 0
line << fragment.merge(text: line_in_fragment) unless line_in_fragment.empty?
line << (fragment.merge text: line_in_fragment) unless line_in_fragment.empty?
end
else
line << fragment
Expand Down Expand Up @@ -1058,7 +1091,7 @@ def guard_indentation fragments
start_of_line = true
fragments.each do |fragment|
next if (text = fragment[:text]).empty?
text.prepend IndentGuard if start_of_line && (text.start_with? ' ')
text[0] = GuardedIndent if start_of_line && (text.start_with? ' ')
text.gsub! InnerIndent, GuardedInnerIndent if text.include? InnerIndent
start_of_line = text.end_with? EOL
end
Expand Down Expand Up @@ -2083,22 +2116,20 @@ def theme_font_size_autofit fragments, category
arranger = arrange_fragments_by_line fragments
adjusted_font_size = nil
theme_font category do
# NOTE finalizing the line here generates fragments using current font settings
# NOTE finalizing the line here generates fragments & calculates their widths using the current font settings
# CAUTION it also removes zero-width spaces
arranger.finalize_line
actual_width = width_of_fragments arranger.fragments
unless ::Array === (padding = @theme[%(#{category}_padding)])
padding = [padding] * 4
end
bounds.add_left_padding(p_left = padding[3] || 0)
bounds.add_right_padding(p_right = padding[1] || 0)
if actual_width > bounds.width
adjusted_font_size = ((bounds.width * font_size).to_f / actual_width).with_precision 4
available_width = bounds.width - (padding[3] || 0) - (padding[1] || 0)
if actual_width > available_width
adjusted_font_size = ((available_width * font_size).to_f / actual_width).with_precision 4
if (min = @theme[%(#{category}_font_size_min)] || @theme.base_font_size_min) && adjusted_font_size < min
adjusted_font_size = min
end
end
bounds.subtract_left_padding p_left
bounds.subtract_right_padding p_right
end
adjusted_font_size
end
Expand All @@ -2115,7 +2146,7 @@ def arrange_fragments_by_line fragments, opts = {}
by_line << fragment
elsif txt.include? EOL
txt.scan(LineScanRx) do |line|
by_line << fragment.merge(text: line)
by_line << (line == EOL ? { text: EOL } : (fragment.merge text: line))
end
else
by_line << fragment
Expand Down Expand Up @@ -2178,7 +2209,7 @@ def preserve_indentation string, tab_size = nil, guard_indent = true
if line.start_with? TAB
# NOTE '+' operator is faster than interpolation in this case
if guard_indent
line.sub!(TabIndentRx) {|tabs| IndentGuard + full_tab_space * tabs.length }
line.sub!(TabIndentRx) {|tabs| GuardedIndent + (full_tab_space * tabs.length).chop! }
else
line.sub!(TabIndentRx) {|tabs| full_tab_space * tabs.length }
end
Expand Down Expand Up @@ -2209,13 +2240,13 @@ def preserve_indentation string, tab_size = nil, guard_indent = true
end

# NOTE we save time by adding indent guard per line while performing tab expansion
result << IndentGuard if leading_space
line[0] = GuardedIndent if leading_space
result << line
end
result.join
else
if guard_indent
string.prepend IndentGuard if string.start_with? ' '
string[0] = GuardedIndent if string.start_with? ' '
string.gsub! InnerIndent, GuardedInnerIndent if string.include? InnerIndent
end
string
Expand Down
8 changes: 4 additions & 4 deletions lib/asciidoctor-pdf/prawn_ext/coderay_encoder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,10 @@ class CodeRayEncoder < ::CodeRay::Encoders::Encoder
}

EOL = %(\n)
NoBreakSpace = %(\u00a0)
InnerIndent = %(\n )
# \u200b is zero-width space
IndentGuard = %(\u200b)
GuardedInnerIndent = %(\n\u200b )
GuardedIndent = %(\u00a0)
GuardedInnerIndent = %(\n\u00a0)

def setup options
super
Expand All @@ -88,7 +88,7 @@ def text_token text, kind
@start_of_line = true
else
# NOTE add guard character to prevent Prawn from trimming indentation
text.prepend IndentGuard if @start_of_line && (text.start_with? ' ')
text[0] = GuardedIndent if @start_of_line && (text.start_with? ' ')
text.gsub! InnerIndent, GuardedInnerIndent if text.include? InnerIndent

# NOTE this optimization assumes we don't support/use background colors
Expand Down
4 changes: 4 additions & 0 deletions lib/asciidoctor-pdf/rouge_ext.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
require 'rouge'
require_relative 'rouge_ext/formatters/prawn'
require_relative 'rouge_ext/css_theme'
require_relative 'rouge_ext/themes/pastie'
14 changes: 14 additions & 0 deletions lib/asciidoctor-pdf/rouge_ext/css_theme.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
module Rouge
class CSSTheme
# Patch style_for to return most specific style first
# See https://github.com/jneen/rouge/issues/280 (fix pending)
def style_for token
token.token_chain.reverse_each do |t|
if (s = styles[t])
return s
end
end
nil
end
end
end
121 changes: 121 additions & 0 deletions lib/asciidoctor-pdf/rouge_ext/formatters/prawn.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
module Rouge
module Formatters
# Transforms a token stream into an array of
# formatted text fragments for use with Prawn.
class Prawn < Formatter
tag 'prawn'

EOL = %(\n)
NoBreakSpace = %(\u00a0)
InnerIndent = %(\n )
GuardedIndent = %(\u00a0)
GuardedInnerIndent = %(\n\u00a0)
BoldStyle = [:bold].to_set
ItalicStyle = [:italic].to_set
BoldItalicStyle = [:bold, :italic].to_set

def initialize opts = {}
unless ::Rouge::Theme === (theme = opts[:theme])
unless theme && (theme = ::Rouge::Theme.find theme)
theme = ::Rouge::Themes::Pastie
end
theme = theme.new
end
@theme = theme
@normalized_colors = {}
@linenum_fragment_base = (create_fragment Token['Generic.Lineno']).merge linenum: true
end

# Override format method so fragments don't get flatted to a string
# and to add an options Hash.
def format tokens, opts = {}
stream tokens, opts
end

def stream tokens, opts = {}
if opts[:line_numbers]
# TODO implement line number start (offset)
linenum = 0
fragments = []
fragments << (create_linenum_fragment linenum += 1)
tokens.each do |tok, val|
fragment = create_fragment tok, val
if val == EOL
fragments << fragment
fragments << (create_linenum_fragment linenum += 1)
elsif val.include? EOL
val.each_line do |line|
fragments << (fragment.merge text: line)
# NOTE append linenum fragment if there's a next line; only works if source doesn't have trailing endline
if line.end_with? EOL
fragments << (create_linenum_fragment linenum += 1)
end
end
else
fragments << fragment
end
end
# NOTE drop orphaned linenum fragment (due to trailing endline in source)
fragments.pop if (last_fragment = fragments[-1]) && last_fragment[:linenum]
# NOTE pad numbers with less digits than the highest line number
if (linenum_w = (linenum / 10) + 1) > 1
# NOTE extra column is the trailing space after the line number
linenum_w += 1
fragments.each do |fragment|
fragment[:text] = %(#{fragment[:text].rjust linenum_w, NoBreakSpace}) if fragment[:linenum]
end
end
fragments
else
start_of_line = true
tokens.map do |tok, val|
# match one or more consecutive endlines
if val == EOL || (val == (EOL * val.length))
start_of_line = true
{ text: val }
else
val[0] = GuardedIndent if start_of_line && (val.start_with? ' ')
val.gsub! InnerIndent, GuardedInnerIndent if val.include? InnerIndent
start_of_line = val.end_with? EOL
# NOTE this optimization assumes we don't support/use background colors
val.rstrip.empty? ? { text: val } : (create_fragment tok, val)
end
end
# QUESTION should we strip trailing newline?
end
end

# TODO method could still be optimized (for instance, check if val is EOL)
def create_fragment tok, val = nil
fragment = val ? { text: val } : {}
if (style_rules = @theme.style_for tok)
# TODO support background color
if (fg = normalize_color style_rules.fg)
fragment[:color] = fg
end
if style_rules[:bold]
fragment[:styles] = style_rules[:italic] ? BoldItalicStyle : BoldStyle
elsif style_rules[:italic]
fragment[:styles] = ItalicStyle
end
end
fragment
end

def create_linenum_fragment linenum
@linenum_fragment_base.merge text: %(#{linenum} )
end

def normalize_color raw
return unless raw
if (normalized = @normalized_colors[raw])
normalized
else
normalized = (raw.start_with? '#') ? raw[1..-1] : raw
normalized = normalized.each_char.map {|c| c * 2 }.join if normalized.size == 3
@normalized_colors[raw] = normalized
end
end
end
end
end
Loading

0 comments on commit 24b3950

Please sign in to comment.