diff --git a/lib/searchyll.rb b/lib/searchyll.rb index ae1db2c..d99c92c 100644 --- a/lib/searchyll.rb +++ b/lib/searchyll.rb @@ -1,6 +1,35 @@ require "searchyll/version" - +require "jekyll/hooks" require "jekyll/plugin" require "jekyll/generator" +require "searchyll/configuration" +require "searchyll/indexer" +require "nokogiri" + +begin + indexers = {} + + Jekyll::Hooks.register(:site, :pre_render) do |site| + config = Searchyll::Configuration.new(site) + indexers[site] = Searchyll::Indexer.new(config) + indexers[site].start + end + + Jekyll::Hooks.register :site, :post_render do |site| + indexers[site].finish + end + + Jekyll::Hooks.register :posts, :post_render do |post| + # strip html + nokogiri_doc = Nokogiri::HTML(post.output) + + indexer = indexers[post.site] + indexer << post.data.merge({ + id: post.id, + text: nokogiri_doc.xpath("//article//text()").to_s.gsub(/\s+/, " ") + }) + end -require "searchyll/generator" +rescue => e + puts e.message +end diff --git a/lib/searchyll/generator.rb b/lib/searchyll/generator.rb index 25f5d75..f08b69d 100644 --- a/lib/searchyll/generator.rb +++ b/lib/searchyll/generator.rb @@ -20,11 +20,15 @@ def generate(site) # Iterate through the site contents and send to indexer # TODO: what are we indexing? - site.posts.each do |doc| - indexer << doc.data.merge({ - id: doc.id, - content: doc.content - }) + # site.posts.each do |doc| + # indexer << doc.data.merge({ + # id: doc.id, + # content: doc.content + # }) + # end + + Jekyll::Hooks.register :posts, :post_render do |post| + puts post.output end # Signal to the indexer that we're done adding content diff --git a/searchyll.gemspec b/searchyll.gemspec index 69cbad5..70e8cbc 100644 --- a/searchyll.gemspec +++ b/searchyll.gemspec @@ -22,8 +22,9 @@ Gem::Specification.new do |spec| spec.add_development_dependency "rake", "~> 10.0" spec.add_development_dependency "rspec" spec.add_development_dependency "guard-rspec" - spec.add_development_dependency "jekyll" + spec.add_development_dependency "jekyll", ">=3.0" spec.add_dependency "elasticsearch-ruby" + spec.add_dependency "nokogiri" end