All Files
(91.62%
covered at
10.44
hits/line)
9 files in total.
191 relevant lines.
175 lines covered and
16 lines missed
-
6
Dir[File.dirname(__FILE__) + '/wikipedia/**/*.rb'].each { |f| require f }
-
-
1
require 'uri'
-
-
1
module Wikipedia
-
# Examples :
-
# page = Wikipedia.find('Rails')
-
# => #<Wikipedia:0x123102>
-
# page.content
-
# => wiki content appears here
-
-
# basically just a wrapper for doing
-
# client = Wikipedia::Client.new
-
# client.find('Rails')
-
#
-
1
def self.find( page, options = {} )
-
4
client.find( page, options )
-
end
-
-
1
def self.find_image( title, options = {} )
-
client.find_image( title, options )
-
end
-
-
1
def self.find_random( options = {} )
-
client.find_random( options )
-
end
-
-
1
def self.Configure(&block)
-
3
Configuration.instance.instance_eval(&block)
-
end
-
-
1
Configure {
-
1
protocol 'https'
-
1
domain 'en.wikipedia.org'
-
1
path 'w/api.php'
-
1
user_agent(
-
'wikipedia-client/1.3 (https://github.com/kenpratt/wikipedia-client)'
-
)
-
}
-
-
1
private
-
-
1
def self.client
-
4
@client ||= Wikipedia::Client.new
-
end
-
end
-
1
module Wikipedia
-
1
class Client
-
# see http://en.wikipedia.org/w/api.php
-
1
BASE_URL = ':protocol://:domain/:path?action=:action&format=json'.freeze
-
-
1
attr_accessor :follow_redirects
-
-
1
def initialize
-
22
self.follow_redirects = true
-
end
-
-
1
def find( title, options = {} )
-
21
title = Url.new(title).title rescue title
-
21
page = Page.new( request_page( title, options ) )
-
20
while follow_redirects && page.redirect?
-
1
page = Page.new( request_page( page.redirect_title, options ) )
-
end
-
20
page
-
end
-
-
1
def find_image( title, options = {} )
-
4
title = Url.new(title).title rescue title
-
4
Page.new( request_image( title, options ) )
-
end
-
-
1
def find_random( options = {} )
-
2
require 'json'
-
2
data = JSON.parse( request_random( options ) )
-
2
title = data['query']['pages'].values[0]['title']
-
2
find( title, options )
-
end
-
-
# http://en.wikipedia.org/w/api.php?action=query&format=json&prop=revisions%7Clinks%7Cimages%7Ccategories&rvprop=content&titles=Flower%20(video%20game)
-
1
def request_page( title, options = {} )
-
22
request( {
-
action: 'query',
-
prop: %w[ info revisions links extlinks images categories coordinates templates extracts ],
-
rvprop: 'content',
-
inprop: 'url',
-
explaintext: '',
-
titles: title
-
}.merge( options ) )
-
end
-
-
# http://en.wikipedia.org/w/api.php?action=query&format=json&prop=imageinfo&iiprop=url&titles=File:Flower.png
-
1
def request_image( title, options = {} )
-
4
request( {
-
action: 'query',
-
prop: 'imageinfo',
-
iiprop: 'url',
-
titles: title
-
}.merge( options ) )
-
end
-
-
# http://en.wikipedia.org/w/api.php?action=query&generator=random&grnnamespace=0&prop=info
-
1
def request_random( options = {} )
-
2
request( {
-
action: 'query',
-
generator: 'random',
-
grnnamespace: '0',
-
prop: 'info'
-
}.merge( options ) )
-
end
-
-
1
def request( options )
-
13
require 'open-uri'
-
13
URI.parse( url_for( options ) ).read( 'User-Agent' => Configuration[:user_agent] )
-
end
-
-
1
protected
-
-
1
def configuration_options
-
{
-
protocol: Configuration[:protocol],
-
domain: Configuration[:domain],
-
path: Configuration[:path]
-
13
}
-
end
-
-
1
def url_for( options )
-
13
url = BASE_URL.dup
-
13
options = configuration_options.merge( options )
-
13
options.each do |key, val|
-
113
value = urlify_value( val )
-
113
if url.include?( ":#{key}" )
-
52
url.sub! ":#{key}", value
-
else
-
61
url << "&#{key}=#{value}"
-
end
-
end
-
13
url
-
end
-
-
1
def urlify_value( val )
-
113
case val
-
when Array
-
11
encode( val.flatten.join( '|' ) )
-
else
-
102
encode( val )
-
end
-
end
-
-
1
def encode( val )
-
113
case val
-
when String
-
113
URI.encode( val ).gsub( '&', '%26' )
-
else
-
val
-
end
-
end
-
end
-
end
-
1
require 'singleton'
-
-
1
module Wikipedia
-
1
class Configuration
-
1
include Singleton
-
-
1
def self.directives(*directives)
-
1
directives.each do |directive|
-
4
define_method directive do |*args|
-
58
return instance_variable_get("@#{directive}") if args.empty?
-
-
6
instance_variable_set("@#{directive}", args.first)
-
end
-
end
-
end
-
-
1
def self.[](directive)
-
52
instance.send(directive)
-
end
-
-
1
directives :protocol, :domain, :path, :user_agent
-
end
-
end
-
1
module Wikipedia
-
1
class Page
-
1
attr_reader :json
-
-
1
def initialize(json)
-
25
require 'json'
-
25
@json = json
-
25
@data = JSON.parse(json)
-
end
-
-
1
def page
-
116
@data['query']['pages'].values.first if @data['query']['pages']
-
end
-
-
1
def content
-
48
page['revisions'].first['*'] if page['revisions']
-
end
-
-
1
def sanitized_content
-
1
self.class.sanitize(content)
-
end
-
-
1
def redirect?
-
23
content && content.match(/\#REDIRECT\s*\[\[(.*?)\]\]/i)
-
end
-
-
1
def redirect_title
-
1
redirect?[1] rescue nil
-
end
-
-
1
def title
-
7
page['title']
-
end
-
-
1
def fullurl
-
1
page['fullurl']
-
end
-
-
1
def editurl
-
page['editurl']
-
end
-
-
1
def text
-
1
page['extract']
-
end
-
-
1
def summary
-
1
page['extract'].split('==')[0].strip if page['extract'] && page['extract'] != ''
-
end
-
-
1
def categories
-
11
page['categories'].map { |c| c['title'] } if page['categories']
-
end
-
-
1
def links
-
11
page['links'].map { |c| c['title'] } if page['links']
-
end
-
-
1
def extlinks
-
page['extlinks'].map { |c| c['*'] } if page['extlinks']
-
end
-
-
1
def images
-
6
page['images'].map { |c| c['title'] } if page['images']
-
end
-
-
1
def image_url
-
1
page['imageinfo'].first['url'] if page['imageinfo']
-
end
-
-
1
def image_descriptionurl
-
page['imageinfo'].first['descriptionurl'] if page['imageinfo']
-
end
-
-
1
def image_urls
-
image_metadata.map(&:image_url)
-
end
-
-
1
def image_descriptionurls
-
image_metadata.map(&:image_descriptionurl)
-
end
-
-
1
def coordinates
-
page['coordinates'].first.values if page['coordinates']
-
end
-
-
1
def raw_data
-
@data
-
end
-
-
1
def image_metadata
-
unless @cached_image_metadata
-
return if images.nil?
-
filtered = images.select { |i| i =~ /:.+\.(jpg|jpeg|png|gif|svg)$/i && !i.include?('LinkFA-star') }
-
@cached_image_metadata = filtered.map { |title| Wikipedia.find_image(title) }
-
end
-
@cached_image_metadata || []
-
end
-
-
1
def templates
-
page['templates'].map { |c| c['title'] } if page['templates']
-
end
-
-
# rubocop:disable Metrics/MethodLength
-
# rubocop:disable Metrics/AbcSize
-
1
def self.sanitize( s )
-
13
return unless s
-
-
# strip anything inside curly braces!
-
13
s.gsub!(/\{\{[^\{\}]+?\}\}/, '') while s =~ /\{\{[^\{\}]+?\}\}/
-
-
# strip info box
-
13
s.sub!(/^\{\|[^\{\}]+?\n\|\}\n/, '')
-
-
# strip internal links
-
13
s.gsub!(/\[\[([^\]\|]+?)\|([^\]\|]+?)\]\]/, '\2')
-
13
s.gsub!(/\[\[([^\]\|]+?)\]\]/, '\1')
-
-
# strip images and file links
-
13
s.gsub!(/\[\[Image:[^\[\]]+?\]\]/, '')
-
13
s.gsub!(/\[\[File:[^\[\]]+?\]\]/, '')
-
-
# convert bold/italic to html
-
13
s.gsub!(/'''''(.+?)'''''/, '<b><i>\1</i></b>')
-
13
s.gsub!(/'''(.+?)'''/, '<b>\1</b>')
-
13
s.gsub!(/''(.+?)''/, '<i>\1</i>')
-
-
# misc
-
13
s.gsub!(/<ref[^<>]*>[\s\S]*?<\/ref>/, '')
-
13
s.gsub!(/<!--[^>]+?-->/, '')
-
13
s.gsub!(' ', ' ')
-
13
s.strip!
-
-
# create paragraphs
-
13
sections = s.split("\n\n")
-
13
if sections.size > 1
-
42
s = sections.map { |paragraph| "<p>#{paragraph.strip}</p>" }.join("\n")
-
end
-
-
13
s
-
end
-
end
-
end
-
1
module Wikipedia
-
1
class Url
-
1
def initialize(wiki_url)
-
26
@wiki_url = wiki_url
-
end
-
-
1
def title
-
26
return @title if @title
-
-
26
uri = URI.parse( @wiki_url )
-
18
@title = URI.decode( uri.path.split('/').last )
-
end
-
end
-
end
-
1
module Wikipedia
-
1
VERSION = "1.6.3"
-
end
-
1
require File.dirname(__FILE__) + '/../spec_helper'
-
1
require 'json'
-
-
1
describe Wikipedia::Page, ".sanitize wiki markup" do
-
1
Dir[File.dirname(__FILE__) + '/../fixtures/sanitization_samples/*-raw.txt'].each do |raw_filename|
-
12
name = raw_filename.sub(/\/(.+?)\-raw\.txt$/, '\1')
-
12
sanitized_filename = raw_filename.sub('-raw', '-sanitized')
-
12
it "should sanitize #{name} properly" do
-
12
@raw = File.read(raw_filename)
-
12
@sanitized = File.read(sanitized_filename).strip
-
12
Wikipedia::Page.sanitize(@raw).strip.should == @sanitized
-
end
-
end
-
end
-
1
require File.dirname(__FILE__) + '/../spec_helper'
-
-
1
describe Wikipedia::Url, "like http://en.wikipedia.org/wiki/Getting_Things_Done" do
-
1
it "should have a title of Getting_Things_Done" do
-
1
url = Wikipedia::Url.new('http://en.wikipedia.org/wiki/Getting_Things_Done')
-
1
url.title.should == 'Getting_Things_Done'
-
end
-
end
-
1
require File.dirname(__FILE__) + '/../spec_helper'
-
-
1
describe Wikipedia, ".find" do
-
1
it "should return a Wikipedia::Page instance" do
-
1
page = Wikipedia.find('Getting_Things_Done')
-
1
page.should be_an_instance_of(Wikipedia::Page)
-
end
-
-
1
it "should return a Page with a title" do
-
1
page = Wikipedia.find('Getting_Things_Done')
-
1
page.title.should_not be_nil
-
end
-
-
1
it "should return a Page given a URL" do
-
1
page1 = Wikipedia.find('Getting_Things_Done')
-
-
1
page2 = Wikipedia.find('http://en.wikipedia.org/wiki/Getting_Things_Done')
-
1
page1.title.should == page2.title
-
end
-
end