diff --git a/lib/jekyll/generators/gather_webmentions.rb b/lib/jekyll/generators/gather_webmentions.rb index d100b71..69b7873 100644 --- a/lib/jekyll/generators/gather_webmentions.rb +++ b/lib/jekyll/generators/gather_webmentions.rb @@ -10,155 +10,157 @@ require "time" module Jekyll - class GatherWebmentions < Generator - safe true - priority :high - - def generate(site) - @site = site - @site_url = site.config["url"].to_s - - if @site_url.include? "localhost" - Jekyll::WebmentionIO.log "msg", "Webmentions won’t be gathered on localhost." - return - end + module WebmentionIO + class GatherWebmentions < Generator + safe true + priority :high - if @site.config.dig("webmentions", "pause_lookups") == true - WebmentionIO.log "msg", "Webmention gathering is currently paused." - return - end + def generate(site) + @site = site + @site_url = site.config["url"].to_s - WebmentionIO.log "msg", "Beginning to gather webmentions of your posts. This may take a while." + if @site_url.include? "localhost" + Jekyll::WebmentionIO.log "msg", "Webmentions won’t be gathered on localhost." + return + end - WebmentionIO.api_path = "mentions" - # add an arbitrarily high perPage to trump pagination - WebmentionIO.api_suffix = "&perPage=9999" + if @site.config.dig("webmentions", "pause_lookups") == true + WebmentionIO.log "msg", "Webmention gathering is currently paused." + return + end - @cached_webmentions = WebmentionIO.read_cached_webmentions "incoming" + WebmentionIO.log "msg", "Beginning to gather webmentions of your posts. This may take a while." - @lookups = WebmentionIO.read_lookup_dates + WebmentionIO.api_path = "mentions" + # add an arbitrarily high perPage to trump pagination + WebmentionIO.api_suffix = "&perPage=9999" - posts = WebmentionIO.gather_documents(@site) - posts.each do |post| - check_for_webmentions(post) - end + @cached_webmentions = WebmentionIO.read_cached_webmentions "incoming" - WebmentionIO.cache_lookup_dates @lookups + @lookups = WebmentionIO.read_lookup_dates - WebmentionIO.cache_webmentions "incoming", @cached_webmentions - end # generate + posts = WebmentionIO.gather_documents(@site) + posts.each do |post| + check_for_webmentions(post) + end - private + WebmentionIO.cache_lookup_dates @lookups - def check_for_webmentions(post) - WebmentionIO.log "info", "Checking for webmentions of #{post.url}." + WebmentionIO.cache_webmentions "incoming", @cached_webmentions + end # generate - last_webmention = @cached_webmentions.dig(post.url, @cached_webmentions.dig(post.url)&.keys&.last) + private - # get the last webmention - last_lookup = if @lookups[post.url] - @lookups[post.url] - elsif last_webmention - Date.parse last_webmention.dig("raw", "verified_date") - end + def check_for_webmentions(post) + WebmentionIO.log "info", "Checking for webmentions of #{post.url}." - # should we throttle? - if post.respond_to? "date" # Some docs have no date - if last_lookup && WebmentionIO.post_should_be_throttled?(post, post.date, last_lookup) - WebmentionIO.log "info", "Throttling this post." - return + last_webmention = @cached_webmentions.dig(post.url, @cached_webmentions.dig(post.url)&.keys&.last) + + # get the last webmention + last_lookup = if @lookups[post.url] + @lookups[post.url] + elsif last_webmention + Date.parse last_webmention.dig("raw", "verified_date") + end + + # should we throttle? + if post.respond_to? "date" # Some docs have no date + if last_lookup && WebmentionIO.post_should_be_throttled?(post, post.date, last_lookup) + WebmentionIO.log "info", "Throttling this post." + return + end end - end - # Get the last id we have in the hash - since_id = last_webmention ? last_webmention.dig("raw", "id") : false + # Get the last id we have in the hash + since_id = last_webmention ? last_webmention.dig("raw", "id") : false - # Gather the URLs - targets = get_webmention_target_urls(post) + # Gather the URLs + targets = get_webmention_target_urls(post) - # execute the API - response = WebmentionIO.get_response assemble_api_params(targets, since_id) - webmentions = response.dig("links") - if webmentions && !webmentions.empty? - WebmentionIO.log "info", "Here’s what we got back:\n\n#{response.inspect}\n\n" - else - WebmentionIO.log "info", "No webmentions found." - end + # execute the API + response = WebmentionIO.get_response assemble_api_params(targets, since_id) + webmentions = response.dig("links") + if webmentions && !webmentions.empty? + WebmentionIO.log "info", "Here’s what we got back:\n\n#{response.inspect}\n\n" + else + WebmentionIO.log "info", "No webmentions found." + end - @lookups[post.url] = Date.today - cache_new_webmentions(post.url, response) - end + @lookups[post.url] = Date.today + cache_new_webmentions(post.url, response) + end - def get_webmention_target_urls(post) - targets = [] - uri = File.join(@site_url, post.url) - targets.push(uri) + def get_webmention_target_urls(post) + targets = [] + uri = File.join(@site_url, post.url) + targets.push(uri) - # Redirection? - gather_redirected_targets(post, uri, targets) + # Redirection? + gather_redirected_targets(post, uri, targets) - # Domain changed? - gather_legacy_targets(uri, targets) + # Domain changed? + gather_legacy_targets(uri, targets) - targets - end + targets + end - def gather_redirected_targets(post, uri, targets) - redirected = false - if post.data.key? "redirect_from" - if post.data["redirect_from"].is_a? String - redirected = uri.sub post.url, post.data["redirect_from"] - targets.push(redirected) - elsif post.data["redirect_from"].is_a? Array - post.data["redirect_from"].each do |redirect| - redirected = uri.sub post.url, redirect + def gather_redirected_targets(post, uri, targets) + redirected = false + if post.data.key? "redirect_from" + if post.data["redirect_from"].is_a? String + redirected = uri.sub post.url, post.data["redirect_from"] targets.push(redirected) + elsif post.data["redirect_from"].is_a? Array + post.data["redirect_from"].each do |redirect| + redirected = uri.sub post.url, redirect + targets.push(redirected) + end end end end - end - def gather_legacy_targets(uri, targets) - if WebmentionIO.config.key? "legacy_domains" - WebmentionIO.log "info", "adding legacy URIs" - WebmentionIO.config["legacy_domains"].each do |domain| - legacy = uri.sub(@site_url, domain) - WebmentionIO.log "info", "adding URI #{legacy}" - targets.push(legacy) + def gather_legacy_targets(uri, targets) + if WebmentionIO.config.key? "legacy_domains" + WebmentionIO.log "info", "adding legacy URIs" + WebmentionIO.config["legacy_domains"].each do |domain| + legacy = uri.sub(@site_url, domain) + WebmentionIO.log "info", "adding URI #{legacy}" + targets.push(legacy) + end end end - end - - def assemble_api_params(targets, since_id) - api_params = targets.collect { |v| "target[]=#{v}" }.join("&") - api_params << "&since_id=#{since_id}" if since_id - api_params << "&sort-by=published" - api_params - end - def cache_new_webmentions(post_uri, response) - # Get cached webmentions - webmentions = if @cached_webmentions.key? post_uri - @cached_webmentions[post_uri] - else - {} - end - - if response && response["links"] - response["links"].reverse_each do |link| - webmention = WebmentionIO::Webmention.new(link, @site) - - # Do we already have it? - if webmentions.key? webmention.id - next - end + def assemble_api_params(targets, since_id) + api_params = targets.collect { |v| "target[]=#{v}" }.join("&") + api_params << "&since_id=#{since_id}" if since_id + api_params << "&sort-by=published" + api_params + end - # Add it to the list - WebmentionIO.log "info", webmention.to_hash.inspect - webmentions[webmention.id] = webmention.to_hash - end # each link - end # if response - @cached_webmentions[post_uri] = webmentions - end # process_webmentions + def cache_new_webmentions(post_uri, response) + # Get cached webmentions + webmentions = if @cached_webmentions.key? post_uri + @cached_webmentions[post_uri] + else + {} + end + + if response && response["links"] + response["links"].reverse_each do |link| + webmention = WebmentionIO::Webmention.new(link, @site) + + # Do we already have it? + if webmentions.key? webmention.id + next + end + + # Add it to the list + WebmentionIO.log "info", webmention.to_hash.inspect + webmentions[webmention.id] = webmention.to_hash + end # each link + end # if response + @cached_webmentions[post_uri] = webmentions + end # process_webmentions + end end end diff --git a/lib/jekyll/generators/queue_webmentions.rb b/lib/jekyll/generators/queue_webmentions.rb index cf2147e..aa7ac5c 100644 --- a/lib/jekyll/generators/queue_webmentions.rb +++ b/lib/jekyll/generators/queue_webmentions.rb @@ -8,91 +8,93 @@ # module Jekyll - class QueueWebmentions < Generator - safe true - priority :low + module WebmentionIO + class QueueWebmentions < Generator + safe true + priority :low - def generate(site) - @site = site - @site_url = site.config["url"].to_s + def generate(site) + @site = site + @site_url = site.config["url"].to_s - if @site_url.include? "localhost" - WebmentionIO.log "msg", "Webmentions lookups are not run on localhost." - return - end - - if @site.config.dig("webmentions", "pause_lookups") - WebmentionIO.log "info", "Webmention lookups are currently paused." - return - end + if @site_url.include? "localhost" + WebmentionIO.log "msg", "Webmentions lookups are not run on localhost." + return + end - WebmentionIO.log "msg", "Beginning to gather webmentions you’ve made. This may take a while." + if @site.config.dig("webmentions", "pause_lookups") + WebmentionIO.log "info", "Webmention lookups are currently paused." + return + end - upgrade_outgoing_webmention_cache + WebmentionIO.log "msg", "Beginning to gather webmentions you’ve made. This may take a while." - posts = WebmentionIO.gather_documents(@site) + upgrade_outgoing_webmention_cache - gather_webmentions(posts) - end + posts = WebmentionIO.gather_documents(@site) - private + gather_webmentions(posts) + end + + private - def gather_webmentions(posts) - webmentions = WebmentionIO.read_cached_webmentions "outgoing" + def gather_webmentions(posts) + webmentions = WebmentionIO.read_cached_webmentions "outgoing" - posts.each do |post| - uri = File.join(@site_url, post.url) - mentions = get_mentioned_uris(post) - if webmentions.key? uri - mentions.each do |mentioned_uri, response| - unless webmentions[uri].key? mentioned_uri - webmentions[uri][mentioned_uri] = response + posts.each do |post| + uri = File.join(@site_url, post.url) + mentions = get_mentioned_uris(post) + if webmentions.key? uri + mentions.each do |mentioned_uri, response| + unless webmentions[uri].key? mentioned_uri + webmentions[uri][mentioned_uri] = response + end end + else + webmentions[uri] = mentions end - else - webmentions[uri] = mentions end - end - WebmentionIO.cache_webmentions "outgoing", webmentions - end - - def get_mentioned_uris(post) - uris = {} - if post.data["in_reply_to"] - uris[post.data["in_reply_to"]] = false + WebmentionIO.cache_webmentions "outgoing", webmentions end - post.content.scan(/(?:https?:)?\/\/[^\s)#"]+/) do |match| - unless uris.key? match - uris[match] = false + + def get_mentioned_uris(post) + uris = {} + if post.data["in_reply_to"] + uris[post.data["in_reply_to"]] = false + end + post.content.scan(/(?:https?:)?\/\/[^\s)#"]+/) do |match| + unless uris.key? match + uris[match] = false + end end + return uris end - return uris - end - def upgrade_outgoing_webmention_cache - old_sent_file = WebmentionIO.cache_file("sent.yml") - old_outgoing_file = WebmentionIO.cache_file("queued.yml") - unless File.exist? old_sent_file - return - end - sent_webmentions = WebmentionIO.load_yaml(old_sent_file) - outgoing_webmentions = WebmentionIO.load_yaml(old_outgoing_file) - merged = {} - outgoing_webmentions.each do |source_url, webmentions| - collection = {} - webmentions.each do |target_url| - collection[target_url] = if sent_webmentions.dig(source_url, target_url) - "" - else - false - end + def upgrade_outgoing_webmention_cache + old_sent_file = WebmentionIO.cache_file("sent.yml") + old_outgoing_file = WebmentionIO.cache_file("queued.yml") + unless File.exist? old_sent_file + return + end + sent_webmentions = WebmentionIO.load_yaml(old_sent_file) + outgoing_webmentions = WebmentionIO.load_yaml(old_outgoing_file) + merged = {} + outgoing_webmentions.each do |source_url, webmentions| + collection = {} + webmentions.each do |target_url| + collection[target_url] = if sent_webmentions.dig(source_url, target_url) + "" + else + false + end + end + merged[source_url] = collection end - merged[source_url] = collection + WebmentionIO.cache_webmentions "outgoing", merged + File.delete old_sent_file, old_outgoing_file + WebmentionIO.log "msg", "Upgraded your sent webmentions cache." end - WebmentionIO.cache_webmentions "outgoing", merged - File.delete old_sent_file, old_outgoing_file - WebmentionIO.log "msg", "Upgraded your sent webmentions cache." end end end