-
Notifications
You must be signed in to change notification settings - Fork 18
/
ebook.rb
144 lines (110 loc) · 3.81 KB
/
ebook.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# encoding: UTF-8
require 'rubygems'
require 'twitter'
require 'punkt-segmenter'
require 'twitter_init'
require 'markov'
require 'htmlentities'
source_tweets = []
$rand_limit ||= 10
$markov_index ||= 2
puts "PARAMS: #{params}" if params.any?
unless params.key?("tweet")
params["tweet"] = true
end
rand_key = rand($rand_limit)
CLOSING_PUNCTUATION = ['.', ';', ':', '?', '!']
def random_closing_punctuation
CLOSING_PUNCTUATION[rand(CLOSING_PUNCTUATION.length)]
end
def filtered_tweets(tweets)
html_decoder = HTMLEntities.new
include_urls = $include_urls || params["include_urls"]
include_replies = $include_replies || params["include_replies"]
source_tweets = tweets.map {|t| html_decoder.decode(t.text).gsub(/\b(RT|MT) .+/, '') }
if !include_urls
source_tweets = source_tweets.reject {|t| t =~ /(https?:\/\/)/ }
end
if !include_replies
source_tweets = source_tweets.reject {|t| t =~ /^@/ }
end
source_tweets.each do |t|
t.gsub!(/(\#|(h\/t)|(http))\S+/, '')
t.gsub!(/^(@[\d\w_]+\s?)+/, '')
t += "." if t !~ /[.?;:!]$/
end
source_tweets
end
# randomly running only about 1 in $rand_limit times
unless rand_key == 0 || params["force"]
puts "Not running this time (key: #{rand_key})"
else
# Fetch a thousand tweets
begin
user_tweets = Twitter.user_timeline($source_account, :count => 200, :trim_user => true, :include_rts => false)
max_id = user_tweets.last.id
source_tweets += filtered_tweets(user_tweets)
# Twitter only returns up to 3200 of a user timeline, includes retweets.
17.times do
user_tweets = Twitter.user_timeline($source_account, :count => 200, :trim_user => true, :max_id => max_id - 1, :include_rts => false)
puts "MAX_ID #{max_id} TWEETS: #{user_tweets.length}"
break if user_tweets.last.nil?
max_id = user_tweets.last.id
source_tweets += filtered_tweets(user_tweets)
end
rescue => ex
puts ex.message
end
puts "#{source_tweets.length} tweets found"
if source_tweets.length == 0
raise "Error fetching tweets from Twitter. Aborting."
end
markov = MarkovChainer.new($markov_index)
tokenizer = Punkt::SentenceTokenizer.new(source_tweets.join(" ")) # init with corpus of all sentences
source_tweets.each do |twt|
next if twt.nil? || twt == ''
sentences = tokenizer.sentences_from_text(twt, :output => :sentences_text)
# sentences = text.split(/[.:;?!]/)
# sentences.each do |sentence|
# next if sentence =~ /@/
# if sentence !~ /\p{Punct}$/
# sentence += "."
# end
sentences.each do |sentence|
next if sentence =~ /@/
markov.add_sentence(sentence)
end
end
tweet = nil
10.times do
tweet = markov.generate_sentence
tweet_letters = tweet.gsub(/\P{Word}/, '')
next if source_tweets.any? {|t| t.gsub(/\P{Word}/, '') =~ /#{tweet_letters}/ }
# if rand(3) == 0 && tweet =~ /(in|to|from|for|with|by|our|of|your|around|under|beyond)\p{Space}\w+$/
# puts "Losing last word randomly"
# tweet.gsub(/\p{Space}\p{Word}+.$/, '') # randomly losing the last word sometimes like horse_ebooks
# end
if tweet.length < 40 && rand(10) == 0
puts "Short tweet. Adding another sentence randomly"
next_sentence = markov.generate_sentence
tweet_letters = next_sentence.gsub(/\P{Word}/, '')
next if source_tweets.any? {|t| t.gsub(/\P{Word}/, '') =~ /#{tweet_letters}/ }
tweet += random_closing_punctuation if tweet !~ /[.;:?!]$/
tweet += " #{markov.generate_sentence}"
end
if !params["tweet"]
puts "MARKOV: #{tweet}"
end
break if !tweet.nil? && tweet.length < 110
end
if params["tweet"]
if !tweet.nil? && tweet != ''
puts "TWEET: #{tweet}"
Twitter.update(tweet)
else
raise "ERROR: EMPTY TWEET"
end
else
puts "DEBUG: #{tweet}"
end
end