-
Notifications
You must be signed in to change notification settings - Fork 0
/
es_to_csv.rb
96 lines (94 loc) · 2.32 KB
/
es_to_csv.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/env ruby
require 'elasticsearch'
require 'json'
#######################################
#
# Main
#
#######################################
t = Time.new(2018, 7, 22)
start_time = (t.to_f * 1000).to_i
end_time = (t.to_f * 1000).to_i + 86399999
client = Elasticsearch::Client.new host: 'gsr-elastic.geosurf.io'
response = client.search index: '_all', scroll: '5m', size: 1000,
body: {
"version": true,
"size": 1000,
"sort": [
{
"@timestamp": {
"order": "desc",
"unmapped_type": "boolean"
}
}
],
"_source": {
"excludes": []
},
"aggs": {
"2": {
"date_histogram": {
"field": "@timestamp",
"interval": "30m",
"time_zone": "Asia/Jerusalem",
"min_doc_count": 1
}
}
},
"stored_fields": [
"*"
],
"script_fields": {
"bytes_overall": {
"script": {
"inline": "doc['bytes_read'].value + doc['bytes_uploaded'].value",
"lang": "painless"
}
}
},
"docvalue_fields": [
"@timestamp",
"received_at"
],
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "bytes_read: [0 TO *] AND group_id: 6895",
"analyze_wildcard": true,
"default_field": "*"
}
},
{
"range": {
"@timestamp": {
"gte": start_time,
"lte": end_time,
"format": "epoch_millis"
}
}
}
],
"filter": [],
"should": [],
"must_not": []
}
},
"highlight": {
"pre_tags": [
"@kibana-highlighted-field@"
],
"post_tags": [
"@/kibana-highlighted-field@"
],
"fields": {
"*": {}
},
"fragment_size": 2147483647
}
}
puts response['hits']['hits'].map { |r| "#{r['_source']['time_stamp']},#{r['_source']['group_id']},#{r['_source']['response_code']},#{r['_source']['client_ip']},#{r['fields']['bytes_overall'][0]},#{r['_source']['domain']}" }
while response = client.scroll(scroll_id: response['_scroll_id'], scroll: '5m') and not response['hits']['hits'].empty? do
puts response['hits']['hits'].map { |r| "#{r['_source']['time_stamp']},#{r['_source']['group_id']},#{r['_source']['response_code']},#{r['_source']['client_ip']},#{r['fields']['bytes_overall'][0]},#{r['_source']['domain']}" }
end