Skip to content

Commit 71242a8

Browse files
committed
[7.x] Support 'calendar_interval' and 'fixed_interval' in DateHistogramFacet
1 parent 10e03f5 commit 71242a8

File tree

5 files changed

+177
-64
lines changed

5 files changed

+177
-64
lines changed

docs/faceted_search.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ There are several different facets available:
4949
provides an option to split documents into groups based on a value of a field, for example ``TermsFacet(field='category')``
5050

5151
``DateHistogramFacet``
52-
split documents into time intervals, example: ``DateHistogramFacet(field="published_date", interval="day")``
52+
split documents into time intervals, example: ``DateHistogramFacet(field="published_date", calendar_interval="day")``
5353

5454
``HistogramFacet``
5555
similar to ``DateHistogramFacet`` but for numerical values: ``HistogramFacet(field="rating", interval=2)``

elasticsearch_dsl/faceted_search.py

+33-5
Original file line numberDiff line numberDiff line change
@@ -168,14 +168,34 @@ def get_value_filter(self, filter_value):
168168
)
169169

170170

171+
def _date_interval_month(d):
172+
return (d + timedelta(days=32)).replace(day=1)
173+
174+
175+
def _date_interval_week(d):
176+
return d + timedelta(days=7)
177+
178+
179+
def _date_interval_day(d):
180+
return d + timedelta(days=1)
181+
182+
183+
def _date_interval_hour(d):
184+
return d + timedelta(hours=1)
185+
186+
171187
class DateHistogramFacet(Facet):
172188
agg_type = "date_histogram"
173189

174190
DATE_INTERVALS = {
175-
"month": lambda d: (d + timedelta(days=32)).replace(day=1),
176-
"week": lambda d: d + timedelta(days=7),
177-
"day": lambda d: d + timedelta(days=1),
178-
"hour": lambda d: d + timedelta(hours=1),
191+
"month": _date_interval_month,
192+
"1M": _date_interval_month,
193+
"week": _date_interval_week,
194+
"1w": _date_interval_week,
195+
"day": _date_interval_day,
196+
"1d": _date_interval_day,
197+
"hour": _date_interval_hour,
198+
"1h": _date_interval_hour,
179199
}
180200

181201
def __init__(self, **kwargs):
@@ -194,12 +214,20 @@ def get_value(self, bucket):
194214
return bucket["key"]
195215

196216
def get_value_filter(self, filter_value):
217+
for interval_type in ("calendar_interval", "fixed_interval"):
218+
if interval_type in self._params:
219+
break
220+
else:
221+
interval_type = "interval"
222+
197223
return Range(
198224
_expand__to_dot=False,
199225
**{
200226
self._params["field"]: {
201227
"gte": filter_value,
202-
"lt": self.DATE_INTERVALS[self._params["interval"]](filter_value),
228+
"lt": self.DATE_INTERVALS[self._params[interval_type]](
229+
filter_value
230+
),
203231
}
204232
}
205233
)

tests/conftest.py

+11
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919

2020
import os
21+
import re
2122
from datetime import datetime
2223

2324
from elasticsearch.helpers import bulk
@@ -47,6 +48,16 @@ def client():
4748
skip()
4849

4950

51+
@fixture(scope="session")
52+
def es_version(client):
53+
info = client.info()
54+
print(info)
55+
yield tuple(
56+
int(x)
57+
for x in re.match(r"^([0-9.]+)", info["version"]["number"]).group(1).split(".")
58+
)
59+
60+
5061
@fixture
5162
def write_client(client):
5263
yield client

tests/test_faceted_search.py

+44
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
from datetime import datetime
1919

20+
import pytest
21+
2022
from elasticsearch_dsl.faceted_search import (
2123
DateHistogramFacet,
2224
FacetedSearch,
@@ -144,3 +146,45 @@ def test_date_histogram_facet_with_1970_01_01_date():
144146
dhf = DateHistogramFacet()
145147
assert dhf.get_value({"key": None}) == datetime(1970, 1, 1, 0, 0)
146148
assert dhf.get_value({"key": 0}) == datetime(1970, 1, 1, 0, 0)
149+
150+
151+
@pytest.mark.parametrize(
152+
["interval_type", "interval"],
153+
[
154+
("interval", "month"),
155+
("calendar_interval", "month"),
156+
("interval", "week"),
157+
("calendar_interval", "week"),
158+
("interval", "day"),
159+
("calendar_interval", "day"),
160+
("fixed_interval", "day"),
161+
("interval", "hour"),
162+
("fixed_interval", "hour"),
163+
("interval", "1M"),
164+
("calendar_interval", "1M"),
165+
("interval", "1w"),
166+
("calendar_interval", "1w"),
167+
("interval", "1d"),
168+
("calendar_interval", "1d"),
169+
("fixed_interval", "1d"),
170+
("interval", "1h"),
171+
("fixed_interval", "1h"),
172+
],
173+
)
174+
def test_date_histogram_interval_types(interval_type, interval):
175+
dhf = DateHistogramFacet(field="@timestamp", **{interval_type: interval})
176+
assert dhf.get_aggregation().to_dict() == {
177+
"date_histogram": {
178+
"field": "@timestamp",
179+
interval_type: interval,
180+
"min_doc_count": 0,
181+
}
182+
}
183+
dhf.get_value_filter(datetime.now())
184+
185+
186+
def test_date_histogram_no_interval_keyerror():
187+
dhf = DateHistogramFacet(field="@timestamp")
188+
with pytest.raises(KeyError) as e:
189+
dhf.get_value_filter(datetime.now())
190+
assert str(e.value) == "'interval'"

tests/test_integration/test_faceted_search.py

+88-58
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
from datetime import datetime
1919

20+
import pytest
21+
2022
from elasticsearch_dsl import A, Boolean, Date, Document, Keyword
2123
from elasticsearch_dsl.faceted_search import (
2224
DateHistogramFacet,
@@ -29,25 +31,6 @@
2931
from .test_document import PullRequest
3032

3133

32-
class CommitSearch(FacetedSearch):
33-
index = "flat-git"
34-
fields = (
35-
"description",
36-
"files",
37-
)
38-
39-
facets = {
40-
"files": TermsFacet(field="files"),
41-
"frequency": DateHistogramFacet(
42-
field="authored_date", interval="day", min_doc_count=1
43-
),
44-
"deletions": RangeFacet(
45-
field="stats.deletions",
46-
ranges=[("ok", (None, 1)), ("good", (1, 5)), ("better", (5, None))],
47-
),
48-
}
49-
50-
5134
class Repos(Document):
5235
is_public = Boolean()
5336
created_at = Date()
@@ -64,19 +47,6 @@ class Index:
6447
name = "git"
6548

6649

67-
class RepoSearch(FacetedSearch):
68-
index = "git"
69-
doc_types = [Repos]
70-
facets = {
71-
"public": TermsFacet(field="is_public"),
72-
"created": DateHistogramFacet(field="created_at", interval="month"),
73-
}
74-
75-
def search(self):
76-
s = super(RepoSearch, self).search()
77-
return s.filter("term", commit_repo="repo")
78-
79-
8050
class MetricSearch(FacetedSearch):
8151
index = "git"
8252
doc_types = [Commit]
@@ -86,15 +56,72 @@ class MetricSearch(FacetedSearch):
8656
}
8757

8858

89-
class PRSearch(FacetedSearch):
90-
index = "test-prs"
91-
doc_types = [PullRequest]
92-
facets = {
93-
"comments": NestedFacet(
94-
"comments",
95-
DateHistogramFacet(field="comments.created_at", interval="month"),
59+
@pytest.fixture(scope="session")
60+
def commit_search_cls(es_version):
61+
if es_version >= (7, 2):
62+
interval_kwargs = {"fixed_interval": "1d"}
63+
else:
64+
interval_kwargs = {"interval": "day"}
65+
66+
class CommitSearch(FacetedSearch):
67+
index = "flat-git"
68+
fields = (
69+
"description",
70+
"files",
9671
)
97-
}
72+
73+
facets = {
74+
"files": TermsFacet(field="files"),
75+
"frequency": DateHistogramFacet(
76+
field="authored_date", min_doc_count=1, **interval_kwargs
77+
),
78+
"deletions": RangeFacet(
79+
field="stats.deletions",
80+
ranges=[("ok", (None, 1)), ("good", (1, 5)), ("better", (5, None))],
81+
),
82+
}
83+
84+
return CommitSearch
85+
86+
87+
@pytest.fixture(scope="session")
88+
def repo_search_cls(es_version):
89+
interval_type = "calendar_interval" if es_version >= (7, 2) else "interval"
90+
91+
class RepoSearch(FacetedSearch):
92+
index = "git"
93+
doc_types = [Repos]
94+
facets = {
95+
"public": TermsFacet(field="is_public"),
96+
"created": DateHistogramFacet(
97+
field="created_at", **{interval_type: "month"}
98+
),
99+
}
100+
101+
def search(self):
102+
s = super(RepoSearch, self).search()
103+
return s.filter("term", commit_repo="repo")
104+
105+
return RepoSearch
106+
107+
108+
@pytest.fixture(scope="session")
109+
def pr_search_cls(es_version):
110+
interval_type = "calendar_interval" if es_version >= (7, 2) else "interval"
111+
112+
class PRSearch(FacetedSearch):
113+
index = "test-prs"
114+
doc_types = [PullRequest]
115+
facets = {
116+
"comments": NestedFacet(
117+
"comments",
118+
DateHistogramFacet(
119+
field="comments.created_at", **{interval_type: "month"}
120+
),
121+
)
122+
}
123+
124+
return PRSearch
98125

99126

100127
def test_facet_with_custom_metric(data_client):
@@ -106,36 +133,36 @@ def test_facet_with_custom_metric(data_client):
106133
assert dates[0] == 1399038439000
107134

108135

109-
def test_nested_facet(pull_request):
110-
prs = PRSearch()
136+
def test_nested_facet(pull_request, pr_search_cls):
137+
prs = pr_search_cls()
111138
r = prs.execute()
112139

113140
assert r.hits.total.value == 1
114141
assert [(datetime(2018, 1, 1, 0, 0), 1, False)] == r.facets.comments
115142

116143

117-
def test_nested_facet_with_filter(pull_request):
118-
prs = PRSearch(filters={"comments": datetime(2018, 1, 1, 0, 0)})
144+
def test_nested_facet_with_filter(pull_request, pr_search_cls):
145+
prs = pr_search_cls(filters={"comments": datetime(2018, 1, 1, 0, 0)})
119146
r = prs.execute()
120147

121148
assert r.hits.total.value == 1
122149
assert [(datetime(2018, 1, 1, 0, 0), 1, True)] == r.facets.comments
123150

124-
prs = PRSearch(filters={"comments": datetime(2018, 2, 1, 0, 0)})
151+
prs = pr_search_cls(filters={"comments": datetime(2018, 2, 1, 0, 0)})
125152
r = prs.execute()
126153
assert not r.hits
127154

128155

129-
def test_datehistogram_facet(data_client):
130-
rs = RepoSearch()
156+
def test_datehistogram_facet(data_client, repo_search_cls):
157+
rs = repo_search_cls()
131158
r = rs.execute()
132159

133160
assert r.hits.total.value == 1
134161
assert [(datetime(2014, 3, 1, 0, 0), 1, False)] == r.facets.created
135162

136163

137-
def test_boolean_facet(data_client):
138-
rs = RepoSearch()
164+
def test_boolean_facet(data_client, repo_search_cls):
165+
rs = repo_search_cls()
139166
r = rs.execute()
140167

141168
assert r.hits.total.value == 1
@@ -144,9 +171,8 @@ def test_boolean_facet(data_client):
144171
assert value is True
145172

146173

147-
def test_empty_search_finds_everything(data_client):
148-
cs = CommitSearch()
149-
174+
def test_empty_search_finds_everything(data_client, es_version, commit_search_cls):
175+
cs = commit_search_cls()
150176
r = cs.execute()
151177

152178
assert r.hits.total.value == 52
@@ -190,8 +216,10 @@ def test_empty_search_finds_everything(data_client):
190216
] == r.facets.deletions
191217

192218

193-
def test_term_filters_are_shown_as_selected_and_data_is_filtered(data_client):
194-
cs = CommitSearch(filters={"files": "test_elasticsearch_dsl"})
219+
def test_term_filters_are_shown_as_selected_and_data_is_filtered(
220+
data_client, commit_search_cls
221+
):
222+
cs = commit_search_cls(filters={"files": "test_elasticsearch_dsl"})
195223

196224
r = cs.execute()
197225

@@ -234,16 +262,18 @@ def test_term_filters_are_shown_as_selected_and_data_is_filtered(data_client):
234262
] == r.facets.deletions
235263

236264

237-
def test_range_filters_are_shown_as_selected_and_data_is_filtered(data_client):
238-
cs = CommitSearch(filters={"deletions": "better"})
265+
def test_range_filters_are_shown_as_selected_and_data_is_filtered(
266+
data_client, commit_search_cls
267+
):
268+
cs = commit_search_cls(filters={"deletions": "better"})
239269

240270
r = cs.execute()
241271

242272
assert 19 == r.hits.total.value
243273

244274

245-
def test_pagination(data_client):
246-
cs = CommitSearch()
275+
def test_pagination(data_client, commit_search_cls):
276+
cs = commit_search_cls()
247277
cs = cs[0:20]
248278

249279
assert 52 == cs.count()

0 commit comments

Comments
 (0)