diff --git a/R-package/R/lgb.Dataset.R b/R-package/R/lgb.Dataset.R
index b9831c232f31..c3bc5e2eef92 100644
--- a/R-package/R/lgb.Dataset.R
+++ b/R-package/R/lgb.Dataset.R
@@ -998,7 +998,11 @@ slice.lgb.Dataset <- function(dataset, idxset, ...) {
 #' \itemize{
 #'     \item \code{label}: label lightgbm learn from ;
 #'     \item \code{weight}: to do a weight rescale ;
-#'     \item \code{group}: group size ;
+#'     \item{\code{group}: used for learning-to-rank tasks. An integer vector describing how to
+#'         group rows together as ordered results from the same set of candidate results to be ranked.
+#'         For example, if you have a 100-document dataset with \code{group = c(10, 20, 40, 10, 10, 10)},
+#'         that means that you have 6 groups, where the first 10 records are in the first group,
+#'         records 11-30 are in the second group, etc.}
 #'     \item \code{init_score}: initial score is the base prediction lightgbm will boost from.
 #' }
 #'
@@ -1052,8 +1056,9 @@ getinfo.lgb.Dataset <- function(dataset, name, ...) {
 #'     \item{\code{init_score}: initial score is the base prediction lightgbm will boost from}
 #'     \item{\code{group}: used for learning-to-rank tasks. An integer vector describing how to
 #'         group rows together as ordered results from the same set of candidate results to be ranked.
-#'         For example, if you have a 1000-row dataset that contains 250 4-document query results,
-#'         set this to \code{rep(4L, 250L)}}
+#'         For example, if you have a 100-document dataset with \code{group = c(10, 20, 40, 10, 10, 10)},
+#'         that means that you have 6 groups, where the first 10 records are in the first group,
+#'         records 11-30 are in the second group, etc.}
 #' }
 #'
 #' @examples
diff --git a/R-package/man/getinfo.Rd b/R-package/man/getinfo.Rd
index 11a489112d1d..e223d45dcaf4 100644
--- a/R-package/man/getinfo.Rd
+++ b/R-package/man/getinfo.Rd
@@ -30,7 +30,11 @@ The \code{name} field can be one of the following:
 \itemize{
     \item \code{label}: label lightgbm learn from ;
     \item \code{weight}: to do a weight rescale ;
-    \item \code{group}: group size ;
+    \item{\code{group}: used for learning-to-rank tasks. An integer vector describing how to
+        group rows together as ordered results from the same set of candidate results to be ranked.
+        For example, if you have a 100-document dataset with \code{group = c(10, 20, 40, 10, 10, 10)},
+        that means that you have 6 groups, where the first 10 records are in the first group,
+        records 11-30 are in the second group, etc.}
     \item \code{init_score}: initial score is the base prediction lightgbm will boost from.
 }
 }
diff --git a/R-package/man/setinfo.Rd b/R-package/man/setinfo.Rd
index b96cd9c4e83b..1481b621a6c0 100644
--- a/R-package/man/setinfo.Rd
+++ b/R-package/man/setinfo.Rd
@@ -35,8 +35,9 @@ The \code{name} field can be one of the following:
     \item{\code{init_score}: initial score is the base prediction lightgbm will boost from}
     \item{\code{group}: used for learning-to-rank tasks. An integer vector describing how to
         group rows together as ordered results from the same set of candidate results to be ranked.
-        For example, if you have a 1000-row dataset that contains 250 4-document query results,
-        set this to \code{rep(4L, 250L)}}
+        For example, if you have a 100-document dataset with \code{group = c(10, 20, 40, 10, 10, 10)},
+        that means that you have 6 groups, where the first 10 records are in the first group,
+        records 11-30 are in the second group, etc.}
 }
 }
 \examples{
diff --git a/docs/Parameters.rst b/docs/Parameters.rst
index 235dbb9040a2..d9c71d35472c 100644
--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -762,7 +762,7 @@ Dataset Parameters
 
    -  **Note**: works only in case of loading data directly from file
 
-   -  **Note**: data should be grouped by query\_id
+   -  **Note**: data should be grouped by query\_id, for more information, see `Query Data <#query-data>`__
 
    -  **Note**: index starts from ``0`` and it doesn't count the label column when passing type is ``int``, e.g. when label is column\_0 and query\_id is column\_1, the correct parameter is ``query=0``
 
@@ -1231,6 +1231,7 @@ Query Data
 ~~~~~~~~~~
 
 For learning to rank, it needs query information for training data.
+
 LightGBM uses an additional file to store query data, like the following:
 
 ::
@@ -1240,7 +1241,13 @@ LightGBM uses an additional file to store query data, like the following:
     67
     ...
 
-It means first ``27`` lines samples belong to one query and next ``18`` lines belong to another, and so on.
+For wrapper libraries like in Python and R, this information can also be provided as an array-like via the Dataset parameter ``group``.
+
+::
+
+   [27, 18, 67, ...]
+
+For example, if you have a 112-document dataset with ``group = [27, 18, 67]``, that means that you have 3 groups, where the first 27 records are in the first group, records 28-45 are in the second group, and records 46-112 are in the third group.
 
 **Note**: data should be ordered by the query.
 
diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
index b8dd3817407c..aff6e49377ff 100644
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -671,7 +671,7 @@ struct Config {
   // desc = use number for index, e.g. ``query=0`` means column\_0 is the query id
   // desc = add a prefix ``name:`` for column name, e.g. ``query=name:query_id``
   // desc = **Note**: works only in case of loading data directly from file
-  // desc = **Note**: data should be grouped by query\_id
+  // desc = **Note**: data should be grouped by query\_id, for more information, see `Query Data <#query-data>`__
   // desc = **Note**: index starts from ``0`` and it doesn't count the label column when passing type is ``int``, e.g. when label is column\_0 and query\_id is column\_1, the correct parameter is ``query=0``
   std::string group_column = "";
 
diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index a48350d9e280..5ab1f7128b08 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -941,7 +941,10 @@ def __init__(self, data, label=None, reference=None,
         weight : list, numpy 1-D array, pandas Series or None, optional (default=None)
             Weight for each instance.
         group : list, numpy 1-D array, pandas Series or None, optional (default=None)
-            Group/query size for Dataset.
+            Group/query data.
+            Only used in the learning-to-rank task.
+            sum(group) = n_samples.
+            For example, if you have a 100-document dataset with ``group = [10, 20, 40, 10, 10, 10]``, that means that you have 6 groups, where the first 10 records are in the first group, records 11-30 are in the second group, etc.
         init_score : list, numpy 1-D array, pandas Series or None, optional (default=None)
             Init score for Dataset.
         silent : bool, optional (default=False)
@@ -1356,7 +1359,10 @@ def create_valid(self, data, label=None, weight=None, group=None,
         weight : list, numpy 1-D array, pandas Series or None, optional (default=None)
             Weight for each instance.
         group : list, numpy 1-D array, pandas Series or None, optional (default=None)
-            Group/query size for Dataset.
+            Group/query data.
+            Only used in the learning-to-rank task.
+            sum(group) = n_samples.
+            For example, if you have a 100-document dataset with ``group = [10, 20, 40, 10, 10, 10]``, that means that you have 6 groups, where the first 10 records are in the first group, records 11-30 are in the second group, etc.
         init_score : list, numpy 1-D array, pandas Series or None, optional (default=None)
             Init score for Dataset.
         silent : bool, optional (default=False)
@@ -1715,7 +1721,10 @@ def set_group(self, group):
         Parameters
         ----------
         group : list, numpy 1-D array, pandas Series or None
-            Group size of each group.
+            Group/query data.
+            Only used in the learning-to-rank task.
+            sum(group) = n_samples.
+            For example, if you have a 100-document dataset with ``group = [10, 20, 40, 10, 10, 10]``, that means that you have 6 groups, where the first 10 records are in the first group, records 11-30 are in the second group, etc.
 
         Returns
         -------
@@ -1830,7 +1839,10 @@ def get_group(self):
         Returns
         -------
         group : numpy array or None
-            Group size of each group.
+            Group/query data.
+            Only used in the learning-to-rank task.
+            sum(group) = n_samples.
+            For example, if you have a 100-document dataset with ``group = [10, 20, 40, 10, 10, 10]``, that means that you have 6 groups, where the first 10 records are in the first group, records 11-30 are in the second group, etc.
         """
         if self.group is None:
             self.group = self.get_field('group')
diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
index 45c3d04e77f6..4c9d7fd021ad 100644
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -36,7 +36,10 @@ def __init__(self, func):
                 y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
                     The predicted values.
                 group : array-like
-                    Group/query data, used for ranking task.
+                    Group/query data.
+                    Only used in the learning-to-rank task.
+                    sum(group) = n_samples.
+                    For example, if you have a 100-document dataset with ``group = [10, 20, 40, 10, 10, 10]``, that means that you have 6 groups, where the first 10 records are in the first group, records 11-30 are in the second group, etc.
                 grad : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
                     The value of the first order derivative (gradient) for each sample point.
                 hess : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
@@ -122,7 +125,10 @@ def __init__(self, func):
                 weight : array-like of shape = [n_samples]
                     The weight of samples.
                 group : array-like
-                    Group/query data, used for ranking task.
+                    Group/query data.
+                    Only used in the learning-to-rank task.
+                    sum(group) = n_samples.
+                    For example, if you have a 100-document dataset with ``group = [10, 20, 40, 10, 10, 10]``, that means that you have 6 groups, where the first 10 records are in the first group, records 11-30 are in the second group, etc.
                 eval_name : string
                     The name of evaluation function (without whitespaces).
                 eval_result : float
@@ -266,7 +272,10 @@ def __init__(self, boosting_type='gbdt', num_leaves=31, max_depth=-1,
             y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
                 The predicted values.
             group : array-like
-                Group/query data, used for ranking task.
+                Group/query data.
+                Only used in the learning-to-rank task.
+                sum(group) = n_samples.
+                For example, if you have a 100-document dataset with ``group = [10, 20, 40, 10, 10, 10]``, that means that you have 6 groups, where the first 10 records are in the first group, records 11-30 are in the second group, etc.
             grad : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
                 The value of the first order derivative (gradient) for each sample point.
             hess : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
@@ -384,7 +393,10 @@ def fit(self, X, y,
         init_score : array-like of shape = [n_samples] or None, optional (default=None)
             Init score of training data.
         group : array-like or None, optional (default=None)
-            Group data of training data.
+            Group/query data.
+            Only used in the learning-to-rank task.
+            sum(group) = n_samples.
+            For example, if you have a 100-document dataset with ``group = [10, 20, 40, 10, 10, 10]``, that means that you have 6 groups, where the first 10 records are in the first group, records 11-30 are in the second group, etc.
         eval_set : list or None, optional (default=None)
             A list of (X, y) tuple pairs to use as validation sets.
         eval_names : list of strings or None, optional (default=None)
@@ -460,7 +472,10 @@ def fit(self, X, y,
             weight : array-like of shape = [n_samples]
                 The weight of samples.
             group : array-like
-                Group/query data, used for ranking task.
+                Group/query data.
+                Only used in the learning-to-rank task.
+                sum(group) = n_samples.
+                For example, if you have a 100-document dataset with ``group = [10, 20, 40, 10, 10, 10]``, that means that you have 6 groups, where the first 10 records are in the first group, records 11-30 are in the second group, etc.
             eval_name : string
                 The name of evaluation function (without whitespaces).
             eval_result : float