Merge pull request #69 from dipinarora9/master - ADDHASH

ashtul · web-flow · commit f925eac0d2ff · 2020-07-06T12:00:12.000+03:00
feat: Added support for ADDHASH command
diff --git a/API.md b/API.md
@@ -195,6 +195,25 @@ Add a single document to the index.
              NOTE: Geo points shoule be encoded as strings of "lon,lat"
 
 
+### add\_document\_hash
+```py
+
+def add_document_hash(self, doc_id, score=1.0, language=None, replace=False)
+
+```
+
+
+
+Add a hash document to the index.
+
+### Parameters
+
+- **doc_id**: the document's id. This has to be an existing HASH key in Redis that will hold the fields the index needs.
+- **score**: the document ranking, between 0.0 and 1.0 
+- **replace**: if True, and the document already is in the index, we perform an update and reindex the document
+- **language**: Specify the language used for document tokenization.
+
+
 ### aggregate
 ```py
 
@@ -264,7 +283,7 @@ Create the search index. The index must not already exist.
 ### delete\_document
 ```py
 
-def delete_document(self, doc_id, conn=None)
+def delete_document(self, doc_id, conn=None, delete_actual_document=False)
 
 ```
 
@@ -273,6 +292,9 @@ def delete_document(self, doc_id, conn=None)
 Delete a document from index
 Returns 1 if the document was deleted, 0 if not
 
+### Parameters
+
+- **delete_actual_document**: if set to True, RediSearch also delete the actual document if it is in the index
 
 ### drop\_index
 ```py
@@ -361,6 +383,18 @@ def add_document(self, doc_id, nosave=False, score=1.0, payload=None, replace=Fa
 Add a document to the batch query
 
 
+### add\_document\_hash
+```py
+
+def add_document_hash(self, doc_id, score=1.0, language=None, replace=False)
+
+```
+
+
+
+Add a hash document to the batch query
+
+
 ### commit
 ```py
 
diff --git a/gendoc.py b/gendoc.py
@@ -71,5 +71,4 @@ def generatedocs(module):
         print("Error while trying to import " + module)
 
 if __name__ == '__main__':
-
-    print generatedocs(sys.argv[1])
+    print(generatedocs(sys.argv[1]))
diff --git a/redisearch/client.py b/redisearch/client.py
@@ -26,7 +26,7 @@ class Field(object):
     def __init__(self, name, *args):
         self.name = name
         self.args = args
-    
+
     def redis_args(self):
         return [self.name] + list(self.args)
 
@@ -87,14 +87,15 @@ class TagField(Field):
     TagField is a tag-indexing field with simpler compression and tokenization. 
     See http://redisearch.io/Tags/
     """
-    def __init__(self, name, separator = ',', no_index=False):
+
+    def __init__(self, name, separator=',', no_index=False):
         args = [Field.TAG, Field.SEPARATOR, separator]
 
         if no_index:
             args.append(Field.NOINDEX)
 
         Field.__init__(self, name, *args)
-    
+
 
 class Client(object):
     """
@@ -108,6 +109,7 @@ class Client(object):
     ALTER_CMD = 'FT.ALTER'
     SEARCH_CMD = 'FT.SEARCH'
     ADD_CMD = 'FT.ADD'
+    ADDHASH_CMD = "FT.ADDHASH"
     DROP_CMD = 'FT.DROP'
     EXPLAIN_CMD = 'FT.EXPLAIN'
     DEL_CMD = 'FT.DEL'
@@ -120,7 +122,6 @@ class Client(object):
     GET_CMD = 'FT.GET'
     MGET_CMD = 'FT.MGET'
 
-
     NOOFFSETS = 'NOOFFSETS'
     NOFIELDS = 'NOFIELDS'
     STOPWORDS = 'STOPWORDS'
@@ -156,6 +157,20 @@ def add_document(self, doc_id, nosave=False, score=1.0, payload=None,
             if self.current_chunk >= self.chunk_size:
                 self.commit()
 
+        def add_document_hash(
+            self, doc_id, score=1.0, replace=False,
+        ):
+            """
+            Add a hash to the batch query
+            """
+            self.client._add_document_hash(
+                doc_id, conn=self.pipeline, score=score, replace=replace,
+            )
+            self.current_chunk += 1
+            self.total += 1
+            if self.current_chunk >= self.chunk_size:
+                self.commit()
+
         def commit(self):
             """
             Manually commit and flush the batch indexing query
@@ -182,7 +197,7 @@ def batch_indexer(self, chunk_size=100):
         return Client.BatchIndexer(self, chunk_size=chunk_size)
 
     def create_index(self, fields, no_term_offsets=False,
-                     no_field_flags=False, stopwords = None):
+                     no_field_flags=False, stopwords=None):
         """
         Create the search index. The index must not already exist.
 
@@ -203,7 +218,7 @@ def create_index(self, fields, no_term_offsets=False,
             args += [self.STOPWORDS, len(stopwords)]
             if len(stopwords) > 0:
                 args += list(stopwords)
-    
+
         args.append('SCHEMA')
 
         args += list(itertools.chain(*(f.redis_args() for f in fields)))
@@ -230,7 +245,7 @@ def drop_index(self):
         Drop the index if it exists
         """
         return self.redis.execute_command(self.DROP_CMD, self.index_name)
-        
+
     def _add_document(self, doc_id, conn=None, nosave=False, score=1.0, payload=None,
                       replace=False, partial=False, language=None, no_create=False, **fields):
         """ 
@@ -260,6 +275,25 @@ def _add_document(self, doc_id, conn=None, nosave=False, score=1.0, payload=None
         args += list(itertools.chain(*fields.items()))
         return conn.execute_command(*args)
 
+    def _add_document_hash(
+        self, doc_id, conn=None, score=1.0, language=None, replace=False,
+    ):
+        """ 
+        Internal add_document_hash used for both batch and single doc indexing 
+        """
+        if conn is None:
+            conn = self.redis
+
+        args = [self.ADDHASH_CMD, self.index_name, doc_id, score]
+
+        if replace:
+            args.append("REPLACE")
+
+        if language:
+            args += ["LANGUAGE", language]
+
+        return conn.execute_command(*args)
+
     def add_document(self, doc_id, nosave=False, score=1.0, payload=None,
                      replace=False, partial=False, language=None, no_create=False, **fields):
         """
@@ -281,20 +315,44 @@ def add_document(self, doc_id, nosave=False, score=1.0, payload=None,
         - **fields** kwargs dictionary of the document fields to be saved and/or indexed. 
                      NOTE: Geo points shoule be encoded as strings of "lon,lat"
         """
-        return self._add_document(doc_id, conn=None, nosave=nosave, score=score, 
+        return self._add_document(doc_id, conn=None, nosave=nosave, score=score,
                                   payload=payload, replace=replace,
-                                  partial=partial, language=language, 
-                                  no_create=no_create,**fields)
+                                  partial=partial, language=language,
+                                  no_create=no_create, **fields)
+
+    def add_document_hash(
+        self, doc_id, score=1.0, language=None, replace=False,
+    ):
+        """
+        Add a hash document to the index.
+
+        ### Parameters
+
+        - **doc_id**: the document's id. This has to be an existing HASH key in Redis that will hold the fields the index needs.
+        - **score**: the document ranking, between 0.0 and 1.0 
+        - **replace**: if True, and the document already is in the index, we perform an update and reindex the document
+        - **language**: Specify the language used for document tokenization.
+        """
+        return self._add_document_hash(
+            doc_id, conn=None, score=score, language=language, replace=replace,
+        )
 
-    def delete_document(self, doc_id, conn=None):
+    def delete_document(self, doc_id, conn=None, delete_actual_document=False):
         """
         Delete a document from index
         Returns 1 if the document was deleted, 0 if not
+
+        ### Parameters
+
+        - **delete_actual_document**: if set to True, RediSearch also delete the actual document if it is in the index
         """
+        args = [self.DEL_CMD, self.index_name, doc_id]
         if conn is None:
             conn = self.redis
+        if delete_actual_document:
+            args.append('DD')
 
-        return conn.execute_command(self.DEL_CMD, self.index_name, doc_id)
+        return conn.execute_command(*args)
 
     def load_document(self, id):
         """
@@ -315,12 +373,12 @@ def load_document(self, id):
     def get(self, *ids):
         """
         Returns the full contents of multiple documents.
-         
+
         ### Parameters
-        
+
         - **ids**: the ids of the saved documents.
         """
-        
+
         return self.redis.execute_command('FT.MGET', self.index_name, *ids)
 
     def info(self):
@@ -386,7 +444,8 @@ def aggregate(self, query):
         elif isinstance(query, Cursor):
             has_schema = False
             has_cursor = True
-            cmd = [self.CURSOR_CMD, 'READ', self.index_name] + query.build_args()
+            cmd = [self.CURSOR_CMD, 'READ',
+                   self.index_name] + query.build_args()
         else:
             raise ValueError('Bad query', query)
 
@@ -401,7 +460,7 @@ def aggregate(self, query):
         else:
             cursor = None
 
-        if query._with_schema:
+        if isinstance(query, AggregateRequest) and query._with_schema:
             schema = raw[0]
             rows = raw[2:]
         else:
diff --git a/redisearch/reducers.py b/redisearch/reducers.py
@@ -56,6 +56,7 @@ class avg(FieldOnlyReducer):
     def __init__(self, field):
         super(avg, self).__init__(field)
 
+
 class tolist(FieldOnlyReducer):
     """
     Returns all the matched properties in a list
@@ -65,6 +66,7 @@ class tolist(FieldOnlyReducer):
     def __init__(self, field):
         super(tolist, self).__init__(field)
 
+
 class count_distinct(FieldOnlyReducer):
     """
     Calculate the number of distinct values contained in all the results in
@@ -82,7 +84,7 @@ class count_distinctish(FieldOnlyReducer):
     group for the given field. This uses a faster algorithm than
     `count_distinct` but is less accurate
     """
-    name = 'COUNT_DISTINCTISH'
+    NAME = 'COUNT_DISTINCTISH'
 
 
 class quantile(Reducer):
@@ -101,7 +103,7 @@ class stddev(FieldOnlyReducer):
     """
     Return the standard deviation for the values within the group
     """
-    name = 'STDDEV'
+    NAME = 'STDDEV'
 
     def __init__(self, field):
         super(stddev, self).__init__(field)
diff --git a/test/test.py b/test/test.py