This package now supporting closeness amoung python dicts
This is project is using to find relationship between mongodb documents. You can evaluate the closeness with rank field from the result, value lies between 0-100.
This will be the initial version of the project
How to use:
Install package with pip
pip install closeness
See the example,
from closeness.closeness_aggregation import ClosenessAggregation
from pymongo import MongoClient
client = MongoClient()
db = client.test_database
user_collection = db.user_collection
user1 = {
'name': 'User 1',
'age': 25,
'gender': 'male',
'tags': [
"tag1",
"tag2",
"tag3",
],
'friends': [
{"user_id": "friend1", 'name': "name1"},
{"user_id": "friend2", 'name': "name2"},
{"user_id": "friend3", 'name': "name3"},
]
}
user2 = {
'name': 'User 2',
'age': 25,
'gender': 'male',
'tags': [
"tag1",
"tag2",
"tag3",
],
'friends': [
{"user_id": "friend1", 'name': "name1"},
{"user_id": "friend2", 'name': "name2"},
{"user_id": "friend3", 'name': "name3"},
]
}
user3 = {
'name': 'User 3',
'age': 30,
'gender': 'female',
'tags': [
"tag1",
],
'friends': [
{"user_id": "friend3", 'name': "name3"},
]
}
user_collection.insert([user1, user2, user3])
query_stage = {'$match': {'name': {'$ne': user1['name']}}}
ARRAY_CMP_FIELDS = [
{
'field': 'tags',
'weight': 3
}
]
ARRAY_DICT_CMP_FIELDS = [
{
'field': 'friends',
'unique': 'user_id',
'weight': .5
}
]
STRING_CMP_FIELDS = [
{
'field': 'gender',
'weight': .5
}
]
NUM_CMP_FIELDS = [
{
'field': 'age',
'from': -1,
'to': 1,
'weight': .3
}
]
OUT_PUT_FIELDS = [
'name', 'age'
]
test = ClosenessAggregation(
user1, # document to compare
query_stage,
OUT_PUT_FIELDS,
limit=10, #limit
ARRAY_CMP_FIELDS=ARRAY_CMP_FIELDS,
STRING_CMP_FIELDS=STRING_CMP_FIELDS,
NUM_CMP_FIELDS=NUM_CMP_FIELDS,
ARRAY_DICT_CMP_FIELDS=ARRAY_DICT_CMP_FIELDS,
)
aggregation_query = test.get_aggregation_pipeline(
mode=ClosenessAggregation.FUZZY
)
result = user_collection.aggregate(aggregation_query)
# {u'ok': 1.0,
# u'result': [{u'age': 25,
# u'_id': ObjectId('55c894dcb67e20612cd6ddf0'),
# u'weights': [{u'gender': 11.627906976744187,
# u'age': 6.9767441860465125,
# u'friends': 11.626615417599819,
# u'tags': 69.75969250559892}],
# u'name': u'User 2',
# u'rank': 99.99095908598945},
# {u'age': 30,
# u'_id': ObjectId('55c894dcb67e20612cd6ddf1'),
# u'weights': [{u'gender': 0,
# u'age': 0,
# u'friends': 6.456076223518085,
# u'tags': 38.73645734110851}],
# u'name': u'User 3',
# u'rank': 45.1925335646266}]}
aggregation_query = closeness_obj.get_aggregation_pipeline(
mode=ClosenessAggregation.SIMPLE
)
result = user_collection.aggregate(aggregation_query)
# {u'ok': 1.0,
# u'result': [{u'age': 25,
# u'_id': ObjectId('55c894dcb67e20612cd6ddf3'),
# u'weights': [{u'gender': 11.627906976744187,
# u'age': 6.9767441860465125,
# u'friends': 11.627906976744187,
# u'tags': 69.76744186046513}],
# u'name': u'User 2',
# u'rank': 100.00000000000001},
# {u'age': 30,
# u'_id': ObjectId('55c894dcb67e20612cd6ddf4'),
# u'weights': [{u'gender': 0,
# u'age': 0,
# u'friends': 3.8759689922480622,
# u'tags': 23.255813953488374}],
# u'name': u'User 3',
# u'rank': 27.131782945736436}]}
# By using python dicts
users = [user2, user3]
closeness_dict_obj = ClosenessDict(
user1,
users,
ARRAY_CMP_FIELDS=ARRAY_CMP_FIELDS,
STRING_CMP_FIELDS=STRING_CMP_FIELDS,
NUM_CMP_FIELDS=NUM_CMP_FIELDS,
ARRAY_DICT_CMP_FIELDS=ARRAY_DICT_CMP_FIELDS,
)
result = closeness_dict_obj.execute(
mode=ClosenessDict.SIMPLE
)
self.assertEqual(
result[0]['closeness']['rank'],
100.00000000000001)
self.assertEqual(
result[1]['closeness']['rank'],
27.131782945736436)
# [{'name': 'User 2',
# 'tags': ['tag1',
# 'tag2',
# 'tag3'],
# 'gender': 'male',
# 'age': 25,
# 'closeness': {'weightages': {'gender': 11.627906976744187,
# 'age': 6.9767441860465125,
# 'friends': 11.627906976744187,
# 'tags': 69.76744186046513},
# 'rank': 100.00000000000001},
# 'friends': ['friend1',
# 'friend2',
# 'friend3']},
# {'name': 'User 3',
# 'tags': ['tag1'],
# 'gender': 'female',
# 'age': 30,
# 'closeness': {'weightages': {'gender': 0.0,
# 'age': 0.0,
# 'friends': 3.8759689922480622,
# 'tags': 23.255813953488374},
# 'rank': 27.131782945736436},
# 'friends': ['friend3']}]
result = closeness_dict_obj.execute(
mode=ClosenessDict.FUZZY
)
self.assertEqual(
result[0]['closeness']['rank'],
100.00000000000001)
self.assertEqual(
result[1]['closeness']['rank'],
45.21963824289406)
# [{'name': 'User 2',
# 'tags': ['tag1',
# 'tag2',
# 'tag3'],
# 'gender': 'male',
# 'age': 25,
# 'closeness': {'weightages': {'gender': 11.627906976744187,
# 'age': 6.9767441860465125,
# 'friends': 11.627906976744187,
# 'tags': 69.76744186046513},
# 'rank': 100.00000000000001},
# 'friends': ['friend1',
# 'friend2',
# 'friend3']},
# {'name': 'User 3',
# 'tags': ['tag1'],
# 'gender': 'female',
# 'age': 30,
# 'closeness': {'weightages': {'gender': 0.0,
# 'age': 0.0,
# 'friends': 6.459948320413436,
# 'tags': 38.75968992248062},
# 'rank': 45.21963824289406},
# 'friends': ['friend3']}]