Skip to content

Commit

Permalink
fix: some bugs related to connection between ranker and engine
Browse files Browse the repository at this point in the history
  • Loading branch information
AhmedHamed3699 committed May 14, 2024
1 parent 1995506 commit df72e57
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 51 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ mistermeow/.project
mistermeow/.settings/org.eclipse.buildship.core.prefs
mistermeow/app/.settings
mistermeow/app/src/meowapp/node_modules
.vscode/*
3 changes: 0 additions & 3 deletions .vscode/settings.json

This file was deleted.

19 changes: 8 additions & 11 deletions mistermeow/app/src/main/java/meowEngine/QueryEngineController.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,45 +5,43 @@
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import meowdbmanager.DBManager;
import org.bson.Document;
import org.bson.types.ObjectId;
import org.jsoup.Jsoup;
import org.springframework.web.bind.annotation.*;

import meowdbmanager.DBManager;
import meowindexer.Tokenizer;
import meowranker.PhraseRanker;
import meowranker.*;

//TODO: normal queries with ranking
//TODO: bold in snippts
@CrossOrigin(origins = "*", allowedHeaders = "*")
@RestController
@RequestMapping("/")
public class QueryEngineController {
private DBManager dbManager;
private Tokenizer tokenizer;
private PhraseRanker phraseRanker;
private Ranker ranker, phraseRanker;
private List<ObjectId> docs;
private String currentQuery;
private boolean isPhraseMatching, isFirstTime;
private String[] phrases;
private int[] operators; // 0: None, 1: AND, 2: OR, 3: NOT
private List<String> tokens, tags, suggestions;
private List<String> tokens, suggestions;
private int resultCount;
private final int numOfDocsInPage = 20, windowCharSize = 100;

public QueryEngineController() {
dbManager = new DBManager();
tokenizer = new Tokenizer();
phraseRanker = new PhraseRanker();
ranker = new QueryRanker();
docs = new ArrayList<>();
currentQuery = "";
isPhraseMatching = false;
isFirstTime = true;
phrases = new String[3];
operators = new int[2];
tags = new ArrayList<>();
tokens = new ArrayList<>();
suggestions = new ArrayList<>();
resultCount = 0;
Expand Down Expand Up @@ -72,7 +70,6 @@ public Document searchQuery(@RequestParam("query") String query,
parse(currentQuery);
dbManager.insertSuggestion(currentQuery);
tokens = tokenizer.tokenizeString(currentQuery, false);
tags = tokenizer.tokenizeString(currentQuery, false);
docs = rankDocs();
isFirstTime = false;
resultCount = docs.size();
Expand Down Expand Up @@ -129,11 +126,10 @@ private Document getResults(List<ObjectId> docs) {
availableCount--;
}

System.out.println("Results: " + results);
Document data = new Document("results", results)
.append("count", resultCount)
.append("availableCount", availableCount)
.append("tags", tags)
.append("tags", tokens)
.append("suggestions", suggestions);

return data;
Expand All @@ -147,6 +143,8 @@ public String getSnippet(String doc) {
Matcher stringMatch = Pattern.compile("\\b" + string + "\\b").matcher(textContent);
if (stringMatch.find()) {
int index = stringMatch.start();
textContent = textContent.substring(0, index) + "*" + string + "*"
+ textContent.substring(index + string.length());
int start = Math.max(0, index - windowCharSize);
int end = Math.min(textContent.length(), index + windowCharSize);
return textContent.substring(start, end);
Expand All @@ -163,10 +161,9 @@ private List<ObjectId> rankDocs() {
useOperator(docIDs, operators[0], 1);
if (phrases[2] != null)
useOperator(docIDs, operators[1], 2);
System.out.println("DocIDs: " + docIDs);
return docIDs;
}
return dbManager.getDocIDs(tags);
return ranker.rank(currentQuery);
}

private void useOperator(List<ObjectId> docIDs, int operator, int phraseIndex) {
Expand Down
11 changes: 5 additions & 6 deletions mistermeow/app/src/main/java/meowdbmanager/DBManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -414,14 +414,13 @@ public Document getDocument(String docID) {
public List<Document> getDocuments(List<ObjectId> docIDs) {
try {
List<Document> pipeline = new ArrayList<>();
pipeline.add(new Document(
"$match", new Document("_id", new Document("$in", docIDs))));
pipeline.add(new Document("$match", new Document("_id", new Document("$in", docIDs))));
pipeline.add(new Document("$project", new Document()
.append("host", 1)
.append("URL", 1)
.append("title", 1)
.append("content", 1))
.append("ranker_id" , 1));
.append("content", 1)
.append("ranker_id", 1)));

List<Document> results = docCollection.aggregate(pipeline).into(new ArrayList<>());

Expand Down Expand Up @@ -473,12 +472,12 @@ public double getDocumentFromInverted(String token, ObjectId docID) {
}
}

public String getPoisitionFromInverted(String token, ObjectId docID) {
public String getPositionFromInverted(String token, ObjectId docID) {
try {
Document query = new Document("token", token).append("docs._id", docID);

Document result = invertedCollection.find(query)
.projection(new Document("docs.$", 1))
.projection(new Document("docs", 1))
.first();

if (result != null)
Expand Down
33 changes: 15 additions & 18 deletions mistermeow/app/src/main/java/meowranker/Main.java
Original file line number Diff line number Diff line change
@@ -1,28 +1,25 @@
package meowranker;

import java.util.*;
import org.bson.types.ObjectId;

public class Main {
public static void main(String[] argv) {

Ranker ranker = new PhraseRanker();

String query = "The dfl;akjf;asd Free Encyclopedia"; // tests searching for unfound token
ranker.rank(query);

query = "The Free Encyclopedia";
ranker.rank(query);

// query = "Wikipedia";
// Ranker ranker = new PhraseRanker();
//
// String query = "The dfl;akjf;asd Free Encyclopedia"; // tests searching for
// unfound token
// ranker.rank(query);
//
// query = "The Free Encyclopedia";
// ranker.rank(query);
//
// // query = "Wikipedia";
// // ranker.rank(query);
//
// query = "I love you";
// ranker.rank(query);

query = "I love you";
ranker.rank(query);


ranker = new QueryRanker();
ranker.rank("The Free Encyclopedia");
Ranker ranker = new QueryRanker();
ranker.rank("cats");

// QueryRanker Qr = new QueryRanker();
// Qr.rank("Wkiipedia the free encyclopedia");
Expand Down
26 changes: 13 additions & 13 deletions mistermeow/app/src/main/java/meowranker/Ranker.java
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@

package meowranker;

import com.google.common.collect.Table;
import java.lang.Math;
import java.util.*;
import meowdbmanager.DBManager;
import meowindexer.Tokenizer;
import org.bson.Document;
import org.bson.types.ObjectId;
import org.springframework.data.mongodb.core.query.Query;

public abstract class Ranker {

Expand Down Expand Up @@ -49,19 +47,21 @@ public List<ObjectId> rank(String query) {

finalRank.sort((e1, e2) -> e2.getValue().compareTo(e1.getValue()));

System.out.println("======================================");
System.out.println("=========== Final Result =============");
System.out.println("======================================");
// System.out.println("======================================");
// System.out.println("=========== Final Result =============");
// System.out.println("======================================");

List<ObjectId> SortedList = new ArrayList<>();
for (Map.Entry<ObjectId, Double> e : finalRank) {
SortedList.add(e.getKey());
System.out
.println("URL: " + db.getDocument(e.getKey().toString()).getString("URL") + " || Rank = " + e.getValue());
// System.out
// .println("URL: " + db.getDocument(e.getKey().toString()).getString("URL") + "
// || Rank = " + e.getValue());
// The previous printing is time costly, comment it if you're not testing of
// debugging
}

System.out.println("Ranking finished!");
return SortedList;
}

Expand Down Expand Up @@ -151,7 +151,7 @@ public static double[][] constructUrlsGraph() {
}

public double[] getPopularityArr() {
int numberOfUrls = db.getUrlsCount();
// int numberOfUrls = db.getUrlsCount();
// double[] popularityArr = new double[numberOfUrls];

// for (int i = 0; i < numberOfUrls; i++) {
Expand Down Expand Up @@ -239,10 +239,10 @@ public List<Double> calculateRelevance(List<ObjectId> docIds,
double val = 0;
for (String token : searchTokens) {
// summation(tf-idf)
String position = db.getPoisitionFromInverted(token, docIds.get(i));
String position = db.getPositionFromInverted(token, docIds.get(i));

val += db.getDocumentFromInverted(token, docIds.get(i)) * getIDF(token);
if (!position.equals("other"))
if (position != null && !position.equals("other"))
val += boost;
// NOTE: uncomment when testing
// System.out.println(
Expand All @@ -255,8 +255,8 @@ public List<Double> calculateRelevance(List<ObjectId> docIds,
relevance.add(val);
}

if(this instanceof QueryRanker){
QueryRanker ranker = (QueryRanker)this;
if (this instanceof QueryRanker) {
QueryRanker ranker = (QueryRanker) this;
relevance = ranker.addQueryDocRel(relevance);
}

Expand All @@ -267,7 +267,7 @@ public double getIDF(String token) {
double df;
Document invertedInd = db.getInvertedIndex(token);

if (invertedInd == null) // Handling tokens that are not in any documnets
if (invertedInd == null) // Handling tokens that are not in any documents
return 0;

df = (double) db.getInvertedIndex(token).getInteger("DF");
Expand Down

0 comments on commit df72e57

Please sign in to comment.