Skip to content

remove dups when reindexing #40

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 8, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 27 additions & 20 deletions src/org/opensolaris/opengrok/index/IndexDatabase.java
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
* one index database per project.
*
* @author Trond Norbye
* @author Lubos Kosco , update for lucene 4.0.0
* @author Lubos Kosco , update for lucene 4.2.0
*/
public class IndexDatabase {

Expand Down Expand Up @@ -345,7 +345,7 @@ public void update() throws IOException, HistoryException {
directories.add(project.getPath());
}
}

for (String dir : directories) {
File sourceRoot;
if ("".equals(dir)) {
Expand All @@ -367,9 +367,11 @@ public void update() throws IOException, HistoryException {

try {
if (numDocs > 0) {
uidIter = terms.iterator(null);
TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid), true); //init uid
if (stat==TermsEnum.SeekStatus.END || stat==TermsEnum.SeekStatus.NOT_FOUND) { uidIter=null; }
uidIter = terms.iterator(uidIter);
TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid)); //init uid
if (stat==TermsEnum.SeekStatus.END) { uidIter=null;
log.log(Level.WARNING, "Couldn't find a start term for {0}, empty u field?", startuid);
}
}
//TODO below should be optional, since it traverses the tree once more to get total count! :(
int file_cnt = 0;
Expand All @@ -385,7 +387,8 @@ public void update() throws IOException, HistoryException {

while (uidIter != null && uidIter.term() != null && uidIter.term().utf8ToString().startsWith(startuid)) {
removeFile();
uidIter.next();
BytesRef next = uidIter.next();
if (next==null) {uidIter=null;}
}
} finally {
reader.close();
Expand Down Expand Up @@ -802,7 +805,7 @@ private boolean isLocal(String path) {
*
*/
private int indexDown(File dir, String parent, boolean count_only, int cur_count, int est_total) throws IOException {
int lcur_count = cur_count;
int lcur_count = cur_count;
if (isInterrupted()) {
return lcur_count;
}
Expand Down Expand Up @@ -842,16 +845,18 @@ public int compare(File p1, File p2) {
if (uidIter != null) {
String uid = Util.path2uid(path, DateTools.timeToString(file.lastModified(), DateTools.Resolution.MILLISECOND)); // construct uid for doc
BytesRef buid = new BytesRef(uid);
while (uidIter.term() != null
while (uidIter != null && uidIter.term() != null
&& uidIter.term().compareTo(emptyBR) !=0
&& uidIter.term().compareTo(buid) < 0) {
removeFile();
uidIter.next();
BytesRef next = uidIter.next();
if (next==null) {uidIter=null;}
}

if (uidIter.term() != null
if (uidIter != null && uidIter.term() != null
&& uidIter.term().bytesEquals(buid)) {
uidIter.next(); // keep matching docs
BytesRef next = uidIter.next(); // keep matching docs
if (next==null) {uidIter=null;}
continue;
}
}
Expand Down Expand Up @@ -953,8 +958,8 @@ public static void listAllFiles(List<String> subFiles) throws IOException {
*/
public void listFiles() throws IOException {
IndexReader ireader = null;
TermsEnum iter;
Terms terms = null;
TermsEnum iter=null;
Terms terms = null;

try {
ireader = DirectoryReader.open(indexDirectory); // open existing index
Expand All @@ -963,10 +968,11 @@ public void listFiles() throws IOException {
Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0);
terms = uFields.terms(QueryBuilder.U);
}
iter = terms.iterator(null); // init uid iterator
while (iter.term() != null) {
iter = terms.iterator(iter); // init uid iterator
while (iter != null && iter.term() != null) {
log.fine(Util.uid2url(iter.term().utf8ToString()));
iter.next();
BytesRef next=iter.next();
if (next==null) {iter=null;}
}
} finally {

Expand Down Expand Up @@ -1014,7 +1020,7 @@ static void listFrequentTokens(List<String> subFiles) throws IOException {
public void listTokens(int freq) throws IOException {
IndexReader ireader = null;
TermsEnum iter = null;
Terms terms = null;
Terms terms = null;

try {
ireader = DirectoryReader.open(indexDirectory);
Expand All @@ -1023,13 +1029,14 @@ public void listTokens(int freq) throws IOException {
Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0);
terms = uFields.terms(QueryBuilder.DEFS);
}
iter = terms.iterator(null); // init uid iterator
while (iter.term() != null) {
iter = terms.iterator(iter); // init uid iterator
while (iter != null && iter.term() != null) {
//if (iter.term().field().startsWith("f")) {
if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) {
log.warning(iter.term().utf8ToString());
}
iter.next();
BytesRef next = iter.next();
if (next==null) {iter=null;}
/*} else {
break;
}*/
Expand Down
12 changes: 7 additions & 5 deletions test/org/opensolaris/opengrok/index/IndexerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,8 @@ public void testMain() throws IOException {

private class MyIndexChangeListener implements org.opensolaris.opengrok.index.IndexChangedListener {

List<String> files = new ArrayList<String>();
List<String> files = new ArrayList<>();
List<String> removedFiles = new ArrayList<>();

@Override
public void fileAdd(String path, String analyzer) {
Expand All @@ -203,10 +204,11 @@ public void fileUpdate(String path) {
@Override
public void fileRemoved(String path) {
files.remove(path);
removedFiles.add(path);
}

public void reset() {
this.files = new ArrayList<String>();
this.files = new ArrayList<>();
}
}

Expand Down Expand Up @@ -311,11 +313,11 @@ public void testIncrementalIndexAddRemoveFile() throws Exception {
listener.reset();
repository.addDummyFile(ppath);
idb.update();
assertEquals("No new file added",2, listener.files.size());
listener.reset();
assertEquals("No new file added",1, listener.files.size());
repository.removeDummyFile(ppath);
idb.update();
assertEquals("Didn't remove the dummy file",1, listener.files.size());
assertEquals("Didn't remove the dummy file",0, listener.files.size());
assertEquals("Didn't remove the dummy file",1, listener.removedFiles.size());
} else {
System.out.println("Skipping test. Could not find a ctags I could use in path.");
}
Expand Down