|
import java.io.File; |
|
import java.io.IOException; |
|
import java.lang.management.ManagementFactory; |
|
import java.util.Random; |
|
import java.util.UUID; |
|
|
|
import org.apache.lucene.analysis.Analyzer; |
|
import org.apache.lucene.analysis.WhitespaceAnalyzer; |
|
import org.apache.lucene.document.Document; |
|
import org.apache.lucene.document.Field; |
|
import org.apache.lucene.document.Field.Index; |
|
import org.apache.lucene.document.Field.Store; |
|
import org.apache.lucene.index.CorruptIndexException; |
|
import org.apache.lucene.index.IndexReader; |
|
import org.apache.lucene.index.IndexWriter; |
|
import org.apache.lucene.queryParser.QueryParser; |
|
import org.apache.lucene.search.IndexSearcher; |
|
import org.apache.lucene.search.Query; |
|
import org.apache.lucene.search.ScoreDoc; |
|
import org.apache.lucene.store.FSDirectory; |
|
import org.apache.lucene.store.LockObtainFailedException; |
|
import org.apache.lucene.util.Version; |
|
|
|
public class FieldsIndexingMemTest { |
|
|
|
private static IndexReader ireader; |
|
private static IndexSearcher isearcher; |
|
private static FSDirectory directory; |
|
private static Analyzer analyzer; |
|
private static QueryParser parser; |
|
private static IndexWriter iwriter; |
|
private static Random random = new Random(); |
|
|
|
private enum Mode { |
|
fewFields, manyFields |
|
}; |
|
|
|
private static Mode mode = Mode.fewFields; |
|
// private static Mode mode = Mode.manyFields; |
|
|
|
/** |
|
* @param args |
|
*/ |
|
@SuppressWarnings("deprecation") |
|
public static void main(String[] args) throws Exception { |
|
System.out.println(ManagementFactory.getRuntimeMXBean().getName()); |
|
System.out.println("mode=" + mode); |
|
long before = System.currentTimeMillis(); |
|
printOutMemory("Before starting"); |
|
|
|
File indexFolder = new File("C:\\temp\\index\\" + mode.toString()); |
|
directory = FSDirectory.open(new File("C:\\temp\\index\\" + mode.toString())); |
|
analyzer = new WhitespaceAnalyzer(); |
|
parser = new QueryParser(Version.LUCENE_CURRENT, "content", analyzer); |
|
|
|
if (indexFolder.exists()) { |
|
openWriterOverExistingIndex(); |
|
} else { |
|
createNewIndex(); |
|
} |
|
|
|
printOutMemory("Before opening reader+searcher+running dummy query"); |
|
ireader = IndexReader.open(directory); |
|
isearcher = new IndexSearcher(ireader); |
|
Query query = parser.parse("name:a*"); |
|
System.out.println(query.rewrite(ireader)); |
|
ScoreDoc[] hits = isearcher.search(query, null, 1000000).scoreDocs; |
|
printOutMemory("After opening reader+searcher+running dummy query"); |
|
|
|
printOutMemory("Before closing Lucene objects"); |
|
System.out.println("Hit enter key to continue..."); |
|
System.in.read(); |
|
|
|
ireader.close(); |
|
iwriter.close(); |
|
directory.close(); |
|
|
|
System.out.println(); |
|
System.out.println("Done. Runtime duration=" + (System.currentTimeMillis() - before) + "ms"); |
|
printOutMemory("After closing Lucene objects"); |
|
} |
|
|
|
private static void printOutMemory(String prefixMessage) { |
|
Runtime runtime = Runtime.getRuntime(); |
|
long beforeUsedMemory = runtime.totalMemory() - runtime.freeMemory(); |
|
for (int i = 0; i < 10; i++) { |
|
System.gc(); |
|
} |
|
try { |
|
Thread.sleep(500); |
|
} catch (InterruptedException e) { |
|
e.printStackTrace(); |
|
} |
|
for (int i = 0; i < 10; i++) { |
|
System.gc(); |
|
} |
|
long afterUsedMemory = runtime.totalMemory() - runtime.freeMemory(); |
|
System.out.println(prefixMessage + " - Used memory=" + (afterUsedMemory / 1024 / 1024) + "MB (beforeUsedMemory=" + beforeUsedMemory / 1024 / 1024 |
|
+ "MB)"); |
|
} |
|
|
|
private static void openWriterOverExistingIndex() throws CorruptIndexException, LockObtainFailedException, IOException { |
|
System.out.println("Opening existing index"); |
|
long before = System.currentTimeMillis(); |
|
iwriter = new IndexWriter(directory, analyzer, false, IndexWriter.MaxFieldLength.UNLIMITED); |
|
System.out.println("open existing index duration=" + (System.currentTimeMillis() - before) + "ms"); |
|
} |
|
|
|
private static void createNewIndex() throws CorruptIndexException, LockObtainFailedException, IOException { |
|
System.out.println("Creating index from scratch"); |
|
iwriter = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); |
|
|
|
int numUniqueTerms = 100 * 1000; |
|
String[] uniqueTerms = new String[numUniqueTerms]; |
|
for (int i = 0; i < uniqueTerms.length; i++) { |
|
uniqueTerms[i] = String.valueOf(i); |
|
} |
|
|
|
int numUniqueFieldNames = 1 * 1000 * 1000; |
|
String[] uniqueFieldNames = new String[numUniqueFieldNames]; |
|
for (int i = 0; i < uniqueFieldNames.length; i++) { |
|
uniqueFieldNames[i] = "community_tag_" + UUID.randomUUID().toString(); |
|
} |
|
|
|
int numOfDocs = 100 * 1000; |
|
for (int i = 0; i < numOfDocs; i++) { |
|
addNewDocument(numUniqueTerms, uniqueTerms, numUniqueFieldNames, uniqueFieldNames); |
|
if (i % 1000 == 0) { |
|
System.out.println("Progress: " + (100 * i / numOfDocs) + "% (wrote " + i + " documents)"); |
|
} |
|
} |
|
// release mem |
|
uniqueTerms = null; |
|
uniqueFieldNames = null; |
|
|
|
printOutMemory("before commit()"); |
|
iwriter.commit(); |
|
printOutMemory("after commit()"); |
|
} |
|
|
|
private static void addNewDocument(int numUniqueTerms, String[] uniqueTerms, int numUniqueFieldNames, String[] uniqueFieldNames) |
|
throws CorruptIndexException, IOException { |
|
Document doc = new Document(); |
|
for (int j = 0; j < 10; j++) { |
|
String fieldName = (mode == Mode.fewFields) ? ("community_tag_" + j) : uniqueFieldNames[random.nextInt(numUniqueFieldNames)]; |
|
String fieldValue = getFieldValue(numUniqueTerms, uniqueTerms); |
|
doc.add(new Field(fieldName, fieldValue, Store.YES, Index.NOT_ANALYZED_NO_NORMS)); |
|
} |
|
iwriter.addDocument(doc); |
|
} |
|
|
|
private static String getFieldValue(int numUniqueTerms, String[] uniqueTerms) { |
|
int termsInField = random.nextInt(10); |
|
StringBuilder sb = new StringBuilder(); |
|
for (int w = 0; w < termsInField; w++) { |
|
sb.append(uniqueTerms[random.nextInt(numUniqueTerms)]).append(" "); |
|
} |
|
String fieldValue = sb.toString(); |
|
return fieldValue; |
|
} |
|
} |