1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * See LICENSE.txt included in this distribution for the specific 9 * language governing permissions and limitations under the License. 10 * 11 * When distributing Covered Code, include this CDDL HEADER in each 12 * file and include the License file at LICENSE.txt. 13 * If applicable, add the following below this CDDL HEADER, with the 14 * fields enclosed by brackets "[]" replaced with your own identifying 15 * information: Portions Copyright [yyyy] [name of copyright owner] 16 * 17 * CDDL HEADER END 18 */ 19 20 /* 21 * Copyright (c) 2020, Chris Fraire <cfraire@me.com>. 22 */ 23 package org.opengrok.indexer.index; 24 25 import org.apache.commons.lang3.math.NumberUtils; 26 import org.apache.lucene.document.Document; 27 import org.apache.lucene.document.Field; 28 import org.apache.lucene.document.StoredField; 29 import org.apache.lucene.document.StringField; 30 import org.apache.lucene.index.IndexReader; 31 import org.apache.lucene.index.IndexWriter; 32 import org.apache.lucene.index.Term; 33 import org.apache.lucene.queryparser.classic.ParseException; 34 import org.apache.lucene.queryparser.classic.QueryParser; 35 import org.apache.lucene.search.IndexSearcher; 36 import org.apache.lucene.search.Query; 37 import org.apache.lucene.search.ScoreDoc; 38 import org.apache.lucene.search.TermQuery; 39 import org.apache.lucene.search.TopDocs; 40 import org.opengrok.indexer.analysis.CompatibleAnalyser; 41 import org.opengrok.indexer.analysis.AccumulatedNumLinesLOC; 42 import org.opengrok.indexer.analysis.NullableNumLinesLOC; 43 import org.opengrok.indexer.analysis.NumLinesLOC; 44 import org.opengrok.indexer.search.QueryBuilder; 45 46 import java.io.File; 47 import java.io.IOException; 48 import java.util.ArrayList; 49 import java.util.HashMap; 50 import java.util.List; 51 52 /** 53 * Represents a data-access object for Lucene documents containing directory 54 * number-of-lines and lines-of-code data. 55 */ 56 class NumLinesLOCAccessor { 57 private static final int BULK_READ_THRESHOLD = 100; 58 59 /** 60 * Determines whether there is stored number-of-lines and lines-of-code 61 * in the index associated to the specified {@code reader}. 62 */ hasStored(IndexReader reader)63 public boolean hasStored(IndexReader reader) throws IOException { 64 DSearchResult searchResult = newDSearch(reader, 1); 65 return searchResult.hits.totalHits.value > 0; 66 } 67 68 /** 69 * Stores the net deltas to the index through the specified {@code writer}. 70 */ store(IndexWriter writer, IndexReader reader, NumLinesLOCAggregator countsAggregator, boolean isAggregatingDeltas)71 public void store(IndexWriter writer, IndexReader reader, 72 NumLinesLOCAggregator countsAggregator, boolean isAggregatingDeltas) 73 throws IOException { 74 75 List<AccumulatedNumLinesLOC> counts = new ArrayList<>(); 76 countsAggregator.iterator().forEachRemaining(counts::add); 77 if (counts.size() >= BULK_READ_THRESHOLD) { 78 storeBulk(writer, reader, counts, isAggregatingDeltas); 79 } else if (counts.size() > 0) { 80 storeIterative(writer, reader, counts, isAggregatingDeltas); 81 } 82 } 83 84 /** 85 * Queries the stored counts from the specified reader to register them to 86 * the specified aggregator. 87 * @return a value indicating whether any defined number-of-lines and 88 * lines-of-code were found 89 */ register(NumLinesLOCAggregator countsAggregator, IndexReader reader)90 public boolean register(NumLinesLOCAggregator countsAggregator, IndexReader reader) 91 throws IOException { 92 93 /* 94 * Search for existing documents with any value of PATH. Those are 95 * documents representing source code files, as opposed to source code 96 * directories or other object data (e.g. IndexAnalysisSettings3), which 97 * have no stored PATH. 98 */ 99 IndexSearcher searcher = new IndexSearcher(reader); 100 101 Query query; 102 try { 103 QueryParser parser = new QueryParser(QueryBuilder.PATH, new CompatibleAnalyser()); 104 parser.setAllowLeadingWildcard(true); 105 query = parser.parse("*"); 106 } catch (ParseException ex) { 107 // This is not expected, so translate to RuntimeException. 108 throw new RuntimeException(ex); 109 } 110 111 TopDocs hits = searcher.search(query, Integer.MAX_VALUE); 112 return processFileCounts(countsAggregator, searcher, hits); 113 } 114 storeBulk(IndexWriter writer, IndexReader reader, List<AccumulatedNumLinesLOC> counts, boolean isAggregatingDeltas)115 private void storeBulk(IndexWriter writer, IndexReader reader, 116 List<AccumulatedNumLinesLOC> counts, boolean isAggregatingDeltas) throws IOException { 117 118 DSearchResult searchResult = newDSearch(reader, Integer.MAX_VALUE); 119 120 // Index the existing document IDs by QueryBuilder.D. 121 HashMap<String, Integer> byDir = new HashMap<>(); 122 int intMaximum = Integer.MAX_VALUE < searchResult.hits.totalHits.value ? 123 Integer.MAX_VALUE : (int) searchResult.hits.totalHits.value; 124 for (int i = 0; i < intMaximum; ++i) { 125 int docID = searchResult.hits.scoreDocs[i].doc; 126 Document doc = searchResult.searcher.doc(docID); 127 String dirPath = doc.get(QueryBuilder.D); 128 byDir.put(dirPath, docID); 129 } 130 131 for (AccumulatedNumLinesLOC entry : counts) { 132 Integer docID = byDir.get(entry.getPath()); 133 updateDocumentData(writer, searchResult.searcher, entry, docID, isAggregatingDeltas); 134 } 135 } 136 137 private void storeIterative(IndexWriter writer, IndexReader reader, 138 List<AccumulatedNumLinesLOC> counts, boolean isAggregatingDeltas) throws IOException { 139 140 // Search for existing documents with QueryBuilder.D. 141 IndexSearcher searcher = new IndexSearcher(reader); 142 143 for (AccumulatedNumLinesLOC entry : counts) { 144 Query query = new TermQuery(new Term(QueryBuilder.D, entry.getPath())); 145 TopDocs hits = searcher.search(query, 1); 146 147 Integer docID = null; 148 if (hits.totalHits.value > 0) { 149 docID = hits.scoreDocs[0].doc; 150 } 151 updateDocumentData(writer, searcher, entry, docID, isAggregatingDeltas); 152 } 153 } 154 155 private void updateDocumentData(IndexWriter writer, IndexSearcher searcher, 156 AccumulatedNumLinesLOC aggregate, Integer docID, boolean isAggregatingDeltas) 157 throws IOException { 158 159 File pathFile = new File(aggregate.getPath()); 160 String parent = pathFile.getParent(); 161 if (parent == null) { 162 parent = ""; 163 } 164 165 String normalizedPath = QueryBuilder.normalizeDirPath(parent); 166 long extantLOC = 0; 167 long extantLines = 0; 168 169 if (docID != null) { 170 Document doc = searcher.doc(docID); 171 if (isAggregatingDeltas) { 172 extantLines = NumberUtils.toLong(doc.get(QueryBuilder.NUML)); 173 extantLOC = NumberUtils.toLong(doc.get(QueryBuilder.LOC)); 174 } 175 writer.deleteDocuments(new Term(QueryBuilder.D, aggregate.getPath())); 176 } 177 178 long newNumLines = extantLines + aggregate.getNumLines(); 179 long newLOC = extantLOC + aggregate.getLOC(); 180 181 Document doc = new Document(); 182 doc.add(new StringField(QueryBuilder.D, aggregate.getPath(), Field.Store.YES)); 183 doc.add(new StringField(QueryBuilder.DIRPATH, normalizedPath, Field.Store.NO)); 184 doc.add(new StoredField(QueryBuilder.NUML, newNumLines)); 185 doc.add(new StoredField(QueryBuilder.LOC, newLOC)); 186 writer.addDocument(doc); 187 } 188 189 private boolean processFileCounts(NumLinesLOCAggregator countsAggregator, 190 IndexSearcher searcher, TopDocs hits) throws IOException { 191 192 boolean hasDefinedNumLines = false; 193 for (ScoreDoc sd : hits.scoreDocs) { 194 Document d = searcher.doc(sd.doc); 195 NullableNumLinesLOC counts = NumLinesLOCUtil.read(d); 196 if (counts.getNumLines() != null && counts.getLOC() != null) { 197 NumLinesLOC defCounts = new NumLinesLOC(counts.getPath(), 198 counts.getNumLines(), counts.getLOC()); 199 countsAggregator.register(defCounts); 200 hasDefinedNumLines = true; 201 } 202 } 203 return hasDefinedNumLines; 204 } 205 206 private DSearchResult newDSearch(IndexReader reader, int n) throws IOException { 207 // Search for existing documents with QueryBuilder.D. 208 IndexSearcher searcher = new IndexSearcher(reader); 209 Query query; 210 try { 211 QueryParser parser = new QueryParser(QueryBuilder.D, new CompatibleAnalyser()); 212 parser.setAllowLeadingWildcard(true); 213 query = parser.parse("*"); 214 } catch (ParseException ex) { 215 // This is not expected, so translate to RuntimeException. 216 throw new RuntimeException(ex); 217 } 218 219 TopDocs topDocs = searcher.search(query, n); 220 return new DSearchResult(searcher, topDocs); 221 } 222 223 private static class DSearchResult { 224 private final IndexSearcher searcher; 225 private final TopDocs hits; 226 227 DSearchResult(IndexSearcher searcher, TopDocs hits) { 228 this.searcher = searcher; 229 this.hits = hits; 230 } 231 } 232 } 233