xref: /OpenGrok/opengrok-indexer/src/main/java/org/opengrok/indexer/index/NumLinesLOCAccessor.java (revision 6c62ede99bd45f84e663cda017732f3bcc28db30)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * See LICENSE.txt included in this distribution for the specific
9  * language governing permissions and limitations under the License.
10  *
11  * When distributing Covered Code, include this CDDL HEADER in each
12  * file and include the License file at LICENSE.txt.
13  * If applicable, add the following below this CDDL HEADER, with the
14  * fields enclosed by brackets "[]" replaced with your own identifying
15  * information: Portions Copyright [yyyy] [name of copyright owner]
16  *
17  * CDDL HEADER END
18  */
19 
20 /*
21  * Copyright (c) 2020, Chris Fraire <cfraire@me.com>.
22  */
23 package org.opengrok.indexer.index;
24 
25 import org.apache.commons.lang3.math.NumberUtils;
26 import org.apache.lucene.document.Document;
27 import org.apache.lucene.document.Field;
28 import org.apache.lucene.document.StoredField;
29 import org.apache.lucene.document.StringField;
30 import org.apache.lucene.index.IndexReader;
31 import org.apache.lucene.index.IndexWriter;
32 import org.apache.lucene.index.Term;
33 import org.apache.lucene.queryparser.classic.ParseException;
34 import org.apache.lucene.queryparser.classic.QueryParser;
35 import org.apache.lucene.search.IndexSearcher;
36 import org.apache.lucene.search.Query;
37 import org.apache.lucene.search.ScoreDoc;
38 import org.apache.lucene.search.TermQuery;
39 import org.apache.lucene.search.TopDocs;
40 import org.opengrok.indexer.analysis.CompatibleAnalyser;
41 import org.opengrok.indexer.analysis.AccumulatedNumLinesLOC;
42 import org.opengrok.indexer.analysis.NullableNumLinesLOC;
43 import org.opengrok.indexer.analysis.NumLinesLOC;
44 import org.opengrok.indexer.search.QueryBuilder;
45 
46 import java.io.File;
47 import java.io.IOException;
48 import java.util.ArrayList;
49 import java.util.HashMap;
50 import java.util.List;
51 
52 /**
53  * Represents a data-access object for Lucene documents containing directory
54  * number-of-lines and lines-of-code data.
55  */
56 class NumLinesLOCAccessor {
57     private static final int BULK_READ_THRESHOLD = 100;
58 
59     /**
60      * Determines whether there is stored number-of-lines and lines-of-code
61      * in the index associated to the specified {@code reader}.
62      */
hasStored(IndexReader reader)63     public boolean hasStored(IndexReader reader) throws IOException {
64         DSearchResult searchResult = newDSearch(reader, 1);
65         return searchResult.hits.totalHits.value > 0;
66     }
67 
68     /**
69      * Stores the net deltas to the index through the specified {@code writer}.
70      */
store(IndexWriter writer, IndexReader reader, NumLinesLOCAggregator countsAggregator, boolean isAggregatingDeltas)71     public void store(IndexWriter writer, IndexReader reader,
72             NumLinesLOCAggregator countsAggregator, boolean isAggregatingDeltas)
73             throws IOException {
74 
75         List<AccumulatedNumLinesLOC> counts = new ArrayList<>();
76         countsAggregator.iterator().forEachRemaining(counts::add);
77         if (counts.size() >= BULK_READ_THRESHOLD) {
78             storeBulk(writer, reader, counts, isAggregatingDeltas);
79         } else if (counts.size() > 0) {
80             storeIterative(writer, reader, counts, isAggregatingDeltas);
81         }
82     }
83 
84     /**
85      * Queries the stored counts from the specified reader to register them to
86      * the specified aggregator.
87      * @return a value indicating whether any defined number-of-lines and
88      * lines-of-code were found
89      */
register(NumLinesLOCAggregator countsAggregator, IndexReader reader)90     public boolean register(NumLinesLOCAggregator countsAggregator, IndexReader reader)
91             throws IOException {
92 
93         /*
94          * Search for existing documents with any value of PATH. Those are
95          * documents representing source code files, as opposed to source code
96          * directories or other object data (e.g. IndexAnalysisSettings3), which
97          * have no stored PATH.
98          */
99         IndexSearcher searcher = new IndexSearcher(reader);
100 
101         Query query;
102         try {
103             QueryParser parser = new QueryParser(QueryBuilder.PATH, new CompatibleAnalyser());
104             parser.setAllowLeadingWildcard(true);
105             query = parser.parse("*");
106         } catch (ParseException ex) {
107             // This is not expected, so translate to RuntimeException.
108             throw new RuntimeException(ex);
109         }
110 
111         TopDocs hits = searcher.search(query, Integer.MAX_VALUE);
112         return processFileCounts(countsAggregator, searcher, hits);
113     }
114 
storeBulk(IndexWriter writer, IndexReader reader, List<AccumulatedNumLinesLOC> counts, boolean isAggregatingDeltas)115     private void storeBulk(IndexWriter writer, IndexReader reader,
116             List<AccumulatedNumLinesLOC> counts, boolean isAggregatingDeltas) throws IOException {
117 
118         DSearchResult searchResult = newDSearch(reader, Integer.MAX_VALUE);
119 
120         // Index the existing document IDs by QueryBuilder.D.
121         HashMap<String, Integer> byDir = new HashMap<>();
122         int intMaximum = Integer.MAX_VALUE < searchResult.hits.totalHits.value ?
123                 Integer.MAX_VALUE : (int) searchResult.hits.totalHits.value;
124         for (int i = 0; i < intMaximum; ++i) {
125             int docID = searchResult.hits.scoreDocs[i].doc;
126             Document doc = searchResult.searcher.doc(docID);
127             String dirPath = doc.get(QueryBuilder.D);
128             byDir.put(dirPath, docID);
129         }
130 
131         for (AccumulatedNumLinesLOC entry : counts) {
132             Integer docID = byDir.get(entry.getPath());
133             updateDocumentData(writer, searchResult.searcher, entry, docID, isAggregatingDeltas);
134         }
135     }
136 
137     private void storeIterative(IndexWriter writer, IndexReader reader,
138             List<AccumulatedNumLinesLOC> counts, boolean isAggregatingDeltas) throws IOException {
139 
140         // Search for existing documents with QueryBuilder.D.
141         IndexSearcher searcher = new IndexSearcher(reader);
142 
143         for (AccumulatedNumLinesLOC entry : counts) {
144             Query query = new TermQuery(new Term(QueryBuilder.D, entry.getPath()));
145             TopDocs hits = searcher.search(query, 1);
146 
147             Integer docID = null;
148             if (hits.totalHits.value > 0) {
149                 docID = hits.scoreDocs[0].doc;
150             }
151             updateDocumentData(writer, searcher, entry, docID, isAggregatingDeltas);
152         }
153     }
154 
155     private void updateDocumentData(IndexWriter writer, IndexSearcher searcher,
156             AccumulatedNumLinesLOC aggregate, Integer docID, boolean isAggregatingDeltas)
157             throws IOException {
158 
159         File pathFile = new File(aggregate.getPath());
160         String parent = pathFile.getParent();
161         if (parent == null) {
162             parent = "";
163         }
164 
165         String normalizedPath = QueryBuilder.normalizeDirPath(parent);
166         long extantLOC = 0;
167         long extantLines = 0;
168 
169         if (docID != null) {
170             Document doc = searcher.doc(docID);
171             if (isAggregatingDeltas) {
172                 extantLines = NumberUtils.toLong(doc.get(QueryBuilder.NUML));
173                 extantLOC = NumberUtils.toLong(doc.get(QueryBuilder.LOC));
174             }
175             writer.deleteDocuments(new Term(QueryBuilder.D, aggregate.getPath()));
176         }
177 
178         long newNumLines = extantLines + aggregate.getNumLines();
179         long newLOC = extantLOC + aggregate.getLOC();
180 
181         Document doc = new Document();
182         doc.add(new StringField(QueryBuilder.D, aggregate.getPath(), Field.Store.YES));
183         doc.add(new StringField(QueryBuilder.DIRPATH, normalizedPath, Field.Store.NO));
184         doc.add(new StoredField(QueryBuilder.NUML, newNumLines));
185         doc.add(new StoredField(QueryBuilder.LOC, newLOC));
186         writer.addDocument(doc);
187     }
188 
189     private boolean processFileCounts(NumLinesLOCAggregator countsAggregator,
190             IndexSearcher searcher, TopDocs hits) throws IOException {
191 
192         boolean hasDefinedNumLines = false;
193         for (ScoreDoc sd : hits.scoreDocs) {
194             Document d = searcher.doc(sd.doc);
195             NullableNumLinesLOC counts = NumLinesLOCUtil.read(d);
196             if (counts.getNumLines() != null && counts.getLOC() != null) {
197                 NumLinesLOC defCounts = new NumLinesLOC(counts.getPath(),
198                         counts.getNumLines(), counts.getLOC());
199                 countsAggregator.register(defCounts);
200                 hasDefinedNumLines = true;
201             }
202         }
203         return hasDefinedNumLines;
204     }
205 
206     private DSearchResult newDSearch(IndexReader reader, int n) throws IOException {
207         // Search for existing documents with QueryBuilder.D.
208         IndexSearcher searcher = new IndexSearcher(reader);
209         Query query;
210         try {
211             QueryParser parser = new QueryParser(QueryBuilder.D, new CompatibleAnalyser());
212             parser.setAllowLeadingWildcard(true);
213             query = parser.parse("*");
214         } catch (ParseException ex) {
215             // This is not expected, so translate to RuntimeException.
216             throw new RuntimeException(ex);
217         }
218 
219         TopDocs topDocs = searcher.search(query, n);
220         return new DSearchResult(searcher, topDocs);
221     }
222 
223     private static class DSearchResult {
224         private final IndexSearcher searcher;
225         private final TopDocs hits;
226 
227         DSearchResult(IndexSearcher searcher, TopDocs hits) {
228             this.searcher = searcher;
229             this.hits = hits;
230         }
231     }
232 }
233