Skip to content

Commit e94351d

Browse files
committed
ctags performance fix
1 parent 2a335dc commit e94351d

File tree

14 files changed

+547
-195
lines changed

14 files changed

+547
-195
lines changed

.classpath

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<classpathentry kind="src" path="src"/>
66
<classpathentry kind="src" path="generatedsrc"/>
77
<classpathentry kind="src" path="build/src/jsp"/>
8-
<classpathentry kind="lib" path="lib/bcel-5.2.jar"/>
8+
<classpathentry kind="lib" path="lib/bcel-6.0-20150726.222224-123.jar"/>
99
<classpathentry kind="con" path="org.eclipse.jdt.USER_LIBRARY/Jasper GF3"/>
1010
<classpathentry kind="lib" path="build/jrcs"/>
1111
<classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/>

platform/solaris/ips/create.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ PKG pkgsend add file doc/ctags.config mode=0444 owner=root group=sys path=/usr/o
210210

211211
# install libs
212212
LV=6.0.1
213-
for file in ant.jar bcel-5.2.jar \
213+
for file in ant.jar bcel-6.0-20150726.222224-123.jar \
214214
lucene-analyzers-common-${LV}.jar lucene-core-${LV}.jar lucene-queryparser-${LV}.jar lucene-suggest-${LV}.jar \
215215
jrcs.jar \
216216
swing-layout-0.9.jar \

platform/solaris/pkgdef/prototype

+1-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ f none usr/opengrok/doc/EXAMPLE.txt=doc/EXAMPLE.txt 0444 root sys
5959
f none usr/opengrok/doc/opengrok-agent.properties=platform/solaris/default/opengrok-agent.properties 0444 root sys
6060
f none usr/opengrok/lib/opengrok.jar=dist/opengrok.jar 0444 root bin
6161
f none usr/opengrok/lib/ant.jar=dist/lib/ant.jar 0444 root bin
62-
f none usr/opengrok/lib/bcel-5.2.jar=lib/bcel-5.2.jar 0444 root bin
62+
f none usr/opengrok/lib/bcel-6.0-20150726.222224-123.jar=lib/bcel-6.0-20150726.222224-123.jar 0444 root bin
6363
f none usr/opengrok/lib/lucene-core-6.0.1.jar=lib/lucene-core-6.0.1.jar 0444 root bin
6464
f none usr/opengrok/lib/lucene-analyzers-common-6.0.1.jar=lib/lucene-analyzers-common-6.0.1.jar 0444 root bin
6565
f none usr/opengrok/lib/lucene-queryparser-6.0.1.jar=lib/lucene-queryparser-6.0.1.jar 0444 root bin

pom.xml

+7-1
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
7474
<dependency>
7575
<groupId>org.apache.bcel</groupId>
7676
<artifactId>bcel</artifactId>
77-
<version>5.2</version>
77+
<version>6.0-20150726.222224-123</version>
7878
</dependency>
7979
<dependency>
8080
<groupId>org.apache.lucene</groupId>
@@ -255,6 +255,12 @@ Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
255255
<url>http://repo.maven.apache.org/maven2/</url>
256256
<layout>default</layout>
257257
</repository>
258+
<repository>
259+
<id>apache.snapshots</id>
260+
<name>apache.snapshots</name>
261+
<url>http://repository.apache.org/snapshots/</url>
262+
<layout>default</layout>
263+
</repository>
258264
</repositories>
259265

260266
<pluginRepositories>

src/org/opensolaris/opengrok/analysis/Ctags.java

+157-34
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,19 @@
1818
*/
1919

2020
/*
21-
* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
21+
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
2222
*/
2323
package org.opensolaris.opengrok.analysis;
2424

2525
import java.io.BufferedReader;
2626
import java.io.IOException;
27+
import java.io.InputStream;
2728
import java.io.InputStreamReader;
29+
import java.io.OutputStream;
2830
import java.io.OutputStreamWriter;
31+
import java.io.StringReader;
2932
import java.util.ArrayList;
33+
import java.util.EnumMap;
3034
import java.util.List;
3135
import java.util.logging.Level;
3236
import java.util.logging.Logger;
@@ -57,6 +61,8 @@ public class Ctags {
5761
private final int MAX_METHOD_LINE_LENGTH = 1030; //96 is used by universal ctags for some lines, but it's too low, OpenGrok can theoretically handle 50000 with 8G heap
5862
// also this might break scopes functionality, if set too low
5963

64+
private boolean junit_testing = false;
65+
6066
public void setBinary(String binary) {
6167
this.binary = binary;
6268
}
@@ -79,15 +85,15 @@ private void initialize() throws IOException {
7985

8086
command.add(binary);
8187
command.add("--c-kinds=+l");
82-
83-
if (env.isUniversalCtags()) {
88+
89+
if (env.isUniversalCtags()) {
8490
command.add("--langmap=clojure:+.cljs");
8591
command.add("--langmap=clojure:+.cljx");
86-
87-
// Workaround for bug #14924: Don't get local variables in Java
88-
// code since that creates many false positives.
89-
// CtagsTest : bug14924 "too many methods" guards for this
90-
// universal ctags are however safe, so enabling for them
92+
93+
// Workaround for bug #14924: Don't get local variables in Java
94+
// code since that creates many false positives.
95+
// CtagsTest : bug14924 "too many methods" guards for this
96+
// universal ctags are however safe, so enabling for them
9197
command.add("--java-kinds=+l");
9298
}
9399
command.add("--sql-kinds=+l");
@@ -231,16 +237,125 @@ public Definitions doCtags(String file) throws IOException {
231237
return ret;
232238
}
233239

240+
/**
241+
* produce definitions for the text in the buffer String ctags process is
242+
* mocked, not real mostly used for junit testing
243+
*
244+
* @param bufferTags tags file output
245+
* @return definitions parsed from buffer
246+
*/
247+
public Definitions testCtagsParser(String bufferTags) {
248+
junit_testing = true;
249+
ctagsOut = new BufferedReader(new StringReader(bufferTags));
250+
ctags = new Process() {
251+
@Override
252+
public OutputStream getOutputStream() {
253+
return null;
254+
}
255+
256+
@Override
257+
public InputStream getInputStream() {
258+
return null;
259+
}
260+
261+
@Override
262+
public InputStream getErrorStream() {
263+
return null;
264+
}
265+
266+
@Override
267+
public int waitFor() throws InterruptedException {
268+
return 0;
269+
}
270+
271+
@Override
272+
public int exitValue() {
273+
return 0;
274+
}
275+
276+
@Override
277+
public void destroy() {
278+
}
279+
};
280+
281+
Definitions ret;
282+
ret = new Definitions();
283+
readTags(ret);
284+
return ret;
285+
}
286+
287+
// this should mimic https://github.com/universal-ctags/ctags/blob/master/docs/format.rst
288+
// or http://ctags.sourceforge.net/FORMAT (for backwards compatibility)
289+
//uncomment only those that are used ... (to avoid populating the hashmap for every record)
290+
public enum tagFields {
291+
// ARITY("arity"),
292+
CLASS("class"),
293+
// INHERIT("inherit"), //this is not defined in above format docs, but both universal and exuberant ctags use it
294+
// INTERFACE("interface"), //this is not defined in above format docs, but both universal and exuberant ctags use it
295+
// ENUM("enum"),
296+
// FILE("file"),
297+
// FUNCTION("function"),
298+
// KIND("kind"),
299+
LINE("line"),
300+
// NAMESPACE("namespace"), //this is not defined in above format docs, but both universal and exuberant ctags use it
301+
// PROGRAM("program"), //this is not defined in above format docs, but both universal and exuberant ctags use it
302+
SIGNATURE("signature");
303+
// STRUCT("struct"),
304+
// TYPEREF("typeref"),
305+
// UNION("union");
306+
307+
//NOTE: if you edit above, always consult below charCmpEndOffset
308+
private final String name;
309+
310+
tagFields(String name) {
311+
this.name = name;
312+
}
313+
314+
//this is very important, we only compare that amount of chars from field types with input to save time,
315+
//this number has to be long enough to get rid of disambiguation (so currently 2 characters)
316+
//TODO:
317+
//NOTE this is a big tradeoff in terms of input data, e.g. field "find"
318+
//will be considered "file" and overwrite the value, so if ctags will send us buggy input
319+
//we will output buggy data TOO!
320+
//NO VALIDATION happens of input - but then we gain LOTS of speed, due to not comparing the same field names again and again fully
321+
// 1 - means only 2 first chars are compared
322+
public static int charCmpEndOffset = 0; // make this MAX. 8 chars! (backwards compat to DOS/Win )
323+
324+
//quickly get if the field name matches allowed/consumed ones
325+
public static Ctags.tagFields quickValueOf(String fullName) {
326+
int i;
327+
boolean match;
328+
for (tagFields x : tagFields.values()) {
329+
match = true;
330+
for (i = 0; i <= charCmpEndOffset; i++) {
331+
if (x.name.charAt(i) != fullName.charAt(i)) {
332+
match = false;
333+
break;
334+
}
335+
}
336+
if (match) {
337+
return x;
338+
}
339+
}
340+
return null;
341+
}
342+
}
343+
234344
private void readTags(Definitions defs) {
345+
EnumMap<tagFields, String> fields = new EnumMap<>(tagFields.class);
235346
try {
236347
do {
237348
String tagLine = ctagsOut.readLine();
238349
//log.fine("Tagline:-->" + tagLine+"<----ONELINE");
239350
if (tagLine == null) {
240-
LOGGER.warning("Unexpected end of file!");
351+
if (!junit_testing) {
352+
LOGGER.warning("Unexpected end of file!");
353+
}
241354
try {
242355
int val = ctags.exitValue();
243-
LOGGER.log(Level.WARNING, "ctags exited with code: {0}", val);
356+
if (!junit_testing) {
357+
LOGGER.log(Level.WARNING, "ctags exited with code: {0}", val);
358+
}
244359
} catch (Exception e) {
245360
LOGGER.log(Level.WARNING, "Ctags problem: ", e);
246361
}
@@ -265,43 +380,50 @@ private void readTags(Definitions defs) {
265380
}
266381
String def = tagLine.substring(0, p);
267382
int mstart = tagLine.indexOf('\t', p + 1);
268-
String lnum = "-1";
269-
String signature = null;
383+
270384
String kind = null;
271-
String inher = null;
272385

273386
int lp = tagLine.length();
274387
while ((p = tagLine.lastIndexOf('\t', lp - 1)) > 0) {
275388
//log.fine(" p = " + p + " lp = " + lp);
276389
String fld = tagLine.substring(p + 1, lp);
277-
//log.fine("FIELD===" + fld);
390+
//log.fine("FIELD===" + fld);
278391
lp = p;
279-
if (fld.startsWith("line:")) {
280-
int sep = fld.indexOf(':');
281-
lnum = fld.substring(sep + 1);
282-
} else if (fld.startsWith("signature:")) {
283-
int sep = fld.indexOf(':');
284-
signature = fld.substring(sep + 1);
285-
} else if (fld.indexOf(':') < 0) {
392+
393+
int sep = fld.indexOf(':');
394+
if (sep != -1) {
395+
tagFields pos = tagFields.quickValueOf(fld);
396+
if (pos != null) {
397+
String val = fld.substring(sep + 1);
398+
fields.put(pos, val);
399+
} else {
400+
//unknown field name
401+
//don't log on purpose, since we don't consume all possible fields, so just ignore this error for now
402+
// LOGGER.log(Level.WARNING, "Unknown field name found: {0}", fld.substring(0, sep - 1));
403+
}
404+
} else {
405+
//TODO no separator, assume this is the kind
286406
kind = fld;
287407
break;
288-
} else {
289-
inher = fld;
290408
}
291409
}
292410

411+
String lnum = fields.get(tagFields.LINE);
412+
String signature = fields.get(tagFields.SIGNATURE);
413+
String classInher = fields.get(tagFields.CLASS);
414+
293415
final String match;
294416
int mlength = p - mstart;
295417
if ((p > 0) && (mlength > MIN_METHOD_LINE_LENGTH)) {
296418
if (mlength < MAX_METHOD_LINE_LENGTH) {
297419
match = tagLine.substring(mstart + 3, p - 4).
298-
replace("\\/", "/").replaceAll("[ \t]+", " ");
420+
replace("\\/", "/").replaceAll("[ \t]+", " "); //TODO per format we should also recognize \r and \n and \\
299421
} else {
300-
LOGGER.log(Level.FINEST, "Ctags: stripping method body for def {0} line {1}(scopes might break)", new Object[]{def, lnum});
422+
LOGGER.log(Level.FINEST, "Ctags: stripping method body for def {0} line {1}(scopes/highlight might break)", new Object[]{def, lnum});
301423
match = tagLine.substring(mstart + 3, mstart + MAX_METHOD_LINE_LENGTH - 1). // +3 - 4 = -1
302424
replace("\\/", "/").replaceAll("[ \t]+", " ");
303425
}
304-
} else {
426+
} else { //tag is in wrong format, cannot extract tagaddress from it, skip
305427
continue;
306428
}
307429

@@ -310,8 +432,8 @@ private void readTags(Definitions defs) {
310432
final Interner<String> seenSymbols = new Interner<>();
311433

312434
final String type
313-
= inher == null ? kind : kind + " in " + inher;
314-
addTag(defs, seenSymbols, lnum, def, type, match, inher, signature);
435+
= classInher == null ? kind : kind + " in " + classInher;
436+
addTag(defs, seenSymbols, lnum, def, type, match, classInher, signature);
315437
if (signature != null) {
316438
//TODO if some languages use different character for separating arguments, below needs to be adjusted
317439
String[] args = signature.split(",");
@@ -323,17 +445,18 @@ private void readTags(Definitions defs) {
323445
//FIXME this will not work for typeless languages such as python or assignments inside signature ... but since ctags doesn't provide signatures for python yet and assigning stuff in signature is not the case for c or java, we don't care ...
324446
String[] names = afters.split("[\\W]"); //this should just parse out variables, we assume first non empty text is the argument name
325447
for (String name : names) {
326-
if (name.length()>0) {
327-
//log.fine("Param Def = "+ string);
328-
addTag(defs, seenSymbols, lnum, name, "argument",
448+
if (name.length() > 0) {
449+
//log.fine("Param Def = "+ string);
450+
addTag(defs, seenSymbols, lnum, name, "argument",
329451
def.trim() + signature.trim(), null, signature);
330-
break;
452+
break;
331453
}
332454
}
333455
}
334456
}
335457
}
336458
//log.fine("Read = " + def + " : " + lnum + " = " + kind + " IS " + inher + " M " + match);
459+
fields.clear();
337460
} while (true);
338461
} catch (Exception e) {
339462
LOGGER.log(Level.WARNING, "CTags parsing problem: ", e);
@@ -345,13 +468,13 @@ private void readTags(Definitions defs) {
345468
* Add a tag to a {@code Definitions} instance.
346469
*/
347470
private void addTag(Definitions defs, Interner<String> seenSymbols,
348-
String lnum, String symbol, String type, String text, String scope, String signature) {
471+
String lnum, String symbol, String type, String text, String namespace, String signature) {
349472
// The strings are frequently repeated (a symbol can be used in
350473
// multiple definitions, multiple definitions can have the same type,
351474
// one line can contain multiple definitions). Intern them to minimize
352475
// the space consumed by them (see bug #809).
353476
defs.addTag(Integer.parseInt(lnum), seenSymbols.intern(symbol.trim()),
354477
seenSymbols.intern(type.trim()), seenSymbols.intern(text.trim()),
355-
scope == null ? null : seenSymbols.intern(scope.trim()), signature);
478+
namespace == null ? null : seenSymbols.intern(namespace.trim()), signature);
356479
}
357480
}

0 commit comments

Comments
 (0)