18
18
*/
19
19
20
20
/*
21
- * Copyright (c) 2005, 2015 , Oracle and/or its affiliates. All rights reserved.
21
+ * Copyright (c) 2005, 2016 , Oracle and/or its affiliates. All rights reserved.
22
22
*/
23
23
package org .opensolaris .opengrok .analysis ;
24
24
25
25
import java .io .BufferedReader ;
26
26
import java .io .IOException ;
27
+ import java .io .InputStream ;
27
28
import java .io .InputStreamReader ;
29
+ import java .io .OutputStream ;
28
30
import java .io .OutputStreamWriter ;
31
+ import java .io .StringReader ;
29
32
import java .util .ArrayList ;
33
+ import java .util .EnumMap ;
30
34
import java .util .List ;
31
35
import java .util .logging .Level ;
32
36
import java .util .logging .Logger ;
@@ -57,6 +61,8 @@ public class Ctags {
57
61
private final int MAX_METHOD_LINE_LENGTH = 1030 ; //96 is used by universal ctags for some lines, but it's too low, OpenGrok can theoretically handle 50000 with 8G heap
58
62
// also this might break scopes functionality, if set too low
59
63
64
+ private boolean junit_testing = false ;
65
+
60
66
public void setBinary (String binary ) {
61
67
this .binary = binary ;
62
68
}
@@ -79,15 +85,15 @@ private void initialize() throws IOException {
79
85
80
86
command .add (binary );
81
87
command .add ("--c-kinds=+l" );
82
-
83
- if (env .isUniversalCtags ()) {
88
+
89
+ if (env .isUniversalCtags ()) {
84
90
command .add ("--langmap=clojure:+.cljs" );
85
91
command .add ("--langmap=clojure:+.cljx" );
86
-
87
- // Workaround for bug #14924: Don't get local variables in Java
88
- // code since that creates many false positives.
89
- // CtagsTest : bug14924 "too many methods" guards for this
90
- // universal ctags are however safe, so enabling for them
92
+
93
+ // Workaround for bug #14924: Don't get local variables in Java
94
+ // code since that creates many false positives.
95
+ // CtagsTest : bug14924 "too many methods" guards for this
96
+ // universal ctags are however safe, so enabling for them
91
97
command .add ("--java-kinds=+l" );
92
98
}
93
99
command .add ("--sql-kinds=+l" );
@@ -231,16 +237,125 @@ public Definitions doCtags(String file) throws IOException {
231
237
return ret ;
232
238
}
233
239
240
+ /**
241
+ * produce definitions for the text in the buffer String ctags process is
242
+ * mocked, not real mostly used for junit testing
243
+ *
244
+ * @param bufferTags tags file output
245
+ * @return definitions parsed from buffer
246
+ */
247
+ public Definitions testCtagsParser (String bufferTags ) {
248
+ junit_testing = true ;
249
+ ctagsOut = new BufferedReader (new StringReader (bufferTags ));
250
+ ctags = new Process () {
251
+ @ Override
252
+ public OutputStream getOutputStream () {
253
+ return null ;
254
+ }
255
+
256
+ @ Override
257
+ public InputStream getInputStream () {
258
+ return null ;
259
+ }
260
+
261
+ @ Override
262
+ public InputStream getErrorStream () {
263
+ return null ;
264
+ }
265
+
266
+ @ Override
267
+ public int waitFor () throws InterruptedException {
268
+ return 0 ;
269
+ }
270
+
271
+ @ Override
272
+ public int exitValue () {
273
+ return 0 ;
274
+ }
275
+
276
+ @ Override
277
+ public void destroy () {
278
+ }
279
+ };
280
+
281
+ Definitions ret ;
282
+ ret = new Definitions ();
283
+ readTags (ret );
284
+ return ret ;
285
+ }
286
+
287
+ // this should mimic https://github.com/universal-ctags/ctags/blob/master/docs/format.rst
288
+ // or http://ctags.sourceforge.net/FORMAT (for backwards compatibility)
289
+ //uncomment only those that are used ... (to avoid populating the hashmap for every record)
290
+ public enum tagFields {
291
+ // ARITY("arity"),
292
+ CLASS ("class" ),
293
+ // INHERIT("inherit"), //this is not defined in above format docs, but both universal and exuberant ctags use it
294
+ // INTERFACE("interface"), //this is not defined in above format docs, but both universal and exuberant ctags use it
295
+ // ENUM("enum"),
296
+ // FILE("file"),
297
+ // FUNCTION("function"),
298
+ // KIND("kind"),
299
+ LINE ("line" ),
300
+ // NAMESPACE("namespace"), //this is not defined in above format docs, but both universal and exuberant ctags use it
301
+ // PROGRAM("program"), //this is not defined in above format docs, but both universal and exuberant ctags use it
302
+ SIGNATURE ("signature" );
303
+ // STRUCT("struct"),
304
+ // TYPEREF("typeref"),
305
+ // UNION("union");
306
+
307
+ //NOTE: if you edit above, always consult below charCmpEndOffset
308
+ private final String name ;
309
+
310
+ tagFields (String name ) {
311
+ this .name = name ;
312
+ }
313
+
314
+ //this is very important, we only compare that amount of chars from field types with input to save time,
315
+ //this number has to be long enough to get rid of disambiguation (so currently 2 characters)
316
+ //TODO:
317
+ //NOTE this is a big tradeoff in terms of input data, e.g. field "find"
318
+ //will be considered "file" and overwrite the value, so if ctags will send us buggy input
319
+ //we will output buggy data TOO!
320
+ //NO VALIDATION happens of input - but then we gain LOTS of speed, due to not comparing the same field names again and again fully
321
+ // 1 - means only 2 first chars are compared
322
+ public static int charCmpEndOffset = 0 ; // make this MAX. 8 chars! (backwards compat to DOS/Win )
323
+
324
+ //quickly get if the field name matches allowed/consumed ones
325
+ public static Ctags .tagFields quickValueOf (String fullName ) {
326
+ int i ;
327
+ boolean match ;
328
+ for (tagFields x : tagFields .values ()) {
329
+ match = true ;
330
+ for (i = 0 ; i <= charCmpEndOffset ; i ++) {
331
+ if (x .name .charAt (i ) != fullName .charAt (i )) {
332
+ match = false ;
333
+ break ;
334
+ }
335
+ }
336
+ if (match ) {
337
+ return x ;
338
+ }
339
+ }
340
+ return null ;
341
+ }
342
+ }
343
+
234
344
private void readTags (Definitions defs ) {
345
+ EnumMap <tagFields , String > fields = new EnumMap <>(tagFields .class );
235
346
try {
236
347
do {
237
348
String tagLine = ctagsOut .readLine ();
238
349
//log.fine("Tagline:-->" + tagLine+"<----ONELINE");
239
350
if (tagLine == null ) {
240
- LOGGER .warning ("Unexpected end of file!" );
351
+ if (!junit_testing ) {
352
+ LOGGER .warning ("Unexpected end of file!" );
353
+ }
241
354
try {
242
355
int val = ctags .exitValue ();
243
- LOGGER .log (Level .WARNING , "ctags exited with code: {0}" , val );
356
+ if (!junit_testing ) {
357
+ LOGGER .log (Level .WARNING , "ctags exited with code: {0}" , val );
358
+ }
244
359
} catch (Exception e ) {
245
360
LOGGER .log (Level .WARNING , "Ctags problem: " , e );
246
361
}
@@ -265,43 +380,50 @@ private void readTags(Definitions defs) {
265
380
}
266
381
String def = tagLine .substring (0 , p );
267
382
int mstart = tagLine .indexOf ('\t' , p + 1 );
268
- String lnum = "-1" ;
269
- String signature = null ;
383
+
270
384
String kind = null ;
271
- String inher = null ;
272
385
273
386
int lp = tagLine .length ();
274
387
while ((p = tagLine .lastIndexOf ('\t' , lp - 1 )) > 0 ) {
275
388
//log.fine(" p = " + p + " lp = " + lp);
276
389
String fld = tagLine .substring (p + 1 , lp );
277
- //log.fine("FIELD===" + fld);
390
+ //log.fine("FIELD===" + fld);
278
391
lp = p ;
279
- if (fld .startsWith ("line:" )) {
280
- int sep = fld .indexOf (':' );
281
- lnum = fld .substring (sep + 1 );
282
- } else if (fld .startsWith ("signature:" )) {
283
- int sep = fld .indexOf (':' );
284
- signature = fld .substring (sep + 1 );
285
- } else if (fld .indexOf (':' ) < 0 ) {
392
+
393
+ int sep = fld .indexOf (':' );
394
+ if (sep != -1 ) {
395
+ tagFields pos = tagFields .quickValueOf (fld );
396
+ if (pos != null ) {
397
+ String val = fld .substring (sep + 1 );
398
+ fields .put (pos , val );
399
+ } else {
400
+ //unknown field name
401
+ //don't log on purpose, since we don't consume all possible fields, so just ignore this error for now
402
+ // LOGGER.log(Level.WARNING, "Unknown field name found: {0}", fld.substring(0, sep - 1));
403
+ }
404
+ } else {
405
+ //TODO no separator, assume this is the kind
286
406
kind = fld ;
287
407
break ;
288
- } else {
289
- inher = fld ;
290
408
}
291
409
}
292
410
411
+ String lnum = fields .get (tagFields .LINE );
412
+ String signature = fields .get (tagFields .SIGNATURE );
413
+ String classInher = fields .get (tagFields .CLASS );
414
+
293
415
final String match ;
294
416
int mlength = p - mstart ;
295
417
if ((p > 0 ) && (mlength > MIN_METHOD_LINE_LENGTH )) {
296
418
if (mlength < MAX_METHOD_LINE_LENGTH ) {
297
419
match = tagLine .substring (mstart + 3 , p - 4 ).
298
- replace ("\\ /" , "/" ).replaceAll ("[ \t ]+" , " " );
420
+ replace ("\\ /" , "/" ).replaceAll ("[ \t ]+" , " " ); //TODO per format we should also recognize \r and \n and \\
299
421
} else {
300
- LOGGER .log (Level .FINEST , "Ctags: stripping method body for def {0} line {1}(scopes might break)" , new Object []{def , lnum });
422
+ LOGGER .log (Level .FINEST , "Ctags: stripping method body for def {0} line {1}(scopes/highlight might break)" , new Object []{def , lnum });
301
423
match = tagLine .substring (mstart + 3 , mstart + MAX_METHOD_LINE_LENGTH - 1 ). // +3 - 4 = -1
302
424
replace ("\\ /" , "/" ).replaceAll ("[ \t ]+" , " " );
303
425
}
304
- } else {
426
+ } else { //tag is in wrong format, cannot extract tagaddress from it, skip
305
427
continue ;
306
428
}
307
429
@@ -310,8 +432,8 @@ private void readTags(Definitions defs) {
310
432
final Interner <String > seenSymbols = new Interner <>();
311
433
312
434
final String type
313
- = inher == null ? kind : kind + " in " + inher ;
314
- addTag (defs , seenSymbols , lnum , def , type , match , inher , signature );
435
+ = classInher == null ? kind : kind + " in " + classInher ;
436
+ addTag (defs , seenSymbols , lnum , def , type , match , classInher , signature );
315
437
if (signature != null ) {
316
438
//TODO if some languages use different character for separating arguments, below needs to be adjusted
317
439
String [] args = signature .split ("," );
@@ -323,17 +445,18 @@ private void readTags(Definitions defs) {
323
445
//FIXME this will not work for typeless languages such as python or assignments inside signature ... but since ctags doesn't provide signatures for python yet and assigning stuff in signature is not the case for c or java, we don't care ...
324
446
String [] names = afters .split ("[\\ W]" ); //this should just parse out variables, we assume first non empty text is the argument name
325
447
for (String name : names ) {
326
- if (name .length ()> 0 ) {
327
- //log.fine("Param Def = "+ string);
328
- addTag (defs , seenSymbols , lnum , name , "argument" ,
448
+ if (name .length () > 0 ) {
449
+ //log.fine("Param Def = "+ string);
450
+ addTag (defs , seenSymbols , lnum , name , "argument" ,
329
451
def .trim () + signature .trim (), null , signature );
330
- break ;
452
+ break ;
331
453
}
332
454
}
333
455
}
334
456
}
335
457
}
336
458
//log.fine("Read = " + def + " : " + lnum + " = " + kind + " IS " + inher + " M " + match);
459
+ fields .clear ();
337
460
} while (true );
338
461
} catch (Exception e ) {
339
462
LOGGER .log (Level .WARNING , "CTags parsing problem: " , e );
@@ -345,13 +468,13 @@ private void readTags(Definitions defs) {
345
468
* Add a tag to a {@code Definitions} instance.
346
469
*/
347
470
private void addTag (Definitions defs , Interner <String > seenSymbols ,
348
- String lnum , String symbol , String type , String text , String scope , String signature ) {
471
+ String lnum , String symbol , String type , String text , String namespace , String signature ) {
349
472
// The strings are frequently repeated (a symbol can be used in
350
473
// multiple definitions, multiple definitions can have the same type,
351
474
// one line can contain multiple definitions). Intern them to minimize
352
475
// the space consumed by them (see bug #809).
353
476
defs .addTag (Integer .parseInt (lnum ), seenSymbols .intern (symbol .trim ()),
354
477
seenSymbols .intern (type .trim ()), seenSymbols .intern (text .trim ()),
355
- scope == null ? null : seenSymbols .intern (scope .trim ()), signature );
478
+ namespace == null ? null : seenSymbols .intern (namespace .trim ()), signature );
356
479
}
357
480
}
0 commit comments