Skip to content

Commit 2f1f6d7

Browse files
committed
Massive parallelisation of file tagging
Now per each file given, a process is launched to do the work for this file alone. This way, not only the files are processed in parallel faster, but we also ensure releasing of the memory that the binaries take faster, as when the process dies, the ref_count goes down to zero and is directly collected. Over big big repos, I could measure a drop in memory usage from 9GB of RAM to 2.5GB! Note that for this, it is important to create the ets table with write_concurrency. An important note is that the functionality to read files from `stdin` has been removed.
1 parent 1fb0a9d commit 2f1f6d7

File tree

1 file changed

+44
-81
lines changed

1 file changed

+44
-81
lines changed

bin/vim_erlang_tags.erl

Lines changed: 44 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -259,98 +259,61 @@ expand_dirs_or_filenames(FileName) ->
259259

260260
%%%=============================================================================
261261
%%% Create tags from directory trees and file lists
262-
%%%=============================================================================
263-
264-
% Read the given Erlang source files and return an ets table that contains the
265-
% appropriate tags.
266-
create_tags(Files) ->
267-
Tags = ets:new(tags, [ordered_set]),
268-
log("Tags table created.~n"),
269-
270-
{StdIn, RealFiles} =
271-
lists:partition(
272-
fun(stdin) -> true;
273-
(_) -> false
274-
end, Files),
275-
276-
case StdIn of
277-
[] ->
278-
ok;
279-
_ ->
280-
process_filenames_from_stdin(Tags)
281-
end,
282-
283-
process_filenames(RealFiles, Tags),
284-
285-
Tags.
286-
287-
% Read file names for stdin and scan the files for tags.
288-
process_filenames_from_stdin(Tags) ->
289-
case io:get_line(standard_io, "") of
290-
eof ->
291-
ok;
292-
Line ->
293-
File = trim(Line),
294-
log("File to process: ~s~n", [File]),
295-
add_tags_from_file(File, Tags),
296-
process_filenames_from_stdin(Tags)
297-
end.
298-
299-
% Traverse the given directory and scan the Erlang files inside for tags.
300-
process_dir_tree(Top, Tags) ->
301-
IsIgnored = lists:member(Top, get(ignored)),
302-
if IsIgnored -> ok;
303-
true ->
304-
case file:list_dir(Top) of
305-
{ok, FileNames} ->
306-
RelFileNames = [filename:join(Top, FileName) ||
307-
FileName <- FileNames],
308-
process_filenames(RelFileNames, Tags);
309-
{error, eacces} ->
310-
log_error("Permission denied: ~s~n", [Top]);
311-
{error, enoent} ->
312-
log_error("Directory does not exist: ~s~n", [Top])
313-
end
314-
end.
315-
316-
% Go through the given files: scan the Erlang files for tags and traverse the
317-
% directories for further Erlang files.
318-
process_filenames([], _Tags) ->
319-
ok;
320-
process_filenames([File|OtherFiles], Tags) ->
321-
IsIgnored = lists:member(File, get(ignored)),
322-
if IsIgnored -> ok;
323-
true ->
324-
case filelib:is_dir(File) of
325-
true ->
326-
process_dir_tree(File, Tags);
327-
false ->
328-
case filename:extension(File) of
329-
Ext when Ext == ".erl";
330-
Ext == ".hrl" ->
331-
add_tags_from_file(File, Tags);
332-
_ ->
333-
ok
334-
end
335-
end
336-
end,
337-
process_filenames(OtherFiles, Tags).
262+
%%%================================================================================================
263+
264+
% Read the given Erlang source files and return an ets table that contains the appropriate tags.
265+
-spec create_tags([file:filename()]) -> ets:tid().
266+
create_tags(Explore) ->
267+
log("In create_tags, To explore: ~p~n", [Explore]),
268+
EtsTags = ets:new(tags,
269+
[set,
270+
public,
271+
{write_concurrency,true},
272+
{read_concurrency,false}
273+
]),
274+
log("EtsTags table created.~n"),
275+
log("Starting processing of files~n"),
276+
Processes = process_filenames(Explore, EtsTags, []),
277+
lists:foreach(
278+
fun({Pid, Ref}) ->
279+
receive
280+
{'DOWN', Ref, process, Pid, normal} -> ok
281+
after 5000 -> error("Some process takes to long")
282+
end
283+
end,
284+
Processes),
285+
EtsTags.
286+
287+
288+
% Go through the given files: scan the Erlang files for tags
289+
% Here we now for sure that `Files` are indeed files with extensions *.erl or *.hrl.
290+
-spec process_filenames(Files, EtsTags, Processes) -> RetProcesses when
291+
Files :: [file:filename()],
292+
EtsTags :: ets:tid(),
293+
Processes :: [{pid(), reference()}],
294+
RetProcesses :: [{pid(), reference()}].
295+
process_filenames([], _Tags, Processes) ->
296+
Processes;
297+
process_filenames([File|OtherFiles], EtsTags, Processes) ->
298+
Verbose = get(verbose),
299+
P = spawn_monitor(fun() -> add_tags_from_file(File, EtsTags, Verbose) end),
300+
process_filenames(OtherFiles, EtsTags, [P | Processes]).
338301

339302
%%%=============================================================================
340303
%%% Scan a file or line for tags
341304
%%%=============================================================================
342305

343-
% Read the given Erlang source file and add the appropriate tags to the Tags ets
344-
% table.
345-
add_tags_from_file(File, Tags) ->
306+
% Read the given Erlang source file and add the appropriate tags to the EtsTags ets table.
307+
add_tags_from_file(File, EtsTags, Verbose) ->
308+
put(verbose, Verbose),
346309
log("~nProcessing file: ~s~n", [File]),
347310

348311
BaseName = filename:basename(File), % e.g. "mymod.erl"
349312
ModName = filename:rootname(BaseName), % e.g. "mymod"
350-
add_file_tag(Tags, File, BaseName, ModName),
313+
add_file_tag(EtsTags, File, BaseName, ModName),
351314

352315
case file:read_file(File) of
353-
{ok, Contents} -> ok = scan_tags(Contents, {Tags, File, ModName});
316+
{ok, Contents} -> ok = scan_tags(Contents, {EtsTags, File, ModName});
354317
Err -> log_error("File ~s not readable: ~p~n", [File, Err])
355318
end.
356319

0 commit comments

Comments
 (0)