Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pause and resume profiling around forks #115

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
129 changes: 101 additions & 28 deletions ext/vernier/vernier.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1065,10 +1065,6 @@ class Thread {
return start_thread == ruby_thread;
}

bool running() {
return state != State::STOPPED;
}

void mark() {
}
};
Expand Down Expand Up @@ -1168,7 +1164,15 @@ class BaseCollector {
}

public:
bool running = false;
VALUE self = Qnil;

enum State {
STATE_INIT,
STATE_PAUSED,
STATE_RUNNING
};
State state = STATE_INIT;

StackTable *stack_table;
VALUE stack_table_value;

Expand All @@ -1179,27 +1183,39 @@ class BaseCollector {
}
virtual ~BaseCollector() {}

bool is_running() {
return state == STATE_RUNNING;
}

virtual bool start() {
if (running) {
if (is_running()) {
return false;
}

start_thread = rb_thread_current();
started_at = TimeStamp::Now();

running = true;
state = STATE_RUNNING;
return true;
}

virtual VALUE stop() {
if (!running) {
if (!is_running()) {
rb_raise(rb_eRuntimeError, "collector not running");
}
running = false;
state = STATE_PAUSED;

return Qnil;
}

virtual void pause() {
state = STATE_PAUSED;
}

virtual void resume() {
state = STATE_RUNNING;
}

virtual void write_meta(VALUE meta, VALUE result) {
rb_hash_aset(meta, sym("started_at"), ULL2NUM(started_at.nanoseconds()));
rb_hash_aset(meta, sym("interval"), Qnil);
Expand All @@ -1224,9 +1240,11 @@ class BaseCollector {
virtual void mark() {
//frame_list.mark_frames();
rb_gc_mark(stack_table_value);
rb_gc_mark(self);
};

virtual void compact() {
self = rb_gc_location(self);
};
};

Expand Down Expand Up @@ -1520,7 +1538,6 @@ class TimeCollector : public BaseCollector {

pthread_t sample_thread;

atomic_bool running;
SignalSafeSemaphore thread_stopped;

TimeStamp interval;
Expand Down Expand Up @@ -1727,46 +1744,78 @@ class TimeCollector : public BaseCollector {
this->threads.initial(thread);
}

if (allocation_interval > 0) {
tp_newobj = rb_tracepoint_new(0, RUBY_INTERNAL_EVENT_NEWOBJ, newobj_i, this);
rb_tracepoint_enable(tp_newobj);
}

GlobalSignalHandler::get_instance()->install();

running = true;

collector_thread.start();

// Set the state of the current Ruby thread to RUNNING, which we know it
// is as it must have held the GVL to start the collector. We want to
// have at least one thread in our thread list because it's possible
// that the profile might be such that we don't get any thread switch
// events and we need at least one
this->threads.resumed(rb_thread_current());

thread_hook = rb_internal_thread_add_event_hook(internal_thread_event_cb, RUBY_INTERNAL_THREAD_EVENT_MASK, this);
rb_add_event_hook(internal_gc_event_cb, RUBY_INTERNAL_EVENTS, PTR2NUM((void *)this));
rb_add_event_hook(internal_thread_event_cb, RUBY_NORMAL_EVENTS, PTR2NUM((void *)this));
resume_profiling();

return true;
}

VALUE stop() {
BaseCollector::stop();
void resume_profiling() {
collector_thread.start();
GlobalSignalHandler::get_instance()->install();
install_event_hooks();
}

collector_thread.stop();
void resume() {
if (state != STATE_RUNNING) {
BaseCollector::resume();

GlobalSignalHandler::get_instance()->uninstall();
resume_profiling();
}
}

void install_event_hooks() {
if (allocation_interval > 0) {
tp_newobj = rb_tracepoint_new(0, RUBY_INTERNAL_EVENT_NEWOBJ, newobj_i, this);
rb_tracepoint_enable(tp_newobj);
}

thread_hook = rb_internal_thread_add_event_hook(internal_thread_event_cb, RUBY_INTERNAL_THREAD_EVENT_MASK, this);
rb_add_event_hook(internal_gc_event_cb, RUBY_INTERNAL_EVENTS, PTR2NUM((void *)this));
rb_add_event_hook(internal_thread_event_cb, RUBY_NORMAL_EVENTS, PTR2NUM((void *)this));
}

void uninstall_event_hooks() {
if (RTEST(tp_newobj)) {
rb_tracepoint_disable(tp_newobj);
tp_newobj = Qnil;
}

RUBY_ASSERT_ALWAYS(thread_hook);
rb_internal_thread_remove_event_hook(thread_hook);
thread_hook = NULL;
rb_remove_event_hook(internal_gc_event_cb);
rb_remove_event_hook(internal_thread_event_cb);
}

void pause_profiling() {
collector_thread.stop();
GlobalSignalHandler::get_instance()->uninstall();
uninstall_event_hooks();
}

void pause() {
if (is_running()) {
BaseCollector::pause();

pause_profiling();

state = STATE_PAUSED;
} else {
cout << "called pause, but state is: " << state << endl;
}
}

VALUE stop() {
BaseCollector::stop();

pause_profiling();

stack_table->finalize();

Expand Down Expand Up @@ -1863,6 +1912,26 @@ collector_start(VALUE self) {
return Qtrue;
}

static VALUE
collector_pause(VALUE self) {
auto *collector = get_collector(self);
collector->pause();
return self;
}

static VALUE
collector_resume(VALUE self) {
auto *collector = get_collector(self);
collector->resume();
return self;
}

static VALUE
collector_running_p(VALUE self) {
auto *collector = get_collector(self);
return collector->is_running() ? Qtrue : Qfalse;
}

static VALUE
collector_stop(VALUE self) {
auto *collector = get_collector(self);
Expand Down Expand Up @@ -1918,6 +1987,7 @@ static VALUE collector_new(VALUE self, VALUE mode, VALUE options) {
rb_raise(rb_eArgError, "invalid mode");
}
VALUE obj = TypedData_Wrap_Struct(self, &rb_collector_type, collector);
collector->self = obj;
rb_funcall(obj, rb_intern("initialize"), 2, mode, options);
return obj;
}
Expand Down Expand Up @@ -1957,6 +2027,9 @@ Init_vernier(void)
rb_undef_alloc_func(rb_cVernierCollector);
rb_define_singleton_method(rb_cVernierCollector, "_new", collector_new, 2);
rb_define_method(rb_cVernierCollector, "start", collector_start, 0);
rb_define_method(rb_cVernierCollector, "pause", collector_pause, 0);
rb_define_method(rb_cVernierCollector, "resume", collector_resume, 0);
rb_define_method(rb_cVernierCollector, "running?", collector_running_p, 0);
rb_define_method(rb_cVernierCollector, "sample", collector_sample, 0);
rb_define_method(rb_cVernierCollector, "stack_table", collector_stack_table, 0);
rb_define_private_method(rb_cVernierCollector, "finish", collector_stop, 0);
Expand Down
6 changes: 6 additions & 0 deletions lib/vernier.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
require_relative "vernier/stack_table"
require_relative "vernier/result"
require_relative "vernier/hooks"
require_relative "vernier/fork"
require_relative "vernier/vernier"
require_relative "vernier/output/firefox"
require_relative "vernier/output/top"
Expand Down Expand Up @@ -56,6 +57,11 @@ def self.stop_profile
result
end

def self.cancel_profile
@collector&.cancel
@collector = nil
end

def self.trace_retained(**profile_options, &block)
profile(**profile_options.merge(mode: :retained), &block)
end
Expand Down
9 changes: 9 additions & 0 deletions lib/vernier/collector.rb
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,15 @@ def record_interval(category, name = category)
)
end

def cancel
finish
@thread_names.cancel
@hooks.each do |hook|
hook.disable
end
nil
end

def stop
result = finish

Expand Down
24 changes: 24 additions & 0 deletions lib/vernier/fork.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# frozen_string_literal: true

module Vernier
if ::Process.respond_to?(:_fork)
module ForkHooks
def _fork
running_collectors = ObjectSpace.each_object(Vernier::Collector).select(&:running?)
running_collectors.each(&:pause)
pid = super
if pid == 0
# We're in the child
else
# We're in the parent
running_collectors.each do |collector|
collector.resume
end
end
pid
end
end

::Process.singleton_class.prepend(ForkHooks)
end
end
28 changes: 14 additions & 14 deletions lib/vernier/result.rb
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ def each_sample
end

class BaseType
attr_reader :result, :idx
def initialize(result, idx)
@result = result
attr_reader :stack_table, :idx
def initialize(stack_table, idx)
@stack_table = stack_table
@idx = idx
end

Expand All @@ -78,12 +78,12 @@ def inspect

class Func < BaseType
def label
result._stack_table.func_name(idx)
stack_table.func_name(idx)
end
alias name label

def filename
result._stack_table.func_filename(idx)
stack_table.func_filename(idx)
end

def to_s
Expand All @@ -97,12 +97,12 @@ def filename; func.filename; end
alias name label

def func
func_idx = result._stack_table.frame_func_idx(idx)
Func.new(result, func_idx)
func_idx = stack_table.frame_func_idx(idx)
Func.new(stack_table, func_idx)
end

def line
result._stack_table.frame_line_no(idx)
stack_table.frame_line_no(idx)
end

def to_s
Expand All @@ -116,18 +116,18 @@ def each_frame

stack_idx = idx
while stack_idx
frame_idx = result._stack_table.stack_frame_idx(stack_idx)
yield Frame.new(result, frame_idx)
stack_idx = result._stack_table.stack_parent_idx(stack_idx)
frame_idx = stack_table.stack_frame_idx(stack_idx)
yield Frame.new(stack_table, frame_idx)
stack_idx = stack_table.stack_parent_idx(stack_idx)
end
end

def leaf_frame_idx
result._stack_table.stack_frame_idx(idx)
stack_table.stack_frame_idx(idx)
end

def leaf_frame
Frame.new(result, leaf_frame_idx)
Frame.new(stack_table, leaf_frame_idx)
end

def frames
Expand All @@ -144,7 +144,7 @@ def to_s
end

def stack(idx)
Stack.new(self, idx)
Stack.new(_stack_table, idx)
end

def total_bytes
Expand Down
4 changes: 4 additions & 0 deletions lib/vernier/thread_names.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ def [](object_id)
@names[object_id] || "thread obj_id:#{object_id}"
end

def cancel
@tp.disable
end

def finish
collect_running
@tp.disable
Expand Down
16 changes: 16 additions & 0 deletions test/test_fork.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# frozen_string_literal: true

require "test_helper"

class TestFork < Minitest::Test
def test_that_forked_children_do_not_hang
Vernier.trace do
pid = Process.fork do
sleep 0.1
# noop
end
_, status = Process.waitpid2(pid)
assert_predicate status, :success?
end
end
end
Loading
Loading