diff --git a/Cargo.lock b/Cargo.lock
index cbedb552a068e..80364515a7cca 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -265,7 +265,7 @@ dependencies = [
 
 [[package]]
 name = "cargo"
-version = "0.40.0"
+version = "0.41.0"
 dependencies = [
  "atty",
  "bytesize",
@@ -600,7 +600,7 @@ checksum = "e7ca8a5221364ef15ce201e8ed2f609fc312682a8f4e0e3d4aa5879764e0fa3b"
 
 [[package]]
 name = "crates-io"
-version = "0.28.0"
+version = "0.29.0"
 dependencies = [
  "curl",
  "failure",
@@ -730,25 +730,24 @@ dependencies = [
 
 [[package]]
 name = "curl"
-version = "0.4.21"
+version = "0.4.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a85f2f95f2bd277d316d1aa8a477687ab4a6942258c7db7c89c187534669979c"
+checksum = "d08ad3cb89d076a36b0ce5749eec2c9964f70c0c58480ab6b75a91ec4fc206d8"
 dependencies = [
  "curl-sys",
- "kernel32-sys",
  "libc",
  "openssl-probe",
  "openssl-sys",
  "schannel",
  "socket2",
- "winapi 0.2.8",
+ "winapi 0.3.6",
 ]
 
 [[package]]
 name = "curl-sys"
-version = "0.4.18"
+version = "0.4.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9d91a0052d5b982887d8e829bee0faffc7218ea3c6ebd3d6c2c8f678a93c9a42"
+checksum = "2e9a9a4e417722876332136a00cacf92c2ceb331fab4b52b6a1ad16c6cd79255"
 dependencies = [
  "cc",
  "libc",
diff --git a/src/ci/run.sh b/src/ci/run.sh
index 457ba97171207..0d5ea371245e4 100755
--- a/src/ci/run.sh
+++ b/src/ci/run.sh
@@ -55,6 +55,9 @@ if [ "$DEPLOY$DEPLOY_ALT" = "1" ]; then
   if [ "$NO_LLVM_ASSERTIONS" = "1" ]; then
     RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --disable-llvm-assertions"
   elif [ "$DEPLOY_ALT" != "" ]; then
+    if [ "$NO_PARALLEL_COMPILER" = "" ]; then
+      RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --set rust.parallel-compiler"
+    fi
     RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --enable-llvm-assertions"
     RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --set rust.verify-llvm-ir"
   fi
diff --git a/src/doc/book b/src/doc/book
index 871416b85c1a7..04806c80be0f5 160000
--- a/src/doc/book
+++ b/src/doc/book
@@ -1 +1 @@
-Subproject commit 871416b85c1a73717d65d6f4a9ea29e5aef3db0e
+Subproject commit 04806c80be0f54b1290287e3f85e84bdfc0b6ec7
diff --git a/src/doc/reference b/src/doc/reference
index fa5dfb832ef8a..320d232b206ed 160000
--- a/src/doc/reference
+++ b/src/doc/reference
@@ -1 +1 @@
-Subproject commit fa5dfb832ef8a7568e17dabf612f486d641ff4ac
+Subproject commit 320d232b206edecb67489316f71a14e31dbc6c08
diff --git a/src/doc/rust-by-example b/src/doc/rust-by-example
index 67cfbf31df880..a6288e7407a6c 160000
--- a/src/doc/rust-by-example
+++ b/src/doc/rust-by-example
@@ -1 +1 @@
-Subproject commit 67cfbf31df880728dcf7cb35b15b028ec92caf31
+Subproject commit a6288e7407a6c4c19ea29de6d43f40c803883f21
diff --git a/src/libarena/lib.rs b/src/libarena/lib.rs
index 690d8344acff9..66d27a275192e 100644
--- a/src/libarena/lib.rs
+++ b/src/libarena/lib.rs
@@ -500,7 +500,7 @@ impl DroplessArena {
                 // though it was supposed to give us `len`
                 return slice::from_raw_parts_mut(mem, i);
             }
-            ptr::write(mem.offset(i as isize), value.unwrap());
+            ptr::write(mem.add(i), value.unwrap());
             i += 1;
         }
     }
diff --git a/src/libcore/iter/traits/iterator.rs b/src/libcore/iter/traits/iterator.rs
index 0a9e076ec5869..a272035150a15 100644
--- a/src/libcore/iter/traits/iterator.rs
+++ b/src/libcore/iter/traits/iterator.rs
@@ -1859,14 +1859,13 @@ pub trait Iterator {
         Self: Sized, F: FnMut(Self::Item) -> bool
     {
         #[inline]
-        fn check<T>(mut f: impl FnMut(T) -> bool) -> impl FnMut(T) -> LoopState<(), ()> {
-            move |x| {
+        fn check<T>(mut f: impl FnMut(T) -> bool) -> impl FnMut((), T) -> LoopState<(), ()> {
+            move |(), x| {
                 if f(x) { LoopState::Continue(()) }
                 else { LoopState::Break(()) }
             }
         }
-
-        self.try_for_each(check(f)) == LoopState::Continue(())
+        self.try_fold((), check(f)) == LoopState::Continue(())
     }
 
     /// Tests if any element of the iterator matches a predicate.
@@ -1913,14 +1912,14 @@ pub trait Iterator {
         F: FnMut(Self::Item) -> bool
     {
         #[inline]
-        fn check<T>(mut f: impl FnMut(T) -> bool) -> impl FnMut(T) -> LoopState<(), ()> {
-            move |x| {
+        fn check<T>(mut f: impl FnMut(T) -> bool) -> impl FnMut((), T) -> LoopState<(), ()> {
+            move |(), x| {
                 if f(x) { LoopState::Break(()) }
                 else { LoopState::Continue(()) }
             }
         }
 
-        self.try_for_each(check(f)) == LoopState::Break(())
+        self.try_fold((), check(f)) == LoopState::Break(())
     }
 
     /// Searches for an element of an iterator that satisfies a predicate.
@@ -1972,14 +1971,16 @@ pub trait Iterator {
         P: FnMut(&Self::Item) -> bool,
     {
         #[inline]
-        fn check<T>(mut predicate: impl FnMut(&T) -> bool) -> impl FnMut(T) -> LoopState<(), T> {
-            move |x| {
+        fn check<T>(
+            mut predicate: impl FnMut(&T) -> bool
+        ) -> impl FnMut((), T) -> LoopState<(), T> {
+            move |(), x| {
                 if predicate(&x) { LoopState::Break(x) }
                 else { LoopState::Continue(()) }
             }
         }
 
-        self.try_for_each(check(predicate)).break_value()
+        self.try_fold((), check(predicate)).break_value()
     }
 
     /// Applies function to the elements of iterator and returns
@@ -2004,14 +2005,14 @@ pub trait Iterator {
         F: FnMut(Self::Item) -> Option<B>,
     {
         #[inline]
-        fn check<T, B>(mut f: impl FnMut(T) -> Option<B>) -> impl FnMut(T) -> LoopState<(), B> {
-            move |x| match f(x) {
+        fn check<T, B>(mut f: impl FnMut(T) -> Option<B>) -> impl FnMut((), T) -> LoopState<(), B> {
+            move |(), x| match f(x) {
                 Some(x) => LoopState::Break(x),
                 None => LoopState::Continue(()),
             }
         }
 
-        self.try_for_each(check(f)).break_value()
+        self.try_fold((), check(f)).break_value()
     }
 
     /// Searches for an element in an iterator, returning its index.
diff --git a/src/librustc/session/config.rs b/src/librustc/session/config.rs
index 50aa036f723a0..2771ce69b9e0d 100644
--- a/src/librustc/session/config.rs
+++ b/src/librustc/session/config.rs
@@ -805,6 +805,7 @@ macro_rules! options {
         pub const parse_list: Option<&str> = Some("a space-separated list of strings");
         pub const parse_opt_list: Option<&str> = Some("a space-separated list of strings");
         pub const parse_opt_comma_list: Option<&str> = Some("a comma-separated list of strings");
+        pub const parse_threads: Option<&str> = Some("a number");
         pub const parse_uint: Option<&str> = Some("a number");
         pub const parse_passes: Option<&str> =
             Some("a space-separated list of passes, or `all`");
@@ -948,6 +949,14 @@ macro_rules! options {
             }
         }
 
+        fn parse_threads(slot: &mut usize, v: Option<&str>) -> bool {
+            match v.and_then(|s| s.parse().ok()) {
+                Some(0) => { *slot = ::num_cpus::get(); true },
+                Some(i) => { *slot = i; true },
+                None => false
+            }
+        }
+
         fn parse_uint(slot: &mut usize, v: Option<&str>) -> bool {
             match v.and_then(|s| s.parse().ok()) {
                 Some(i) => { *slot = i; true },
@@ -1251,7 +1260,11 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options,
         "prints the LLVM optimization passes being run"),
     ast_json: bool = (false, parse_bool, [UNTRACKED],
         "print the AST as JSON and halt"),
-    threads: Option<usize> = (None, parse_opt_uint, [UNTRACKED],
+    // We default to 1 here since we want to behave like
+    // a sequential compiler for now. This'll likely be adjusted
+    // in the future. Note that -Zthreads=0 is the way to get
+    // the num_cpus behavior.
+    threads: usize = (1, parse_threads, [UNTRACKED],
         "use a thread pool with N threads"),
     ast_json_noexpand: bool = (false, parse_bool, [UNTRACKED],
         "print the pre-expansion AST as JSON and halt"),
@@ -2146,14 +2159,14 @@ pub fn build_session_options_and_crate_config(
         }
     }
 
-    if debugging_opts.threads == Some(0) {
+    if debugging_opts.threads == 0 {
         early_error(
             error_format,
             "value for threads must be a positive non-zero integer",
         );
     }
 
-    if debugging_opts.threads.unwrap_or(1) > 1 && debugging_opts.fuel.is_some() {
+    if debugging_opts.threads > 1 && debugging_opts.fuel.is_some() {
         early_error(
             error_format,
             "optimization fuel is incompatible with multiple threads",
diff --git a/src/librustc/session/mod.rs b/src/librustc/session/mod.rs
index 8e9c2735c3913..f22445f5d4744 100644
--- a/src/librustc/session/mod.rs
+++ b/src/librustc/session/mod.rs
@@ -32,7 +32,7 @@ use syntax::source_map;
 use syntax::parse::{self, ParseSess};
 use syntax::symbol::Symbol;
 use syntax_pos::{MultiSpan, Span};
-use crate::util::profiling::SelfProfiler;
+use crate::util::profiling::{SelfProfiler, SelfProfilerRef};
 
 use rustc_target::spec::{PanicStrategy, RelroLevel, Target, TargetTriple};
 use rustc_data_structures::flock;
@@ -129,7 +129,7 @@ pub struct Session {
     pub profile_channel: Lock<Option<mpsc::Sender<ProfileQueriesMsg>>>,
 
     /// Used by `-Z self-profile`.
-    pub self_profiling: Option<Arc<SelfProfiler>>,
+    pub prof: SelfProfilerRef,
 
     /// Some measurements that are being gathered during compilation.
     pub perf_stats: PerfStats,
@@ -835,24 +835,6 @@ impl Session {
         }
     }
 
-    #[inline(never)]
-    #[cold]
-    fn profiler_active<F: FnOnce(&SelfProfiler) -> ()>(&self, f: F) {
-        match &self.self_profiling {
-            None => bug!("profiler_active() called but there was no profiler active"),
-            Some(profiler) => {
-                f(&profiler);
-            }
-        }
-    }
-
-    #[inline(always)]
-    pub fn profiler<F: FnOnce(&SelfProfiler) -> ()>(&self, f: F) {
-        if unlikely!(self.self_profiling.is_some()) {
-            self.profiler_active(f)
-        }
-    }
-
     pub fn print_perf_stats(&self) {
         println!(
             "Total time spent computing symbol hashes:      {}",
@@ -896,16 +878,10 @@ impl Session {
         ret
     }
 
-    /// Returns the number of query threads that should be used for this
-    /// compilation
-    pub fn threads_from_count(query_threads: Option<usize>) -> usize {
-        query_threads.unwrap_or(::num_cpus::get())
-    }
-
     /// Returns the number of query threads that should be used for this
     /// compilation
     pub fn threads(&self) -> usize {
-        Self::threads_from_count(self.opts.debugging_opts.threads)
+        self.opts.debugging_opts.threads
     }
 
     /// Returns the number of codegen units that should be used for this
@@ -1257,7 +1233,7 @@ fn build_session_(
         imported_macro_spans: OneThread::new(RefCell::new(FxHashMap::default())),
         incr_comp_session: OneThread::new(RefCell::new(IncrCompSession::NotInitialized)),
         cgu_reuse_tracker,
-        self_profiling: self_profiler,
+        prof: SelfProfilerRef::new(self_profiler),
         profile_channel: Lock::new(None),
         perf_stats: PerfStats {
             symbol_hash_time: Lock::new(Duration::from_secs(0)),
diff --git a/src/librustc/ty/context.rs b/src/librustc/ty/context.rs
index 87b9917f340da..5f1a17e4a9521 100644
--- a/src/librustc/ty/context.rs
+++ b/src/librustc/ty/context.rs
@@ -45,6 +45,7 @@ use crate::ty::CanonicalPolyFnSig;
 use crate::util::common::ErrorReported;
 use crate::util::nodemap::{DefIdMap, DefIdSet, ItemLocalMap, ItemLocalSet};
 use crate::util::nodemap::{FxHashMap, FxHashSet};
+use crate::util::profiling::SelfProfilerRef;
 
 use errors::DiagnosticBuilder;
 use arena::SyncDroplessArena;
@@ -1030,6 +1031,8 @@ pub struct GlobalCtxt<'tcx> {
 
     pub dep_graph: DepGraph,
 
+    pub prof: SelfProfilerRef,
+
     /// Common objects.
     pub common: Common<'tcx>,
 
@@ -1260,6 +1263,7 @@ impl<'tcx> TyCtxt<'tcx> {
             arena: WorkerLocal::new(|_| Arena::default()),
             interners,
             dep_graph,
+            prof: s.prof.clone(),
             common,
             types: common_types,
             lifetimes: common_lifetimes,
diff --git a/src/librustc/ty/query/plumbing.rs b/src/librustc/ty/query/plumbing.rs
index 32858d30b0cc4..955f1447c55b6 100644
--- a/src/librustc/ty/query/plumbing.rs
+++ b/src/librustc/ty/query/plumbing.rs
@@ -112,7 +112,7 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
             let mut lock = cache.get_shard_by_value(key).lock();
             if let Some(value) = lock.results.get(key) {
                 profq_msg!(tcx, ProfileQueriesMsg::CacheHit);
-                tcx.sess.profiler(|p| p.record_query_hit(Q::NAME));
+                tcx.prof.query_cache_hit(Q::NAME);
                 let result = (value.value.clone(), value.index);
                 #[cfg(debug_assertions)]
                 {
@@ -128,7 +128,7 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
                             // in another thread has completed. Record how long we wait in the
                             // self-profiler.
                             #[cfg(parallel_compiler)]
-                            tcx.sess.profiler(|p| p.query_blocked_start(Q::NAME));
+                            tcx.prof.query_blocked_start(Q::NAME);
 
                             job.clone()
                         },
@@ -170,7 +170,7 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
             #[cfg(parallel_compiler)]
             {
                 let result = job.r#await(tcx, span);
-                tcx.sess.profiler(|p| p.query_blocked_end(Q::NAME));
+                tcx.prof.query_blocked_end(Q::NAME);
 
                 if let Err(cycle) = result {
                     return TryGetJob::Cycle(Q::handle_cycle_error(tcx, cycle));
@@ -382,8 +382,9 @@ impl<'tcx> TyCtxt<'tcx> {
         }
 
         if Q::ANON {
+
             profq_msg!(self, ProfileQueriesMsg::ProviderBegin);
-            self.sess.profiler(|p| p.start_query(Q::NAME));
+            let prof_timer = self.prof.query_provider(Q::NAME);
 
             let ((result, dep_node_index), diagnostics) = with_diagnostics(|diagnostics| {
                 self.start_query(job.job.clone(), diagnostics, |tcx| {
@@ -393,7 +394,7 @@ impl<'tcx> TyCtxt<'tcx> {
                 })
             });
 
-            self.sess.profiler(|p| p.end_query(Q::NAME));
+            drop(prof_timer);
             profq_msg!(self, ProfileQueriesMsg::ProviderEnd);
 
             self.dep_graph.read_index(dep_node_index);
@@ -451,9 +452,8 @@ impl<'tcx> TyCtxt<'tcx> {
         // First we try to load the result from the on-disk cache.
         let result = if Q::cache_on_disk(self, key.clone(), None) &&
                         self.sess.opts.debugging_opts.incremental_queries {
-            self.sess.profiler(|p| p.incremental_load_result_start(Q::NAME));
+            let _prof_timer = self.prof.incr_cache_loading(Q::NAME);
             let result = Q::try_load_from_disk(self, prev_dep_node_index);
-            self.sess.profiler(|p| p.incremental_load_result_end(Q::NAME));
 
             // We always expect to find a cached result for things that
             // can be forced from `DepNode`.
@@ -469,21 +469,17 @@ impl<'tcx> TyCtxt<'tcx> {
 
         let result = if let Some(result) = result {
             profq_msg!(self, ProfileQueriesMsg::CacheHit);
-            self.sess.profiler(|p| p.record_query_hit(Q::NAME));
-
             result
         } else {
             // We could not load a result from the on-disk cache, so
             // recompute.
-
-            self.sess.profiler(|p| p.start_query(Q::NAME));
+            let _prof_timer = self.prof.query_provider(Q::NAME);
 
             // The dep-graph for this computation is already in-place.
             let result = self.dep_graph.with_ignore(|| {
                 Q::compute(self, key)
             });
 
-            self.sess.profiler(|p| p.end_query(Q::NAME));
             result
         };
 
@@ -551,7 +547,7 @@ impl<'tcx> TyCtxt<'tcx> {
                 key, dep_node);
 
         profq_msg!(self, ProfileQueriesMsg::ProviderBegin);
-        self.sess.profiler(|p| p.start_query(Q::NAME));
+        let prof_timer = self.prof.query_provider(Q::NAME);
 
         let ((result, dep_node_index), diagnostics) = with_diagnostics(|diagnostics| {
             self.start_query(job.job.clone(), diagnostics, |tcx| {
@@ -571,7 +567,7 @@ impl<'tcx> TyCtxt<'tcx> {
             })
         });
 
-        self.sess.profiler(|p| p.end_query(Q::NAME));
+        drop(prof_timer);
         profq_msg!(self, ProfileQueriesMsg::ProviderEnd);
 
         if unlikely!(self.sess.opts.debugging_opts.query_dep_graph) {
@@ -619,7 +615,7 @@ impl<'tcx> TyCtxt<'tcx> {
             let _ = self.get_query::<Q>(DUMMY_SP, key);
         } else {
             profq_msg!(self, ProfileQueriesMsg::CacheHit);
-            self.sess.profiler(|p| p.record_query_hit(Q::NAME));
+            self.prof.query_cache_hit(Q::NAME);
         }
     }
 
diff --git a/src/librustc/util/profiling.rs b/src/librustc/util/profiling.rs
index 8624856a4f55c..bd02e7f5a14a1 100644
--- a/src/librustc/util/profiling.rs
+++ b/src/librustc/util/profiling.rs
@@ -1,9 +1,9 @@
-use std::borrow::Cow;
 use std::error::Error;
 use std::fs;
 use std::mem::{self, Discriminant};
 use std::path::Path;
 use std::process;
+use std::sync::Arc;
 use std::thread::ThreadId;
 use std::u32;
 
@@ -62,6 +62,206 @@ fn thread_id_to_u64(tid: ThreadId) -> u64 {
     unsafe { mem::transmute::<ThreadId, u64>(tid) }
 }
 
+
+/// A reference to the SelfProfiler. It can be cloned and sent across thread
+/// boundaries at will.
+#[derive(Clone)]
+pub struct SelfProfilerRef {
+    // This field is `None` if self-profiling is disabled for the current
+    // compilation session.
+    profiler: Option<Arc<SelfProfiler>>,
+
+    // We store the filter mask directly in the reference because that doesn't
+    // cost anything and allows for filtering with checking if the profiler is
+    // actually enabled.
+    event_filter_mask: EventFilter,
+}
+
+impl SelfProfilerRef {
+
+    pub fn new(profiler: Option<Arc<SelfProfiler>>) -> SelfProfilerRef {
+        // If there is no SelfProfiler then the filter mask is set to NONE,
+        // ensuring that nothing ever tries to actually access it.
+        let event_filter_mask = profiler
+            .as_ref()
+            .map(|p| p.event_filter_mask)
+            .unwrap_or(EventFilter::NONE);
+
+        SelfProfilerRef {
+            profiler,
+            event_filter_mask,
+        }
+    }
+
+    // This shim makes sure that calls only get executed if the filter mask
+    // lets them pass. It also contains some trickery to make sure that
+    // code is optimized for non-profiling compilation sessions, i.e. anything
+    // past the filter check is never inlined so it doesn't clutter the fast
+    // path.
+    #[inline(always)]
+    fn exec<F>(&self, event_filter: EventFilter, f: F) -> TimingGuard<'_>
+        where F: for<'a> FnOnce(&'a SelfProfiler) -> TimingGuard<'a>
+    {
+        #[inline(never)]
+        fn cold_call<F>(profiler_ref: &SelfProfilerRef, f: F) -> TimingGuard<'_>
+            where F: for<'a> FnOnce(&'a SelfProfiler) -> TimingGuard<'a>
+        {
+            let profiler = profiler_ref.profiler.as_ref().unwrap();
+            f(&**profiler)
+        }
+
+        if unlikely!(self.event_filter_mask.contains(event_filter)) {
+            cold_call(self, f)
+        } else {
+            TimingGuard::none()
+        }
+    }
+
+    /// Start profiling a generic activity. Profiling continues until the
+    /// TimingGuard returned from this call is dropped.
+    #[inline(always)]
+    pub fn generic_activity(&self, event_id: &str) -> TimingGuard<'_> {
+        self.exec(EventFilter::GENERIC_ACTIVITIES, |profiler| {
+            let event_id = profiler.profiler.alloc_string(event_id);
+            TimingGuard::start(
+                profiler,
+                profiler.generic_activity_event_kind,
+                event_id
+            )
+        })
+    }
+
+    /// Start profiling a generic activity. Profiling continues until
+    /// `generic_activity_end` is called. The RAII-based `generic_activity`
+    /// usually is the better alternative.
+    #[inline(always)]
+    pub fn generic_activity_start(&self, event_id: &str) {
+        self.non_guard_generic_event(
+            |profiler| profiler.generic_activity_event_kind,
+            |profiler| profiler.profiler.alloc_string(event_id),
+            EventFilter::GENERIC_ACTIVITIES,
+            TimestampKind::Start,
+        );
+    }
+
+    /// End profiling a generic activity that was started with
+    /// `generic_activity_start`. The RAII-based `generic_activity` usually is
+    /// the better alternative.
+    #[inline(always)]
+    pub fn generic_activity_end(&self, event_id: &str) {
+        self.non_guard_generic_event(
+            |profiler| profiler.generic_activity_event_kind,
+            |profiler| profiler.profiler.alloc_string(event_id),
+            EventFilter::GENERIC_ACTIVITIES,
+            TimestampKind::End,
+        );
+    }
+
+    /// Start profiling a query provider. Profiling continues until the
+    /// TimingGuard returned from this call is dropped.
+    #[inline(always)]
+    pub fn query_provider(&self, query_name: QueryName) -> TimingGuard<'_> {
+        self.exec(EventFilter::QUERY_PROVIDERS, |profiler| {
+            let event_id = SelfProfiler::get_query_name_string_id(query_name);
+            TimingGuard::start(profiler, profiler.query_event_kind, event_id)
+        })
+    }
+
+    /// Record a query in-memory cache hit.
+    #[inline(always)]
+    pub fn query_cache_hit(&self, query_name: QueryName) {
+        self.non_guard_query_event(
+            |profiler| profiler.query_cache_hit_event_kind,
+            query_name,
+            EventFilter::QUERY_CACHE_HITS,
+            TimestampKind::Instant,
+        );
+    }
+
+    /// Start profiling a query being blocked on a concurrent execution.
+    /// Profiling continues until `query_blocked_end` is called.
+    #[inline(always)]
+    pub fn query_blocked_start(&self, query_name: QueryName) {
+        self.non_guard_query_event(
+            |profiler| profiler.query_blocked_event_kind,
+            query_name,
+            EventFilter::QUERY_BLOCKED,
+            TimestampKind::Start,
+        );
+    }
+
+    /// End profiling a query being blocked on a concurrent execution.
+    #[inline(always)]
+    pub fn query_blocked_end(&self, query_name: QueryName) {
+        self.non_guard_query_event(
+            |profiler| profiler.query_blocked_event_kind,
+            query_name,
+            EventFilter::QUERY_BLOCKED,
+            TimestampKind::End,
+        );
+    }
+
+    /// Start profiling how long it takes to load a query result from the
+    /// incremental compilation on-disk cache. Profiling continues until the
+    /// TimingGuard returned from this call is dropped.
+    #[inline(always)]
+    pub fn incr_cache_loading(&self, query_name: QueryName) -> TimingGuard<'_> {
+        self.exec(EventFilter::INCR_CACHE_LOADS, |profiler| {
+            let event_id = SelfProfiler::get_query_name_string_id(query_name);
+            TimingGuard::start(
+                profiler,
+                profiler.incremental_load_result_event_kind,
+                event_id
+            )
+        })
+    }
+
+    #[inline(always)]
+    fn non_guard_query_event(
+        &self,
+        event_kind: fn(&SelfProfiler) -> StringId,
+        query_name: QueryName,
+        event_filter: EventFilter,
+        timestamp_kind: TimestampKind
+    ) {
+        drop(self.exec(event_filter, |profiler| {
+            let event_id = SelfProfiler::get_query_name_string_id(query_name);
+            let thread_id = thread_id_to_u64(std::thread::current().id());
+
+            profiler.profiler.record_event(
+                event_kind(profiler),
+                event_id,
+                thread_id,
+                timestamp_kind,
+            );
+
+            TimingGuard::none()
+        }));
+    }
+
+    #[inline(always)]
+    fn non_guard_generic_event<F: FnOnce(&SelfProfiler) -> StringId>(
+        &self,
+        event_kind: fn(&SelfProfiler) -> StringId,
+        event_id: F,
+        event_filter: EventFilter,
+        timestamp_kind: TimestampKind
+    ) {
+        drop(self.exec(event_filter, |profiler| {
+            let thread_id = thread_id_to_u64(std::thread::current().id());
+
+            profiler.profiler.record_event(
+                event_kind(profiler),
+                event_id(profiler),
+                thread_id,
+                timestamp_kind,
+            );
+
+            TimingGuard::none()
+        }));
+    }
+}
+
 pub struct SelfProfiler {
     profiler: Profiler,
     event_filter_mask: EventFilter,
@@ -143,103 +343,51 @@ impl SelfProfiler {
         let id = SelfProfiler::get_query_name_string_id(query_name);
         self.profiler.alloc_string_with_reserved_id(id, query_name.as_str());
     }
+}
 
-    #[inline]
-    pub fn start_activity(
-        &self,
-        label: impl Into<Cow<'static, str>>,
-    ) {
-        if self.event_filter_mask.contains(EventFilter::GENERIC_ACTIVITIES) {
-            self.record(&label.into(), self.generic_activity_event_kind, TimestampKind::Start);
-        }
-    }
-
-    #[inline]
-    pub fn end_activity(
-        &self,
-        label: impl Into<Cow<'static, str>>,
-    ) {
-        if self.event_filter_mask.contains(EventFilter::GENERIC_ACTIVITIES) {
-            self.record(&label.into(), self.generic_activity_event_kind, TimestampKind::End);
-        }
-    }
-
-    #[inline]
-    pub fn record_query_hit(&self, query_name: QueryName) {
-        if self.event_filter_mask.contains(EventFilter::QUERY_CACHE_HITS) {
-            self.record_query(query_name, self.query_cache_hit_event_kind, TimestampKind::Instant);
-        }
-    }
-
-    #[inline]
-    pub fn start_query(&self, query_name: QueryName) {
-        if self.event_filter_mask.contains(EventFilter::QUERY_PROVIDERS) {
-            self.record_query(query_name, self.query_event_kind, TimestampKind::Start);
-        }
-    }
-
-    #[inline]
-    pub fn end_query(&self, query_name: QueryName) {
-        if self.event_filter_mask.contains(EventFilter::QUERY_PROVIDERS) {
-            self.record_query(query_name, self.query_event_kind, TimestampKind::End);
-        }
-    }
-
-    #[inline]
-    pub fn incremental_load_result_start(&self, query_name: QueryName) {
-        if self.event_filter_mask.contains(EventFilter::INCR_CACHE_LOADS) {
-            self.record_query(
-                query_name,
-                self.incremental_load_result_event_kind,
-                TimestampKind::Start
-            );
-        }
-    }
-
-    #[inline]
-    pub fn incremental_load_result_end(&self, query_name: QueryName) {
-        if self.event_filter_mask.contains(EventFilter::INCR_CACHE_LOADS) {
-            self.record_query(
-                query_name,
-                self.incremental_load_result_event_kind,
-                TimestampKind::End
-            );
-        }
-    }
+#[must_use]
+pub struct TimingGuard<'a>(Option<TimingGuardInternal<'a>>);
 
-    #[inline]
-    pub fn query_blocked_start(&self, query_name: QueryName) {
-        if self.event_filter_mask.contains(EventFilter::QUERY_BLOCKED) {
-            self.record_query(query_name, self.query_blocked_event_kind, TimestampKind::Start);
-        }
-    }
+struct TimingGuardInternal<'a> {
+    raw_profiler: &'a Profiler,
+    event_id: StringId,
+    event_kind: StringId,
+    thread_id: u64,
+}
 
+impl<'a> TimingGuard<'a> {
     #[inline]
-    pub fn query_blocked_end(&self, query_name: QueryName) {
-        if self.event_filter_mask.contains(EventFilter::QUERY_BLOCKED) {
-            self.record_query(query_name, self.query_blocked_event_kind, TimestampKind::End);
-        }
+    pub fn start(
+        profiler: &'a SelfProfiler,
+        event_kind: StringId,
+        event_id: StringId,
+    ) -> TimingGuard<'a> {
+        let thread_id = thread_id_to_u64(std::thread::current().id());
+        let raw_profiler = &profiler.profiler;
+        raw_profiler.record_event(event_kind, event_id, thread_id, TimestampKind::Start);
+
+        TimingGuard(Some(TimingGuardInternal {
+            raw_profiler,
+            event_kind,
+            event_id,
+            thread_id,
+        }))
     }
 
     #[inline]
-    fn record(&self, event_id: &str, event_kind: StringId, timestamp_kind: TimestampKind) {
-        let thread_id = thread_id_to_u64(std::thread::current().id());
-
-        let event_id = self.profiler.alloc_string(event_id);
-        self.profiler.record_event(event_kind, event_id, thread_id, timestamp_kind);
+    pub fn none() -> TimingGuard<'a> {
+        TimingGuard(None)
     }
+}
 
+impl<'a> Drop for TimingGuardInternal<'a> {
     #[inline]
-    fn record_query(
-        &self,
-        query_name: QueryName,
-        event_kind: StringId,
-        timestamp_kind: TimestampKind,
-    ) {
-        let dep_node_name = SelfProfiler::get_query_name_string_id(query_name);
-
-        let thread_id = thread_id_to_u64(std::thread::current().id());
-
-        self.profiler.record_event(event_kind, dep_node_name, thread_id, timestamp_kind);
+    fn drop(&mut self) {
+        self.raw_profiler.record_event(
+            self.event_kind,
+            self.event_id,
+            self.thread_id,
+            TimestampKind::End
+        );
     }
 }
diff --git a/src/librustc_apfloat/ieee.rs b/src/librustc_apfloat/ieee.rs
index 18d968fbddd9b..4abb86a5251a5 100644
--- a/src/librustc_apfloat/ieee.rs
+++ b/src/librustc_apfloat/ieee.rs
@@ -1199,8 +1199,8 @@ impl<S: Semantics> Float for IeeeFloat<S> {
         }
 
         // Handle a leading minus sign.
-        let minus = s.starts_with("-");
-        if minus || s.starts_with("+") {
+        let minus = s.starts_with('-');
+        if minus || s.starts_with('+') {
             s = &s[1..];
             if s.is_empty() {
                 return Err(ParseError("String has no digits"));
diff --git a/src/librustc_codegen_llvm/back/lto.rs b/src/librustc_codegen_llvm/back/lto.rs
index a43fbb68dbaed..c4368d2cb8b45 100644
--- a/src/librustc_codegen_llvm/back/lto.rs
+++ b/src/librustc_codegen_llvm/back/lto.rs
@@ -62,11 +62,13 @@ fn prepare_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
     };
     let exported_symbols = cgcx.exported_symbols
         .as_ref().expect("needs exported symbols for LTO");
-    let mut symbol_white_list = exported_symbols[&LOCAL_CRATE]
-        .iter()
-        .filter_map(symbol_filter)
-        .collect::<Vec<CString>>();
-    let _timer = cgcx.profile_activity("generate_symbol_white_list_for_thinlto");
+    let mut symbol_white_list = {
+        let _timer = cgcx.prof.generic_activity("LLVM_lto_generate_symbol_white_list");
+        exported_symbols[&LOCAL_CRATE]
+            .iter()
+            .filter_map(symbol_filter)
+            .collect::<Vec<CString>>()
+    };
     info!("{} symbols to preserve in this crate", symbol_white_list.len());
 
     // If we're performing LTO for the entire crate graph, then for each of our
@@ -95,14 +97,17 @@ fn prepare_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
         }
 
         for &(cnum, ref path) in cgcx.each_linked_rlib_for_lto.iter() {
-            let _timer = cgcx.profile_activity(format!("load: {}", path.display()));
             let exported_symbols = cgcx.exported_symbols
                 .as_ref().expect("needs exported symbols for LTO");
-            symbol_white_list.extend(
-                exported_symbols[&cnum]
-                    .iter()
-                    .filter_map(symbol_filter));
+            {
+                let _timer = cgcx.prof.generic_activity("LLVM_lto_generate_symbol_white_list");
+                symbol_white_list.extend(
+                    exported_symbols[&cnum]
+                        .iter()
+                        .filter_map(symbol_filter));
+            }
 
+            let _timer = cgcx.prof.generic_activity("LLVM_lto_load_upstream_bitcode");
             let archive = ArchiveRO::open(&path).expect("wanted an rlib");
             let bytecodes = archive.iter().filter_map(|child| {
                 child.ok().and_then(|c| c.name().map(|name| (name, c)))
@@ -189,6 +194,7 @@ fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
            symbol_white_list: &[*const libc::c_char])
     -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError>
 {
+    let _timer = cgcx.prof.generic_activity("LLVM_fat_lto_build_monolithic_module");
     info!("going for a fat lto");
 
     // Sort out all our lists of incoming modules into two lists.
@@ -287,6 +293,7 @@ fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
         // save and persist everything with the original module.
         let mut linker = Linker::new(llmod);
         for (bc_decoded, name) in serialized_modules {
+            let _timer = cgcx.prof.generic_activity("LLVM_fat_lto_link_module");
             info!("linking {:?}", name);
             time_ext(cgcx.time_passes, None, &format!("ll link {:?}", name), || {
                 let data = bc_decoded.data();
@@ -388,6 +395,7 @@ fn thin_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
             symbol_white_list: &[*const libc::c_char])
     -> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError>
 {
+    let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_global_analysis");
     unsafe {
         info!("going for that thin, thin LTO");
 
@@ -601,16 +609,6 @@ impl ModuleBuffer {
             llvm::LLVMRustModuleBufferCreate(m)
         })
     }
-
-    pub fn parse<'a>(
-        &self,
-        name: &str,
-        cx: &'a llvm::Context,
-        handler: &Handler,
-    ) -> Result<&'a llvm::Module, FatalError> {
-        let name = CString::new(name).unwrap();
-        parse_module(cx, &name, self.data(), handler)
-    }
 }
 
 impl ModuleBufferMethods for ModuleBuffer {
@@ -723,7 +721,7 @@ pub unsafe fn optimize_thin_module(
         // Like with "fat" LTO, get some better optimizations if landing pads
         // are disabled by removing all landing pads.
         if cgcx.no_landing_pads {
-            let _timer = cgcx.profile_activity("LLVM_remove_landing_pads");
+            let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_remove_landing_pads");
             llvm::LLVMRustMarkAllFunctionsNounwind(llmod);
             save_temp_bitcode(&cgcx, &module, "thin-lto-after-nounwind");
         }
@@ -736,26 +734,41 @@ pub unsafe fn optimize_thin_module(
         //
         // You can find some more comments about these functions in the LLVM
         // bindings we've got (currently `PassWrapper.cpp`)
-        if !llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod) {
-            let msg = "failed to prepare thin LTO module";
-            return Err(write::llvm_err(&diag_handler, msg))
+        {
+            let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_rename");
+            if !llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod) {
+                let msg = "failed to prepare thin LTO module";
+                return Err(write::llvm_err(&diag_handler, msg))
+            }
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-rename");
         }
-        save_temp_bitcode(cgcx, &module, "thin-lto-after-rename");
-        if !llvm::LLVMRustPrepareThinLTOResolveWeak(thin_module.shared.data.0, llmod) {
-            let msg = "failed to prepare thin LTO module";
-            return Err(write::llvm_err(&diag_handler, msg))
+
+        {
+            let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_resolve_weak");
+            if !llvm::LLVMRustPrepareThinLTOResolveWeak(thin_module.shared.data.0, llmod) {
+                let msg = "failed to prepare thin LTO module";
+                return Err(write::llvm_err(&diag_handler, msg))
+            }
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-resolve");
         }
-        save_temp_bitcode(cgcx, &module, "thin-lto-after-resolve");
-        if !llvm::LLVMRustPrepareThinLTOInternalize(thin_module.shared.data.0, llmod) {
-            let msg = "failed to prepare thin LTO module";
-            return Err(write::llvm_err(&diag_handler, msg))
+
+        {
+            let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_internalize");
+            if !llvm::LLVMRustPrepareThinLTOInternalize(thin_module.shared.data.0, llmod) {
+                let msg = "failed to prepare thin LTO module";
+                return Err(write::llvm_err(&diag_handler, msg))
+            }
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-internalize");
         }
-        save_temp_bitcode(cgcx, &module, "thin-lto-after-internalize");
-        if !llvm::LLVMRustPrepareThinLTOImport(thin_module.shared.data.0, llmod) {
-            let msg = "failed to prepare thin LTO module";
-            return Err(write::llvm_err(&diag_handler, msg))
+
+        {
+            let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_import");
+            if !llvm::LLVMRustPrepareThinLTOImport(thin_module.shared.data.0, llmod) {
+                let msg = "failed to prepare thin LTO module";
+                return Err(write::llvm_err(&diag_handler, msg))
+            }
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-import");
         }
-        save_temp_bitcode(cgcx, &module, "thin-lto-after-import");
 
         // Ok now this is a bit unfortunate. This is also something you won't
         // find upstream in LLVM's ThinLTO passes! This is a hack for now to
@@ -786,18 +799,24 @@ pub unsafe fn optimize_thin_module(
         // not too much) but for now at least gets LLVM to emit valid DWARF (or
         // so it appears). Hopefully we can remove this once upstream bugs are
         // fixed in LLVM.
-        llvm::LLVMRustThinLTOPatchDICompileUnit(llmod, cu1);
-        save_temp_bitcode(cgcx, &module, "thin-lto-after-patch");
+        {
+            let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_patch_debuginfo");
+            llvm::LLVMRustThinLTOPatchDICompileUnit(llmod, cu1);
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-patch");
+        }
 
         // Alright now that we've done everything related to the ThinLTO
         // analysis it's time to run some optimizations! Here we use the same
         // `run_pass_manager` as the "fat" LTO above except that we tell it to
         // populate a thin-specific pass manager, which presumably LLVM treats a
         // little differently.
-        info!("running thin lto passes over {}", module.name);
-        let config = cgcx.config(module.kind);
-        run_pass_manager(cgcx, &module, config, true);
-        save_temp_bitcode(cgcx, &module, "thin-lto-after-pm");
+        {
+            let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_optimize");
+            info!("running thin lto passes over {}", module.name);
+            let config = cgcx.config(module.kind);
+            run_pass_manager(cgcx, &module, config, true);
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-pm");
+        }
     }
     Ok(module)
 }
diff --git a/src/librustc_codegen_llvm/back/write.rs b/src/librustc_codegen_llvm/back/write.rs
index 253110dcb34c0..78db90b57b53d 100644
--- a/src/librustc_codegen_llvm/back/write.rs
+++ b/src/librustc_codegen_llvm/back/write.rs
@@ -306,6 +306,8 @@ pub(crate) unsafe fn optimize(cgcx: &CodegenContext<LlvmCodegenBackend>,
                    config: &ModuleConfig)
     -> Result<(), FatalError>
 {
+    let _timer = cgcx.prof.generic_activity("LLVM_module_optimize");
+
     let llmod = module.module_llvm.llmod();
     let llcx = &*module.module_llvm.llcx;
     let tm = &*module.module_llvm.tm;
@@ -423,7 +425,7 @@ pub(crate) unsafe fn optimize(cgcx: &CodegenContext<LlvmCodegenBackend>,
 
         // Finally, run the actual optimization passes
         {
-            let _timer = cgcx.profile_activity("LLVM_function_passes");
+            let _timer = cgcx.prof.generic_activity("LLVM_module_optimize_function_passes");
             time_ext(config.time_passes,
                         None,
                         &format!("llvm function passes [{}]", module_name.unwrap()),
@@ -432,7 +434,7 @@ pub(crate) unsafe fn optimize(cgcx: &CodegenContext<LlvmCodegenBackend>,
             });
         }
         {
-            let _timer = cgcx.profile_activity("LLVM_module_passes");
+            let _timer = cgcx.prof.generic_activity("LLVM_module_optimize_module_passes");
             time_ext(config.time_passes,
                     None,
                     &format!("llvm module passes [{}]", module_name.unwrap()),
@@ -454,7 +456,7 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<LlvmCodegenBackend>,
                   config: &ModuleConfig)
     -> Result<CompiledModule, FatalError>
 {
-    let _timer = cgcx.profile_activity("codegen");
+    let _timer = cgcx.prof.generic_activity("LLVM_module_codegen");
     {
         let llmod = module.module_llvm.llmod();
         let llcx = &*module.module_llvm.llcx;
@@ -505,12 +507,12 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<LlvmCodegenBackend>,
 
 
         if write_bc || config.emit_bc_compressed || config.embed_bitcode {
-            let _timer = cgcx.profile_activity("LLVM_make_bitcode");
+            let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_make_bitcode");
             let thin = ThinBuffer::new(llmod);
             let data = thin.data();
 
             if write_bc {
-                let _timer = cgcx.profile_activity("LLVM_emit_bitcode");
+                let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_emit_bitcode");
                 if let Err(e) = fs::write(&bc_out, data) {
                     let msg = format!("failed to write bytecode to {}: {}", bc_out.display(), e);
                     diag_handler.err(&msg);
@@ -518,12 +520,13 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<LlvmCodegenBackend>,
             }
 
             if config.embed_bitcode {
-                let _timer = cgcx.profile_activity("LLVM_embed_bitcode");
+                let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_embed_bitcode");
                 embed_bitcode(cgcx, llcx, llmod, Some(data));
             }
 
             if config.emit_bc_compressed {
-                let _timer = cgcx.profile_activity("LLVM_compress_bitcode");
+                let _timer =
+                    cgcx.prof.generic_activity("LLVM_module_codegen_emit_compressed_bitcode");
                 let dst = bc_out.with_extension(RLIB_BYTECODE_EXTENSION);
                 let data = bytecode::encode(&module.name, data);
                 if let Err(e) = fs::write(&dst, data) {
@@ -538,7 +541,7 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<LlvmCodegenBackend>,
         time_ext(config.time_passes, None, &format!("codegen passes [{}]", module_name.unwrap()),
             || -> Result<(), FatalError> {
             if config.emit_ir {
-                let _timer = cgcx.profile_activity("LLVM_emit_ir");
+                let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_emit_ir");
                 let out = cgcx.output_filenames.temp_path(OutputType::LlvmAssembly, module_name);
                 let out_c = path_to_c_string(&out);
 
@@ -585,7 +588,7 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<LlvmCodegenBackend>,
             }
 
             if config.emit_asm || asm_to_obj {
-                let _timer = cgcx.profile_activity("LLVM_emit_asm");
+                let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_emit_asm");
                 let path = cgcx.output_filenames.temp_path(OutputType::Assembly, module_name);
 
                 // We can't use the same module for asm and binary output, because that triggers
@@ -603,13 +606,13 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<LlvmCodegenBackend>,
             }
 
             if write_obj {
-                let _timer = cgcx.profile_activity("LLVM_emit_obj");
+                let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_emit_obj");
                 with_codegen(tm, llmod, config.no_builtins, |cpm| {
                     write_output_file(diag_handler, tm, cpm, llmod, &obj_out,
                                       llvm::FileType::ObjectFile)
                 })?;
             } else if asm_to_obj {
-                let _timer = cgcx.profile_activity("LLVM_asm_to_obj");
+                let _timer = cgcx.prof.generic_activity("LLVM_module_codegen_asm_to_obj");
                 let assembly = cgcx.output_filenames.temp_path(OutputType::Assembly, module_name);
                 run_assembler(cgcx, diag_handler, &assembly, &obj_out);
 
diff --git a/src/librustc_codegen_llvm/base.rs b/src/librustc_codegen_llvm/base.rs
index 5758cdbebf7d7..bd7d0d4017dce 100644
--- a/src/librustc_codegen_llvm/base.rs
+++ b/src/librustc_codegen_llvm/base.rs
@@ -108,6 +108,7 @@ pub fn compile_codegen_unit(
     cgu_name: InternedString,
     tx_to_llvm_workers: &std::sync::mpsc::Sender<Box<dyn std::any::Any + Send>>,
 ) {
+    let prof_timer = tcx.prof.generic_activity("codegen_module");
     let start_time = Instant::now();
 
     let dep_node = tcx.codegen_unit(cgu_name).codegen_dep_node(tcx);
@@ -119,6 +120,7 @@ pub fn compile_codegen_unit(
         dep_graph::hash_result,
     );
     let time_to_codegen = start_time.elapsed();
+    drop(prof_timer);
 
     // We assume that the cost to run LLVM on a CGU is proportional to
     // the time we needed for codegenning it.
diff --git a/src/librustc_codegen_llvm/lib.rs b/src/librustc_codegen_llvm/lib.rs
index 309a17a01e3fa..87eab484fafc6 100644
--- a/src/librustc_codegen_llvm/lib.rs
+++ b/src/librustc_codegen_llvm/lib.rs
@@ -324,8 +324,9 @@ impl CodegenBackend for LlvmCodegenBackend {
 
         // Run the linker on any artifacts that resulted from the LLVM run.
         // This should produce either a finished executable or library.
-        sess.profiler(|p| p.start_activity("link_crate"));
         time(sess, "linking", || {
+            let _prof_timer = sess.prof.generic_activity("link_crate");
+
             use rustc_codegen_ssa::back::link::link_binary;
             use crate::back::archive::LlvmArchiveBuilder;
 
@@ -338,7 +339,6 @@ impl CodegenBackend for LlvmCodegenBackend {
                 target_cpu,
             );
         });
-        sess.profiler(|p| p.end_activity("link_crate"));
 
         // Now that we won't touch anything in the incremental compilation directory
         // any more, we can finalize it (which involves renaming it)
diff --git a/src/librustc_codegen_ssa/back/write.rs b/src/librustc_codegen_ssa/back/write.rs
index 3c5fbfd0f866f..f1cfac2703322 100644
--- a/src/librustc_codegen_ssa/back/write.rs
+++ b/src/librustc_codegen_ssa/back/write.rs
@@ -19,7 +19,7 @@ use rustc::util::nodemap::FxHashMap;
 use rustc::hir::def_id::{CrateNum, LOCAL_CRATE};
 use rustc::ty::TyCtxt;
 use rustc::util::common::{time_depth, set_time_depth, print_time_passes_entry};
-use rustc::util::profiling::SelfProfiler;
+use rustc::util::profiling::SelfProfilerRef;
 use rustc_fs_util::link_or_copy;
 use rustc_data_structures::svh::Svh;
 use rustc_errors::{Handler, Level, FatalError, DiagnosticId};
@@ -31,7 +31,6 @@ use syntax_pos::symbol::{Symbol, sym};
 use jobserver::{Client, Acquired};
 
 use std::any::Any;
-use std::borrow::Cow;
 use std::fs;
 use std::io;
 use std::mem;
@@ -196,42 +195,13 @@ impl<B: WriteBackendMethods> Clone for TargetMachineFactory<B> {
     }
 }
 
-pub struct ProfileGenericActivityTimer {
-    profiler: Option<Arc<SelfProfiler>>,
-    label: Cow<'static, str>,
-}
-
-impl ProfileGenericActivityTimer {
-    pub fn start(
-        profiler: Option<Arc<SelfProfiler>>,
-        label: Cow<'static, str>,
-    ) -> ProfileGenericActivityTimer {
-        if let Some(profiler) = &profiler {
-            profiler.start_activity(label.clone());
-        }
-
-        ProfileGenericActivityTimer {
-            profiler,
-            label,
-        }
-    }
-}
-
-impl Drop for ProfileGenericActivityTimer {
-    fn drop(&mut self) {
-        if let Some(profiler) = &self.profiler {
-            profiler.end_activity(self.label.clone());
-        }
-    }
-}
-
 /// Additional resources used by optimize_and_codegen (not module specific)
 #[derive(Clone)]
 pub struct CodegenContext<B: WriteBackendMethods> {
     // Resources needed when running LTO
     pub backend: B,
     pub time_passes: bool,
-    pub profiler: Option<Arc<SelfProfiler>>,
+    pub prof: SelfProfilerRef,
     pub lto: Lto,
     pub no_landing_pads: bool,
     pub save_temps: bool,
@@ -283,31 +253,6 @@ impl<B: WriteBackendMethods> CodegenContext<B> {
             ModuleKind::Allocator => &self.allocator_module_config,
         }
     }
-
-    #[inline(never)]
-    #[cold]
-    fn profiler_active<F: FnOnce(&SelfProfiler) -> ()>(&self, f: F) {
-        match &self.profiler {
-            None => bug!("profiler_active() called but there was no profiler active"),
-            Some(profiler) => {
-                f(&*profiler);
-            }
-        }
-    }
-
-    #[inline(always)]
-    pub fn profile<F: FnOnce(&SelfProfiler) -> ()>(&self, f: F) {
-        if unlikely!(self.profiler.is_some()) {
-            self.profiler_active(f)
-        }
-    }
-
-    pub fn profile_activity(
-        &self,
-        label: impl Into<Cow<'static, str>>,
-    ) -> ProfileGenericActivityTimer {
-        ProfileGenericActivityTimer::start(self.profiler.clone(), label.into())
-    }
 }
 
 fn generate_lto_work<B: ExtraBackendMethods>(
@@ -316,7 +261,7 @@ fn generate_lto_work<B: ExtraBackendMethods>(
     needs_thin_lto: Vec<(String, B::ThinBuffer)>,
     import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>
 ) -> Vec<(WorkItem<B>, u64)> {
-    cgcx.profile(|p| p.start_activity("codegen_run_lto"));
+    let _prof_timer = cgcx.prof.generic_activity("codegen_run_lto");
 
     let (lto_modules, copy_jobs) = if !needs_fat_lto.is_empty() {
         assert!(needs_thin_lto.is_empty());
@@ -343,8 +288,6 @@ fn generate_lto_work<B: ExtraBackendMethods>(
         }), 0)
     })).collect();
 
-    cgcx.profile(|p| p.end_activity("codegen_run_lto"));
-
     result
 }
 
@@ -380,6 +323,9 @@ pub fn start_async_codegen<B: ExtraBackendMethods>(
 ) -> OngoingCodegen<B> {
     let (coordinator_send, coordinator_receive) = channel();
     let sess = tcx.sess;
+
+    sess.prof.generic_activity_start("codegen_and_optimize_crate");
+
     let crate_name = tcx.crate_name(LOCAL_CRATE);
     let crate_hash = tcx.crate_hash(LOCAL_CRATE);
     let no_builtins = attr::contains_name(&tcx.hir().krate().attrs, sym::no_builtins);
@@ -1088,7 +1034,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
         save_temps: sess.opts.cg.save_temps,
         opts: Arc::new(sess.opts.clone()),
         time_passes: sess.time_extended(),
-        profiler: sess.self_profiling.clone(),
+        prof: sess.prof.clone(),
         exported_symbols,
         plugin_passes: sess.plugin_llvm_passes.borrow().clone(),
         remark: sess.opts.cg.remark.clone(),
@@ -1645,12 +1591,8 @@ fn spawn_work<B: ExtraBackendMethods>(
         // as a diagnostic was already sent off to the main thread - just
         // surface that there was an error in this worker.
         bomb.result = {
-            let label = work.name();
-            cgcx.profile(|p| p.start_activity(label.clone()));
-            let result = execute_work_item(&cgcx, work).ok();
-            cgcx.profile(|p| p.end_activity(label));
-
-            result
+            let _prof_timer = cgcx.prof.generic_activity(&work.name());
+            execute_work_item(&cgcx, work).ok()
         };
     });
 }
@@ -1835,6 +1777,8 @@ impl<B: ExtraBackendMethods> OngoingCodegen<B> {
             self.backend.print_pass_timings()
         }
 
+        sess.prof.generic_activity_end("codegen_and_optimize_crate");
+
         (CodegenResults {
             crate_name: self.crate_name,
             crate_hash: self.crate_hash,
diff --git a/src/librustc_codegen_ssa/base.rs b/src/librustc_codegen_ssa/base.rs
index d11d8911a93d5..935087714a7eb 100644
--- a/src/librustc_codegen_ssa/base.rs
+++ b/src/librustc_codegen_ssa/base.rs
@@ -559,7 +559,7 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
 
     if need_metadata_module {
         // Codegen the encoded metadata.
-        tcx.sess.profiler(|p| p.start_activity("codegen crate metadata"));
+        let _prof_timer = tcx.prof.generic_activity("codegen_crate_metadata");
 
         let metadata_cgu_name = cgu_name_builder.build_cgu_name(LOCAL_CRATE,
                                                                 &["crate"],
@@ -570,7 +570,6 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
             backend.write_compressed_metadata(tcx, &ongoing_codegen.metadata,
                                               &mut metadata_llvm_module);
         });
-        tcx.sess.profiler(|p| p.end_activity("codegen crate metadata"));
 
         let metadata_module = ModuleCodegen {
             name: metadata_cgu_name,
@@ -599,11 +598,9 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
 
         match cgu_reuse {
             CguReuse::No => {
-                tcx.sess.profiler(|p| p.start_activity(format!("codegen {}", cgu.name())));
                 let start_time = Instant::now();
                 backend.compile_codegen_unit(tcx, *cgu.name(), &ongoing_codegen.coordinator_send);
                 total_codegen_time += start_time.elapsed();
-                tcx.sess.profiler(|p| p.end_activity(format!("codegen {}", cgu.name())));
                 false
             }
             CguReuse::PreLto => {
diff --git a/src/librustc_codegen_ssa/lib.rs b/src/librustc_codegen_ssa/lib.rs
index 161d3ce61f0a6..5017a60ca699a 100644
--- a/src/librustc_codegen_ssa/lib.rs
+++ b/src/librustc_codegen_ssa/lib.rs
@@ -21,7 +21,6 @@
 
 #[macro_use] extern crate log;
 #[macro_use] extern crate rustc;
-#[macro_use] extern crate rustc_data_structures;
 #[macro_use] extern crate syntax;
 
 use std::path::PathBuf;
diff --git a/src/librustc_data_structures/graph/implementation/mod.rs b/src/librustc_data_structures/graph/implementation/mod.rs
index 052a09c0774b6..c438a8558a704 100644
--- a/src/librustc_data_structures/graph/implementation/mod.rs
+++ b/src/librustc_data_structures/graph/implementation/mod.rs
@@ -303,11 +303,11 @@ pub struct AdjacentEdges<'g, N, E> {
 
 impl<'g, N: Debug, E: Debug> AdjacentEdges<'g, N, E> {
     fn targets(self) -> impl Iterator<Item = NodeIndex> + 'g {
-        self.into_iter().map(|(_, edge)| edge.target)
+        self.map(|(_, edge)| edge.target)
     }
 
     fn sources(self) -> impl Iterator<Item = NodeIndex> + 'g {
-        self.into_iter().map(|(_, edge)| edge.source)
+        self.map(|(_, edge)| edge.source)
     }
 }
 
diff --git a/src/librustc_data_structures/obligation_forest/mod.rs b/src/librustc_data_structures/obligation_forest/mod.rs
index cfccef67fe7dd..958ab617cb315 100644
--- a/src/librustc_data_structures/obligation_forest/mod.rs
+++ b/src/librustc_data_structures/obligation_forest/mod.rs
@@ -151,9 +151,8 @@ pub struct ObligationForest<O: ForestObligation> {
     /// comments in `process_obligation` for details.
     active_cache: FxHashMap<O::Predicate, usize>,
 
-    /// A scratch vector reused in various operations, to avoid allocating new
-    /// vectors.
-    scratch: RefCell<Vec<usize>>,
+    /// A vector reused in compress(), to avoid allocating new vectors.
+    node_rewrites: RefCell<Vec<usize>>,
 
     obligation_tree_id_generator: ObligationTreeIdGenerator,
 
@@ -235,10 +234,6 @@ enum NodeState {
     /// This obligation was resolved to an error. Error nodes are
     /// removed from the vector by the compression step.
     Error,
-
-    /// This is a temporary state used in DFS loops to detect cycles,
-    /// it should not exist outside of these DFSes.
-    OnDfsStack,
 }
 
 #[derive(Debug)]
@@ -279,7 +274,7 @@ impl<O: ForestObligation> ObligationForest<O> {
             nodes: vec![],
             done_cache: Default::default(),
             active_cache: Default::default(),
-            scratch: RefCell::new(vec![]),
+            node_rewrites: RefCell::new(vec![]),
             obligation_tree_id_generator: (0..).map(ObligationTreeId),
             error_cache: Default::default(),
         }
@@ -305,9 +300,10 @@ impl<O: ForestObligation> ObligationForest<O> {
 
         match self.active_cache.entry(obligation.as_predicate().clone()) {
             Entry::Occupied(o) => {
+                let index = *o.get();
                 debug!("register_obligation_at({:?}, {:?}) - duplicate of {:?}!",
-                       obligation, parent, o.get());
-                let node = &mut self.nodes[*o.get()];
+                       obligation, parent, index);
+                let node = &mut self.nodes[index];
                 if let Some(parent_index) = parent {
                     // If the node is already in `active_cache`, it has already
                     // had its chance to be marked with a parent. So if it's
@@ -342,7 +338,8 @@ impl<O: ForestObligation> ObligationForest<O> {
                 if already_failed {
                     Err(())
                 } else {
-                    v.insert(self.nodes.len());
+                    let new_index = self.nodes.len();
+                    v.insert(new_index);
                     self.nodes.push(Node::new(parent, obligation, obligation_tree_id));
                     Ok(())
                 }
@@ -352,15 +349,16 @@ impl<O: ForestObligation> ObligationForest<O> {
 
     /// Converts all remaining obligations to the given error.
     pub fn to_errors<E: Clone>(&mut self, error: E) -> Vec<Error<O, E>> {
-        let mut errors = vec![];
-        for (index, node) in self.nodes.iter().enumerate() {
-            if let NodeState::Pending = node.state.get() {
-                errors.push(Error {
+        let errors = self.nodes.iter().enumerate()
+            .filter(|(_index, node)| node.state.get() == NodeState::Pending)
+            .map(|(index, _node)| {
+                Error {
                     error: error.clone(),
                     backtrace: self.error_at(index),
-                });
-            }
-        }
+                }
+            })
+            .collect();
+
         let successful_obligations = self.compress(DoCompleted::Yes);
         assert!(successful_obligations.unwrap().is_empty());
         errors
@@ -370,15 +368,14 @@ impl<O: ForestObligation> ObligationForest<O> {
     pub fn map_pending_obligations<P, F>(&self, f: F) -> Vec<P>
         where F: Fn(&O) -> P
     {
-        self.nodes
-            .iter()
-            .filter(|n| n.state.get() == NodeState::Pending)
-            .map(|n| f(&n.obligation))
+        self.nodes.iter()
+            .filter(|node| node.state.get() == NodeState::Pending)
+            .map(|node| f(&node.obligation))
             .collect()
     }
 
-    fn insert_into_error_cache(&mut self, node_index: usize) {
-        let node = &self.nodes[node_index];
+    fn insert_into_error_cache(&mut self, index: usize) {
+        let node = &self.nodes[index];
         self.error_cache
             .entry(node.obligation_tree_id)
             .or_default()
@@ -408,10 +405,10 @@ impl<O: ForestObligation> ObligationForest<O> {
             // `self.active_cache`. This means that `self.active_cache` can get
             // out of sync with `nodes`. It's not very common, but it does
             // happen, and code in `compress` has to allow for it.
-            let result = match node.state.get() {
-                NodeState::Pending => processor.process_obligation(&mut node.obligation),
-                _ => continue
-            };
+            if node.state.get() != NodeState::Pending {
+                continue;
+            }
+            let result = processor.process_obligation(&mut node.obligation);
 
             debug!("process_obligations: node {} got result {:?}", index, result);
 
@@ -476,8 +473,7 @@ impl<O: ForestObligation> ObligationForest<O> {
     fn process_cycles<P>(&self, processor: &mut P)
         where P: ObligationProcessor<Obligation=O>
     {
-        let mut stack = self.scratch.replace(vec![]);
-        debug_assert!(stack.is_empty());
+        let mut stack = vec![];
 
         debug!("process_cycles()");
 
@@ -485,55 +481,45 @@ impl<O: ForestObligation> ObligationForest<O> {
             // For some benchmarks this state test is extremely
             // hot. It's a win to handle the no-op cases immediately to avoid
             // the cost of the function call.
-            match node.state.get() {
-                // Match arms are in order of frequency. Pending, Success and
-                // Waiting dominate; the others are rare.
-                NodeState::Pending => {},
-                NodeState::Success => self.find_cycles_from_node(&mut stack, processor, index),
-                NodeState::Waiting | NodeState::Done | NodeState::Error => {},
-                NodeState::OnDfsStack => self.find_cycles_from_node(&mut stack, processor, index),
+            if node.state.get() == NodeState::Success {
+                self.find_cycles_from_node(&mut stack, processor, index);
             }
         }
 
         debug!("process_cycles: complete");
 
         debug_assert!(stack.is_empty());
-        self.scratch.replace(stack);
     }
 
     fn find_cycles_from_node<P>(&self, stack: &mut Vec<usize>, processor: &mut P, index: usize)
         where P: ObligationProcessor<Obligation=O>
     {
         let node = &self.nodes[index];
-        match node.state.get() {
-            NodeState::OnDfsStack => {
-                let rpos = stack.iter().rposition(|&n| n == index).unwrap();
-                processor.process_backedge(stack[rpos..].iter().map(GetObligation(&self.nodes)),
-                                           PhantomData);
-            }
-            NodeState::Success => {
-                node.state.set(NodeState::OnDfsStack);
-                stack.push(index);
-                for &index in node.dependents.iter() {
-                    self.find_cycles_from_node(stack, processor, index);
+        if node.state.get() == NodeState::Success {
+            match stack.iter().rposition(|&n| n == index) {
+                None => {
+                    stack.push(index);
+                    for &index in node.dependents.iter() {
+                        self.find_cycles_from_node(stack, processor, index);
+                    }
+                    stack.pop();
+                    node.state.set(NodeState::Done);
+                }
+                Some(rpos) => {
+                    // Cycle detected.
+                    processor.process_backedge(
+                        stack[rpos..].iter().map(GetObligation(&self.nodes)),
+                        PhantomData
+                    );
                 }
-                stack.pop();
-                node.state.set(NodeState::Done);
-            },
-            NodeState::Waiting | NodeState::Pending => {
-                // This node is still reachable from some pending node. We
-                // will get to it when they are all processed.
-            }
-            NodeState::Done | NodeState::Error => {
-                // Already processed that node.
             }
-        };
+        }
     }
 
     /// Returns a vector of obligations for `p` and all of its
     /// ancestors, putting them into the error state in the process.
     fn error_at(&self, mut index: usize) -> Vec<O> {
-        let mut error_stack = self.scratch.replace(vec![]);
+        let mut error_stack: Vec<usize> = vec![];
         let mut trace = vec![];
 
         loop {
@@ -554,15 +540,12 @@ impl<O: ForestObligation> ObligationForest<O> {
 
         while let Some(index) = error_stack.pop() {
             let node = &self.nodes[index];
-            match node.state.get() {
-                NodeState::Error => continue,
-                _ => node.state.set(NodeState::Error),
+            if node.state.get() != NodeState::Error {
+                node.state.set(NodeState::Error);
+                error_stack.extend(node.dependents.iter());
             }
-
-            error_stack.extend(node.dependents.iter());
         }
 
-        self.scratch.replace(error_stack);
         trace
     }
 
@@ -570,7 +553,19 @@ impl<O: ForestObligation> ObligationForest<O> {
     #[inline(always)]
     fn inlined_mark_neighbors_as_waiting_from(&self, node: &Node<O>) {
         for &index in node.dependents.iter() {
-            self.mark_as_waiting_from(&self.nodes[index]);
+            let node = &self.nodes[index];
+            match node.state.get() {
+                NodeState::Waiting | NodeState::Error => {}
+                NodeState::Success => {
+                    node.state.set(NodeState::Waiting);
+                    // This call site is cold.
+                    self.uninlined_mark_neighbors_as_waiting_from(node);
+                }
+                NodeState::Pending | NodeState::Done => {
+                    // This call site is cold.
+                    self.uninlined_mark_neighbors_as_waiting_from(node);
+                }
+            }
         }
     }
 
@@ -596,37 +591,28 @@ impl<O: ForestObligation> ObligationForest<O> {
         }
     }
 
-    fn mark_as_waiting_from(&self, node: &Node<O>) {
-        match node.state.get() {
-            NodeState::Waiting | NodeState::Error | NodeState::OnDfsStack => return,
-            NodeState::Success => node.state.set(NodeState::Waiting),
-            NodeState::Pending | NodeState::Done => {},
-        }
-
-        // This call site is cold.
-        self.uninlined_mark_neighbors_as_waiting_from(node);
-    }
-
-    /// Compresses the vector, removing all popped nodes. This adjusts
-    /// the indices and hence invalidates any outstanding
-    /// indices. Cannot be used during a transaction.
+    /// Compresses the vector, removing all popped nodes. This adjusts the
+    /// indices and hence invalidates any outstanding indices.
     ///
     /// Beforehand, all nodes must be marked as `Done` and no cycles
     /// on these nodes may be present. This is done by e.g., `process_cycles`.
     #[inline(never)]
     fn compress(&mut self, do_completed: DoCompleted) -> Option<Vec<O>> {
-        let nodes_len = self.nodes.len();
-        let mut node_rewrites: Vec<_> = self.scratch.replace(vec![]);
-        node_rewrites.extend(0..nodes_len);
+        let orig_nodes_len = self.nodes.len();
+        let mut node_rewrites: Vec<_> = self.node_rewrites.replace(vec![]);
+        debug_assert!(node_rewrites.is_empty());
+        node_rewrites.extend(0..orig_nodes_len);
         let mut dead_nodes = 0;
+        let mut removed_done_obligations: Vec<O> = vec![];
 
-        // Now move all popped nodes to the end. Try to keep the order.
+        // Now move all Done/Error nodes to the end, preserving the order of
+        // the Pending/Waiting nodes.
         //
         // LOOP INVARIANT:
         //     self.nodes[0..index - dead_nodes] are the first remaining nodes
         //     self.nodes[index - dead_nodes..index] are all dead
         //     self.nodes[index..] are unchanged
-        for index in 0..self.nodes.len() {
+        for index in 0..orig_nodes_len {
             let node = &self.nodes[index];
             match node.state.get() {
                 NodeState::Pending | NodeState::Waiting => {
@@ -637,7 +623,7 @@ impl<O: ForestObligation> ObligationForest<O> {
                 }
                 NodeState::Done => {
                     // This lookup can fail because the contents of
-                    // `self.active_cache` is not guaranteed to match those of
+                    // `self.active_cache` are not guaranteed to match those of
                     // `self.nodes`. See the comment in `process_obligation`
                     // for more details.
                     if let Some((predicate, _)) =
@@ -647,7 +633,11 @@ impl<O: ForestObligation> ObligationForest<O> {
                     } else {
                         self.done_cache.insert(node.obligation.as_predicate().clone());
                     }
-                    node_rewrites[index] = nodes_len;
+                    if do_completed == DoCompleted::Yes {
+                        // Extract the success stories.
+                        removed_done_obligations.push(node.obligation.clone());
+                    }
+                    node_rewrites[index] = orig_nodes_len;
                     dead_nodes += 1;
                 }
                 NodeState::Error => {
@@ -655,53 +645,38 @@ impl<O: ForestObligation> ObligationForest<O> {
                     // tests must come up with a different type on every type error they
                     // check against.
                     self.active_cache.remove(node.obligation.as_predicate());
-                    node_rewrites[index] = nodes_len;
-                    dead_nodes += 1;
                     self.insert_into_error_cache(index);
+                    node_rewrites[index] = orig_nodes_len;
+                    dead_nodes += 1;
                 }
-                NodeState::OnDfsStack | NodeState::Success => unreachable!()
+                NodeState::Success => unreachable!()
             }
         }
 
-        // No compression needed.
-        if dead_nodes == 0 {
-            node_rewrites.truncate(0);
-            self.scratch.replace(node_rewrites);
-            return if do_completed == DoCompleted::Yes { Some(vec![]) } else { None };
+        if dead_nodes > 0 {
+            // Remove the dead nodes and rewrite indices.
+            self.nodes.truncate(orig_nodes_len - dead_nodes);
+            self.apply_rewrites(&node_rewrites);
         }
 
-        // Pop off all the nodes we killed and extract the success stories.
-        let successful = if do_completed == DoCompleted::Yes {
-            Some((0..dead_nodes)
-                .map(|_| self.nodes.pop().unwrap())
-                .flat_map(|node| {
-                    match node.state.get() {
-                        NodeState::Error => None,
-                        NodeState::Done => Some(node.obligation),
-                        _ => unreachable!()
-                    }
-                })
-                .collect())
-        } else {
-            self.nodes.truncate(self.nodes.len() - dead_nodes);
-            None
-        };
-        self.apply_rewrites(&node_rewrites);
-
         node_rewrites.truncate(0);
-        self.scratch.replace(node_rewrites);
+        self.node_rewrites.replace(node_rewrites);
 
-        successful
+        if do_completed == DoCompleted::Yes {
+            Some(removed_done_obligations)
+        } else {
+            None
+        }
     }
 
     fn apply_rewrites(&mut self, node_rewrites: &[usize]) {
-        let nodes_len = node_rewrites.len();
+        let orig_nodes_len = node_rewrites.len();
 
         for node in &mut self.nodes {
             let mut i = 0;
             while i < node.dependents.len() {
                 let new_index = node_rewrites[node.dependents[i]];
-                if new_index >= nodes_len {
+                if new_index >= orig_nodes_len {
                     node.dependents.swap_remove(i);
                     if i == 0 && node.has_parent {
                         // We just removed the parent.
@@ -718,7 +693,7 @@ impl<O: ForestObligation> ObligationForest<O> {
         // removal of nodes within `compress` can fail. See above.
         self.active_cache.retain(|_predicate, index| {
             let new_index = node_rewrites[*index];
-            if new_index >= nodes_len {
+            if new_index >= orig_nodes_len {
                 false
             } else {
                 *index = new_index;
diff --git a/src/librustc_data_structures/obligation_forest/tests.rs b/src/librustc_data_structures/obligation_forest/tests.rs
index e20466572a26f..54b6f6d0adc6c 100644
--- a/src/librustc_data_structures/obligation_forest/tests.rs
+++ b/src/librustc_data_structures/obligation_forest/tests.rs
@@ -116,7 +116,9 @@ fn push_pop() {
                 _ => unreachable!(),
             }
         }, |_| {}), DoCompleted::Yes);
-    assert_eq!(ok.unwrap(), vec!["A.3", "A.1", "A.3.i"]);
+    let mut ok = ok.unwrap();
+    ok.sort();
+    assert_eq!(ok, vec!["A.1", "A.3", "A.3.i"]);
     assert_eq!(err,
                vec![Error {
                         error: "A is for apple",
@@ -132,7 +134,9 @@ fn push_pop() {
                 _ => panic!("unexpected obligation {:?}", obligation),
             }
         }, |_| {}), DoCompleted::Yes);
-    assert_eq!(ok.unwrap(), vec!["D.2.i", "D.2"]);
+    let mut ok = ok.unwrap();
+    ok.sort();
+    assert_eq!(ok, vec!["D.2", "D.2.i"]);
     assert_eq!(err,
                vec![Error {
                         error: "D is for dumb",
@@ -172,7 +176,9 @@ fn success_in_grandchildren() {
                 _ => unreachable!(),
             }
         }, |_| {}), DoCompleted::Yes);
-    assert_eq!(ok.unwrap(), vec!["A.3", "A.1"]);
+    let mut ok = ok.unwrap();
+    ok.sort();
+    assert_eq!(ok, vec!["A.1", "A.3"]);
     assert!(err.is_empty());
 
     let Outcome { completed: ok, errors: err, .. } =
@@ -193,7 +199,9 @@ fn success_in_grandchildren() {
                 _ => unreachable!(),
             }
         }, |_| {}), DoCompleted::Yes);
-    assert_eq!(ok.unwrap(), vec!["A.2.i.a", "A.2.i", "A.2", "A"]);
+    let mut ok = ok.unwrap();
+    ok.sort();
+    assert_eq!(ok, vec!["A", "A.2", "A.2.i", "A.2.i.a"]);
     assert!(err.is_empty());
 
     let Outcome { completed: ok, errors: err, .. } =
@@ -261,7 +269,9 @@ fn diamond() {
             }
         }, |_|{}), DoCompleted::Yes);
     assert_eq!(d_count, 1);
-    assert_eq!(ok.unwrap(), vec!["D", "A.2", "A.1", "A"]);
+    let mut ok = ok.unwrap();
+    ok.sort();
+    assert_eq!(ok, vec!["A", "A.1", "A.2", "D"]);
     assert_eq!(err.len(), 0);
 
     let errors = forest.to_errors(());
@@ -323,7 +333,9 @@ fn done_dependency() {
                 _ => unreachable!(),
             }
         }, |_|{}), DoCompleted::Yes);
-    assert_eq!(ok.unwrap(), vec!["C: Sized", "B: Sized", "A: Sized"]);
+    let mut ok = ok.unwrap();
+    ok.sort();
+    assert_eq!(ok, vec!["A: Sized", "B: Sized", "C: Sized"]);
     assert_eq!(err.len(), 0);
 
     forest.register_obligation("(A,B,C): Sized");
@@ -361,7 +373,9 @@ fn orphan() {
                 _ => unreachable!(),
             }
         }, |_|{}), DoCompleted::Yes);
-    assert_eq!(ok.unwrap(), vec!["C2", "C1"]);
+    let mut ok = ok.unwrap();
+    ok.sort();
+    assert_eq!(ok, vec!["C1", "C2"]);
     assert_eq!(err.len(), 0);
 
     let Outcome { completed: ok, errors: err, .. } =
diff --git a/src/librustc_errors/lib.rs b/src/librustc_errors/lib.rs
index c01dcd94c725e..f9dc13ce97eea 100644
--- a/src/librustc_errors/lib.rs
+++ b/src/librustc_errors/lib.rs
@@ -469,14 +469,17 @@ impl Handler {
     /// NOTE: *do not* call this function from rustc. It is only meant to be called from external
     /// tools that want to reuse a `Parser` cleaning the previously emitted diagnostics as well as
     /// the overall count of emitted error diagnostics.
-    // FIXME: this does not clear inner entirely
     pub fn reset_err_count(&self) {
         let mut inner = self.inner.borrow_mut();
-        // actually frees the underlying memory (which `clear` would not do)
-        inner.emitted_diagnostics = Default::default();
-        inner.deduplicated_err_count = 0;
         inner.err_count = 0;
-        inner.stashed_diagnostics.clear();
+        inner.deduplicated_err_count = 0;
+
+        // actually free the underlying memory (which `clear` would not do)
+        inner.delayed_span_bugs = Default::default();
+        inner.taught_diagnostics = Default::default();
+        inner.emitted_diagnostic_codes = Default::default();
+        inner.emitted_diagnostics = Default::default();
+        inner.stashed_diagnostics = Default::default();
     }
 
     /// Stash a given diagnostic with the given `Span` and `StashKey` as the key for later stealing.
diff --git a/src/librustc_incremental/persist/save.rs b/src/librustc_incremental/persist/save.rs
index 13e2c5d1c574d..6af065513ee0d 100644
--- a/src/librustc_incremental/persist/save.rs
+++ b/src/librustc_incremental/persist/save.rs
@@ -28,6 +28,8 @@ pub fn save_dep_graph(tcx: TyCtxt<'_>) {
 
         join(move || {
             if tcx.sess.opts.debugging_opts.incremental_queries {
+                let _timer = tcx.prof.generic_activity("incr_comp_persist_result_cache");
+
                 time(sess, "persist query result cache", || {
                     save_in(sess,
                             query_cache_path,
@@ -36,6 +38,8 @@ pub fn save_dep_graph(tcx: TyCtxt<'_>) {
             }
         }, || {
             time(sess, "persist dep-graph", || {
+                let _timer = tcx.prof.generic_activity("incr_comp_persist_dep_graph");
+
                 save_in(sess,
                         dep_graph_path,
                         |e| {
@@ -135,6 +139,7 @@ fn encode_dep_graph(tcx: TyCtxt<'_>, encoder: &mut Encoder) {
 
     // Encode the graph data.
     let serialized_graph = time(tcx.sess, "getting serialized graph", || {
+        let _timer = tcx.prof.generic_activity("incr_comp_serialize_dep_graph");
         tcx.dep_graph.serialize()
     });
 
@@ -214,6 +219,7 @@ fn encode_dep_graph(tcx: TyCtxt<'_>, encoder: &mut Encoder) {
     }
 
     time(tcx.sess, "encoding serialized graph", || {
+        let _timer = tcx.prof.generic_activity("incr_comp_encode_serialized_dep_graph");
         serialized_graph.encode(encoder).unwrap();
     });
 }
diff --git a/src/librustc_index/bit_set.rs b/src/librustc_index/bit_set.rs
index 9c96645ccf977..8c49e0dde0dcd 100644
--- a/src/librustc_index/bit_set.rs
+++ b/src/librustc_index/bit_set.rs
@@ -621,7 +621,7 @@ impl<'a, T: Idx> Iterator for HybridIter<'a, T> {
 
     fn next(&mut self) -> Option<T> {
         match self {
-            HybridIter::Sparse(sparse) => sparse.next().map(|e| *e),
+            HybridIter::Sparse(sparse) => sparse.next().copied(),
             HybridIter::Dense(dense) => dense.next(),
         }
     }
diff --git a/src/librustc_interface/interface.rs b/src/librustc_interface/interface.rs
index fef60a47dc4e7..dae8fb242d58c 100644
--- a/src/librustc_interface/interface.rs
+++ b/src/librustc_interface/interface.rs
@@ -147,5 +147,8 @@ where
     F: FnOnce() -> R + Send,
     R: Send,
 {
-    util::spawn_thread_pool(edition, None, &None, f)
+    // the 1 here is duplicating code in config.opts.debugging_opts.threads
+    // which also defaults to 1; it ultimately doesn't matter as the default
+    // isn't threaded, and just ignores this parameter
+    util::spawn_thread_pool(edition, 1, &None, f)
 }
diff --git a/src/librustc_interface/passes.rs b/src/librustc_interface/passes.rs
index 9874c0673cfc9..0055e0a8b2e88 100644
--- a/src/librustc_interface/passes.rs
+++ b/src/librustc_interface/passes.rs
@@ -59,15 +59,17 @@ use std::rc::Rc;
 pub fn parse<'a>(sess: &'a Session, input: &Input) -> PResult<'a, ast::Crate> {
     sess.diagnostic()
         .set_continue_after_error(sess.opts.debugging_opts.continue_parse_after_error);
-    sess.profiler(|p| p.start_activity("parsing"));
-    let krate = time(sess, "parsing", || match *input {
-        Input::File(ref file) => parse::parse_crate_from_file(file, &sess.parse_sess),
-        Input::Str {
-            ref input,
-            ref name,
-        } => parse::parse_crate_from_source_str(name.clone(), input.clone(), &sess.parse_sess),
+    let krate = time(sess, "parsing", || {
+        let _prof_timer = sess.prof.generic_activity("parse_crate");
+
+        match *input {
+            Input::File(ref file) => parse::parse_crate_from_file(file, &sess.parse_sess),
+            Input::Str {
+                ref input,
+                ref name,
+            } => parse::parse_crate_from_source_str(name.clone(), input.clone(), &sess.parse_sess),
+        }
     })?;
-    sess.profiler(|p| p.end_activity("parsing"));
 
     sess.diagnostic().set_continue_after_error(true);
 
@@ -355,8 +357,8 @@ fn configure_and_expand_inner<'a>(
     );
 
     // Expand all macros
-    sess.profiler(|p| p.start_activity("macro expansion"));
     krate = time(sess, "expansion", || {
+        let _prof_timer = sess.prof.generic_activity("macro_expand_crate");
         // Windows dlls do not have rpaths, so they don't know how to find their
         // dependencies. It's up to us to tell the system where to find all the
         // dependent dlls. Note that this uses cfg!(windows) as opposed to
@@ -430,7 +432,6 @@ fn configure_and_expand_inner<'a>(
         }
         krate
     });
-    sess.profiler(|p| p.end_activity("macro expansion"));
 
     time(sess, "maybe building test harness", || {
         syntax_ext::test_harness::inject(
@@ -1071,11 +1072,10 @@ pub fn start_codegen<'tcx>(
         encode_and_write_metadata(tcx, outputs)
     });
 
-    tcx.sess.profiler(|p| p.start_activity("codegen crate"));
     let codegen = time(tcx.sess, "codegen", move || {
+        let _prof_timer = tcx.prof.generic_activity("codegen_crate");
         codegen_backend.codegen_crate(tcx, metadata, need_metadata_module)
     });
-    tcx.sess.profiler(|p| p.end_activity("codegen crate"));
 
     if log_enabled!(::log::Level::Info) {
         println!("Post-codegen");
diff --git a/src/librustc_interface/util.rs b/src/librustc_interface/util.rs
index e73195fbb8c2d..b05bad554f406 100644
--- a/src/librustc_interface/util.rs
+++ b/src/librustc_interface/util.rs
@@ -173,7 +173,7 @@ pub fn scoped_thread<F: FnOnce() -> R + Send, R: Send>(cfg: thread::Builder, f:
 #[cfg(not(parallel_compiler))]
 pub fn spawn_thread_pool<F: FnOnce() -> R + Send, R: Send>(
     edition: Edition,
-    _threads: Option<usize>,
+    _threads: usize,
     stderr: &Option<Arc<Mutex<Vec<u8>>>>,
     f: F,
 ) -> R {
@@ -198,7 +198,7 @@ pub fn spawn_thread_pool<F: FnOnce() -> R + Send, R: Send>(
 #[cfg(parallel_compiler)]
 pub fn spawn_thread_pool<F: FnOnce() -> R + Send, R: Send>(
     edition: Edition,
-    threads: Option<usize>,
+    threads: usize,
     stderr: &Option<Arc<Mutex<Vec<u8>>>>,
     f: F,
 ) -> R {
@@ -209,7 +209,7 @@ pub fn spawn_thread_pool<F: FnOnce() -> R + Send, R: Send>(
     let mut config = ThreadPoolBuilder::new()
         .acquire_thread_handler(jobserver::acquire_thread)
         .release_thread_handler(jobserver::release_thread)
-        .num_threads(Session::threads_from_count(threads))
+        .num_threads(threads)
         .deadlock_handler(|| unsafe { ty::query::handle_deadlock() });
 
     if let Some(size) = get_stack_size() {
diff --git a/src/librustc_mir/borrow_check/nll/region_infer/error_reporting/mod.rs b/src/librustc_mir/borrow_check/nll/region_infer/error_reporting/mod.rs
index 6d3f2e566382f..502da588d19a2 100644
--- a/src/librustc_mir/borrow_check/nll/region_infer/error_reporting/mod.rs
+++ b/src/librustc_mir/borrow_check/nll/region_infer/error_reporting/mod.rs
@@ -1,5 +1,4 @@
 use crate::borrow_check::nll::constraints::OutlivesConstraint;
-use crate::borrow_check::nll::region_infer::AppliedMemberConstraint;
 use crate::borrow_check::nll::region_infer::RegionInferenceContext;
 use crate::borrow_check::nll::type_check::Locations;
 use crate::borrow_check::nll::universal_regions::DefiningTy;
@@ -253,29 +252,33 @@ impl<'tcx> RegionInferenceContext<'tcx> {
             let outgoing_edges_from_graph = self.constraint_graph
                 .outgoing_edges(r, &self.constraints, fr_static);
 
-
-            // But member constraints can also give rise to `'r: 'x`
-            // edges that were not part of the graph initially, so
-            // watch out for those.
-            let outgoing_edges_from_picks = self.applied_member_constraints(r)
-                .iter()
-                .map(|&AppliedMemberConstraint { min_choice, member_constraint_index, .. }| {
-                    let p_c = &self.member_constraints[member_constraint_index];
-                    OutlivesConstraint {
-                        sup: r,
-                        sub: min_choice,
-                        locations: Locations::All(p_c.definition_span),
-                        category: ConstraintCategory::OpaqueType,
-                    }
-                });
-
-            for constraint in outgoing_edges_from_graph.chain(outgoing_edges_from_picks) {
+            // Always inline this closure because it can be hot.
+            let mut handle_constraint = #[inline(always)] |constraint: OutlivesConstraint| {
                 debug_assert_eq!(constraint.sup, r);
                 let sub_region = constraint.sub;
                 if let Trace::NotVisited = context[sub_region] {
                     context[sub_region] = Trace::FromOutlivesConstraint(constraint);
                     deque.push_back(sub_region);
                 }
+            };
+
+            // This loop can be hot.
+            for constraint in outgoing_edges_from_graph {
+                handle_constraint(constraint);
+            }
+
+            // Member constraints can also give rise to `'r: 'x` edges that
+            // were not part of the graph initially, so watch out for those.
+            // (But they are extremely rare; this loop is very cold.)
+            for constraint in self.applied_member_constraints(r) {
+                let p_c = &self.member_constraints[constraint.member_constraint_index];
+                let constraint = OutlivesConstraint {
+                    sup: r,
+                    sub: constraint.min_choice,
+                    locations: Locations::All(p_c.definition_span),
+                    category: ConstraintCategory::OpaqueType,
+                };
+                handle_constraint(constraint);
             }
         }
 
diff --git a/src/librustc_mir/dataflow/generic/graphviz.rs b/src/librustc_mir/dataflow/generic/graphviz.rs
index 2a08feff9e77a..47ace8f33ecac 100644
--- a/src/librustc_mir/dataflow/generic/graphviz.rs
+++ b/src/librustc_mir/dataflow/generic/graphviz.rs
@@ -165,25 +165,25 @@ where
         block: BasicBlock,
     ) -> io::Result<()> {
         //   Sample output:
-        //   +-+--------------------------------------------------+
-        // A |                         bb4                        |
-        //   +-+----------------------------------+---------------+
-        // B |               MIR                  |     STATE     |
-        //   +-+----------------------------------+---------------+
-        // C | | (on entry)                       | {_0,_2,_3}    |
-        //   +-+----------------------------------+---------------+
-        // D |0|  0: StorageLive(_7)              |               |
-        //   +-+----------------------------------+---------------+
-        //   |1|  1: StorageLive(_8)              |               |
-        //   +-+----------------------------------+---------------+
-        //   |2|  2: _8 = &mut _1                 | +_8           |
-        //   +-+----------------------------------+---------------+
-        // E |T| _7 = const Foo::twiddle(move _8) | -_8           |
-        //   +-+----------------------------------+---------------+
-        // F | | (on unwind)                      | {_0,_2,_3,_7} |
-        //   +-+----------------------------------+---------------+
-        //   | | (on successful return)           | +_7           |
-        //   +-+----------------------------------+---------------+
+        //   +-+-----------------------------------------------+
+        // A |                      bb4                        |
+        //   +-+----------------------------------+------------+
+        // B |                MIR                 |   STATE    |
+        //   +-+----------------------------------+------------+
+        // C | | (on entry)                       | {_0,_2,_3} |
+        //   +-+----------------------------------+------------+
+        // D |0| StorageLive(_7)                  |            |
+        //   +-+----------------------------------+------------+
+        //   |1| StorageLive(_8)                  |            |
+        //   +-+----------------------------------+------------+
+        //   |2| _8 = &mut _1                     | +_8        |
+        //   +-+----------------------------------+------------+
+        // E |T| _4 = const Foo::twiddle(move _2) | -_2        |
+        //   +-+----------------------------------+------------+
+        // F | | (on unwind)                      | {_0,_3,_8} |
+        //   +-+----------------------------------+------------+
+        //   | | (on successful return)           | +_4        |
+        //   +-+----------------------------------+------------+
 
         write!(
             w,
@@ -211,6 +211,7 @@ where
         )?;
 
         // C: Entry state
+        self.bg = Background::Light;
         self.results.seek_to_block_start(block);
         self.write_row_with_curr_state(w, "", "(on entry)")?;
         self.prev_state.overwrite(self.results.get());
diff --git a/src/librustc_mir/lib.rs b/src/librustc_mir/lib.rs
index 938dab57181d6..81c08ee87e985 100644
--- a/src/librustc_mir/lib.rs
+++ b/src/librustc_mir/lib.rs
@@ -25,6 +25,7 @@ Rust MIR: a lowered representation of Rust. Also: an experiment!
 #![feature(mem_take)]
 #![feature(associated_type_bounds)]
 #![feature(range_is_empty)]
+#![feature(stmt_expr_attributes)]
 
 #![recursion_limit="256"]
 
diff --git a/src/librustc_typeck/check/writeback.rs b/src/librustc_typeck/check/writeback.rs
index 99d62d198a9fd..7a8a209a5350c 100644
--- a/src/librustc_typeck/check/writeback.rs
+++ b/src/librustc_typeck/check/writeback.rs
@@ -190,9 +190,26 @@ impl<'cx, 'tcx> WritebackCx<'cx, 'tcx> {
         if let hir::ExprKind::Index(ref base, ref index) = e.kind {
             let mut tables = self.fcx.tables.borrow_mut();
 
-            // All valid indexing looks like this; might encounter non-valid indexes at this point
-            if let ty::Ref(_, base_ty, _) = tables.expr_ty_adjusted(&base).kind {
-                let index_ty = tables.expr_ty_adjusted(&index);
+            // All valid indexing looks like this; might encounter non-valid indexes at this point.
+            let base_ty = tables.expr_ty_adjusted_opt(&base).map(|t| &t.kind);
+            if base_ty.is_none() {
+                // When encountering `return [0][0]` outside of a `fn` body we can encounter a base
+                // that isn't in the type table. We assume more relevant errors have already been
+                // emitted, so we delay an ICE if none have. (#64638)
+                self.tcx().sess.delay_span_bug(e.span, &format!("bad base: `{:?}`", base));
+            }
+            if let Some(ty::Ref(_, base_ty, _)) = base_ty {
+                let index_ty = tables.expr_ty_adjusted_opt(&index).unwrap_or_else(|| {
+                    // When encountering `return [0][0]` outside of a `fn` body we would attempt
+                    // to access an unexistend index. We assume that more relevant errors will
+                    // already have been emitted, so we only gate on this with an ICE if no
+                    // error has been emitted. (#64638)
+                    self.tcx().sess.delay_span_bug(
+                        e.span,
+                        &format!("bad index {:?} for base: `{:?}`", index, base),
+                    );
+                    self.fcx.tcx.types.err
+                });
                 let index_ty = self.fcx.resolve_vars_if_possible(&index_ty);
 
                 if base_ty.builtin_index().is_some() && index_ty == self.fcx.tcx.types.usize {
diff --git a/src/librustc_typeck/lib.rs b/src/librustc_typeck/lib.rs
index 00be1c84599a3..26a8f79b8d831 100644
--- a/src/librustc_typeck/lib.rs
+++ b/src/librustc_typeck/lib.rs
@@ -295,7 +295,7 @@ pub fn provide(providers: &mut Providers<'_>) {
 }
 
 pub fn check_crate(tcx: TyCtxt<'_>) -> Result<(), ErrorReported> {
-    tcx.sess.profiler(|p| p.start_activity("type-check crate"));
+    let _prof_timer = tcx.prof.generic_activity("type_check_crate");
 
     // this ensures that later parts of type checking can assume that items
     // have valid types and not error
@@ -347,8 +347,6 @@ pub fn check_crate(tcx: TyCtxt<'_>) -> Result<(), ErrorReported> {
     check_unused::check_crate(tcx);
     check_for_entry_fn(tcx);
 
-    tcx.sess.profiler(|p| p.end_activity("type-check crate"));
-
     if tcx.sess.err_count() == 0 {
         Ok(())
     } else {
diff --git a/src/libserialize/json.rs b/src/libserialize/json.rs
index d0007074a823c..d2e360f5e20fd 100644
--- a/src/libserialize/json.rs
+++ b/src/libserialize/json.rs
@@ -1053,12 +1053,12 @@ impl Json {
     /// a value associated with the provided key is found. If no value is found
     /// or the Json value is not an Object, returns `None`.
     pub fn search(&self, key: &str) -> Option<&Json> {
-        match self {
-            &Json::Object(ref map) => {
+        match *self {
+            Json::Object(ref map) => {
                 match map.get(key) {
                     Some(json_value) => Some(json_value),
                     None => {
-                        for (_, v) in map {
+                        for v in map.values() {
                             match v.search(key) {
                                 x if x.is_some() => return x,
                                 _ => ()
@@ -1487,12 +1487,12 @@ impl<T: Iterator<Item=char>> Parser<T> {
     }
 
     fn parse_number(&mut self) -> JsonEvent {
-        let mut neg = false;
-
-        if self.ch_is('-') {
+        let neg = if self.ch_is('-') {
             self.bump();
-            neg = true;
-        }
+            true
+        } else {
+            false
+        };
 
         let res = match self.parse_u64() {
             Ok(res) => res,
@@ -2162,10 +2162,9 @@ impl crate::Decoder for Decoder {
         let s = self.read_str()?;
         {
             let mut it = s.chars();
-            match (it.next(), it.next()) {
+            if let (Some(c), None) = (it.next(), it.next()) {
                 // exactly one character
-                (Some(c), None) => return Ok(c),
-                _ => ()
+                return Ok(c);
             }
         }
         Err(ExpectedError("single character string".to_owned(), s.to_string()))
diff --git a/src/test/ui/issues/issue-64620.rs b/src/test/ui/issues/issue-64620.rs
new file mode 100644
index 0000000000000..a62e5bf8d3c62
--- /dev/null
+++ b/src/test/ui/issues/issue-64620.rs
@@ -0,0 +1,5 @@
+enum Bug {
+    V1 = return [0][0] //~ERROR return statement outside of function body
+}
+
+fn main() {}
diff --git a/src/test/ui/issues/issue-64620.stderr b/src/test/ui/issues/issue-64620.stderr
new file mode 100644
index 0000000000000..f40ac4de32d59
--- /dev/null
+++ b/src/test/ui/issues/issue-64620.stderr
@@ -0,0 +1,9 @@
+error[E0572]: return statement outside of function body
+  --> $DIR/issue-64620.rs:2:10
+   |
+LL |     V1 = return [0][0]
+   |          ^^^^^^^^^^^^^
+
+error: aborting due to previous error
+
+For more information about this error, try `rustc --explain E0572`.
diff --git a/src/tools/cargo b/src/tools/cargo
index ab6fa8908c9b6..8b0561d68f12e 160000
--- a/src/tools/cargo
+++ b/src/tools/cargo
@@ -1 +1 @@
-Subproject commit ab6fa8908c9b6c8f7e2249231c395eebfbc49891
+Subproject commit 8b0561d68f12eeb1d72e07ceef464ebf6032a1bc
diff --git a/src/tools/rls b/src/tools/rls
index 80a1d340f7d65..8dc9ba96d57c5 160000
--- a/src/tools/rls
+++ b/src/tools/rls
@@ -1 +1 @@
-Subproject commit 80a1d340f7d65b466bd3e0513c6b4b53498de2ff
+Subproject commit 8dc9ba96d57c5705b99a18a380d41579e9d2d675
diff --git a/src/tools/rustc-workspace-hack/Cargo.toml b/src/tools/rustc-workspace-hack/Cargo.toml
index 980c9753761ed..a78cbdc2c4c6a 100644
--- a/src/tools/rustc-workspace-hack/Cargo.toml
+++ b/src/tools/rustc-workspace-hack/Cargo.toml
@@ -24,6 +24,7 @@ features = [
   "jobapi",
   "jobapi2",
   "knownfolders",
+  "libloaderapi",
   "lmcons",
   "memoryapi",
   "minschannel",