Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit dd4c81e

Browse files
committedMay 22, 2025·
Build & upload rustdoc json output next to other docs
1 parent 7ba7591 commit dd4c81e

File tree

4 files changed

+278
-12
lines changed

4 files changed

+278
-12
lines changed
 

‎src/db/delete.rs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use super::{CrateId, update_latest_version_id};
1111

1212
/// List of directories in docs.rs's underlying storage (either the database or S3) containing a
1313
/// subdirectory named after the crate. Those subdirectories will be deleted.
14-
static LIBRARY_STORAGE_PATHS_TO_DELETE: &[&str] = &["rustdoc", "sources"];
14+
static LIBRARY_STORAGE_PATHS_TO_DELETE: &[&str] = &["rustdoc", "rustdoc-json", "sources"];
1515
static OTHER_STORAGE_PATHS_TO_DELETE: &[&str] = &["sources"];
1616

1717
#[derive(Debug, thiserror::Error)]
@@ -222,6 +222,7 @@ mod tests {
222222
use super::*;
223223
use crate::db::ReleaseId;
224224
use crate::registry_api::{CrateOwner, OwnerKind};
225+
use crate::storage::rustdoc_json_path;
225226
use crate::test::{async_wrapper, fake_release_that_failed_before_build};
226227
use test_case::test_case;
227228

@@ -405,6 +406,17 @@ mod tests {
405406
.collect())
406407
}
407408

409+
async fn json_exists(storage: &AsyncStorage, version: &str) -> Result<bool> {
410+
storage
411+
.exists(&rustdoc_json_path(
412+
"a",
413+
version,
414+
"x86_64-unknown-linux-gnu",
415+
crate::storage::RustdocJsonFormatVersion::Latest,
416+
))
417+
.await
418+
}
419+
408420
let mut conn = env.async_db().await.async_conn().await;
409421
let v1 = env
410422
.fake_release()
@@ -426,6 +438,7 @@ mod tests {
426438
.rustdoc_file_exists("a", "1.0.0", None, "a/index.html", archive_storage)
427439
.await?
428440
);
441+
assert!(json_exists(&*env.async_storage().await, "1.0.0").await?);
429442
let crate_id = sqlx::query_scalar!(
430443
r#"SELECT crate_id as "crate_id: CrateId" FROM releases WHERE id = $1"#,
431444
v1.0
@@ -457,6 +470,7 @@ mod tests {
457470
.rustdoc_file_exists("a", "2.0.0", None, "a/index.html", archive_storage)
458471
.await?
459472
);
473+
assert!(json_exists(&*env.async_storage().await, "2.0.0").await?);
460474
assert_eq!(
461475
owners(&mut conn, crate_id).await?,
462476
vec!["Peter Rabbit".to_string()]
@@ -494,13 +508,16 @@ mod tests {
494508
.await?
495509
);
496510
}
511+
assert!(!json_exists(&*env.async_storage().await, "1.0.0").await?);
512+
497513
assert!(release_exists(&mut conn, v2).await?);
498514
assert!(
499515
env.async_storage()
500516
.await
501517
.rustdoc_file_exists("a", "2.0.0", None, "a/index.html", archive_storage)
502518
.await?
503519
);
520+
assert!(json_exists(&*env.async_storage().await, "2.0.0").await?);
504521
assert_eq!(
505522
owners(&mut conn, crate_id).await?,
506523
vec!["Peter Rabbit".to_string()]

‎src/docbuilder/rustwide_builder.rs

Lines changed: 210 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,10 @@ use crate::db::{
1212
use crate::docbuilder::Limits;
1313
use crate::error::Result;
1414
use crate::repositories::RepositoryStatsUpdater;
15-
use crate::storage::{rustdoc_archive_path, source_archive_path};
15+
use crate::storage::{
16+
CompressionAlgorithm, RustdocJsonFormatVersion, compress, rustdoc_archive_path,
17+
rustdoc_json_path, source_archive_path,
18+
};
1619
use crate::utils::{
1720
CargoMetadata, ConfigName, copy_dir_all, get_config, parse_rustc_version, report_error,
1821
set_config,
@@ -26,19 +29,39 @@ use rustwide::cmd::{Command, CommandError, SandboxBuilder, SandboxImage};
2629
use rustwide::logging::{self, LogStorage};
2730
use rustwide::toolchain::ToolchainError;
2831
use rustwide::{AlternativeRegistry, Build, Crate, Toolchain, Workspace, WorkspaceBuilder};
32+
use serde::Deserialize;
2933
use std::collections::{HashMap, HashSet};
30-
use std::fs;
34+
use std::fs::{self, File};
35+
use std::io::BufReader;
3136
use std::path::Path;
3237
use std::sync::Arc;
3338
use std::time::Instant;
3439
use tokio::runtime::Runtime;
35-
use tracing::{debug, info, info_span, instrument, warn};
40+
use tracing::{debug, error, info, info_span, instrument, warn};
3641

3742
const USER_AGENT: &str = "docs.rs builder (https://github.com/rust-lang/docs.rs)";
3843
const COMPONENTS: &[&str] = &["llvm-tools-preview", "rustc-dev", "rustfmt"];
3944
const DUMMY_CRATE_NAME: &str = "empty-library";
4045
const DUMMY_CRATE_VERSION: &str = "1.0.0";
4146

47+
/// read the format version from a rustdoc JSON file.
48+
fn read_format_version_from_rustdoc_json(
49+
reader: impl std::io::Read,
50+
) -> Result<RustdocJsonFormatVersion> {
51+
let reader = BufReader::new(reader);
52+
53+
#[derive(Deserialize)]
54+
struct RustdocJson {
55+
format_version: u16,
56+
}
57+
58+
let rustdoc_json: RustdocJson = serde_json::from_reader(reader)?;
59+
60+
Ok(RustdocJsonFormatVersion::Version(
61+
rustdoc_json.format_version,
62+
))
63+
}
64+
4265
async fn get_configured_toolchain(conn: &mut sqlx::PgConnection) -> Result<Toolchain> {
4366
let name: String = get_config(conn, ConfigName::Toolchain)
4467
.await?
@@ -303,8 +326,18 @@ impl RustwideBuilder {
303326
.run(|build| {
304327
let metadata = Metadata::from_crate_root(build.host_source_dir())?;
305328

306-
let res =
307-
self.execute_build(HOST_TARGET, true, build, &limits, &metadata, true, false)?;
329+
let res = self.execute_build(
330+
BuildId(0),
331+
DUMMY_CRATE_NAME,
332+
DUMMY_CRATE_VERSION,
333+
HOST_TARGET,
334+
true,
335+
build,
336+
&limits,
337+
&metadata,
338+
true,
339+
false,
340+
)?;
308341
if !res.result.successful {
309342
bail!("failed to build dummy crate for {}", rustc_version);
310343
}
@@ -518,12 +551,13 @@ impl RustwideBuilder {
518551
build.fetch_build_std_dependencies(&targets)?;
519552
}
520553

554+
521555
let mut has_docs = false;
522556
let mut successful_targets = Vec::new();
523557

524558
// Perform an initial build
525559
let mut res =
526-
self.execute_build(default_target, true, build, &limits, &metadata, false, collect_metrics)?;
560+
self.execute_build(build_id, name, version, default_target, true, build, &limits, &metadata, false, collect_metrics)?;
527561

528562
// If the build fails with the lockfile given, try using only the dependencies listed in Cargo.toml.
529563
let cargo_lock = build.host_source_dir().join("Cargo.lock");
@@ -545,7 +579,7 @@ impl RustwideBuilder {
545579
.run_capture()?;
546580
}
547581
res =
548-
self.execute_build(default_target, true, build, &limits, &metadata, false, collect_metrics)?;
582+
self.execute_build(build_id, name, version, default_target, true, build, &limits, &metadata, false, collect_metrics)?;
549583
}
550584

551585
if res.result.successful {
@@ -576,6 +610,9 @@ impl RustwideBuilder {
576610
for target in other_targets.into_iter().take(limits.targets()) {
577611
debug!("building package {} {} for {}", name, version, target);
578612
let target_res = self.build_target(
613+
build_id,
614+
name,
615+
version,
579616
target,
580617
build,
581618
&limits,
@@ -751,6 +788,9 @@ impl RustwideBuilder {
751788
#[allow(clippy::too_many_arguments)]
752789
fn build_target(
753790
&self,
791+
build_id: BuildId,
792+
name: &str,
793+
version: &str,
754794
target: &str,
755795
build: &Build,
756796
limits: &Limits,
@@ -760,6 +800,9 @@ impl RustwideBuilder {
760800
collect_metrics: bool,
761801
) -> Result<FullBuildResult> {
762802
let target_res = self.execute_build(
803+
build_id,
804+
name,
805+
version,
763806
target,
764807
false,
765808
build,
@@ -781,6 +824,102 @@ impl RustwideBuilder {
781824
Ok(target_res)
782825
}
783826

827+
/// Run the build with rustdoc JSON output for a specific target and directly upload the
828+
/// build log & the JSON files.
829+
///
830+
/// The method only returns an `Err` for internal errors that should be retryable.
831+
/// For all build errors we would just upload the log file and still return `Ok(())`.
832+
#[instrument(skip(self, build))]
833+
#[allow(clippy::too_many_arguments)]
834+
fn execute_json_build(
835+
&self,
836+
build_id: BuildId,
837+
name: &str,
838+
version: &str,
839+
target: &str,
840+
is_default_target: bool,
841+
build: &Build,
842+
metadata: &Metadata,
843+
limits: &Limits,
844+
) -> Result<()> {
845+
let rustdoc_flags = vec!["--output-format".to_string(), "json".to_string()];
846+
847+
let mut storage = LogStorage::new(log::LevelFilter::Info);
848+
storage.set_max_size(limits.max_log_size());
849+
850+
let successful = logging::capture(&storage, || {
851+
let _span = info_span!("cargo_build_json", target = %target).entered();
852+
self.prepare_command(build, target, metadata, limits, rustdoc_flags, false)
853+
.and_then(|command| command.run().map_err(Error::from))
854+
.is_ok()
855+
});
856+
857+
{
858+
let _span = info_span!("store_json_build_logs").entered();
859+
let build_log_path = format!("build-logs/{build_id}/{target}_json.txt");
860+
self.storage
861+
.store_one(build_log_path, storage.to_string())
862+
.context("storing build log on S3")?;
863+
}
864+
865+
if !successful {
866+
// this is a normal build error and will be visible in the uploaded build logs.
867+
// We don't need the Err variant here.
868+
return Ok(());
869+
}
870+
871+
let json_dir = if metadata.proc_macro {
872+
assert!(
873+
is_default_target && target == HOST_TARGET,
874+
"can't handle cross-compiling macros"
875+
);
876+
build.host_target_dir().join("doc")
877+
} else {
878+
build.host_target_dir().join(target).join("doc")
879+
};
880+
881+
let json_filename = fs::read_dir(&json_dir)?
882+
.filter_map(|entry| {
883+
let entry = entry.ok()?;
884+
let path = entry.path();
885+
if path.is_file() && path.extension()? == "json" {
886+
Some(path)
887+
} else {
888+
None
889+
}
890+
})
891+
.next()
892+
.ok_or_else(|| {
893+
anyhow!("no JSON file found in target/doc after successful rustdoc json build")
894+
})?;
895+
896+
let format_version = {
897+
let _span = info_span!("read_format_version").entered();
898+
read_format_version_from_rustdoc_json(&File::open(&json_filename)?)
899+
.context("couldn't parse rustdoc json to find format version")?
900+
};
901+
902+
let compressed_json: Vec<u8> = {
903+
let _span =
904+
info_span!("compress_json", file_size = json_filename.metadata()?.len()).entered();
905+
906+
compress(
907+
BufReader::new(File::open(&json_filename)?),
908+
CompressionAlgorithm::Zstd,
909+
)?
910+
};
911+
912+
for format_version in [format_version, RustdocJsonFormatVersion::Latest] {
913+
let _span = info_span!("store_json", %format_version).entered();
914+
self.storage.store_one(
915+
rustdoc_json_path(name, version, target, format_version),
916+
compressed_json.clone(),
917+
)?;
918+
}
919+
920+
Ok(())
921+
}
922+
784923
#[instrument(skip(self, build))]
785924
fn get_coverage(
786925
&self,
@@ -841,6 +980,9 @@ impl RustwideBuilder {
841980
#[allow(clippy::too_many_arguments)]
842981
fn execute_build(
843982
&self,
983+
build_id: BuildId,
984+
name: &str,
985+
version: &str,
844986
target: &str,
845987
is_default_target: bool,
846988
build: &Build,
@@ -883,6 +1025,26 @@ impl RustwideBuilder {
8831025
}
8841026
};
8851027

1028+
if let Err(err) = self.execute_json_build(
1029+
build_id,
1030+
name,
1031+
version,
1032+
target,
1033+
is_default_target,
1034+
build,
1035+
metadata,
1036+
limits,
1037+
) {
1038+
// FIXME: this is temporary. Theoretically all `Err` things coming out
1039+
// of the method should be retryable, so we could juse use `?` here.
1040+
// But since this is new, I want to be carful and first see what kind of
1041+
// errors we are seeing here.
1042+
error!(
1043+
?err,
1044+
"internal error when trying to generate rustdoc JSON output"
1045+
);
1046+
}
1047+
8861048
let successful = {
8871049
let _span = info_span!("cargo_build", target = %target, is_default_target).entered();
8881050
logging::capture(&storage, || {
@@ -1114,13 +1276,12 @@ impl Default for BuildPackageSummary {
11141276

11151277
#[cfg(test)]
11161278
mod tests {
1117-
use std::iter;
1118-
11191279
use super::*;
11201280
use crate::db::types::Feature;
11211281
use crate::registry_api::ReleaseData;
11221282
use crate::storage::CompressionAlgorithm;
11231283
use crate::test::{AxumRouterTestExt, TestEnvironment, wrapper};
1284+
use std::{io, iter};
11241285

11251286
fn get_features(
11261287
env: &TestEnvironment,
@@ -1305,6 +1466,31 @@ mod tests {
13051466

13061467
// other targets too
13071468
for target in DEFAULT_TARGETS {
1469+
// check if rustdoc json files exist for all targets
1470+
assert!(storage.exists(&rustdoc_json_path(
1471+
crate_,
1472+
version,
1473+
target,
1474+
RustdocJsonFormatVersion::Latest
1475+
))?);
1476+
1477+
let json_prefix = format!("rustdoc-json/{crate_}/{version}/{target}/");
1478+
let mut json_files: Vec<_> = storage
1479+
.list_prefix(&json_prefix)
1480+
.filter_map(|res| res.ok())
1481+
.map(|f| f.strip_prefix(&json_prefix).unwrap().to_owned())
1482+
.collect();
1483+
json_files.sort();
1484+
dbg!(&json_prefix);
1485+
dbg!(&json_files);
1486+
assert_eq!(
1487+
json_files,
1488+
vec![
1489+
format!("empty-library_1.0.0_{target}_45.json.zst"),
1490+
format!("empty-library_1.0.0_{target}_latest.json.zst"),
1491+
]
1492+
);
1493+
13081494
if target == &default_target {
13091495
continue;
13101496
}
@@ -1876,4 +2062,19 @@ mod tests {
18762062
Ok(())
18772063
})
18782064
}
2065+
2066+
#[test]
2067+
fn test_read_format_version_from_rustdoc_json() -> Result<()> {
2068+
let buf = serde_json::to_vec(&serde_json::json!({
2069+
"something": "else",
2070+
"format_version": 42
2071+
}))?;
2072+
2073+
assert_eq!(
2074+
read_format_version_from_rustdoc_json(&mut io::Cursor::new(buf))?,
2075+
RustdocJsonFormatVersion::Version(42)
2076+
);
2077+
2078+
Ok(())
2079+
}
18792080
}

‎src/storage/mod.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -815,6 +815,25 @@ pub(crate) fn rustdoc_archive_path(name: &str, version: &str) -> String {
815815
format!("rustdoc/{name}/{version}.zip")
816816
}
817817

818+
#[derive(strum::Display, Debug, PartialEq, Eq)]
819+
#[strum(serialize_all = "snake_case")]
820+
pub(crate) enum RustdocJsonFormatVersion {
821+
#[strum(serialize = "{0}")]
822+
Version(u16),
823+
Latest,
824+
}
825+
826+
pub(crate) fn rustdoc_json_path(
827+
name: &str,
828+
version: &str,
829+
target: &str,
830+
format_version: RustdocJsonFormatVersion,
831+
) -> String {
832+
format!(
833+
"rustdoc-json/{name}/{version}/{target}/{name}_{version}_{target}_{format_version}.json.zst"
834+
)
835+
}
836+
818837
pub(crate) fn source_archive_path(name: &str, version: &str) -> String {
819838
format!("sources/{name}/{version}.zip")
820839
}

‎src/test/fakes.rs

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ use crate::docbuilder::DocCoverage;
99
use crate::error::Result;
1010
use crate::registry_api::{CrateData, CrateOwner, ReleaseData};
1111
use crate::storage::{
12-
AsyncStorage, CompressionAlgorithm, rustdoc_archive_path, source_archive_path,
12+
AsyncStorage, CompressionAlgorithm, RustdocJsonFormatVersion, rustdoc_archive_path,
13+
rustdoc_json_path, source_archive_path,
1314
};
1415
use crate::utils::{Dependency, MetadataPackage, Target};
1516
use anyhow::{Context, bail};
@@ -512,10 +513,38 @@ impl<'a> FakeRelease<'a> {
512513
}
513514
store_files_into(&self.source_files, crate_dir)?;
514515

516+
let default_target = self.default_target.unwrap_or("x86_64-unknown-linux-gnu");
517+
518+
{
519+
let mut targets = self.doc_targets.clone();
520+
if !targets.contains(&default_target.to_owned()) {
521+
targets.push(default_target.to_owned());
522+
}
523+
for target in &targets {
524+
for format_version in [
525+
RustdocJsonFormatVersion::Version(42),
526+
RustdocJsonFormatVersion::Latest,
527+
] {
528+
storage
529+
.store_one(
530+
&rustdoc_json_path(
531+
&package.name,
532+
&package.version,
533+
target,
534+
format_version,
535+
),
536+
serde_json::to_vec(&serde_json::json!({
537+
"format_version": 42
538+
}))?,
539+
)
540+
.await?;
541+
}
542+
}
543+
}
544+
515545
// Many tests rely on the default-target being linux, so it should not
516546
// be set to docsrs_metadata::HOST_TARGET, because then tests fail on all
517547
// non-linux platforms.
518-
let default_target = self.default_target.unwrap_or("x86_64-unknown-linux-gnu");
519548
let mut async_conn = db.async_conn().await;
520549
let crate_id = initialize_crate(&mut async_conn, &package.name).await?;
521550
let release_id = initialize_release(&mut async_conn, crate_id, &package.version).await?;

0 commit comments

Comments
 (0)
Please sign in to comment.