Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: registry source cleanup #12240

Merged
merged 5 commits into from
Jun 8, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
196 changes: 195 additions & 1 deletion src/cargo/sources/registry/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,20 @@
//! [`RemoteRegistry`]: super::remote::RemoteRegistry
//! [`Dependency`]: crate::core::Dependency

use crate::core::dependency::DepKind;
use crate::core::Dependency;
use crate::core::{PackageId, SourceId, Summary};
use crate::sources::registry::{LoadResponse, RegistryData, RegistryPackage};
use crate::sources::registry::{LoadResponse, RegistryData};
use crate::util::interning::InternedString;
use crate::util::IntoUrl;
use crate::util::{internal, CargoResult, Config, Filesystem, OptVersionReq, ToSemver};
use anyhow::bail;
use cargo_util::{paths, registry::make_dep_path};
use log::{debug, info};
use semver::Version;
use serde::Deserialize;
use std::borrow::Cow;
use std::collections::BTreeMap;
use std::collections::{HashMap, HashSet};
use std::fs;
use std::io::ErrorKind;
Expand Down Expand Up @@ -261,6 +267,97 @@ struct SummariesCache<'a> {
index_version: &'a str,
}

/// A single line in the index representing a single version of a package.
#[derive(Deserialize)]
pub struct RegistryPackage<'a> {
weihanglo marked this conversation as resolved.
Show resolved Hide resolved
/// Name of the pacakge.
name: InternedString,
/// The version of this dependency.
vers: Version,
/// All kinds of direct dependencies of the package, including dev and
/// build dependencies.
#[serde(borrow)]
deps: Vec<RegistryDependency<'a>>,
/// Set of features defined for the package, i.e., `[features]` table.
features: BTreeMap<InternedString, Vec<InternedString>>,
/// This field contains features with new, extended syntax. Specifically,
/// namespaced features (`dep:`) and weak dependencies (`pkg?/feat`).
///
/// This is separated from `features` because versions older than 1.19
/// will fail to load due to not being able to parse the new syntax, even
/// with a `Cargo.lock` file.
features2: Option<BTreeMap<InternedString, Vec<InternedString>>>,
/// Checksum for verifying the integrity of the corresponding downloaded package.
cksum: String,
/// If `true`, Cargo will skip this version when resolving.
///
/// This was added in 2014. Everything in the crates.io index has this set
/// now, so this probably doesn't need to be an option anymore.
yanked: Option<bool>,
/// Native library name this package links to.
///
/// Added early 2018 (see <https://github.com/rust-lang/cargo/pull/4978>),
/// can be `None` if published before then.
links: Option<InternedString>,
/// Required version of rust
///
/// Corresponds to `package.rust-version`.
///
/// Added in 2023 (see <https://github.com/rust-lang/crates.io/pull/6267>),
/// can be `None` if published before then or if not set in the manifest.
rust_version: Option<InternedString>,
/// The schema version for this entry.
///
/// If this is None, it defaults to version `1`. Entries with unknown
/// versions are ignored.
///
/// Version `2` schema adds the `features2` field.
///
/// This provides a method to safely introduce changes to index entries
/// and allow older versions of cargo to ignore newer entries it doesn't
/// understand. This is honored as of 1.51, so unfortunately older
/// versions will ignore it, and potentially misinterpret version 2 and
/// newer entries.
///
/// The intent is that versions older than 1.51 will work with a
/// pre-existing `Cargo.lock`, but they may not correctly process `cargo
/// update` or build a lock from scratch. In that case, cargo may
/// incorrectly select a new package that uses a new index schema. A
/// workaround is to downgrade any packages that are incompatible with the
/// `--precise` flag of `cargo update`.
v: Option<u32>,
}

/// A dependency as encoded in the [`RegistryPackage`] index JSON.
#[derive(Deserialize)]
struct RegistryDependency<'a> {
/// Name of the dependency. If the dependency is renamed, the original
/// would be stored in [`RegistryDependency::package`].
name: InternedString,
/// The SemVer requirement for this dependency.
#[serde(borrow)]
req: Cow<'a, str>,
/// Set of features enabled for this dependency.
features: Vec<InternedString>,
/// Whether or not this is an optional dependency.
optional: bool,
/// Whether or not default features are enabled.
default_features: bool,
/// The target platform for this dependency.
target: Option<Cow<'a, str>>,
/// The dependency kind. "dev", "build", and "normal".
kind: Option<Cow<'a, str>>,
// The URL of the index of the registry where this dependency is from.
// `None` if it is from the same index.
registry: Option<Cow<'a, str>>,
/// The original name if the dependency is renamed.
package: Option<InternedString>,
/// Whether or not this is a public dependency. Unstable. See [RFC 1977].
///
/// [RFC 1977]: https://rust-lang.github.io/rfcs/1977-public-private-dependencies.html
public: Option<bool>,
}

impl<'cfg> RegistryIndex<'cfg> {
/// Creates an empty registry index at `path`.
pub fn new(
Expand Down Expand Up @@ -843,6 +940,70 @@ impl IndexSummary {
}
}

impl<'a> RegistryDependency<'a> {
/// Converts an encoded dependency in the registry to a cargo dependency
pub fn into_dep(self, default: SourceId) -> CargoResult<Dependency> {
let RegistryDependency {
name,
req,
mut features,
optional,
default_features,
target,
kind,
registry,
package,
public,
} = self;

let id = if let Some(registry) = &registry {
SourceId::for_registry(&registry.into_url()?)?
} else {
default
};

let mut dep = Dependency::parse(package.unwrap_or(name), Some(&req), id)?;
if package.is_some() {
dep.set_explicit_name_in_toml(name);
}
let kind = match kind.as_deref().unwrap_or("") {
"dev" => DepKind::Development,
"build" => DepKind::Build,
_ => DepKind::Normal,
};

let platform = match target {
Some(target) => Some(target.parse()?),
None => None,
};

// All dependencies are private by default
let public = public.unwrap_or(false);

// Unfortunately older versions of cargo and/or the registry ended up
// publishing lots of entries where the features array contained the
// empty feature, "", inside. This confuses the resolution process much
// later on and these features aren't actually valid, so filter them all
// out here.
features.retain(|s| !s.is_empty());

// In index, "registry" is null if it is from the same index.
// In Cargo.toml, "registry" is None if it is from the default
if !id.is_crates_io() {
dep.set_registry_id(id);
}

dep.set_optional(optional)
.set_default_features(default_features)
.set_features(features)
.set_platform(platform)
.set_kind(kind)
.set_public(public);

Ok(dep)
}
}

/// Like [`slice::split`] but is optimized by [`memchr`].
fn split(haystack: &[u8], needle: u8) -> impl Iterator<Item = &[u8]> {
struct Split<'a> {
Expand All @@ -868,3 +1029,36 @@ fn split(haystack: &[u8], needle: u8) -> impl Iterator<Item = &[u8]> {

Split { haystack, needle }
}

#[test]
fn escaped_char_in_index_json_blob() {
let _: RegistryPackage<'_> = serde_json::from_str(
r#"{"name":"a","vers":"0.0.1","deps":[],"cksum":"bae3","features":{}}"#,
)
.unwrap();
let _: RegistryPackage<'_> = serde_json::from_str(
r#"{"name":"a","vers":"0.0.1","deps":[],"cksum":"bae3","features":{"test":["k","q"]},"links":"a-sys"}"#
).unwrap();

// Now we add escaped cher all the places they can go
// these are not valid, but it should error later than json parsing
let _: RegistryPackage<'_> = serde_json::from_str(
r#"{
"name":"This name has a escaped cher in it \n\t\" ",
"vers":"0.0.1",
"deps":[{
"name": " \n\t\" ",
"req": " \n\t\" ",
"features": [" \n\t\" "],
"optional": true,
"default_features": true,
"target": " \n\t\" ",
"kind": " \n\t\" ",
"registry": " \n\t\" "
}],
"cksum":"bae3",
"features":{"test \n\t\" ":["k \n\t\" ","q \n\t\" "]},
"links":" \n\t\" "}"#,
)
.unwrap();
}
Loading