Compare commits

...

3 Commits

Author SHA1 Message Date
Mark Sinclair 04ad1acd9f Merge pull request #6275 from nymtech/ip-fallback-cherrypick
cherrypick no_hostname only
2025-12-08 11:14:14 +00:00
Simon Wicky 5000e8ae39 cherrypick no_hostname only 2025-12-05 17:29:54 +01:00
Jack Wampler 17b9fa4dd5 DNS resilience patch (#6267)
* shared resolver static init, ipv4 only by default, nameserver list

* add fn to run a trial resolution with each nameserver and log results
2025-12-05 09:17:43 -07:00
13 changed files with 480 additions and 153 deletions
+1 -1
View File
@@ -264,7 +264,7 @@ generic-array = "0.14.7"
getrandom = "0.2.10"
handlebars = "3.5.5"
hex = "0.4.3"
hickory-resolver = "0.25"
hickory-resolver = "0.25.2"
hkdf = "0.12.3"
hmac = "0.12.1"
http = "1"
@@ -87,6 +87,7 @@ where
user_chosen_gateway_id.map(|id| id.to_base58_string()),
Some(common_args.latency_based_selection),
common_args.force_tls_gateway,
false,
);
tracing::debug!("Gateway selection specification: {selection_spec:?}");
@@ -136,6 +136,7 @@ where
user_chosen_gateway_id.map(|id| id.to_base58_string()),
Some(common_args.latency_based_selection),
common_args.force_tls_gateway,
false,
);
tracing::debug!("Gateway selection specification: {selection_spec:?}");
+3
View File
@@ -43,6 +43,9 @@ pub enum ClientCoreError {
#[error("Invalid URL: {0}")]
InvalidUrl(String),
#[error("node doesn't advertise ip addresses : {0}")]
MissingIpAddress(String),
#[cfg(not(target_arch = "wasm32"))]
#[error("resolution failed: {0}")]
ResolutionFailed(#[from] nym_http_api_client::ResolveError),
+12 -5
View File
@@ -71,21 +71,28 @@ where
let mut rng = OsRng;
let selected_gateway = match selection_specification {
GatewaySelectionSpecification::UniformRemote { must_use_tls } => {
GatewaySelectionSpecification::UniformRemote {
must_use_tls,
no_hostname,
} => {
let gateway = uniformly_random_gateway(&mut rng, &available_gateways, must_use_tls)?;
SelectedGateway::from_topology_node(gateway, must_use_tls)?
SelectedGateway::from_topology_node(gateway, must_use_tls, no_hostname)?
}
GatewaySelectionSpecification::RemoteByLatency { must_use_tls } => {
GatewaySelectionSpecification::RemoteByLatency {
must_use_tls,
no_hostname,
} => {
let gateway =
choose_gateway_by_latency(&mut rng, &available_gateways, must_use_tls).await?;
SelectedGateway::from_topology_node(gateway, must_use_tls)?
SelectedGateway::from_topology_node(gateway, must_use_tls, no_hostname)?
}
GatewaySelectionSpecification::Specified {
must_use_tls,
no_hostname,
identity,
} => {
let gateway = get_specified_gateway(&identity, &available_gateways, must_use_tls)?;
SelectedGateway::from_topology_node(gateway, must_use_tls)?
SelectedGateway::from_topology_node(gateway, must_use_tls, no_hostname)?
}
GatewaySelectionSpecification::Custom {
gateway_identity,
+38 -14
View File
@@ -42,24 +42,32 @@ impl SelectedGateway {
pub fn from_topology_node(
node: RoutingNode,
must_use_tls: bool,
no_hostname: bool,
) -> Result<Self, ClientCoreError> {
// for now, let's use 'old' behaviour, if you want to change it, you can pass it up the enum stack yourself : )
let prefer_ipv6 = false;
let gateway_listener = if must_use_tls {
node.ws_entry_address_tls()
.ok_or(ClientCoreError::UnsupportedWssProtocol {
gateway: node.identity_key.to_base58_string(),
})?
let (gateway_listener, _) = if must_use_tls {
// WSS main, no fallback
let primary =
node.ws_entry_address_tls()
.ok_or(ClientCoreError::UnsupportedWssProtocol {
gateway: node.identity_key.to_base58_string(),
})?;
(primary, None)
} else {
node.ws_entry_address(prefer_ipv6)
.ok_or(ClientCoreError::UnsupportedEntry {
let (maybe_primary, fallback) =
node.ws_entry_address_with_fallback(prefer_ipv6, no_hostname);
(
maybe_primary.ok_or(ClientCoreError::UnsupportedEntry {
id: node.node_id,
identity: node.identity_key.to_base58_string(),
})?
})?,
fallback,
)
};
let gateway_listener =
let gateway_listener_url =
Url::parse(&gateway_listener).map_err(|source| ClientCoreError::MalformedListener {
gateway_id: node.identity_key.to_base58_string(),
raw_listener: gateway_listener,
@@ -69,7 +77,7 @@ impl SelectedGateway {
Ok(SelectedGateway::Remote {
gateway_id: node.identity_key,
gateway_owner_address: None,
gateway_listener,
gateway_listener: gateway_listener_url,
})
}
@@ -150,15 +158,22 @@ impl InitialisationResult {
#[derive(Clone, Debug)]
pub enum GatewaySelectionSpecification {
/// Uniformly choose a random remote gateway.
UniformRemote { must_use_tls: bool },
UniformRemote {
must_use_tls: bool,
no_hostname: bool,
},
/// Should the new, remote, gateway be selected based on latency.
RemoteByLatency { must_use_tls: bool },
RemoteByLatency {
must_use_tls: bool,
no_hostname: bool,
},
/// Gateway with this specific identity should be chosen.
// JS: I don't really like the name of this enum variant but couldn't think of anything better at the time
Specified {
must_use_tls: bool,
no_hostname: bool,
identity: IdentityKey,
},
@@ -174,6 +189,7 @@ impl Default for GatewaySelectionSpecification {
fn default() -> Self {
GatewaySelectionSpecification::UniformRemote {
must_use_tls: false,
no_hostname: false,
}
}
}
@@ -183,16 +199,24 @@ impl GatewaySelectionSpecification {
gateway_identity: Option<String>,
latency_based_selection: Option<bool>,
must_use_tls: bool,
no_hostname: bool,
) -> Self {
if let Some(identity) = gateway_identity {
GatewaySelectionSpecification::Specified {
identity,
must_use_tls,
no_hostname,
}
} else if let Some(true) = latency_based_selection {
GatewaySelectionSpecification::RemoteByLatency { must_use_tls }
GatewaySelectionSpecification::RemoteByLatency {
must_use_tls,
no_hostname,
}
} else {
GatewaySelectionSpecification::UniformRemote { must_use_tls }
GatewaySelectionSpecification::UniformRemote {
must_use_tls,
no_hostname,
}
}
}
}
+363 -129
View File
@@ -3,28 +3,41 @@
//! DNS resolver configuration for internal lookups.
//!
//! The resolver itself is the set combination of the google, cloudflare, and quad9 endpoints
//! supporting DoH and DoT.
//! The resolver itself is the set combination of the cloudflare, and quad9 endpoints supporting DoH
//! and DoT.
//!
//! This resolver supports a fallback mechanism where, should the DNS-over-TLS resolution fail, a
//! followup resolution will be done using the hosts configured default (e.g. `/etc/resolve.conf` on
//! linux). This is disabled by default and can be enabled using [`enable_system_fallback`].
//!
//! Requires the `dns-over-https-rustls`, `webpki-roots` feature for the
//! `hickory-resolver` crate
//!
//!
//! Note: The hickory DoH resolver can cause warning logs about H2 connection failure. This
//! indicates that the long lived https connection was closed by the remote peer and the resolver
//! will have to reconnect. It should not impact actual functionality.
//!
//! code ref: https://github.com/hickory-dns/hickory-dns/blob/06a8b1ce9bd9322d8e6accf857d30257e1274427/crates/proto/src/h2/h2_client_stream.rs#L534
//!
//! example log:
//!
//! ```txt
//! WARN /home/ubuntu/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/hickory-proto-0.24.3/src/h2/h2_client_stream.rs:493: h2 connection failed: unexpected end of file
//! ```rust
//! use nym_http_api_client::HickoryDnsResolver;
//! # use nym_http_api_client::ResolveError;
//! # type Err = ResolveError;
//! # async fn run() -> Result<(), Err> {
//! let resolver = HickoryDnsResolver::default();
//! resolver.resolve_str("example.com").await?;
//! # Ok(())
//! # }
//! ```
//!
//! ## Fallbacks
//!
//! **System Resolver --** This resolver supports an optional fallback mechanism where, should the
//! DNS-over-TLS resolution fail, a followup resolution will be done using the hosts configured
//! default (e.g. `/etc/resolve.conf` on linux).
//!
//! This is disabled by default and can be enabled using `enable_system_fallback`.
//!
//! **Static Table --** There is also a second optional fallback mechanism that allows a static map
//! to be used as a last resort. This can help when DNS encounters errors due to blocked resolvers
//! or unknown conditions. This is enabled by default, and can be customized if building a new
//! resolver.
//!
//! ## IPv4 / IPv6
//!
//! By default the resolver uses only IPv4 nameservers, and is configured to do `A` lookups first,
//! and only do `AAAA` if no `A` record is available.
//!
//! ---
//!
//! Requires the `dns-over-https-rustls`, `webpki-roots` feature for the `hickory-resolver` crate
#![deny(missing_docs)]
use crate::ClientBuilder;
@@ -39,7 +52,7 @@ use std::{
use hickory_resolver::{
TokioResolver,
config::{LookupIpStrategy, NameServerConfigGroup, ResolverConfig},
config::{NameServerConfig, NameServerConfigGroup, ResolverConfig, ResolverOpts},
lookup_ip::LookupIpIntoIter,
name_server::TokioConnectionProvider,
};
@@ -49,7 +62,11 @@ use tracing::*;
mod constants;
mod static_resolver;
pub use static_resolver::*;
pub(crate) use static_resolver::*;
pub(crate) const DEFAULT_POSITIVE_LOOKUP_CACHE_TTL: Duration = Duration::from_secs(1800);
pub(crate) const DEFAULT_OVERALL_LOOKUP_TIMEOUT: Duration = Duration::from_secs(6);
pub(crate) const DEFAULT_QUERY_TIMEOUT: Duration = Duration::from_secs(3);
impl ClientBuilder {
/// Override the DNS resolver implementation used by the underlying http client.
@@ -71,7 +88,10 @@ impl ClientBuilder {
// but tools like valgrind might report "memory leaks" as it isn't obvious this is intentional.
static SHARED_RESOLVER: LazyLock<HickoryDnsResolver> = LazyLock::new(|| {
tracing::debug!("Initializing shared DNS resolver");
HickoryDnsResolver::default()
HickoryDnsResolver {
use_shared: false, // prevent infinite recursion
..Default::default()
}
});
#[derive(Debug, thiserror::Error)]
@@ -111,7 +131,7 @@ pub struct HickoryDnsResolver {
state: Arc<OnceCell<TokioResolver>>,
fallback: Option<Arc<OnceCell<TokioResolver>>>,
static_base: Option<Arc<OnceCell<StaticResolver>>>,
dont_use_shared: bool,
use_shared: bool,
/// Overall timeout for dns lookup associated with any individual host resolution. For example,
/// use of retries, server_ordering_strategy, etc. ends absolutely if this timeout is reached.
overall_dns_timeout: Duration,
@@ -122,9 +142,9 @@ impl Default for HickoryDnsResolver {
Self {
state: Default::default(),
fallback: Default::default(),
static_base: Default::default(),
dont_use_shared: Default::default(),
overall_dns_timeout: Duration::from_secs(10),
static_base: Some(Default::default()),
use_shared: true,
overall_dns_timeout: DEFAULT_OVERALL_LOOKUP_TIMEOUT,
}
}
}
@@ -134,7 +154,7 @@ impl Resolve for HickoryDnsResolver {
let resolver = self.state.clone();
let maybe_fallback = self.fallback.clone();
let maybe_static = self.static_base.clone();
let independent = self.dont_use_shared;
let use_shared = self.use_shared;
let overall_dns_timeout = self.overall_dns_timeout;
Box::pin(async move {
resolve(
@@ -142,7 +162,7 @@ impl Resolve for HickoryDnsResolver {
resolver,
maybe_fallback,
maybe_static,
independent,
use_shared,
overall_dns_timeout,
)
.await
@@ -236,7 +256,7 @@ impl HickoryDnsResolver {
self.state.clone(),
self.fallback.clone(),
self.static_base.clone(),
self.dont_use_shared,
self.use_shared,
self.overall_dns_timeout,
)
.await
@@ -246,25 +266,25 @@ impl HickoryDnsResolver {
/// Create a (lazy-initialized) resolver that is not shared across threads.
pub fn thread_resolver() -> Self {
Self {
dont_use_shared: true,
use_shared: false,
..Default::default()
}
}
fn new_resolver(dont_use_shared: bool) -> Result<TokioResolver, ResolveError> {
fn new_resolver(use_shared: bool) -> Result<TokioResolver, ResolveError> {
// using a closure here is slightly gross, but this makes sure that if the
// lazy-init returns an error it can be handled by the client
if dont_use_shared {
if !use_shared {
new_resolver()
} else {
Ok(SHARED_RESOLVER.state.get_or_try_init(new_resolver)?.clone())
}
}
fn new_resolver_system(dont_use_shared: bool) -> Result<TokioResolver, ResolveError> {
fn new_resolver_system(use_shared: bool) -> Result<TokioResolver, ResolveError> {
// using a closure here is slightly gross, but this makes sure that if the
// lazy-init returns an error it can be handled by the client
if dont_use_shared || SHARED_RESOLVER.fallback.is_none() {
if !use_shared || SHARED_RESOLVER.fallback.is_none() {
new_resolver_system()
} else {
Ok(SHARED_RESOLVER
@@ -276,8 +296,8 @@ impl HickoryDnsResolver {
}
}
fn new_static_fallback(dont_use_shared: bool) -> StaticResolver {
if !dont_use_shared && let Some(ref shared_resolver) = SHARED_RESOLVER.static_base {
fn new_static_fallback(use_shared: bool) -> StaticResolver {
if use_shared && let Some(ref shared_resolver) = SHARED_RESOLVER.static_base {
shared_resolver
.get_or_init(new_default_static_fallback)
.clone()
@@ -294,6 +314,11 @@ impl HickoryDnsResolver {
.as_ref()
.unwrap()
.get_or_try_init(new_resolver_system)?;
// IF THIS INSTANCE IS A FRONT FOR THE SHARED RESOLVER SHOULDN'T THIS FN ENABLE THE SYSTEM FALLBACK FOR THE SHARED RESOLVER TOO?
// if self.use_shared {
// SHARED_RESOLVER.enable_system_fallback()?;
// }
Ok(())
}
@@ -301,6 +326,11 @@ impl HickoryDnsResolver {
/// returned immediately
pub fn disable_system_fallback(&mut self) {
self.fallback = None;
// // IF THIS INSTANCE IS A FRONT FOR THE SHARED RESOLVER SHOULDN'T THIS FN ENABLE THE SYSTEM FALLBACK FOR THE SHARED RESOLVER TOO?
// if self.use_shared {
// SHARED_RESOLVER.fallback = None;
// }
}
/// Get the current map of hostname to address in use by the fallback static lookup if one
@@ -316,39 +346,122 @@ impl HickoryDnsResolver {
.expect("infallible assign");
self.static_base = Some(Arc::new(cell));
}
/// Successfully resolved addresses are cached for a minimum of 30 minutes
/// Individual lookup Timeouts are set to 3 seconds
/// Number of retries after lookup failure before giving up is set to (default) to 2
/// Lookup order is set to (default) A then AAAA
/// Number or parallel lookup is set to (default) 2
/// Nameserver selection uses the (default) EWMA statistics / performance based strategy
fn default_options() -> ResolverOpts {
let mut opts = ResolverOpts::default();
// Always cache successful responses for queries received by this resolver for 30 min minimum.
opts.positive_min_ttl = Some(DEFAULT_POSITIVE_LOOKUP_CACHE_TTL);
opts.timeout = DEFAULT_QUERY_TIMEOUT;
opts
}
/// Get the list of currently available nameserver configs.
pub fn all_configured_name_servers(&self) -> Vec<NameServerConfig> {
default_nameserver_group().to_vec()
}
/// Get the list of currently used nameserver configs.
pub fn active_name_servers(&self) -> Vec<NameServerConfig> {
if !self.use_shared {
return self
.state
.get()
.map(|r| r.config().name_servers().to_vec())
.unwrap_or(self.all_configured_name_servers());
}
SHARED_RESOLVER.active_name_servers()
}
/// Do a trial resolution using each nameserver individually to test which are working and which
/// fail to complete a lookup. This will always try the full set of default configured resolvers.
pub async fn trial_nameservers(&self) {
let nameservers = default_nameserver_group();
for (ns, result) in trial_nameservers_inner(&nameservers).await {
if let Err(e) = result {
warn!("trial {ns:?} errored: {e}");
} else {
info!("trial {ns:?} succeeded");
}
}
}
}
/// Create a new resolver with a custom DoT based configuration. The options are overridden to look
/// up for both IPv4 and IPv6 addresses to work with "happy eyeballs" algorithm.
///
/// Timeout Defaults to 5 seconds
/// Individual lookup Timeouts are set to 3 seconds
/// Number of retries after lookup failure before giving up Defaults to 2
/// Lookup order is set to (default) A then AAAA
///
/// Caches successfully resolved addresses for 30 minutes to prevent continual use of remote lookup.
/// This resolver is intended to be used for OUR API endpoints that do not rapidly rotate IPs.
fn new_resolver() -> Result<TokioResolver, ResolveError> {
info!("building new configured resolver");
let name_servers = default_nameserver_group_ipv4_only();
let mut name_servers = NameServerConfigGroup::quad9_tls();
name_servers.merge(NameServerConfigGroup::quad9_https());
name_servers.merge(NameServerConfigGroup::cloudflare_tls());
name_servers.merge(NameServerConfigGroup::cloudflare_https());
configure_and_build_resolver(name_servers)
Ok(configure_and_build_resolver(name_servers))
}
fn configure_and_build_resolver(
name_servers: NameServerConfigGroup,
) -> Result<TokioResolver, ResolveError> {
fn configure_and_build_resolver<G>(name_servers: G) -> TokioResolver
where
G: Into<NameServerConfigGroup>,
{
let options = HickoryDnsResolver::default_options();
let name_servers: NameServerConfigGroup = name_servers.into();
info!("building new configured resolver");
debug!("configuring resolver with {options:?}, {name_servers:?}");
let config = ResolverConfig::from_parts(None, Vec::new(), name_servers);
let mut resolver_builder =
TokioResolver::builder_with_config(config, TokioConnectionProvider::default());
resolver_builder.options_mut().ip_strategy = LookupIpStrategy::Ipv4AndIpv6;
// Cache successful responses for queries received by this resolver for 30 min minimum.
resolver_builder.options_mut().positive_min_ttl = Some(Duration::from_secs(1800));
resolver_builder = resolver_builder.with_options(options);
Ok(resolver_builder.build())
resolver_builder.build()
}
fn filter_ipv4(nameservers: impl AsRef<[NameServerConfig]>) -> Vec<NameServerConfig> {
nameservers
.as_ref()
.iter()
.filter(|ns| ns.socket_addr.is_ipv4())
.cloned()
.collect()
}
#[allow(unused)]
fn filter_ipv6(nameservers: impl AsRef<[NameServerConfig]>) -> Vec<NameServerConfig> {
nameservers
.as_ref()
.iter()
.filter(|ns| ns.socket_addr.is_ipv6())
.cloned()
.collect()
}
#[allow(unused)]
fn default_nameserver_group() -> NameServerConfigGroup {
let mut name_servers = NameServerConfigGroup::quad9_tls();
name_servers.merge(NameServerConfigGroup::quad9_https());
name_servers.merge(NameServerConfigGroup::cloudflare_tls());
name_servers.merge(NameServerConfigGroup::cloudflare_https());
name_servers
}
fn default_nameserver_group_ipv4_only() -> NameServerConfigGroup {
filter_ipv4(&default_nameserver_group() as &[NameServerConfig]).into()
}
#[allow(unused)]
fn default_nameserver_group_ipv6_only() -> NameServerConfigGroup {
filter_ipv6(&default_nameserver_group() as &[NameServerConfig]).into()
}
/// Create a new resolver with the default configuration, which reads from the system DNS config
@@ -356,7 +469,12 @@ fn configure_and_build_resolver(
/// addresses to work with "happy eyeballs" algorithm.
fn new_resolver_system() -> Result<TokioResolver, ResolveError> {
let mut resolver_builder = TokioResolver::builder_tokio()?;
resolver_builder.options_mut().ip_strategy = LookupIpStrategy::Ipv4AndIpv6;
let options = HickoryDnsResolver::default_options();
info!("building new fallback system resolver");
debug!("fallback system resolver with {options:?}");
resolver_builder = resolver_builder.with_options(options);
Ok(resolver_builder.build())
}
@@ -365,11 +483,54 @@ fn new_default_static_fallback() -> StaticResolver {
StaticResolver::new(constants::default_static_addrs())
}
/// Do a trial resolution using each nameserver individually to test which are working and which
/// fail to complete a lookup.
async fn trial_nameservers_inner(
name_servers: &[NameServerConfig],
) -> Vec<(NameServerConfig, Result<(), ResolveError>)> {
let mut trial_lookups = tokio::task::JoinSet::new();
for name_server in name_servers {
let ns = name_server.clone();
trial_lookups.spawn(async { (ns.clone(), trial_lookup(ns, "example.com").await) });
}
trial_lookups.join_all().await
}
/// Create an independent resolver that has only the provided nameserver and do one lookup for the
/// provided query target.
async fn trial_lookup(name_server: NameServerConfig, query: &str) -> Result<(), ResolveError> {
debug!("running ns trial {name_server:?} query={query}");
let resolver = configure_and_build_resolver(vec![name_server]);
match tokio::time::timeout(DEFAULT_OVERALL_LOOKUP_TIMEOUT, resolver.ipv4_lookup(query)).await {
Ok(Ok(_)) => Ok(()),
Ok(Err(e)) => Err(e.into()),
Err(_) => Err(ResolveError::Timeout),
}
}
#[cfg(test)]
mod test {
use super::*;
use itertools::Itertools;
use std::collections::HashMap;
use std::{
net::{IpAddr, Ipv4Addr, Ipv6Addr},
time::Instant,
};
/// IP addresses guaranteed to fail attempts to resolve
///
/// Addresses drawn from blocks set off by RFC5737 (ipv4) and RFC3849 (ipv6)
const GUARANTEED_BROKEN_IPS_1: &[IpAddr] = &[
IpAddr::V4(Ipv4Addr::new(192, 0, 2, 1)),
IpAddr::V4(Ipv4Addr::new(198, 51, 100, 1)),
IpAddr::V6(Ipv6Addr::new(0x2001, 0x0db8, 0, 0, 0, 0, 0, 0x1111)),
IpAddr::V6(Ipv6Addr::new(0x2001, 0x0db8, 0, 0, 0, 0, 0, 0x1001)),
];
#[tokio::test]
async fn reqwest_with_custom_dns() {
@@ -428,99 +589,172 @@ mod test {
assert!(addrs.contains(&example_ip6));
Ok(())
}
}
#[cfg(test)]
mod failure_test {
use super::*;
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
// Test the nameserver trial functionality with mostly nameservers guaranteed to be broken and
// one that should work.
#[tokio::test]
async fn trial_nameservers() {
let good_cf_ip = IpAddr::V4(Ipv4Addr::new(1, 1, 1, 1));
/// IP addresses guaranteed to fail attempts to resolve
///
/// Addresses drawn from blocks set off by RFC5737 (ipv4) and RFC3849 (ipv6)
const GUARANTEED_BROKEN_IPS_1: &[IpAddr] = &[
IpAddr::V4(Ipv4Addr::new(192, 0, 2, 1)),
IpAddr::V4(Ipv4Addr::new(198, 51, 100, 1)),
IpAddr::V6(Ipv6Addr::new(0x2001, 0x0db8, 0, 0, 0, 0, 0, 0x1111)),
IpAddr::V6(Ipv6Addr::new(0x2001, 0x0db8, 0, 0, 0, 0, 0, 0x1001)),
];
let mut ns_ips = GUARANTEED_BROKEN_IPS_1.to_vec();
ns_ips.push(good_cf_ip);
// Create a resolver that behaves the same as the custom configured router, except for the fact
// that it is guaranteed to fail.
fn build_broken_resolver() -> Result<TokioResolver, ResolveError> {
info!("building new faulty resolver");
let mut broken_ns_group = NameServerConfigGroup::from_ips_tls(
GUARANTEED_BROKEN_IPS_1,
853,
"cloudflare-dns.com".to_string(),
true,
);
let broken_ns_https = NameServerConfigGroup::from_ips_https(
GUARANTEED_BROKEN_IPS_1,
&ns_ips,
443,
"cloudflare-dns.com".to_string(),
true,
);
broken_ns_group.merge(broken_ns_https);
configure_and_build_resolver(broken_ns_group)
}
#[tokio::test]
async fn dns_lookup_failures() -> Result<(), ResolveError> {
let time_start = std::time::Instant::now();
let r = OnceCell::new();
r.set(build_broken_resolver().expect("failed to build resolver"))
.expect("broken resolver init error");
let inner = configure_and_build_resolver(broken_ns_https);
// create a new resolver that won't mess with the shared resolver used by other tests
let resolver = HickoryDnsResolver {
dont_use_shared: true,
state: Arc::new(r),
overall_dns_timeout: Duration::from_secs(5),
..Default::default()
};
build_broken_resolver()?;
let domain = "ifconfig.me";
let result = resolver.resolve_str(domain).await;
assert!(result.is_err_and(|e| matches!(e, ResolveError::Timeout)));
let duration = time_start.elapsed();
assert!(duration < resolver.overall_dns_timeout + Duration::from_secs(1));
Ok(())
}
#[tokio::test]
async fn fallback_to_static() -> Result<(), ResolveError> {
let r = OnceCell::new();
r.set(build_broken_resolver().expect("failed to build resolver"))
.expect("broken resolver init error");
// create a new resolver that won't mess with the shared resolver used by other tests
let resolver = HickoryDnsResolver {
dont_use_shared: true,
state: Arc::new(r),
use_shared: false,
state: Arc::new(OnceCell::with_value(inner)),
static_base: Some(Default::default()),
overall_dns_timeout: Duration::from_secs(5),
..Default::default()
};
build_broken_resolver()?;
// successful lookup using fallback to static resolver
let domain = "nymvpn.com";
let _ = resolver
.resolve_str(domain)
.await
.expect("failed to resolve address in static lookup");
let name_servers = resolver.state.get().unwrap().config().name_servers();
for (ns, result) in trial_nameservers_inner(name_servers).await {
if ns.socket_addr.ip() == good_cf_ip {
assert!(result.is_ok())
} else {
assert!(result.is_err())
}
}
}
// unsuccessful lookup - primary times out, and not in
let domain = "non-existent.nymtech.net";
let result = resolver.resolve_str(domain).await;
assert!(result.is_err_and(|e| matches!(e, ResolveError::Timeout)));
mod failure_test {
use super::*;
Ok(())
// Create a resolver that behaves the same as the custom configured router, except for the fact
// that it is guaranteed to fail.
fn build_broken_resolver() -> Result<TokioResolver, ResolveError> {
info!("building new faulty resolver");
let mut broken_ns_group = NameServerConfigGroup::from_ips_tls(
GUARANTEED_BROKEN_IPS_1,
853,
"cloudflare-dns.com".to_string(),
true,
);
let broken_ns_https = NameServerConfigGroup::from_ips_https(
GUARANTEED_BROKEN_IPS_1,
443,
"cloudflare-dns.com".to_string(),
true,
);
broken_ns_group.merge(broken_ns_https);
Ok(configure_and_build_resolver(broken_ns_group))
}
#[tokio::test]
async fn dns_lookup_failures() -> Result<(), ResolveError> {
let time_start = std::time::Instant::now();
let r = OnceCell::new();
r.set(build_broken_resolver().expect("failed to build resolver"))
.expect("broken resolver init error");
// create a new resolver that won't mess with the shared resolver used by other tests
let resolver = HickoryDnsResolver {
use_shared: false,
state: Arc::new(r),
overall_dns_timeout: Duration::from_secs(5),
..Default::default()
};
build_broken_resolver()?;
let domain = "ifconfig.me";
let result = resolver.resolve_str(domain).await;
assert!(result.is_err_and(|e| matches!(e, ResolveError::Timeout)));
let duration = time_start.elapsed();
assert!(duration < resolver.overall_dns_timeout + Duration::from_secs(1));
Ok(())
}
#[tokio::test]
async fn fallback_to_static() -> Result<(), ResolveError> {
let r = OnceCell::new();
r.set(build_broken_resolver().expect("failed to build resolver"))
.expect("broken resolver init error");
// create a new resolver that won't mess with the shared resolver used by other tests
let resolver = HickoryDnsResolver {
use_shared: false,
state: Arc::new(r),
static_base: Some(Default::default()),
overall_dns_timeout: Duration::from_secs(5),
..Default::default()
};
build_broken_resolver()?;
// successful lookup using fallback to static resolver
let domain = "nymvpn.com";
let _ = resolver
.resolve_str(domain)
.await
.expect("failed to resolve address in static lookup");
// unsuccessful lookup - primary times out, and not in static table
let domain = "non-existent.nymtech.net";
let result = resolver.resolve_str(domain).await;
assert!(result.is_err_and(|e| matches!(e, ResolveError::Timeout)));
Ok(())
}
#[test]
fn default_resolver_uses_ipv4_only_nameservers() {
let resolver = HickoryDnsResolver::thread_resolver();
resolver
.active_name_servers()
.iter()
.all(|cfg| cfg.socket_addr.is_ipv4());
SHARED_RESOLVER
.active_name_servers()
.iter()
.all(|cfg| cfg.socket_addr.is_ipv4());
}
#[tokio::test]
#[ignore]
// this test is dependent of external network setup -- i.e. blocking all traffic to the default
// resolvers. Otherwise the default resolvers will succeed without using the static fallback,
// making the test pointless
async fn dns_lookup_failure_on_shared() -> Result<(), ResolveError> {
let time_start = Instant::now();
let r = OnceCell::new();
r.set(build_broken_resolver().expect("failed to build resolver"))
.expect("broken resolver init error");
// create a new resolver that won't mess with the shared resolver used by other tests
let resolver = HickoryDnsResolver::default();
// successful lookup using fallback to static resolver
let domain = "rpc.nymtech.net";
let _ = resolver
.resolve_str(domain)
.await
.expect("failed to resolve address in static lookup");
println!(
"{}ms resolved {domain}",
(Instant::now() - time_start).as_millis()
);
// unsuccessful lookup - primary times out, and not in static table
let domain = "non-existent.nymtech.net";
let result = resolver.resolve_str(domain).await;
assert!(result.is_err());
// assert!(result.is_err_and(|e| matches!(e, ResolveError::Timeout)));
// assert!(result.is_err_and(|e| matches!(e, ResolveError::ResolveError(e) if e.is_nx_domain())));
Ok(())
}
}
}
+1 -1
View File
@@ -175,7 +175,7 @@ mod user_agent;
pub use user_agent::UserAgent;
#[cfg(not(target_arch = "wasm32"))]
mod dns;
pub mod dns;
mod path;
#[cfg(not(target_arch = "wasm32"))]
+1 -1
View File
@@ -91,7 +91,7 @@ fn sanitizing_urls() {
#[tokio::test]
async fn api_client_retry() -> Result<(), Box<dyn std::error::Error>> {
let client = ClientBuilder::new_with_urls(vec![
"http://broken.nym.test".parse()?, // This will fail because of DNS (rotate)
"http://broken.nym.test".parse()?, // This should fail because of DNS NXDomain (rotate)
"http://127.0.0.1:9".parse()?, // This will fail because of TCP refused (rotate)
"https://httpbin.org/status/200".parse()?, // This should succeed
])?
+39
View File
@@ -89,6 +89,45 @@ impl RoutingNode {
self.ws_entry_address_no_tls(prefer_ipv6)
}
pub fn ws_entry_address_with_fallback(
&self,
prefer_ipv6: bool,
no_hostname: bool,
) -> (Option<String>, Option<String>) {
let Some(entry) = &self.entry else {
return (None, None);
};
// Put hostname first if we want it
let maybe_hostname = if !no_hostname {
entry.hostname.clone()
} else {
None
};
// Put ipv6 first or keep them as is
let ips: Vec<&IpAddr> = if prefer_ipv6 {
entry
.ip_addresses
.iter()
.filter(|ip| ip.is_ipv6())
.chain(entry.ip_addresses.iter().filter(|ip| ip.is_ipv4()))
.collect()
} else {
entry.ip_addresses.iter().collect()
};
// chain everything and keep the top two as ws addresses
let ws_addresses: Vec<_> = maybe_hostname
.into_iter()
.chain(ips.into_iter().map(|ip| ip.to_string()))
.take(2)
.map(|host| format!("ws://{host}:{}", entry.clients_ws_port))
.collect();
(ws_addresses.first().cloned(), ws_addresses.get(1).cloned())
}
pub fn identity(&self) -> ed25519::PublicKey {
self.identity_key
}
@@ -119,6 +119,7 @@ impl BuilderConfig {
.network_details(self.network_env)
.debug_config(debug_config)
.credentials_mode(true)
.no_hostname(true)
.with_remember_me(remember_me)
.custom_topology_provider(self.custom_topology_provider);
+2 -2
View File
@@ -2864,9 +2864,9 @@ dependencies = [
[[package]]
name = "hickory-resolver"
version = "0.25.1"
version = "0.25.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a128410b38d6f931fcc6ca5c107a3b02cabd6c05967841269a4ad65d23c44331"
checksum = "dc62a9a99b0bfb44d2ab95a7208ac952d31060efc16241c87eaf36406fecf87a"
dependencies = [
"cfg-if",
"futures-util",
+17
View File
@@ -56,6 +56,7 @@ pub struct MixnetClientBuilder<S: MixnetClientStorage = Ephemeral> {
custom_shutdown: Option<ShutdownTracker>,
event_tx: Option<EventSender>,
force_tls: bool,
no_hostname: bool,
user_agent: Option<UserAgent>,
#[cfg(unix)]
connection_fd_callback: Option<Arc<dyn Fn(std::os::fd::RawFd) + Send + Sync>>,
@@ -101,6 +102,7 @@ impl MixnetClientBuilder<OnDiskPersistent> {
event_tx: None,
custom_gateway_transceiver: None,
force_tls: false,
no_hostname: false,
user_agent: None,
#[cfg(unix)]
connection_fd_callback: None,
@@ -134,6 +136,7 @@ where
custom_shutdown: None,
event_tx: None,
force_tls: false,
no_hostname: false,
user_agent: None,
#[cfg(unix)]
connection_fd_callback: None,
@@ -158,6 +161,7 @@ where
custom_shutdown: self.custom_shutdown,
event_tx: self.event_tx,
force_tls: self.force_tls,
no_hostname: self.no_hostname,
user_agent: self.user_agent,
#[cfg(unix)]
connection_fd_callback: self.connection_fd_callback,
@@ -229,6 +233,13 @@ where
self
}
/// Attempt to only choose a gateway with its IP address only, ignored if force_tls is set
#[must_use]
pub fn no_hostname(mut self, no_hostname: bool) -> Self {
self.no_hostname = no_hostname;
self
}
/// Enable paid coconut bandwidth credentials mode.
#[must_use]
pub fn enable_credentials_mode(mut self) -> Self {
@@ -341,6 +352,7 @@ where
client.custom_shutdown = self.custom_shutdown;
client.wait_for_gateway = self.wait_for_gateway;
client.force_tls = self.force_tls;
client.no_hostname = self.no_hostname;
client.user_agent = self.user_agent;
#[cfg(unix)]
if self.connection_fd_callback.is_some() {
@@ -393,6 +405,9 @@ where
/// Force the client to connect using wss protocol with the gateway.
force_tls: bool,
/// Force the client to pick gateway IP and not hostname, ignored if force_tls is set
no_hostname: bool,
/// Allows passing an externally controlled shutdown handle.
custom_shutdown: Option<ShutdownTracker>,
@@ -461,6 +476,7 @@ where
custom_gateway_transceiver: None,
wait_for_gateway: false,
force_tls: false,
no_hostname: false,
custom_shutdown: None,
event_tx,
user_agent: None,
@@ -580,6 +596,7 @@ where
self.config.user_chosen_gateway.clone(),
None,
self.force_tls,
self.no_hostname,
);
let available_gateways = self.available_gateways().await?;