Compare commits

...

3 Commits

Author SHA1 Message Date
Jędrzej Stuczyński 944fc27ef6 add additional leniency in ticketbook requests 2026-05-28 11:05:24 +01:00
Jędrzej Stuczyński 3853c0f0c9 reduce concurrency during quorum check tests 2026-05-27 16:36:13 +01:00
Jędrzej Stuczyński 97f79381b9 NS: don't return nodes with 0 performance 2026-05-27 14:36:02 +01:00
16 changed files with 257 additions and 151 deletions
Generated
+3 -1
View File
@@ -6563,6 +6563,8 @@ dependencies = [
"serde",
"thiserror 2.0.12",
"time",
"tokio",
"wasmtimer",
"zeroize",
]
@@ -7768,7 +7770,7 @@ dependencies = [
[[package]]
name = "nym-node-status-api"
version = "4.6.2-rc7"
version = "4.6.2-rc10"
dependencies = [
"ammonia",
"anyhow",
+23 -3
View File
@@ -27,6 +27,9 @@ pub struct QuorumStateChecker {
cancellation_token: CancellationToken,
check_interval: Duration,
quorum_state: QuorumState,
/// indicates whether the last check has been a failure
last_failed: bool,
}
impl QuorumStateChecker {
@@ -42,6 +45,7 @@ impl QuorumStateChecker {
quorum_state: QuorumState {
available: Arc::new(Default::default()),
},
last_failed: false,
};
// first check MUST succeed, otherwise we shouldn't start
@@ -65,7 +69,7 @@ impl QuorumStateChecker {
let dkg_details = dkg_details_with_client(client_guard.deref()).await?;
drop(client_guard);
let res = check_known_dealers(dkg_details).await?;
let res = check_known_dealers(dkg_details, 4).await?;
info!("there are {} known DKG dealers", res.results.len());
let Some(signing_threshold) = res.threshold else {
@@ -107,7 +111,7 @@ impl QuorumStateChecker {
Ok(available)
}
pub async fn run_forever(self) {
pub async fn run_forever(mut self) {
info!("starting quorum state checker");
loop {
tokio::select! {
@@ -117,7 +121,23 @@ impl QuorumStateChecker {
}
_ = tokio::time::sleep(self.check_interval) => {
match self.check_quorum_state().await {
Ok(available) => self.quorum_state.available.store(available, Ordering::SeqCst),
Ok(available) => {
let previous = self.quorum_state.available.load(Ordering::SeqCst);
// only update the quorum state to a failed state if we've had two consecutive failures
if available {
if !previous {
info!("quorum recovered");
}
self.quorum_state.available.store(true, Ordering::SeqCst);
} else if self.last_failed {
if previous {
warn!("quorum became unavailable after 2 consecutive failed checks");
}
self.quorum_state.available.store(false, Ordering::SeqCst);
}
self.last_failed = !available;
},
Err(err) => error!("failed to check current quorum state: {err}"),
}
}
+8
View File
@@ -36,5 +36,13 @@ nym-ecash-contract-common = { workspace = true }
nym-network-defaults = { workspace = true }
nym-serde-helpers = { workspace = true, features = ["date"] }
[target."cfg(not(target_arch = \"wasm32\"))".dependencies.tokio]
workspace = true
features = ["time"]
[target."cfg(target_arch = \"wasm32\")".dependencies.wasmtimer]
workspace = true
features = ["tokio"]
[dev-dependencies]
rand = { workspace = true }
@@ -6,6 +6,7 @@ use crate::ecash::bandwidth::serialiser::VersionedSerialise;
use crate::ecash::bandwidth::CredentialSigningData;
use crate::ecash::utils::cred_exp_date;
use crate::error::Error;
use log::{debug, warn};
use nym_api_requests::ecash::BlindSignRequestBody;
use nym_credentials_interface::{
aggregate_wallets, generate_keypair_user_from_seed, issue_verify, withdrawal_request,
@@ -17,8 +18,15 @@ use nym_ecash_contract_common::deposit::DepositId;
use nym_ecash_time::{ecash_default_expiration_date, ecash_today, EcashTime};
use nym_validator_client::nym_api::{EpochId, NymApiClientExt};
use serde::{Deserialize, Serialize};
use std::time::Duration;
use time::Date;
#[cfg(not(target_arch = "wasm32"))]
use tokio::time::sleep;
#[cfg(target_arch = "wasm32")]
use wasmtimer::tokio::sleep;
pub use nym_validator_client::nyxd::{Coin, Hash};
#[derive(Serialize, Deserialize)]
@@ -192,6 +200,49 @@ impl IssuanceTicketBook {
Ok(unblinded_signature)
}
// ideally this would have been generic over credential type, but we really don't need secp256k1 keys for bandwidth vouchers
pub async fn obtain_partial_ticketbook_credential_with_retries(
&self,
client: &nym_http_api_client::Client,
signer_index: u64,
validator_vk: &VerificationKeyAuth,
signing_data: CredentialSigningData,
max_attempts: usize,
) -> Result<PartialWallet, Error> {
let Some(client_url) = client.base_urls().first() else {
return Err(Error::CredentialShareObtainFailed);
};
let mut last_err = None;
for attempt in 0..max_attempts {
if attempt > 0 {
sleep(Duration::from_millis(500 * attempt as u64)).await;
}
debug!(
"attempt {} / {max_attempts} to obtain partial ticketbook credential from {client_url}",
attempt + 1,
);
match self
.obtain_partial_ticketbook_credential(
client,
signer_index,
validator_vk,
signing_data.clone(),
)
.await
{
Ok(partial_wallet) => return Ok(partial_wallet),
Err(err) => {
warn!(
"attempt {} / {max_attempts} to obtain partial ticketbook credential from {client_url} failed: {err}",
attempt + 1,
);
last_err = Some(err);
}
}
}
Err(last_err.unwrap_or(Error::CredentialShareObtainFailed))
}
// ideally this would have been generic over credential type, but we really don't need secp256k1 keys for bandwidth vouchers
pub async fn obtain_partial_ticketbook_credential(
&self,
+9 -1
View File
@@ -137,6 +137,8 @@ pub async fn obtain_aggregate_wallet(
ecash_api_clients: &[EcashApiClient],
threshold: u64,
) -> Result<WalletSignatures, Error> {
const MAX_ATTEMPTS: usize = 2;
if ecash_api_clients.len() < threshold as usize {
return Err(Error::NoValidatorsAvailable);
}
@@ -154,11 +156,12 @@ pub async fn obtain_aggregate_wallet(
);
match voucher
.obtain_partial_ticketbook_credential(
.obtain_partial_ticketbook_credential_with_retries(
&ecash_api_client.api_client,
ecash_api_client.node_id,
&ecash_api_client.verification_key,
request.clone(),
MAX_ATTEMPTS,
)
.await
{
@@ -167,6 +170,11 @@ pub async fn obtain_aggregate_wallet(
warn!("failed to obtain partial credential from API {ecash_api_client}: {err}",);
}
};
// we got sufficient number of shares
if wallets.len() >= threshold as usize {
break;
}
}
if wallets.len() < threshold as usize {
return Err(Error::NotEnoughShares);
+3
View File
@@ -63,6 +63,9 @@ pub enum Error {
#[error("failed to create a secp256k1 signature")]
Secp256k1SignFailure,
#[error("failed to obtain a valid credential share")]
CredentialShareObtainFailed,
}
impl From<NymAPIError> for Error {
@@ -195,9 +195,9 @@ impl ClientUnderTest {
pub(crate) async fn check_client(
dealer_details: DealerDetails,
dkg_epoch: u64,
contract_share: Option<&ContractVKShare>,
contract_share: Option<ContractVKShare>,
) -> TypedSignerResult {
let dealer_information = RawDealerInformation::new(&dealer_details, contract_share);
let dealer_information = RawDealerInformation::new(&dealer_details, contract_share.as_ref());
// 7. attempt to construct client instances out of them
let Ok(parsed_information) = dealer_information.parse() else {
+16 -12
View File
@@ -2,7 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
use crate::client_check::check_client;
use futures::stream::{FuturesUnordered, StreamExt};
use futures::stream;
use futures::stream::StreamExt;
use nym_ecash_signer_check_types::status::{SignerResult, Status};
use nym_network_defaults::NymNetworkDetails;
use nym_validator_client::QueryHttpRpcNyxdClient;
@@ -65,7 +66,7 @@ where
C: DkgQueryClient + Sync,
{
let dkg_details = dkg_details_with_client(client).await?;
check_known_dealers(dkg_details).await
check_known_dealers(dkg_details, None).await
}
pub async fn dkg_details_with_client<C>(client: &C) -> Result<DkgDetails, SignerCheckError>
@@ -109,18 +110,21 @@ where
pub async fn check_known_dealers(
dkg_details: DkgDetails,
concurrency: impl Into<Option<usize>>,
) -> Result<SignersTestResult, SignerCheckError> {
// 6. for each dealer attempt to perform the checks
let results = dkg_details
.network_dealers
.into_iter()
.map(|d| {
let share = dkg_details.submitted_shared.get(&d.assigned_index);
check_client(d, dkg_details.dkg_epoch.epoch_id, share)
})
.collect::<FuturesUnordered<_>>()
.collect::<Vec<_>>()
.await;
let epoch_id = dkg_details.dkg_epoch.epoch_id;
let submitted = dkg_details.submitted_shared;
let dealers = dkg_details.network_dealers.len();
let tasks = dkg_details.network_dealers.into_iter().map(move |d| {
let share = submitted.get(&d.assigned_index).cloned();
check_client(d, epoch_id, share)
});
let limit = concurrency.into().filter(|&n| n > 0).unwrap_or(dealers);
let results = stream::iter(tasks).buffer_unordered(limit).collect().await;
Ok(SignersTestResult {
threshold: dkg_details.threshold,
@@ -1,6 +1,6 @@
{
"db_name": "PostgreSQL",
"query": "\n INSERT INTO pending_issuance\n (deposit_id, serialization_revision, pending_ticketbook_data, expiration_date)\n VALUES ($1, $2, $3, $4)\n ",
"query": "\n INSERT INTO pending_issuance\n (deposit_id, serialization_revision, pending_ticketbook_data, expiration_date, epoch_id, failure_message)\n VALUES ($1, $2, $3, $4, $5, $6)\n ",
"describe": {
"columns": [],
"parameters": {
@@ -8,10 +8,12 @@
"Int4",
"Int2",
"Bytea",
"Date"
"Date",
"Int4",
"Text"
]
},
"nullable": []
},
"hash": "2ee6b058d423a66114d8411e7c287ade31137b30407dc0254d30f60e2d0101cf"
"hash": "7c2f58e63efd85010408f812692ecad1c89d9df3ffaf4b5d00db5adfdef854c4"
}
@@ -3,7 +3,7 @@
[package]
name = "nym-node-status-api"
version = "4.6.2-rc7"
version = "4.6.2-rc10"
authors.workspace = true
edition.workspace = true
license.workspace = true
@@ -30,17 +30,21 @@ impl Storage {
deposit_id: i32,
data: &[u8],
expiration_date: Date,
epoch_id: i32,
failure_message: &str,
) -> Result<(), sqlx::Error> {
sqlx::query!(
r#"
INSERT INTO pending_issuance
(deposit_id, serialization_revision, pending_ticketbook_data, expiration_date)
VALUES ($1, $2, $3, $4)
(deposit_id, serialization_revision, pending_ticketbook_data, expiration_date, epoch_id, failure_message)
VALUES ($1, $2, $3, $4, $5, $6)
"#,
deposit_id,
serialisation_revision,
data,
expiration_date,
epoch_id,
failure_message,
)
.execute(&self.pool)
.await?;
@@ -291,137 +291,131 @@ impl HttpCache {
db: &DbPool,
min_node_version: &Version,
) -> Vec<DVpnGateway> {
match self.dvpn_gateways.get(DVPN_GATEWAYS_LIST_KEY).await {
Some(guard) => {
tracing::trace!("Fetching from cache...");
let read_lock = guard.read().await;
read_lock.clone()
}
None => {
tracing::info!("No gateways (dVPN) in cache, refreshing from DB...");
if let Some(guard) = self.dvpn_gateways.get(DVPN_GATEWAYS_LIST_KEY).await {
tracing::trace!("Fetching from cache...");
let read_lock = guard.read().await;
return read_lock.clone();
};
let gateways = self.get_gateway_list(db).await;
tracing::info!("Found {} gateways in database", gateways.len());
tracing::info!("No gateways (dVPN) in cache, refreshing from DB...");
let started_with = gateways.len();
let skimmed_nodes = match crate::db::queries::get_described_bonded_nym_nodes(db)
.await
{
Ok(records) => {
let mut nodes = HashMap::new();
for dto in records {
match SkimmedNodeV1::try_from(dto) {
Ok(skimmed_node) => {
let key =
skimmed_node.ed25519_identity_pubkey.to_base58_string();
nodes.insert(key, skimmed_node);
}
Err(err) => {
error!(
"CRITICAL: Failed to convert NymNodeDto to SkimmedNode: {err}"
);
panic!(
"Cannot convert database record to SkimmedNode - this should never happen! Error: {err}"
);
}
}
let gateways = self.get_gateway_list(db).await;
tracing::info!("Found {} gateways in database", gateways.len());
let started_with = gateways.len();
let skimmed_nodes = match crate::db::queries::get_described_bonded_nym_nodes(db).await {
Ok(records) => {
let mut nodes = HashMap::new();
for dto in records {
match SkimmedNodeV1::try_from(dto) {
Ok(skimmed_node) => {
let key = skimmed_node.ed25519_identity_pubkey.to_base58_string();
nodes.insert(key, skimmed_node);
}
nodes
}
Err(err) => {
error!("CRITICAL: Failed to query nym_nodes from database: {err}");
panic!(
"Cannot read nym_nodes table - database connection issue? Error: {err}"
);
}
};
let socks5_scores = calculate_socks5_percentiles(&gateways);
let res_gws = gateways
.iter()
.filter(|gw| gw.bonded)
.filter_map(|gw| match skimmed_nodes.get(&gw.gateway_identity_key) {
Some(skimmed_node) => Some((gw, skimmed_node)),
None => {
error!(
"CRITICAL: Gateway {} exists in gateways table but not in nym_nodes table! This should not happen.",
gw.gateway_identity_key
Err(err) => {
error!("CRITICAL: Failed to convert NymNodeDto to SkimmedNode: {err}");
panic!(
"Cannot convert database record to SkimmedNode - this should never happen! Error: {err}"
);
None
}
})
.filter_map(
|(gw, skimmed_node)| match DVpnGateway::new(gw.clone(), skimmed_node, socks5_scores.get(&gw.gateway_identity_key)) {
Ok(gw) => Some(gw),
Err(err) => {
error!(
"CRITICAL: Failed to create DVpnGateway for node_id={}, identity_key={}: {}",
skimmed_node.node_id,
skimmed_node.ed25519_identity_pubkey.to_base58_string(),
err
);
// Don't panic here as this might be due to missing fields, but log it loudly
None
}
},
)
.filter(|gw| {
let gw_version = &gw.build_information.build_version;
if let Ok(gw_version) = Version::parse(gw_version) {
&gw_version >= min_node_version
} else {
warn!("Failed to parse GW version {}", gw_version);
false
}
})
.filter(|gw| {
// gateways must have a country
if gw.location.two_letter_iso_country_code.len() == 2 {
true
} else {
warn!(
"Invalid country code: {}",
gw.location.two_letter_iso_country_code
);
false
}
})
// sort by country, then by identity key
.sorted_by_key(|item| {
(
item.location.two_letter_iso_country_code.clone(),
item.identity_key.clone(),
)
})
.collect::<Vec<_>>();
let bonded_count = gateways.iter().filter(|gw| gw.bonded).count();
tracing::info!(
"DVpn gateway filtering: {} total gateways, {} bonded, {} nym_nodes, {} final DVpn gateways",
started_with,
bonded_count,
skimmed_nodes.len(),
res_gws.len()
);
if res_gws.is_empty() && started_with > 0 {
tracing::error!(
"CRITICAL: Started with {} gateways but got 0 DVpn gateways! Min version: {}",
started_with,
min_node_version
);
} else {
tracing::info!(
"Successfully loaded {} DVpn gateways into cache",
res_gws.len()
);
self.upsert_dvpn_gateway_list(res_gws.clone()).await;
}
}
res_gws
nodes
}
Err(err) => {
error!("CRITICAL: Failed to query nym_nodes from database: {err}");
panic!("Cannot read nym_nodes table - database connection issue? Error: {err}");
}
};
let socks5_scores = calculate_socks5_percentiles(&gateways);
let mut dvpd_gateways = Vec::new();
let bonded_count = gateways.iter().filter(|gw| gw.bonded).count();
for gw in gateways {
let id = gw.gateway_identity_key.clone();
// 1. reject all gateways that are not bonded
if !gw.bonded {
continue;
}
// 2. reject all gateways with zero performance
if gw.performance == 0 {
continue;
}
// 3. get corresponding directory details
let Some(skimmed_node) = skimmed_nodes.get(&id) else {
error!(
"CRITICAL: Gateway {id} exists in gateways table but not in nym_nodes table! This should not happen",
);
continue;
};
let node_id = skimmed_node.node_id;
// 4. construct the DVpnGateway model
let dvpn_gw = match DVpnGateway::new(gw, skimmed_node, socks5_scores.get(&id)) {
Ok(gw) => gw,
Err(err) => {
error!(
"CRITICAL: Failed to create DVpnGateway for node_id={node_id}, identity_key={id}: {err}",
);
// Don't panic here as this might be due to missing fields, but log it loudly
continue;
}
};
// 5. filter out outdated nodes
let gw_version = &dvpn_gw.build_information.build_version;
if let Ok(gw_version) = Version::parse(gw_version) {
if &gw_version < min_node_version {
continue;
}
} else {
warn!("Failed to parse GW version {gw_version}");
continue;
}
// 6. filter out nodes without valid country codes
if dvpn_gw.location.two_letter_iso_country_code.len() != 2 {
warn!(
"Invalid country code: {}",
dvpn_gw.location.two_letter_iso_country_code
);
continue;
}
dvpd_gateways.push(dvpn_gw);
}
// 7. finally, sort the nodes by country, then by identity key
dvpd_gateways.sort_by_key(|item| {
(
item.location.two_letter_iso_country_code.clone(),
item.identity_key.clone(),
)
});
tracing::info!(
"DVpn gateway filtering: {started_with} total gateways, {bonded_count} bonded, {} nym_nodes, {} final DVpn gateways",
skimmed_nodes.len(),
dvpd_gateways.len()
);
if dvpd_gateways.is_empty() && started_with > 0 {
tracing::error!(
"CRITICAL: Started with {started_with} gateways but got 0 DVpn gateways! Min version: {min_node_version}",
);
} else {
tracing::info!(
"Successfully loaded {} DVpn gateways into cache",
dvpd_gateways.len()
);
self.upsert_dvpn_gateway_list(dvpd_gateways.clone()).await;
}
dvpd_gateways
}
pub async fn get_entry_dvpn_gateways(
@@ -4,6 +4,7 @@ use crate::node_scraper::helpers::scrape_and_store_description_by_node_id;
use crate::ticketbook_manager::TicketbookManager;
use crate::ticketbook_manager::state::TicketbookManagerState;
use clap::Parser;
use nym_bin_common::bin_info_owned;
use nym_credential_proxy_lib::quorum_checker::QuorumStateChecker;
use nym_credential_proxy_lib::shared_state::nyxd_client::ChainClient;
use nym_crypto::asymmetric::ed25519::PublicKey;
@@ -11,6 +12,7 @@ use nym_network_defaults::setup_env;
use nym_task::ShutdownManager;
use nym_validator_client::nyxd::NyxdClient;
use std::sync::Arc;
use tracing::info;
mod cli;
mod db;
@@ -27,6 +29,9 @@ mod utils;
async fn main() -> anyhow::Result<()> {
logging::setup_tracing_logger()?;
let bin_info = bin_info_owned!();
info!("using the following version: {bin_info}");
let args = cli::Cli::parse();
if let Some(env_file) = &args.config_env_file {
setup_env(Some(env_file));
@@ -121,11 +121,12 @@ impl TicketbookManager {
{
Err(err) => {
error!("failed to obtain aggregated wallet: {err}");
let failure_message = err.to_string();
self.state
.storage()
.insert_pending_ticketbook(&issuance_data).await.inspect_err(|err| {
.insert_pending_ticketbook(&issuance_data, epoch_id, &failure_message).await.inspect_err(|store_err| {
let deposit = issuance_data.deposit_id();
error!("could not save the recovery data for deposit {deposit}: {err}. the data will unfortunately get lost")
error!("could not save the recovery data for deposit {deposit}: {store_err}. the data will unfortunately get lost")
})?;
return Err(err.into());
}
@@ -43,6 +43,8 @@ impl TicketbookManagerStorage {
pub(crate) async fn insert_pending_ticketbook(
&self,
ticketbook: &IssuanceTicketBook,
epoch_id: EpochId,
failure_message: &str,
) -> anyhow::Result<()> {
let ser = ticketbook.pack();
let data = Zeroizing::new(ser.data);
@@ -54,6 +56,8 @@ impl TicketbookManagerStorage {
ticketbook.deposit_id() as i32,
&data,
ticketbook.expiration_date(),
epoch_id as i32,
failure_message,
)
.await?;
@@ -23,7 +23,7 @@ tracing = { workspace = true }
time = { workspace = true }
nym-validator-client = { workspace = true }
nym-validator-client = { workspace = true, features = ["http-client"] }
nym-bin-common = { workspace = true, features = ["output_format", "basic_tracing"] }
nym-network-defaults = { workspace = true }
nym-http-api-client = { workspace = true }