From ce23214d9832609f7e461de8cbf04a155693372b Mon Sep 17 00:00:00 2001 From: 2ro <17595647+2ro@users.noreply.github.com> Date: Fri, 3 Jul 2026 15:39:02 -0400 Subject: [PATCH] nostr: Connected flag tracks the fast relay-live probe, not the 30s catch-up fetch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Symptom: after 'Save & reconnect' of the relay list, the home/onboarding UI sat on 'Connecting relays…' for ~30s even though the relays had physically reconnected over the exit in ~2-4s. Cause: on restart the service clears connected=false, then the UI flag was only restored AFTER publish_identity (serial, untimed per-event sends) AND a catch-up fetch_events_from bounded by FETCH_TIMEOUT=30s. One relay slow to EOSE pinned is_connected() false for the whole window while the connection was already usable. A separate FAST probe task already detects first-relay-Connected at 250ms poll (~2-4s) and reports relay-live to nymproc, but it did not touch the UI flag. Fix: in that fast probe, when relays first report Connected (same point that calls report_relay_live), also set svc.connected=true. The indicator now tracks the real ~2-4s relay-up signal; publish_identity + the catch-up fetch continue in the background. Tradeoff (documented in code): a relay drop between the probe store(true) and the 2s status loop taking over wouldn't flip the flag for up to ~30s until the post-catch-up re-check re-syncs to reality — the same-order staleness as the old pessimistic gap, just optimistic; the transport watchdog still tracks real exit health independently. Hardening: publish_identity's per-event send_event_to was untimed, so a stalled relay delayed the catch-up fetch and the kind:1059 subscription that follow it (real incoming-message latency). Each publish is now wrapped in tokio::time::timeout(SEND_TIMEOUT), mirroring dispatch_dm; on timeout it warns and continues to the next event, never aborting the sequence. Audit: all readers of is_connected() were reviewed for the relaxed invariant (flag can now be true before the giftwrap subscription is established). gui/goblin/mod.rs and gui/goblin/onboarding.rs use it for display + repaint scheduling and to enable the claim-username button — claiming needs relays connected (which the flag now genuinely means), not the incoming kind:1059 subscription. wallet/e2e.rs uses it as a test precondition with downstream waits of 900s/2400s and relays replay stored gift wraps on subscribe, so it still converges. No reader treats is_connected() as 'safe to receive now', so no separate ui_connected flag is needed. --- src/nostr/client.rs | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/src/nostr/client.rs b/src/nostr/client.rs index 3a3a778..4138002 100644 --- a/src/nostr/client.rs +++ b/src/nostr/client.rs @@ -938,6 +938,22 @@ async fn run_service(svc: Arc, wallet: Wallet) { "nostr: first relay Connected ~{}ms after connect()", connect_started.elapsed().as_millis() ); + // Flip the UI "Connected" flag on the REAL relay-up signal + // (~2-4s over the exit) instead of gating it behind + // publish_identity + the up-to-30s catch-up fetch below: those are + // receive-side housekeeping and keep running in the background, + // while the relay is already usable the moment it reaches + // Connected. Without this, one relay slow to EOSE pinned the + // indicator on "Connecting relays…" for ~30s even though the + // connection was live in ~2-4s. + // + // Accepted tradeoff: between here and the 2s status loop taking + // over, a relay DROP wouldn't flip the flag back for up to ~30s + // (until the post-catch-up re-check re-syncs it to reality) — the + // same-order staleness as the old pessimistic gap, just optimistic + // instead. The transport watchdog (nymproc) still tracks real exit + // health independently of this UI flag. + svc_probe.connected.store(true, Ordering::Relaxed); // FAST relay-live report: closes nymproc's relay-readiness // window as soon as the exit is proven to carry relay traffic, // independent of the up-to-30s catch-up fetch below (a slow @@ -1281,8 +1297,15 @@ async fn publish_identity(svc: &Arc, client: &Client) { } } for event in &events { - if let Err(e) = client.send_event_to(&advertised, event).await { - warn!("nostr: publish kind {} failed: {e}", event.kind); + // Time-box each publish (mirrors dispatch_dm's SEND_TIMEOUT): this loop is + // awaited before the catch-up fetch and the kind:1059 subscription below, so + // an untimed send to a stalled relay would delay real incoming-message + // delivery. On timeout, warn and move on to the next event — never abort the + // identity sequence. + match tokio::time::timeout(SEND_TIMEOUT, client.send_event_to(&advertised, event)).await { + Ok(Ok(_)) => {} + Ok(Err(e)) => warn!("nostr: publish kind {} failed: {e}", event.kind), + Err(_) => warn!("nostr: publish kind {} timed out", event.kind), } }