// COLD-CONNECT TIMING HARNESS (Build 98 latency investigation). Not part of the // shipped test suite — it exists to MEASURE, on this machine, how long the real // Nym transport takes to go from a cold start to "transport ready" (a relay // connected+subscribed on the current tunnel generation), broken down per stage, // and to detect the exit-reselect LOOP (watchdog condemning a healthy exit // because relays were slow to connect through lossy mix-dns). // // It drives the SAME `NymWebSocketTransport` the app ships with, over the SAME // default relay set, arming the relay-consumer governance exactly like // `client.rs::run_service`, so the watchdog behaves as it does in the app. // // Run BEFORE (reproduce the old UDP mix-dns + legacy-watchdog loop) vs AFTER // (DoT-over-mixnet + robust watchdog), same binary, via env toggles: // // # BEFORE (old behavior): UDP mix-dns on + legacy watchdog // GOBLIN_DNS_UDP=1 GOBLIN_LEGACY_WATCHDOG=1 \ // cargo test --test connect_timing -- --ignored --nocapture --test-threads=1 // // # AFTER (shipped default): DoT-over-mixnet + robust watchdog // cargo test --test connect_timing -- --ignored --nocapture --test-threads=1 // // Grep the captured log for lines tagged "[timing]" and "[TIMELINE]". use std::time::{Duration, Instant}; use grim::nym::NymWebSocketTransport; use nostr_sdk::prelude::*; /// The app's default relay set (src/nostr/relays.rs). const DEFAULT_RELAYS: &[&str] = &[ "wss://relay.goblin.st", "wss://relay.damus.io", "wss://nos.lol", ]; /// Overall budget for the measured window. Long enough to observe several /// reselect cycles if the loop is present (BEFORE), short enough to keep the run /// bounded. Overridable with GOBLIN_TIMING_WINDOW_SECS. fn window() -> Duration { let secs = std::env::var("GOBLIN_TIMING_WINDOW_SECS") .ok() .and_then(|s| s.parse().ok()) .unwrap_or(180); Duration::from_secs(secs) } fn init() { let _ = rustls::crypto::ring::default_provider().install_default(); let _ = env_logger::builder() .is_test(false) .format_timestamp_millis() // absolute wall-clock ms on every line .filter_level(log::LevelFilter::Info) .filter_module("grim::nym", log::LevelFilter::Debug) .parse_default_env() .try_init(); } /// One cold-connect measurement: bring the tunnel up, dial the default relays /// with the relay-consumer governance armed (as the app does), and record the /// per-stage timeline + any exit reselects over the window. #[tokio::test(flavor = "multi_thread", worker_threads = 4)] #[ignore] async fn cold_connect_timing() { init(); let mode_dns = if std::env::var("GOBLIN_DNS_UDP").as_deref() == Ok("1") { "udp-dns(legacy)" } else { "dot-dns" }; let mode_wd = if std::env::var("GOBLIN_LEGACY_WATCHDOG").as_deref() == Ok("1") { "legacy-watchdog" } else { "robust-watchdog" }; eprintln!("[TIMELINE] === cold_connect_timing START (dns={mode_dns}, watchdog={mode_wd}) ==="); let t0 = Instant::now(); // Stage A: mixnet tunnel bootstrap (select exit + build + liveness probe). grim::nym::warm_up(); let mut tunnel_ready_ms = None; for _ in 0..480 { if grim::nym::is_ready() { tunnel_ready_ms = Some(t0.elapsed().as_millis()); break; } tokio::time::sleep(Duration::from_millis(250)).await; } let gen0 = grim::nym::tunnel_generation(); match tunnel_ready_ms { Some(ms) => eprintln!("[TIMELINE] A. tunnel READY at t+{ms}ms (gen {gen0})"), None => { eprintln!( "[TIMELINE] A. tunnel NEVER became ready within {}s — mixnet bootstrap failed on this machine", t0.elapsed().as_secs() ); panic!("mixnet never bootstrapped; cannot measure connect timing"); } } // Stage B: dial the default relays over the mixnet, exactly like run_service: // arm relay-consumer governance so the watchdog treats a relay-dead exit as // condemnable (this is what produces the loop in the BEFORE case). grim::nym::set_relay_consumer(true); let client = Client::builder() .signer(Keys::generate()) .websocket_transport(NymWebSocketTransport) .build(); for r in DEFAULT_RELAYS { let _ = client.add_relay(*r).await; } let dial_gen = grim::nym::tunnel_generation(); let connect_started = Instant::now(); client.connect().await; // Report relay-live on the current generation as soon as a relay connects, // exactly like run_service's fast-report task — this is what closes the // watchdog's readiness window in the healthy case. let mut first_relay_ms = None; let mut transport_ready_ms = None; let mut reselects = 0u64; let mut last_gen = dial_gen; let mut gen_events: Vec<(u128, u64)> = vec![(t0.elapsed().as_millis(), dial_gen)]; loop { if connect_started.elapsed() > window() { break; } let gen_now = grim::nym::tunnel_generation(); if gen_now != last_gen { reselects += 1; gen_events.push((t0.elapsed().as_millis(), gen_now)); eprintln!( "[TIMELINE] !! exit RESELECT #{reselects}: gen {last_gen} -> {gen_now} at t+{}ms (the loop)", t0.elapsed().as_millis() ); last_gen = gen_now; // Re-dial on the fresh exit like the status loop does. client.disconnect().await; for r in DEFAULT_RELAYS { let _ = client.add_relay(*r).await; } client.connect().await; } let connected = client .relays() .await .values() .any(|r| r.status() == RelayStatus::Connected); if connected { // Feed liveness on the CURRENT generation (what run_service does). grim::nym::report_relay_live(grim::nym::tunnel_generation()); if first_relay_ms.is_none() { first_relay_ms = Some(t0.elapsed().as_millis()); eprintln!( "[TIMELINE] B. first relay CONNECTED at t+{}ms (~{}ms after connect())", t0.elapsed().as_millis(), connect_started.elapsed().as_millis() ); } } else if first_relay_ms.is_some() { grim::nym::report_relay_down(grim::nym::tunnel_generation()); } if grim::nym::transport_ready() && transport_ready_ms.is_none() { transport_ready_ms = Some(t0.elapsed().as_millis()); eprintln!( "[TIMELINE] C. TRANSPORT READY at t+{}ms (relay live on gen {})", t0.elapsed().as_millis(), grim::nym::tunnel_generation() ); // Once ready, watch a little longer to confirm it STAYS ready (no loop), // then finish early rather than burn the whole window. let settle_until = Instant::now() + Duration::from_secs(20); let mut stayed = true; while Instant::now() < settle_until { tokio::time::sleep(Duration::from_millis(500)).await; if grim::nym::tunnel_generation() != last_gen { stayed = false; // a reselect during settle — loop still live break; } } if stayed { eprintln!("[TIMELINE] transport stayed ready for 20s — no loop"); break; } } tokio::time::sleep(Duration::from_millis(250)).await; } grim::nym::set_relay_consumer(false); client.disconnect().await; eprintln!("[TIMELINE] === SUMMARY (dns={mode_dns}, watchdog={mode_wd}) ==="); eprintln!( "[TIMELINE] tunnel_ready: {}", tunnel_ready_ms .map(|m| format!("{m}ms")) .unwrap_or("n/a".into()) ); eprintln!( "[TIMELINE] first_relay: {}", first_relay_ms .map(|m| format!("{m}ms")) .unwrap_or("NEVER".into()) ); eprintln!( "[TIMELINE] transport_ready: {}", transport_ready_ms .map(|m| format!("{m}ms")) .unwrap_or("NEVER".into()) ); eprintln!("[TIMELINE] exit reselects during window: {reselects} (0 = no loop)"); eprintln!("[TIMELINE] generation timeline: {gen_events:?}"); eprintln!("[TIMELINE] === cold_connect_timing END ==="); // The measurement itself shouldn't fail the suite; it's diagnostic. But a // total failure to ever connect is worth surfacing loudly. assert!( first_relay_ms.is_some(), "no relay ever connected within the window" ); } /// Prove DNS resolves END TO END over the tunnel (DoT, or DoH fallback) — no /// clearnet. Loops across exit reselects (the mixnet hands out the odd dead /// exit) until a healthy exit resolves a real relay host, then asserts success. /// Watch the log for "dot-dns: resolved" / "doh-dns: resolved". #[tokio::test(flavor = "multi_thread", worker_threads = 2)] #[ignore] async fn dns_resolve_smoke() { init(); grim::nym::warm_up(); for _ in 0..480 { if grim::nym::is_ready() { break; } tokio::time::sleep(Duration::from_millis(250)).await; } let deadline = Instant::now() + Duration::from_secs(150); let mut ok = false; while Instant::now() < deadline { if let Some(tunnel) = grim::nym::nymproc::tunnel() { for host in ["relay.damus.io", "goblin.st", "api.coingecko.com"] { let t = Instant::now(); match grim::nym::dns::resolve(&tunnel, host, 443).await { Some(addr) => { eprintln!( "[DNSPROOF] resolved {host} -> {addr} in {}ms OVER THE TUNNEL", t.elapsed().as_millis() ); ok = true; } None => eprintln!( "[DNSPROOF] {host} unresolved on this exit ({}ms) — waiting for a better exit", t.elapsed().as_millis() ), } } if ok { break; } } tokio::time::sleep(Duration::from_secs(3)).await; } assert!( ok, "DNS never resolved over the tunnel within the window (all exits bad?)" ); } /// Probe whether the Nym IPR exit policy lets us open TCP to the DoT port (853) /// through the tunnel. 443 is the control (known-open — relays + HTTPS ride it). /// Decides DoT-vs-DoH for the private DNS transport. Run: /// cargo test --test connect_timing probe_dns_ports -- --ignored --nocapture --test-threads=1 #[tokio::test(flavor = "multi_thread", worker_threads = 2)] #[ignore] async fn probe_dns_ports() { init(); grim::nym::warm_up(); let mut ready = false; for _ in 0..480 { if grim::nym::is_ready() { ready = true; break; } tokio::time::sleep(Duration::from_millis(250)).await; } assert!(ready, "tunnel never bootstrapped; cannot probe ports"); let tunnel = grim::nym::nymproc::tunnel().expect("tunnel up"); let targets = [ ("cloudflare:853 (DoT)", "1.1.1.1:853"), ("quad9:853 (DoT)", "9.9.9.9:853"), ("cloudflare:443 (control)", "1.1.1.1:443"), ]; for (label, addr) in targets { let sa: std::net::SocketAddr = addr.parse().unwrap(); let t = Instant::now(); match tokio::time::timeout(Duration::from_secs(12), tunnel.tcp_connect(sa)).await { Ok(Ok(_)) => eprintln!( "[PORTPROBE] {label} = CONNECTED in {}ms", t.elapsed().as_millis() ), Ok(Err(e)) => eprintln!( "[PORTPROBE] {label} = REFUSED/ERR after {}ms: {e}", t.elapsed().as_millis() ), Err(_) => eprintln!( "[PORTPROBE] {label} = TIMEOUT after {}ms", t.elapsed().as_millis() ), } } }