Compare commits

..

19 Commits

Author SHA1 Message Date
benedettadavico 59feaf0c07 Merge branch 'develop' into feature/wasm-tests 2023-03-16 17:36:28 +01:00
benedettadavico f20eaa875c sdk wasm tests 2023-03-16 17:35:16 +01:00
Jon Häggblad 4c2967a733 Delete stray .gitignore file 2023-03-16 15:27:45 +00:00
Jon Häggblad 2b80e5d1c9 Remove leftover file from deleted crate 2023-03-16 12:59:46 +00:00
Jon Häggblad 4e7ff53214 Contract and client support for updating gateway config (#3166)
* mixnet-contract: add update gateway config

* mixnet-contract: tests for updating gateway config

* vesting-contract: add update gateway config

* validator-client: add update gateway config

* wallet: add update_gateway_config

* common/commands: add support for setting gateway config

* Remove commented out line

* Review fixes

* Generate ts file for GatewayConfigUpdate type

* Add generated GatewayConfigUpdate.ts file
2023-03-16 13:43:56 +01:00
Jon Häggblad 9ec36e49b7 contracts: remove .gitignore with Cargo.lock in it
While developing the service-provider-directory contract I ran into
issues with the lock file being inconsistent for cosmwasm-std (1.0 vs
1.2) and was hidden due to ignoring the lock file
2023-03-16 12:28:35 +00:00
pierre dd13073037 docs(connect-android): add build note 2023-03-15 12:29:23 +01:00
Pierre Dommerc 1010df1077 refactor(wallet): ui adjustments (#3182) 2023-03-15 12:22:00 +01:00
Bogdan-Ștefan Neacşu 9eaf9cf491 Feature/fix resharing (#3139)
* Compare verified vks against current group instead of initial dealers

* Fix various dkg logs

* API auto-advance epoch even on corrupt states

* Use verified vks as ultimate truth for dealers

* Set initial dealers based of verified vk

* Extend register period even more

* Fix test

* Use shares from current epoch

* Save initial dealers only when triggering resharing

* Fix tests

* Backup the last InProgress state too

* Reset previous signers that are not initial dealers

* Add unit test for bug reproduction

* More verbose debug logging

* Handle edge case for coconut keypair removal

* Update dkg api test

* Remove dealings directly for each key

* Replacement data is saved only on the first reshare start

* More debug logging

* On failed DKG, just reset

* Clippy fix
2023-03-15 11:16:43 +00:00
pierre 8e96318478 build(connect-android): try to fix fdroid build 2023-03-15 10:25:53 +01:00
benedettadavico 6e27497f14 sdk tests 2023-03-14 18:41:08 +01:00
Jędrzej Stuczyński 54287666e8 chore: simplify mnemonic zeroize story (#3165)
* updated bip39 dependency to simplify our zeroize story

* Replaced UserPassword wrapper with Zeroizing type alias

* fixed wallet-types cosmwasm-std dependency version
2023-03-13 11:29:56 +00:00
Tommy Verrall 6de829163d Merge pull request #3173 from nymtech/feature/nym-cli-tweak
Feature/nym cli tweak
2023-03-13 13:24:13 +02:00
benedettadavico adb5ed7c30 typo fix 2023-03-13 12:14:14 +01:00
benedettadavico 2b019e57df merge develop 2023-03-13 12:12:00 +01:00
benedettadavico 30c07712e3 format 2023-03-13 12:03:38 +01:00
benedettadavico 82c92501d9 vesting stuff 2023-03-13 12:01:22 +01:00
benedettadavico c2a871a1a7 typo 2023-03-10 17:10:27 +01:00
benedettadavico dfd7bd5889 adding pledge more 2023-03-10 17:10:01 +01:00
155 changed files with 1869 additions and 4897 deletions
-1
View File
@@ -43,4 +43,3 @@ Cargo.lock
nym-connect/Cargo.lock
.parcel-cache
**/.DS_Store
cpu-cycles/libcpucycles/build
Generated
+33 -32
View File
@@ -302,22 +302,23 @@ dependencies = [
[[package]]
name = "bip39"
version = "1.0.1"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9e89470017230c38e52b82b3ee3f530db1856ba1d434e3a67a3456a8a8dec5f"
checksum = "93f2635620bf0b9d4576eb7bb9a38a55df78bd1205d26fa994b25911a69f212f"
dependencies = [
"bitcoin_hashes",
"rand 0.6.5",
"rand_core 0.4.2",
"rand 0.8.5",
"rand_core 0.6.4",
"serde",
"unicode-normalization",
"zeroize",
]
[[package]]
name = "bitcoin_hashes"
version = "0.9.7"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ce18265ec2324ad075345d5814fbeed4f41f0a660055dc78840b74d19b874b1"
checksum = "90064b8dee6815a6470d60bad07bbbaee885c0e12d04177138fa3291a01b7bc4"
[[package]]
name = "bitflags"
@@ -2706,9 +2707,9 @@ dependencies = [
[[package]]
name = "libc"
version = "0.2.140"
version = "0.2.139"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c"
checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79"
[[package]]
name = "libgit2-sys"
@@ -2845,12 +2846,6 @@ dependencies = [
"serde",
]
[[package]]
name = "maybe-uninit"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00"
[[package]]
name = "memchr"
version = "2.5.0"
@@ -4171,7 +4166,7 @@ dependencies = [
"instant",
"libc",
"redox_syscall",
"smallvec 1.10.0",
"smallvec",
"winapi",
]
@@ -4184,7 +4179,7 @@ dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec 1.10.0",
"smallvec",
"windows-sys 0.45.0",
]
@@ -5057,7 +5052,7 @@ dependencies = [
"pin-project-lite",
"ref-cast",
"serde",
"smallvec 1.10.0",
"smallvec",
"stable-pattern",
"state",
"time 0.3.17",
@@ -5552,15 +5547,6 @@ dependencies = [
"autocfg 1.1.0",
]
[[package]]
name = "smallvec"
version = "0.6.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b97fcaeba89edba30f044a10c6a3cc39df9c3f17d7cd829dd1446cab35f890e0"
dependencies = [
"maybe-uninit",
]
[[package]]
name = "smallvec"
version = "1.10.0"
@@ -5725,7 +5711,7 @@ dependencies = [
"percent-encoding",
"rustls 0.19.1",
"sha2 0.10.6",
"smallvec 1.10.0",
"smallvec",
"sqlformat 0.1.8",
"sqlx-rt 0.5.13",
"stringprep",
@@ -5773,7 +5759,7 @@ dependencies = [
"rustls 0.20.8",
"rustls-pemfile",
"sha2 0.10.6",
"smallvec 1.10.0",
"smallvec",
"sqlformat 0.2.1",
"sqlx-rt 0.6.2",
"stringprep",
@@ -6198,6 +6184,21 @@ dependencies = [
"serde_json",
]
[[package]]
name = "tinyvec"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50"
dependencies = [
"tinyvec_macros",
]
[[package]]
name = "tinyvec_macros"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]]
name = "tokio"
version = "1.25.0"
@@ -6495,7 +6496,7 @@ dependencies = [
"once_cell",
"regex",
"sharded-slab",
"smallvec 1.10.0",
"smallvec",
"thread_local",
"tracing",
"tracing-core",
@@ -6661,11 +6662,11 @@ checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc"
[[package]]
name = "unicode-normalization"
version = "0.1.9"
version = "0.1.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09c8070a9942f5e7cfccd93f490fdebd230ee3c3c9f107cb25bad5351ef671cf"
checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921"
dependencies = [
"smallvec 0.6.14",
"tinyvec",
]
[[package]]
+1
View File
@@ -105,6 +105,7 @@ license = "Apache-2.0"
[workspace.dependencies]
async-trait = "0.1.64"
bip39 = { version = "2.0.0", features = ["zeroize"] }
cfg-if = "1.0.0"
dotenvy = "0.15.6"
lazy_static = "1.4.0"
+1 -1
View File
@@ -6,7 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
bip39 = "1.0.1"
bip39 = { workspace = true }
clap = { version = "4.0", features = ["cargo", "derive"] }
log = "0.4"
rand = "0.7.3"
-3
View File
@@ -1,3 +0,0 @@
/target
**/*.rs.bk
Cargo.lock
View File
@@ -37,7 +37,7 @@ nym-execute = { path = "../../execute" }
# at some point it might be possible to make it wasm-compatible
# perhaps after https://github.com/cosmos/cosmos-rust/pull/97 is resolved (and tendermint-rs is updated)
async-trait = { workspace = true, optional = true }
bip39 = { version = "1", features = ["rand"], optional = true }
bip39 = { workspace = true, features = ["rand"], optional = true }
nym-config = { path = "../../config", optional = true }
cosmrs = { git = "https://github.com/neacsu/cosmos-rust", branch = "neacsu/feegrant_support", features = ["rpc", "bip32", "cosmwasm"], optional = true}
cw3 = { version = "0.13.4", optional = true }
@@ -47,7 +47,7 @@ flate2 = { version = "1.0.20", optional = true }
sha2 = { version = "0.9.5", optional = true }
itertools = { version = "0.10", optional = true }
cosmwasm-std = { version = "1.0.0", optional = true }
zeroize = { version = "1.5.7", optional = true }
zeroize = { version = "1.5.7", optional = true, features = ["zeroize_derive"] }
[dev-dependencies]
ts-rs = "6.1.2"
@@ -9,6 +9,7 @@ use crate::nyxd::{Fee, NyxdClient, SigningCosmWasmClient};
use async_trait::async_trait;
use cosmrs::AccountId;
use nym_contracts_common::signing::MessageSignature;
use nym_mixnet_contract_common::gateway::GatewayConfigUpdate;
use nym_mixnet_contract_common::mixnode::{MixNodeConfigUpdate, MixNodeCostParams};
use nym_mixnet_contract_common::reward_params::{IntervalRewardingParamsUpdate, Performance};
use nym_mixnet_contract_common::{
@@ -498,6 +499,36 @@ pub trait MixnetSigningClient {
.await
}
async fn update_gateway_config(
&self,
new_config: GatewayConfigUpdate,
fee: Option<Fee>,
) -> Result<ExecuteResult, NyxdError> {
self.execute_mixnet_contract(
fee,
MixnetExecuteMsg::UpdateGatewayConfig { new_config },
vec![],
)
.await
}
async fn update_gateway_config_on_behalf(
&self,
owner: AccountId,
new_config: GatewayConfigUpdate,
fee: Option<Fee>,
) -> Result<ExecuteResult, NyxdError> {
self.execute_mixnet_contract(
fee,
MixnetExecuteMsg::UpdateGatewayConfigOnBehalf {
new_config,
owner: owner.to_string(),
},
vec![],
)
.await
}
// delegation-related:
async fn delegate_to_mixnode(
@@ -7,6 +7,7 @@ use crate::nyxd::error::NyxdError;
use crate::nyxd::{Coin, Fee, NyxdClient};
use async_trait::async_trait;
use nym_contracts_common::signing::MessageSignature;
use nym_mixnet_contract_common::gateway::GatewayConfigUpdate;
use nym_mixnet_contract_common::mixnode::{MixNodeConfigUpdate, MixNodeCostParams};
use nym_mixnet_contract_common::{Gateway, MixId, MixNode};
use nym_vesting_contract_common::messages::{
@@ -35,6 +36,12 @@ pub trait VestingSigningClient {
fee: Option<Fee>,
) -> Result<ExecuteResult, NyxdError>;
async fn vesting_update_gateway_config(
&self,
new_config: GatewayConfigUpdate,
fee: Option<Fee>,
) -> Result<ExecuteResult, NyxdError>;
async fn update_mixnet_address(
&self,
address: &str,
@@ -185,6 +192,19 @@ impl<C: SigningCosmWasmClient + Sync + Send + Clone> VestingSigningClient for Ny
.await
}
async fn vesting_update_gateway_config(
&self,
new_config: GatewayConfigUpdate,
fee: Option<Fee>,
) -> Result<ExecuteResult, NyxdError> {
self.execute_vesting_contract(
fee,
VestingExecuteMsg::UpdateGatewayConfig { new_config },
vec![],
)
.await
}
async fn update_mixnet_address(
&self,
address: &str,
@@ -8,7 +8,7 @@ use cosmrs::crypto::PublicKey;
use cosmrs::tx::SignDoc;
use cosmrs::{tx, AccountId};
use nym_config::defaults;
use zeroize::Zeroize;
use zeroize::{Zeroize, ZeroizeOnDrop};
/// Derivation information required to derive a keypair and an address from a mnemonic.
#[derive(Debug, Clone)]
@@ -41,7 +41,7 @@ impl AccountData {
type Secp256k1Keypair = (SigningKey, PublicKey);
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Zeroize, ZeroizeOnDrop)]
pub struct DirectSecp256k1HdWallet {
/// Base secret
secret: bip39::Mnemonic,
@@ -54,30 +54,10 @@ pub struct DirectSecp256k1HdWallet {
// that would include the secret key which is a dyn EcdsaSigner and hence not Sync making the wallet
// not Sync and if used on the signing client in an async trait, it wouldn't be Send
/// Derivation instructions
#[zeroize(skip)]
accounts: Vec<Secp256k1Derivation>,
}
impl Zeroize for DirectSecp256k1HdWallet {
fn zeroize(&mut self) {
// in ideal world, Mnemonic would have had zeroize defined on it (there's an almost year old PR that introduces it)
// and the memory would have been filled with zeroes.
//
// we really don't want to keep our real mnemonic in memory, so let's do the semi-nasty thing
// of overwriting it with a fresh mnemonic that was never used before
//
// note: this function can only fail on an invalid word count, which clearly is not the case here
self.secret = bip39::Mnemonic::generate(self.secret.word_count()).unwrap();
self.seed.zeroize();
// there's nothing secret about derivation paths
}
}
impl Drop for DirectSecp256k1HdWallet {
fn drop(&mut self) {
self.zeroize()
}
}
impl DirectSecp256k1HdWallet {
pub fn builder(prefix: &str) -> DirectSecp256k1HdWalletBuilder {
DirectSecp256k1HdWalletBuilder::new(prefix)
+1 -1
View File
@@ -6,7 +6,7 @@ edition = "2021"
[dependencies]
base64 = "0.13.0"
bip39 = "1.0.1"
bip39 = { workspace = true }
bs58 = "0.4"
comfy-table = "6.0.0"
cfg-if = "1.0.0"
@@ -6,9 +6,11 @@ use clap::{Args, Subcommand};
pub mod rewards;
pub mod delegate_to_mixnode;
pub mod pledge_more;
pub mod query_for_delegations;
pub mod undelegate_from_mixnode;
pub mod vesting_delegate_to_mixnode;
pub mod vesting_pledge_more;
pub mod vesting_undelegate_from_mixnode;
#[derive(Debug, Args)]
@@ -32,4 +34,8 @@ pub enum MixnetDelegatorsCommands {
DelegateVesting(vesting_delegate_to_mixnode::Args),
/// Undelegate from a mixnode (when originally using locked tokens)
UndelegateVesting(vesting_undelegate_from_mixnode::Args),
/// Pledge more
PledgeMore(pledge_more::Args),
/// Pledge more with locked tokens
PledgeMoreVesting(vesting_pledge_more::Args),
}
@@ -0,0 +1,29 @@
// Copyright 2021 - Nym Technologies SA <contact@nymtech.net>
// SPDX-License-Identifier: Apache-2.0
use crate::context::SigningClient;
use clap::Parser;
use log::info;
use nym_mixnet_contract_common::Coin;
use validator_client::nyxd::traits::MixnetSigningClient;
#[derive(Debug, Parser)]
pub struct Args {
#[clap(long)]
pub amount: u128,
}
pub async fn pledge_more(args: Args, client: SigningClient) {
let denom = client.current_chain_details().mix_denom.base.as_str();
info!("Starting to pledge more");
let coin = Coin::new(args.amount, denom);
let res = client
.pledge_more(coin.into(), None)
.await
.expect("failed to pledge more!");
info!("pledging more: {:?}", res);
}
@@ -0,0 +1,30 @@
// Copyright 2021 - Nym Technologies SA <contact@nymtech.net>
// SPDX-License-Identifier: Apache-2.0
use clap::Parser;
use log::info;
use nym_mixnet_contract_common::Coin;
use validator_client::nyxd::VestingSigningClient;
use crate::context::SigningClient;
#[derive(Debug, Parser)]
pub struct Args {
#[clap(long)]
pub amount: u128,
}
pub async fn vesting_pledge_more(args: Args, client: SigningClient) {
let denom = client.current_chain_details().mix_denom.base.as_str();
info!("Starting vesting pledge more");
let coin = Coin::new(args.amount, denom);
let res = client
.vesting_pledge_more(coin.into(), None)
.await
.expect("failed to pledge more!");
info!("vesting pledge more: {:?}", res);
}
@@ -5,6 +5,7 @@ use clap::{Args, Subcommand};
pub mod bond_gateway;
pub mod gateway_bonding_sign_payload;
pub mod settings;
pub mod unbond_gateway;
pub mod vesting_bond_gateway;
pub mod vesting_unbond_gateway;
@@ -18,14 +19,16 @@ pub struct MixnetOperatorsGateway {
#[derive(Debug, Subcommand)]
pub enum MixnetOperatorsGatewayCommands {
/// Manage your gateway settings stored in the directory
Settings(settings::MixnetOperatorsGatewaySettings),
/// Bond to a gateway
Bond(bond_gateway::Args),
/// Unbound from a gateway
Unbound(unbond_gateway::Args),
/// Unbond from a gateway
Unbond(unbond_gateway::Args),
/// Bond to a gateway with locked tokens
VestingBond(vesting_bond_gateway::Args),
/// Unbound from a gateway (when originally using locked tokens)
VestingUnbound(vesting_unbond_gateway::Args),
/// Unbond from a gateway (when originally using locked tokens)
VestingUnbond(vesting_unbond_gateway::Args),
/// Create base58-encoded payload required for producing valid bonding signature.
CreateGatewayBondingSignPayload(gateway_bonding_sign_payload::Args),
}
@@ -0,0 +1,22 @@
// Copyright 2023 - Nym Technologies SA <contact@nymtech.net>
// SPDX-License-Identifier: Apache-2.0
use clap::{Args, Subcommand};
pub mod update_config;
pub mod vesting_update_config;
#[derive(Debug, Args)]
#[clap(args_conflicts_with_subcommands = true, subcommand_required = true)]
pub struct MixnetOperatorsGatewaySettings {
#[clap(subcommand)]
pub command: MixnetOperatorsGatewaySettingsCommands,
}
#[derive(Debug, Subcommand)]
pub enum MixnetOperatorsGatewaySettingsCommands {
/// Update gateway configuration
UpdateConfig(update_config::Args),
/// Update gateway configuration for a gateway bonded with locked tokens
VestingUpdateConfig(vesting_update_config::Args),
}
@@ -0,0 +1,60 @@
// Copyright 2023 - Nym Technologies SA <contact@nymtech.net>
// SPDX-License-Identifier: Apache-2.0
use crate::context::SigningClient;
use clap::Parser;
use log::info;
use nym_mixnet_contract_common::GatewayConfigUpdate;
use validator_client::nyxd::traits::{MixnetQueryClient, MixnetSigningClient};
#[derive(Debug, Parser)]
pub struct Args {
#[clap(long)]
pub host: Option<String>,
#[clap(long)]
pub mix_port: Option<u16>,
#[clap(long)]
pub clients_port: Option<u16>,
#[clap(long)]
pub location: Option<String>,
#[clap(long)]
pub version: Option<String>,
}
pub async fn update_config(args: Args, client: SigningClient) {
info!("Update gateway config!");
let current_details = match client
.get_owned_gateway(client.address())
.await
.expect("failed to query the chain for gateway details")
.gateway
{
Some(details) => details,
None => {
log::warn!("this operator does not own a gateway to update");
return;
}
};
let update = GatewayConfigUpdate {
host: args.host.unwrap_or(current_details.gateway.host),
mix_port: args.mix_port.unwrap_or(current_details.gateway.mix_port),
clients_port: args
.clients_port
.unwrap_or(current_details.gateway.clients_port),
location: args.location.unwrap_or(current_details.gateway.location),
version: args.version.unwrap_or(current_details.gateway.version),
};
let res = client
.update_gateway_config(update, None)
.await
.expect("updating gateway config");
info!("gateway config updated: {:?}", res)
}
@@ -0,0 +1,61 @@
// Copyright 2023 - Nym Technologies SA <contact@nymtech.net>
// SPDX-License-Identifier: Apache-2.0
use crate::context::SigningClient;
use clap::Parser;
use log::info;
use nym_mixnet_contract_common::GatewayConfigUpdate;
use validator_client::nyxd::traits::MixnetQueryClient;
use validator_client::nyxd::VestingSigningClient;
#[derive(Debug, Parser)]
pub struct Args {
#[clap(long)]
pub host: Option<String>,
#[clap(long)]
pub mix_port: Option<u16>,
#[clap(long)]
pub clients_port: Option<u16>,
#[clap(long)]
pub location: Option<String>,
#[clap(long)]
pub version: Option<String>,
}
pub async fn vesting_update_config(client: SigningClient, args: Args) {
info!("Update vesting gateway config!");
let current_details = match client
.get_owned_gateway(client.address())
.await
.expect("failed to query the chain for gateway details")
.gateway
{
Some(details) => details,
None => {
log::warn!("this operator does not own a gateway to update");
return;
}
};
let update = GatewayConfigUpdate {
host: args.host.unwrap_or(current_details.gateway.host),
mix_port: args.mix_port.unwrap_or(current_details.gateway.mix_port),
clients_port: args
.clients_port
.unwrap_or(current_details.gateway.clients_port),
location: args.location.unwrap_or(current_details.gateway.location),
version: args.version.unwrap_or(current_details.gateway.version),
};
let res = client
.vesting_update_gateway_config(update, None)
.await
.expect("updating vesting gateway config");
info!("gateway config updated: {:?}", res)
}
@@ -29,12 +29,12 @@ pub enum MixnetOperatorsMixnodeCommands {
Settings(settings::MixnetOperatorsMixnodeSettings),
/// Bond to a mixnode
Bond(bond_mixnode::Args),
/// Unbound from a mixnode
Unbound(unbond_mixnode::Args),
/// Unbond from a mixnode
Unbond(unbond_mixnode::Args),
/// Bond to a mixnode with locked tokens
BondVesting(vesting_bond_mixnode::Args),
/// Unbound from a mixnode (when originally using locked tokens)
UnboundVesting(vesting_unbond_mixnode::Args),
/// Unbond from a mixnode (when originally using locked tokens)
UnbondVesting(vesting_unbond_mixnode::Args),
/// Create base58-encoded payload required for producing valid bonding signature.
CreateMixnodeBondingSignPayload(mixnode_bonding_sign_payload::Args),
}
@@ -21,7 +21,7 @@ pub const TOTAL_DEALINGS: usize = 2 + 2 + 1;
#[derive(Serialize, Deserialize, Default, Clone, Debug, PartialEq, Eq, Ord, PartialOrd)]
pub struct InitialReplacementData {
pub initial_dealers: Vec<Addr>,
pub initial_height: Option<u64>,
pub initial_height: u64,
}
#[derive(
@@ -1,6 +1,7 @@
// Copyright 2022 - Nym Technologies SA <contact@nymtech.net>
// SPDX-License-Identifier: Apache-2.0
use crate::gateway::GatewayConfigUpdate;
use crate::mixnode::{MixNodeConfigUpdate, MixNodeCostParams};
use crate::reward_params::{IntervalRewardParams, IntervalRewardingParamsUpdate};
use crate::rewarding::RewardDistribution;
@@ -42,6 +43,7 @@ pub enum MixnetEventType {
ReconcilePendingEvents,
PendingIntervalConfigUpdate,
IntervalConfigUpdate,
GatewayConfigUpdate,
}
impl From<MixnetEventType> for String {
@@ -86,6 +88,7 @@ impl ToString for MixnetEventType {
MixnetEventType::PendingIntervalConfigUpdate => "pending_interval_config_update",
MixnetEventType::IntervalConfigUpdate => "interval_config_update",
MixnetEventType::DelegationOnUnbonding => "delegation_on_unbonding_node",
MixnetEventType::GatewayConfigUpdate => "gateway_config_update",
};
format!("{EVENT_VERSION_PREFIX}{event_name}")
@@ -122,6 +125,7 @@ pub const OLD_REWARDING_VALIDATOR_ADDRESS_KEY: &str = "old_rewarding_validator_a
pub const NEW_REWARDING_VALIDATOR_ADDRESS_KEY: &str = "new_rewarding_validator_address";
pub const UPDATED_MIXNODE_CONFIG_KEY: &str = "updated_mixnode_config";
pub const UPDATED_GATEWAY_CONFIG_KEY: &str = "updated_gateway_config";
pub const UPDATED_MIXNODE_COST_PARAMS_KEY: &str = "updated_mixnode_cost_params";
// rewarding
@@ -382,6 +386,17 @@ pub fn new_mixnode_config_update_event(
.add_attribute(UPDATED_MIXNODE_CONFIG_KEY, update.to_inline_json())
}
pub fn new_gateway_config_update_event(
owner: &Addr,
proxy: &Option<Addr>,
update: &GatewayConfigUpdate,
) -> Event {
Event::new(MixnetEventType::GatewayConfigUpdate)
.add_attribute(OWNER_KEY, owner)
.add_optional_attribute(PROXY_KEY, proxy.as_ref())
.add_attribute(UPDATED_GATEWAY_CONFIG_KEY, update.to_inline_json())
}
pub fn new_mixnode_pending_cost_params_update_event(
mix_id: MixId,
owner: &Addr,
@@ -112,6 +112,26 @@ impl Display for GatewayBond {
}
}
#[cfg_attr(feature = "generate-ts", derive(ts_rs::TS))]
#[cfg_attr(
feature = "generate-ts",
ts(export_to = "ts-packages/types/src/types/rust/GatewayConfigUpdate.ts")
)]
#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize, JsonSchema)]
pub struct GatewayConfigUpdate {
pub host: String,
pub mix_port: u16,
pub clients_port: u16,
pub location: String,
pub version: String,
}
impl GatewayConfigUpdate {
pub fn to_inline_json(&self) -> String {
serde_json::to_string(self).unwrap_or_else(|_| "serialisation failure".into())
}
}
#[derive(Clone, Debug, Deserialize, PartialEq, Serialize, JsonSchema)]
pub struct PagedGatewayResponse {
pub nodes: Vec<GatewayBond>,
@@ -27,7 +27,8 @@ pub use delegation::{
PagedMixNodeDelegationsResponse,
};
pub use gateway::{
Gateway, GatewayBond, GatewayBondResponse, GatewayOwnershipResponse, PagedGatewayResponse,
Gateway, GatewayBond, GatewayBondResponse, GatewayConfigUpdate, GatewayOwnershipResponse,
PagedGatewayResponse,
};
pub use interval::{
CurrentIntervalResponse, EpochState, EpochStatus, Interval, NumberOfPendingEventsResponse,
@@ -3,6 +3,7 @@
use crate::delegation::OwnerProxySubKey;
use crate::error::MixnetContractError;
use crate::gateway::GatewayConfigUpdate;
use crate::helpers::IntoBaseDecimal;
use crate::mixnode::{MixNodeConfigUpdate, MixNodeCostParams};
use crate::reward_params::{
@@ -199,6 +200,13 @@ pub enum ExecuteMsg {
UnbondGatewayOnBehalf {
owner: String,
},
UpdateGatewayConfig {
new_config: GatewayConfigUpdate,
},
UpdateGatewayConfigOnBehalf {
new_config: GatewayConfigUpdate,
owner: String,
},
// delegation-related:
DelegateToMixnode {
@@ -313,6 +321,10 @@ impl ExecuteMsg {
}
ExecuteMsg::UnbondGateway { .. } => "unbonding gateway".into(),
ExecuteMsg::UnbondGatewayOnBehalf { .. } => "unbonding gateway on behalf".into(),
ExecuteMsg::UpdateGatewayConfig { .. } => "updating gateway configuration".into(),
ExecuteMsg::UpdateGatewayConfigOnBehalf { .. } => {
"updating gateway configuration on behalf".into()
}
ExecuteMsg::DelegateToMixnode { mix_id } => format!("delegating to mixnode {mix_id}"),
ExecuteMsg::DelegateToMixnodeOnBehalf { mix_id, .. } => {
format!("delegating to mixnode {mix_id} on behalf")
@@ -17,6 +17,7 @@ pub const VESTING_MIXNODE_BONDING_EVENT_TYPE: &str = "vesting_mixnode_bonding";
pub const VESTING_PLEDGE_MORE_EVENT_TYPE: &str = "vesting_pledge_more";
pub const VESTING_MIXNODE_UNBONDING_EVENT_TYPE: &str = "vesting_mixnode_unbonding";
pub const VESTING_UPDATE_MIXNODE_CONFIG_EVENT_TYPE: &str = "vesting_update_mixnode_config";
pub const VESTING_UPDATE_GATEWAY_CONFIG_EVENT_TYPE: &str = "vesting_update_gateway_config";
pub const VESTING_UPDATE_MIXNODE_COST_PARAMS_EVENT_TYPE: &str =
"vesting_update_mixnode_cost_params";
@@ -121,6 +122,10 @@ pub fn new_vesting_update_mixnode_config_event() -> Event {
Event::new(VESTING_UPDATE_MIXNODE_CONFIG_EVENT_TYPE)
}
pub fn new_vesting_update_gateway_config_event() -> Event {
Event::new(VESTING_UPDATE_GATEWAY_CONFIG_EVENT_TYPE)
}
pub fn new_vesting_update_mixnode_cost_params_event() -> Event {
Event::new(VESTING_UPDATE_MIXNODE_COST_PARAMS_EVENT_TYPE)
}
@@ -1,6 +1,7 @@
use contracts_common::signing::MessageSignature;
use cosmwasm_std::{Coin, Timestamp};
use mixnet_contract_common::{
gateway::GatewayConfigUpdate,
mixnode::{MixNodeConfigUpdate, MixNodeCostParams},
Gateway, IdentityKey, MixId, MixNode,
};
@@ -140,6 +141,9 @@ pub enum ExecuteMsg {
owner: String,
amount: Coin,
},
UpdateGatewayConfig {
new_config: GatewayConfigUpdate,
},
TransferOwnership {
to_address: String,
},
@@ -179,6 +183,7 @@ impl ExecuteMsg {
ExecuteMsg::BondGateway { .. } => "VestingExecuteMsg::BondGateway",
ExecuteMsg::UnbondGateway { .. } => "VestingExecuteMsg::UnbondGateway",
ExecuteMsg::TrackUnbondGateway { .. } => "VestingExecuteMsg::TrackUnbondGateway",
ExecuteMsg::UpdateGatewayConfig { .. } => "VestingExecuteMsg::UpdateGatewayConfig",
ExecuteMsg::TransferOwnership { .. } => "VestingExecuteMsg::TransferOwnership",
ExecuteMsg::UpdateStakingAddress { .. } => "VestingExecuteMsg::UpdateStakingAddress",
ExecuteMsg::UpdateLockedPledgeCap { .. } => "VestingExecuteMsg::UpdateLockedPledgeCap",
-1
View File
@@ -1 +0,0 @@
Cargo.lock
@@ -22,7 +22,7 @@ fn verify_dealer(deps: DepsMut<'_>, dealer: &Addr, resharing: bool) -> Result<()
let state = STATE.load(deps.storage)?;
let height = if resharing {
INITIAL_REPLACEMENT_DATA.load(deps.storage)?.initial_height
Some(INITIAL_REPLACEMENT_DATA.load(deps.storage)?.initial_height)
} else {
None
};
@@ -105,7 +105,7 @@ pub(crate) mod tests {
deps.as_mut().storage,
&InitialReplacementData {
initial_dealers: vec![details1.address, details2.address, details3.address],
initial_height: Some(1),
initial_height: 1,
},
)
.unwrap();
@@ -126,7 +126,7 @@ pub(crate) mod tests {
INITIAL_REPLACEMENT_DATA
.update::<_, ContractError>(deps.as_mut().storage, |mut data| {
data.initial_height = Some(2);
data.initial_height = 2;
Ok(data)
})
.unwrap();
@@ -110,7 +110,7 @@ pub(crate) mod tests {
deps.as_mut().storage,
&InitialReplacementData {
initial_dealers: vec![],
initial_height: None,
initial_height: 1,
},
)
.unwrap();
@@ -7,6 +7,7 @@ use crate::epoch_state::storage::{CURRENT_EPOCH, INITIAL_REPLACEMENT_DATA, THRES
use crate::epoch_state::utils::check_epoch_state;
use crate::error::ContractError;
use crate::state::STATE;
use crate::verification_key_shares::storage::verified_dealers;
use cosmwasm_std::{Addr, Deps, DepsMut, Env, Order, Response, Storage};
use nym_coconut_dkg_common::types::{Epoch, EpochState, InitialReplacementData};
@@ -19,7 +20,13 @@ fn reset_epoch_state(storage: &mut dyn Storage) -> Result<(), ContractError> {
for dealer_addr in dealers {
let details = current_dealers().load(storage, &dealer_addr)?;
for dealings in DEALINGS_BYTES {
dealings.remove(storage, &details.address);
let dealing_keys: Vec<_> = dealings
.keys(storage, None, None, Order::Ascending)
.flatten()
.collect();
for key in dealing_keys {
dealings.remove(storage, &key);
}
}
current_dealers().remove(storage, &dealer_addr)?;
past_dealers().save(storage, &dealer_addr, &details)?;
@@ -46,15 +53,9 @@ fn dealers_still_active(
}
fn dealers_eq_members(deps: &DepsMut<'_>) -> Result<bool, ContractError> {
let dealers_still_active = dealers_still_active(
&deps.as_ref(),
current_dealers()
.keys(deps.storage, None, None, Order::Ascending)
.flatten(),
)?;
let all_dealers = current_dealers()
.keys(deps.storage, None, None, Order::Ascending)
.count();
let verified_dealers = verified_dealers(deps.storage)?;
let all_dealers = verified_dealers.len();
let dealers_still_active = dealers_still_active(&deps.as_ref(), verified_dealers.into_iter())?;
let group_members = STATE
.load(deps.storage)?
.group_addr
@@ -66,7 +67,11 @@ fn dealers_eq_members(deps: &DepsMut<'_>) -> Result<bool, ContractError> {
fn replacement_threshold_surpassed(deps: &DepsMut<'_>) -> Result<bool, ContractError> {
let threshold = THRESHOLD.load(deps.storage)? as usize;
let initial_dealers = INITIAL_REPLACEMENT_DATA.load(deps.storage)?.initial_dealers;
let initial_dealers = verified_dealers(deps.storage)?;
if initial_dealers.is_empty() {
// possibly failed DKG, just reset and start again
return Ok(true);
}
let initial_dealer_count = initial_dealers.len();
let replacement_threshold = threshold - (initial_dealers.len() + 2 - 1) / 2 + 1;
let removed_dealer_count =
@@ -90,24 +95,23 @@ pub(crate) fn advance_epoch_state(deps: DepsMut<'_>, env: Env) -> Result<Respons
let next_epoch = if let Some(state) = current_epoch.state.next() {
// We are during DKG process
let mut new_state = state;
if let EpochState::DealingExchange { resharing } = state {
if let EpochState::DealingExchange { .. } = state {
let current_dealers = current_dealers()
.keys(deps.storage, None, None, Order::Ascending)
.collect::<Result<Vec<Addr>, _>>()?;
if current_dealers.is_empty() {
// If no dealer registered yet, we just stay in the same state until there's at least one
let group_members =
STATE
.load(deps.storage)?
.group_addr
.list_members(&deps.querier, None, None)?;
if current_dealers.len() < group_members.len() {
// If not all group members registered yet, we just stay in the same state until
// they either register or they get kicked out of the group
new_state = current_epoch.state;
} else {
// note: ceiling in integer division can be achieved via q = (x + y - 1) / y;
let threshold = (2 * current_dealers.len() as u64 + 3 - 1) / 3;
THRESHOLD.save(deps.storage, &threshold)?;
if !resharing {
let replacement_data = InitialReplacementData {
initial_dealers: current_dealers,
initial_height: None,
};
INITIAL_REPLACEMENT_DATA.save(deps.storage, &replacement_data)?;
}
}
};
Epoch::new(
@@ -129,13 +133,23 @@ pub(crate) fn advance_epoch_state(deps: DepsMut<'_>, env: Env) -> Result<Respons
// Dealer set changed, we need to redo DKG...
let state = if replacement_threshold_surpassed(&deps)? {
// ... in reset mode
INITIAL_REPLACEMENT_DATA.remove(deps.storage);
EpochState::default()
} else {
// ... in reshare mode
INITIAL_REPLACEMENT_DATA.update::<_, ContractError>(deps.storage, |mut data| {
data.initial_height = Some(env.block.height);
Ok(data)
})?;
if INITIAL_REPLACEMENT_DATA.may_load(deps.storage)?.is_some() {
INITIAL_REPLACEMENT_DATA.update::<_, ContractError>(deps.storage, |mut data| {
data.initial_height = env.block.height;
Ok(data)
})?;
} else {
let replacement_data = InitialReplacementData {
initial_dealers: verified_dealers(deps.storage)?,
initial_height: env.block.height,
};
INITIAL_REPLACEMENT_DATA.save(deps.storage, &replacement_data)?;
}
EpochState::PublicKeySubmission { resharing: true }
};
reset_epoch_state(deps.storage)?;
@@ -158,10 +172,8 @@ pub(crate) fn try_surpassed_threshold(
check_epoch_state(deps.storage, EpochState::InProgress)?;
let threshold = THRESHOLD.load(deps.storage)?;
let dealers = current_dealers()
.keys(deps.storage, None, None, Order::Ascending)
.flatten();
if dealers_still_active(&deps.as_ref(), dealers)? < threshold as usize {
let dealers = verified_dealers(deps.storage)?;
if dealers_still_active(&deps.as_ref(), dealers.into_iter())? < threshold as usize {
reset_epoch_state(deps.storage)?;
CURRENT_EPOCH.update::<_, ContractError>(deps.storage, |epoch| {
Ok(Epoch::new(
@@ -180,8 +192,9 @@ pub(crate) fn try_surpassed_threshold(
pub(crate) mod tests {
use super::*;
use crate::error::ContractError::EarlyEpochStateAdvancement;
use crate::support::tests::fixtures::dealer_details_fixture;
use crate::support::tests::fixtures::{dealer_details_fixture, vk_share_fixture};
use crate::support::tests::helpers::{init_contract, GROUP_MEMBERS};
use crate::verification_key_shares::storage::vk_shares;
use cosmwasm_std::testing::mock_env;
use cosmwasm_std::Addr;
use cw4::Member;
@@ -204,11 +217,15 @@ pub(crate) mod tests {
for n in [10, 25, 50, 100] {
let dealers: Vec<_> = (0..n).map(dealer_details_fixture).collect();
let shares: Vec<_> = (0..n).map(|idx| vk_share_fixture(&format!("owner{}", idx), 0)).collect();
let initial_dealers = dealers.iter().map(|d| d.address.clone()).collect();
let data = InitialReplacementData {
initial_dealers,
initial_height: None,
initial_height: 1,
};
for share in shares {
vk_shares().save(deps.as_mut().storage, (&share.owner, 0), &share).unwrap();
}
for f in [two_thirds, three_fourths, ninty_pc] {
let threshold = f(n);
THRESHOLD.save(deps.as_mut().storage, &threshold).unwrap();
@@ -247,39 +264,39 @@ pub(crate) mod tests {
assert!(dealers_eq_members(&deps.as_mut()).unwrap());
let details = dealer_details_fixture(1);
let different_details = dealer_details_fixture(2);
current_dealers()
.save(deps.as_mut().storage, &details.address, &details)
let share = vk_share_fixture("owner2", 0);
let different_share = vk_share_fixture("owner4", 0);
vk_shares()
.save(deps.as_mut().storage, (&share.owner, 0), &share)
.unwrap();
assert!(!dealers_eq_members(&deps.as_mut()).unwrap());
current_dealers()
.remove(deps.as_mut().storage, &details.address)
vk_shares()
.remove(deps.as_mut().storage, (&share.owner, 0))
.unwrap();
GROUP_MEMBERS.lock().unwrap().push((
Member {
addr: "owner1".to_string(),
addr: "owner2".to_string(),
weight: 10,
},
1,
));
assert!(!dealers_eq_members(&deps.as_mut()).unwrap());
current_dealers()
vk_shares()
.save(
deps.as_mut().storage,
&different_details.address,
&different_details,
(&different_share.owner, 0),
&different_share,
)
.unwrap();
assert!(!dealers_eq_members(&deps.as_mut()).unwrap());
current_dealers()
.remove(deps.as_mut().storage, &different_details.address)
vk_shares()
.remove(deps.as_mut().storage, (&different_share.owner, 0))
.unwrap();
current_dealers()
.save(deps.as_mut().storage, &details.address, &details)
vk_shares()
.save(deps.as_mut().storage, (&share.owner, 0), &share)
.unwrap();
assert!(dealers_eq_members(&deps.as_mut()).unwrap());
}
@@ -407,6 +424,12 @@ pub(crate) mod tests {
);
// setup dealer details
let all_shares: [_; 4] = std::array::from_fn(|i| vk_share_fixture(&format!("owner{}", i + 1), 0));
for share in all_shares.iter() {
vk_shares()
.save(deps.as_mut().storage, (&share.owner, 0), share)
.unwrap();
}
let all_details: [_; 4] = std::array::from_fn(|i| dealer_details_fixture(i as u64 + 1));
for details in all_details.iter() {
current_dealers()
@@ -431,12 +454,6 @@ pub(crate) mod tests {
.time
.plus_seconds(epoch.time_configuration.dealing_exchange_time_secs)
);
let replacement_data = INITIAL_REPLACEMENT_DATA.load(&deps.storage).unwrap();
let expected_replacement_data = InitialReplacementData {
initial_dealers: all_details.iter().map(|d| d.address.clone()).collect(),
initial_height: None,
};
assert_eq!(replacement_data, expected_replacement_data);
env.block.time = env
.block
@@ -588,8 +605,14 @@ pub(crate) mod tests {
);
assert_eq!(curr_epoch, expected_epoch);
assert!(THRESHOLD.may_load(&deps.storage).unwrap().is_none());
let replacement_data = INITIAL_REPLACEMENT_DATA.load(&deps.storage).unwrap();
let expected_replacement_data = InitialReplacementData {
initial_dealers: all_details.iter().map(|d| d.address.clone()).collect(),
initial_height: 12345,
};
assert_eq!(replacement_data, expected_replacement_data);
let all_details: [_; 2] = std::array::from_fn(|i| dealer_details_fixture(i as u64 + 2));
let all_details: [_; 4] = std::array::from_fn(|i| dealer_details_fixture(i as u64 + 2));
for details in all_details.iter() {
past_dealers().remove(deps.as_mut().storage, &details.address).unwrap();
current_dealers()
@@ -607,6 +630,17 @@ pub(crate) mod tests {
advance_epoch_state(deps.as_mut(), env.clone()).unwrap();
}
let all_shares: [_; 4] = std::array::from_fn(|i| {
let mut share = vk_share_fixture(&format!("owner{}", i + 1), 1);
share.verified = i % 2 == 0;
share
});
for share in all_shares.iter() {
vk_shares()
.save(deps.as_mut().storage, (&share.owner, 0), share)
.unwrap();
}
// Group changed even more, surpassing threshold, so re-run dkg in reset mode
*GROUP_MEMBERS.lock().unwrap().last_mut().unwrap() = (
Member {
@@ -623,7 +657,7 @@ pub(crate) mod tests {
advance_epoch_state(deps.as_mut(), env.clone()).unwrap();
let curr_epoch = CURRENT_EPOCH.load(deps.as_mut().storage).unwrap();
let expected_epoch = Epoch::new(
EpochState::PublicKeySubmission { resharing: false },
EpochState::PublicKeySubmission { resharing: true },
prev_epoch.epoch_id + 1,
prev_epoch.time_configuration,
env.block.time,
@@ -672,12 +706,25 @@ pub(crate) mod tests {
}
);
let all_shares: [_; 3] = std::array::from_fn(|i| vk_share_fixture(&format!("owner{}", i + 1), 0));
for share in all_shares.iter() {
vk_shares()
.save(deps.as_mut().storage, (&share.owner, 0), share)
.unwrap();
}
let all_details: [_; 3] = std::array::from_fn(|i| dealer_details_fixture(i as u64 + 1));
for details in all_details.iter() {
current_dealers()
.save(deps.as_mut().storage, &details.address, details)
.unwrap();
}
let all_shares: [_; 3] = std::array::from_fn(|i| vk_share_fixture(&format!("owner{}", i + 1), 0));
for share in all_shares.iter() {
vk_shares()
.save(deps.as_mut().storage, (&share.owner, share.epoch_id), share)
.unwrap();
}
for times in [
time_configuration.public_key_submission_time_secs,
@@ -4,7 +4,9 @@
// SPDX-License-Identifier: Apache-2.0
use crate::constants::{VK_SHARES_EPOCH_ID_IDX_NAMESPACE, VK_SHARES_PK_NAMESPACE};
use cosmwasm_std::Addr;
use crate::epoch_state::storage::CURRENT_EPOCH;
use crate::error::ContractError;
use cosmwasm_std::{Addr, Order, Storage};
use cw_storage_plus::{Index, IndexList, IndexedMap, MultiIndex};
use nym_coconut_dkg_common::types::EpochId;
use nym_coconut_dkg_common::verification_key::ContractVKShare;
@@ -35,3 +37,21 @@ pub(crate) fn vk_shares<'a>() -> IndexedMap<'a, VKShareKey<'a>, ContractVKShare,
};
IndexedMap::new(VK_SHARES_PK_NAMESPACE, indexes)
}
pub(crate) fn verified_dealers(storage: &dyn Storage) -> Result<Vec<Addr>, ContractError> {
let epoch_id = CURRENT_EPOCH.load(storage)?.epoch_id;
Ok(vk_shares()
.idx
.epoch_id
.prefix(epoch_id)
.range(storage, None, None, Order::Ascending)
.flatten()
.filter_map(|(_, share)| {
if share.verified {
Some(share.owner)
} else {
None
}
})
.collect())
}
+8
View File
@@ -318,6 +318,14 @@ pub fn execute(
ExecuteMsg::UnbondGatewayOnBehalf { owner } => {
crate::gateways::transactions::try_remove_gateway_on_behalf(deps, info, owner)
}
ExecuteMsg::UpdateGatewayConfig { new_config } => {
crate::gateways::transactions::try_update_gateway_config(deps, info, new_config)
}
ExecuteMsg::UpdateGatewayConfigOnBehalf { new_config, owner } => {
crate::gateways::transactions::try_update_gateway_config_on_behalf(
deps, info, new_config, owner,
)
}
// delegation-related:
ExecuteMsg::DelegateToMixnode { mix_id } => {
+20
View File
@@ -0,0 +1,20 @@
// Copyright 2023 - Nym Technologies SA <contact@nymtech.net>
// SPDX-License-Identifier: Apache-2.0
use super::storage;
use cosmwasm_std::{Addr, Storage};
use mixnet_contract_common::{error::MixnetContractError, GatewayBond};
pub(crate) fn must_get_gateway_bond_by_owner(
store: &dyn Storage,
owner: &Addr,
) -> Result<GatewayBond, MixnetContractError> {
Ok(storage::gateways()
.idx
.owner
.item(store, owner.clone())?
.ok_or(MixnetContractError::NoAssociatedGatewayBond {
owner: owner.clone(),
})?
.1)
}
+1
View File
@@ -1,6 +1,7 @@
// Copyright 2021 - Nym Technologies SA <contact@nymtech.net>
// SPDX-License-Identifier: Apache-2.0
pub mod helpers;
pub mod queries;
pub mod signature_helpers;
pub mod storage;
+148 -2
View File
@@ -1,16 +1,20 @@
// Copyright 2021-2022 - Nym Technologies SA <contact@nymtech.net>
// SPDX-License-Identifier: Apache-2.0
use super::helpers::must_get_gateway_bond_by_owner;
use super::storage;
use crate::gateways::signature_helpers::verify_gateway_bonding_signature;
use crate::mixnet_contract_settings::storage as mixnet_params_storage;
use crate::signing::storage as signing_storage;
use crate::support::helpers::{
ensure_no_existing_bond, ensure_sent_by_vesting_contract, validate_pledge,
ensure_no_existing_bond, ensure_proxy_match, ensure_sent_by_vesting_contract, validate_pledge,
};
use cosmwasm_std::{wasm_execute, Addr, BankMsg, Coin, DepsMut, Env, MessageInfo, Response};
use mixnet_contract_common::error::MixnetContractError;
use mixnet_contract_common::events::{new_gateway_bonding_event, new_gateway_unbonding_event};
use mixnet_contract_common::events::{
new_gateway_bonding_event, new_gateway_config_update_event, new_gateway_unbonding_event,
};
use mixnet_contract_common::gateway::GatewayConfigUpdate;
use mixnet_contract_common::{Gateway, GatewayBond};
use nym_contracts_common::signing::MessageSignature;
use vesting_contract_common::messages::ExecuteMsg as VestingContractExecuteMsg;
@@ -189,6 +193,56 @@ pub(crate) fn _try_remove_gateway(
)))
}
pub(crate) fn try_update_gateway_config(
deps: DepsMut<'_>,
info: MessageInfo,
new_config: GatewayConfigUpdate,
) -> Result<Response, MixnetContractError> {
let owner = info.sender;
_try_update_gateway_config(deps, new_config, owner, None)
}
pub(crate) fn try_update_gateway_config_on_behalf(
deps: DepsMut,
info: MessageInfo,
new_config: GatewayConfigUpdate,
owner: String,
) -> Result<Response, MixnetContractError> {
ensure_sent_by_vesting_contract(&info, deps.storage)?;
let owner = deps.api.addr_validate(&owner)?;
let proxy = info.sender;
_try_update_gateway_config(deps, new_config, owner, Some(proxy))
}
pub(crate) fn _try_update_gateway_config(
deps: DepsMut,
new_config: GatewayConfigUpdate,
owner: Addr,
proxy: Option<Addr>,
) -> Result<Response, MixnetContractError> {
let existing_bond = must_get_gateway_bond_by_owner(deps.storage, &owner)?;
ensure_proxy_match(&proxy, &existing_bond.proxy)?;
let cfg_update_event = new_gateway_config_update_event(&owner, &proxy, &new_config);
let mut updated_bond = existing_bond.clone();
updated_bond.gateway.host = new_config.host;
updated_bond.gateway.mix_port = new_config.mix_port;
updated_bond.gateway.clients_port = new_config.clients_port;
updated_bond.gateway.location = new_config.location;
updated_bond.gateway.version = new_config.version;
storage::gateways().replace(
deps.storage,
existing_bond.identity(),
Some(&updated_bond),
Some(&existing_bond),
)?;
Ok(Response::new().add_event(cfg_update_event))
}
#[cfg(test)]
pub mod tests {
use super::*;
@@ -196,6 +250,7 @@ pub mod tests {
use crate::gateways::queries;
use crate::gateways::transactions::{
try_add_gateway, try_add_gateway_on_behalf, try_remove_gateway_on_behalf,
try_update_gateway_config, try_update_gateway_config_on_behalf,
};
use crate::interval::pending_events;
use crate::mixnet_contract_settings::storage::minimum_gateway_pledge;
@@ -207,6 +262,7 @@ pub mod tests {
use cosmwasm_std::{Addr, BankMsg, Response, Uint128};
use mixnet_contract_common::error::MixnetContractError;
use mixnet_contract_common::events::new_gateway_unbonding_event;
use mixnet_contract_common::gateway::GatewayConfigUpdate;
use mixnet_contract_common::ExecuteMsg;
#[test]
@@ -485,4 +541,94 @@ pub mod tests {
}
)
}
#[test]
fn update_gateway_config() {
let mut test = TestSetup::new();
let owner = "alice";
let info = mock_info(owner, &[]);
let update = GatewayConfigUpdate {
host: "1.1.1.1:1234".to_string(),
mix_port: 1234,
clients_port: 1235,
location: "home".to_string(),
version: "v1.2.3".to_string(),
};
// try updating a non existing gateway bond
let res = try_update_gateway_config(test.deps_mut(), info.clone(), update.clone());
assert_eq!(
res,
Err(MixnetContractError::NoAssociatedGatewayBond {
owner: Addr::unchecked(owner)
})
);
test.add_dummy_gateway(owner, None);
let vesting_contract = test.vesting_contract();
// attempted to remove on behalf with invalid proxy (current is `None`)
let res = try_update_gateway_config_on_behalf(
test.deps_mut(),
mock_info(vesting_contract.as_ref(), &[]),
update.clone(),
owner.to_string(),
);
assert_eq!(
res,
Err(MixnetContractError::ProxyMismatch {
existing: "None".to_string(),
incoming: vesting_contract.into_string()
})
);
// "normal" update succeeds
let res = try_update_gateway_config(test.deps_mut(), info, update.clone());
assert!(res.is_ok());
// and the config has actually been updated
let bond =
must_get_gateway_bond_by_owner(test.deps().storage, &Addr::unchecked(owner)).unwrap();
assert_eq!(bond.gateway.host, update.host);
assert_eq!(bond.gateway.mix_port, update.mix_port);
assert_eq!(bond.gateway.clients_port, update.clients_port);
assert_eq!(bond.gateway.location, update.location);
assert_eq!(bond.gateway.version, update.version);
}
#[test]
fn updating_gateway_config_with_illegal_proxy() {
let mut test = TestSetup::new();
let illegal_proxy = Addr::unchecked("not-vesting-contract");
let vesting_contract = test.vesting_contract();
let owner = "alice";
test.add_dummy_gateway_with_illegal_proxy(owner, None, illegal_proxy.clone());
let update = GatewayConfigUpdate {
host: "1.1.1.1:1234".to_string(),
mix_port: 1234,
clients_port: 1235,
location: "at home".to_string(),
version: "v1.2.3".to_string(),
};
let res = try_update_gateway_config_on_behalf(
test.deps_mut(),
mock_info(illegal_proxy.as_ref(), &[]),
update,
owner.to_string(),
)
.unwrap_err();
assert_eq!(
res,
MixnetContractError::SenderIsNotVestingContract {
received: illegal_proxy,
vesting_contract
}
)
}
}
@@ -295,7 +295,7 @@ pub(crate) fn try_update_mixnode_config(
}
pub(crate) fn try_update_mixnode_config_on_behalf(
deps: DepsMut,
deps: DepsMut<'_>,
info: MessageInfo,
new_config: MixNodeConfigUpdate,
owner: String,
@@ -308,7 +308,7 @@ pub(crate) fn try_update_mixnode_config_on_behalf(
}
pub(crate) fn _try_update_mixnode_config(
deps: DepsMut,
deps: DepsMut<'_>,
new_config: MixNodeConfigUpdate,
owner: Addr,
proxy: Option<Addr>,
+1 -1
View File
@@ -20,7 +20,7 @@ pub mod test_helpers {
perform_pending_epoch_actions, perform_pending_interval_actions, try_begin_epoch_transition,
};
use crate::interval::{pending_events, storage as interval_storage};
use crate::mixnet_contract_settings::storage as mixnet_params_storage;
use crate::mixnet_contract_settings::storage::{self as mixnet_params_storage};
use crate::mixnet_contract_settings::storage::{
minimum_gateway_pledge, minimum_mixnode_pledge, rewarding_denom,
rewarding_validator_address,
+13
View File
@@ -14,6 +14,7 @@ use cosmwasm_std::{
QueryResponse, Response, StdError, StdResult, Timestamp, Uint128,
};
use cw_storage_plus::Bound;
use mixnet_contract_common::gateway::GatewayConfigUpdate;
use mixnet_contract_common::mixnode::{MixNodeConfigUpdate, MixNodeCostParams};
use mixnet_contract_common::{Gateway, MixId, MixNode};
use semver::Version;
@@ -219,6 +220,9 @@ pub fn execute(
ExecuteMsg::TrackUnbondGateway { owner, amount } => {
try_track_unbond_gateway(&owner, amount, info, deps)
}
ExecuteMsg::UpdateGatewayConfig { new_config } => {
try_update_gateway_config(new_config, info, deps)
}
ExecuteMsg::TransferOwnership { to_address } => {
try_transfer_ownership(to_address, info, deps)
}
@@ -300,6 +304,15 @@ pub fn try_update_mixnode_config(
account.try_update_mixnode_config(new_config, deps.storage)
}
pub fn try_update_gateway_config(
new_config: GatewayConfigUpdate,
info: MessageInfo,
deps: DepsMut,
) -> Result<Response, ContractError> {
let account = account_from_address(info.sender.as_str(), deps.storage, deps.api)?;
account.try_update_gateway_config(new_config, deps.storage)
}
pub fn try_update_mixnode_cost_params(
new_costs: MixNodeCostParams,
info: MessageInfo,
@@ -2,6 +2,7 @@ use crate::errors::ContractError;
use contracts_common::signing::MessageSignature;
use cosmwasm_std::{Coin, Env, Response, Storage};
use mixnet_contract_common::{
gateway::GatewayConfigUpdate,
mixnode::{MixNodeConfigUpdate, MixNodeCostParams},
Gateway, MixNode,
};
@@ -64,4 +65,10 @@ pub trait GatewayBondingAccount {
amount: Coin,
storage: &mut dyn Storage,
) -> Result<(), ContractError>;
fn try_update_gateway_config(
&self,
new_config: GatewayConfigUpdate,
storage: &mut dyn Storage,
) -> Result<Response, ContractError>;
}
@@ -4,9 +4,12 @@ use crate::storage::MIXNET_CONTRACT_ADDRESS;
use crate::traits::GatewayBondingAccount;
use contracts_common::signing::MessageSignature;
use cosmwasm_std::{wasm_execute, Coin, Env, Response, Storage, Uint128};
use mixnet_contract_common::{ExecuteMsg as MixnetExecuteMsg, Gateway};
use mixnet_contract_common::{
gateway::GatewayConfigUpdate, ExecuteMsg as MixnetExecuteMsg, Gateway,
};
use vesting_contract_common::events::{
new_vesting_gateway_bonding_event, new_vesting_gateway_unbonding_event,
new_vesting_update_gateway_config_event,
};
use super::Account;
@@ -78,4 +81,22 @@ impl GatewayBondingAccount for Account {
self.remove_gateway_pledge(storage)?;
Ok(())
}
fn try_update_gateway_config(
&self,
new_config: GatewayConfigUpdate,
storage: &mut dyn Storage,
) -> Result<Response, ContractError> {
let msg = MixnetExecuteMsg::UpdateGatewayConfigOnBehalf {
new_config,
owner: self.owner_address().into_string(),
};
let update_gateway_config_msg =
wasm_execute(MIXNET_CONTRACT_ADDRESS.load(storage)?, &msg, vec![])?;
Ok(Response::new()
.add_message(update_gateway_config_msg)
.add_event(new_vesting_update_gateway_config_event()))
}
}
-12
View File
@@ -1,12 +0,0 @@
[package]
name = "cpu-cycles"
version = "0.1.0"
edition = "2021"
build = "build.rs"
links = "cpucycles"
[dependencies]
libc = "0.2.140"
[build-dependencies]
cfg-if = "1"
-65
View File
@@ -1,65 +0,0 @@
use std::{env, path::PathBuf, process::Command};
fn main() {
let out_dir = env::var("OUT_DIR").unwrap();
let out_path = PathBuf::from(&out_dir);
let source_path = PathBuf::from("libcpucycles")
.canonicalize()
.expect("cannot canonicalize path");
cfg_if::cfg_if! {
if #[cfg(not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "mips", target_arch = "powerpc", target_arch = "powerpc64", target_arch = "arm")))] {
panic!("Unsupported architecture - {}!", env::var("CARGO_CFG_TARGET_ARCH").unwrap(), )
}
};
let mut compile_o_command = Command::new("./configure");
let compile_o_command = compile_o_command
.current_dir(&source_path)
.arg(format!("--prefix={out_dir}"));
match compile_o_command.output() {
Ok(output) => {
if !output.status.success() {
panic!("{:?}", unsafe {
std::str::from_utf8_unchecked(&output.stderr)
})
}
}
Err(e) => panic!("{e}"),
}
let mut compile_o_command = Command::new("make");
let compile_o_command = compile_o_command.current_dir(&source_path).arg("install");
match compile_o_command.output() {
Ok(output) => {
if !output.status.success() {
panic!("{:?}", unsafe {
std::str::from_utf8_unchecked(&output.stderr)
})
}
}
Err(e) => panic!("{e}"),
}
println!(
"cargo:rustc-link-search=native={}",
out_path.join("lib").to_str().unwrap()
);
println!("cargo:rustc-link-lib=static=cpucycles");
let mut compile_o_command = Command::new("make");
let compile_o_command = compile_o_command.current_dir(source_path).arg("clean");
match compile_o_command.output() {
Ok(output) => {
if !output.status.success() {
panic!("{:?}", unsafe {
std::str::from_utf8_unchecked(&output.stderr)
})
}
}
Err(e) => panic!("{e}"),
}
}
-8
View File
@@ -1,8 +0,0 @@
default:
cd build && $(MAKE)
install:
cd build && $(MAKE) install
clean:
cd build && $(MAKE) clean
-69
View File
@@ -1,69 +0,0 @@
#!/usr/bin/env python3
import os
import datetime
import markdown
def load(fn):
with open(fn) as f:
return f.read()
style = load('autogen/html-style')
sitetitle = load('autogen/html-title')
files = []
with open('autogen/html-files') as f:
for line in f:
line = line.strip()
line = line.split(':')
if len(line) != 3: continue
files += [line]
for md,html,pagetitle in files:
fnmd = 'doc/%s.md' % md
fnhtml = 'doc/html/%s.html' % html
output = ''
x = load(fnmd)
x = markdown.markdown(x,extensions=['markdown.extensions.extra','markdown.extensions.tables'])
mtime = datetime.datetime.utcfromtimestamp(os.path.getmtime(fnmd)).strftime('%Y.%m.%d')
output += '<html>\n<head>\n'
output += style
output += '<title>\n'
output += pagetitle
output += '</title>\n'
output += '</head>\n'
output += '<body>\n'
output += '<div class=headline>\n'
output += sitetitle
output += '</div>\n'
output += '<div class=nav>\n'
for submd,subhtml,subpagetitle in files:
if subhtml == html:
output += '<div class="navt here">'
output += pagetitle+'\n'
else:
output += '<div class="navt away">'
output += '<a href=%s.html>%s</a>\n' % (subhtml,subpagetitle)
output += '</div>'
output += '</div>\n'
output += '<div class=main>\n'
output += x
output += '<hr><font size=1><b>Version:</b>\n'
output += 'This is version %s of the "%s" web page.\n' % (mtime,pagetitle)
output += '</font>\n'
output += '</div>\n'
output += '</body>\n'
output += '</html>\n'
if not os.path.exists(fnhtml) or output != load(fnhtml):
with open(fnhtml+'.new','w') as f:
f.write(output)
os.chmod(fnhtml+'.new',0o444)
os.rename(fnhtml+'.new',fnhtml)
@@ -1,7 +0,0 @@
readme:index:Intro
download:download:Download
install:install:Install
api:api:API
counters:counters:Counters
selection:selection:Selection
security:security:Security
@@ -1,32 +0,0 @@
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<style type="text/css">
html{overflow-y:scroll}
body{font-family:sans-serif}
p,ul,ol,blockquote,pre{font-size:0.9em;line-height:1.6em}
li p{font-size:1.0em}
blockquote p{font-size:1.0em}
tt{font-size:1.2em}
code{font-size:1.2em}
h1{font-size:1.5em}
h2{font-size:1.3em}
h3{font-size:1.0em}
h1 a{text-decoration:none}
table{border-collapse:collapse}
th,td{border:1px solid black}
table a{text-decoration:none}
table tr{font-size:0.9em;line-height:1.6em}
.links a:hover{text-decoration:underline}
.links a:active{text-decoration:underline}
.links img{width:200px;padding-left:1em}
.links td{border:0px;padding-top:0.5em;padding-bottom:0.5em}
.headline{padding:0;font-weight:bold;font-size:1.5em;vertical-align:top;padding-bottom:0.5em;color:#125d0d}
.navt{display:inline-block;box-sizing:border-box;-moz-box-sizing:border-box;-webkit-box-sizing:border-box;
min-width:14%;margin:0;padding:0;padding-left:0.5em;padding-right:0.5em;vertical-align:center;
font-weight:bold;font-size:1.1em;text-align:center;border:1px solid black}
.here{border-bottom:0px;background-color:#ffffff}
.away{background-color:#125d0d;}
.away a{text-decoration:none;display:block;color:#ffffff}
.away a:hover,.away a:active{text-decoration:underline}
.main{margin:0;padding-top:0em;padding-bottom:1%;clear:both}
</style>
@@ -1 +0,0 @@
libcpucycles
-3
View File
@@ -1,3 +0,0 @@
#!/bin/sh
pandoc --standalone --to man --metadata title=cpucycles --metadata section=3 < doc/api.md > doc/man/cpucycles.3
@@ -1,93 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <sys/time.h>
#include <sys/types.h>
#include <cpucycles.h>
#define TIMINGS 63
static long long t[TIMINGS+1];
static void t_print(void)
{
long long median = 0;
long long i,j;
for (i = 0;i < TIMINGS;++i)
t[i] = t[i+1]-t[i];
for (j = 0;j < TIMINGS;++j) {
long long belowj = 0;
long long abovej = 0;
for (i = 0;i < TIMINGS;++i) if (t[i] < t[j]) ++belowj;
for (i = 0;i < TIMINGS;++i) if (t[i] > t[j]) ++abovej;
if (belowj*2 < TIMINGS && abovej*2 < TIMINGS) {
median = t[j];
break;
}
}
printf(" %lld ",median);
for (i = 0;i < TIMINGS;++i)
printf("%+lld",t[i]-median);
printf("\n");
fflush(stdout);
}
static long long microseconds(void)
{
struct timeval t;
long long result;
gettimeofday(&t,(struct timezone *) 0);
result = t.tv_sec;
result *= 1000000;
result += t.tv_usec;
return result;
}
static volatile int v;
static void measure_cpucycles(void)
{
long long loops,i,j;
printf("cpucycles persecond %lld\n",cpucycles_persecond());
printf("cpucycles implementation %s\n",cpucycles_implementation());
for (i = 0;i <= TIMINGS;++i)
t[i] = cpucycles();
printf("cpucycles median"); t_print();
for (loops = 1024;loops <= 1048576;loops *= 2) {
long long t00,t01,t10,t11;
long long m0,m1;
double ratiobelow,ratioabove;
t00 = cpucycles();
m0 = microseconds();
t01 = cpucycles();
for (j = 0;j < loops;++j) v = 0;
t10 = cpucycles();
m1 = microseconds();
t11 = cpucycles();
if (t01 < t00) continue;
if (t10 < t01) continue;
if (t11 < t10) continue;
if (m1 <= m0+2) continue;
ratiobelow = floor((1000000.0*(t10-t01))/(m1+1-m0));
ratioabove = ceil((1000000.0*(t11-t00))/(m1-m0-1));
printf("cpucycles observed persecond %.0lf...%.0lf with %lld loops %lld microseconds\n",ratiobelow,ratioabove,loops,m1-m0);
}
}
int main(int argc,char **argv)
{
cpucycles_tracesetup();
printf("cpucycles version %s\n",cpucycles_version());
measure_cpucycles();
return 0;
}
@@ -1,2 +0,0 @@
gcc -Wall -fPIC -fwrapv -O -fvisibility=hidden
clang -Wall -fPIC -fwrapv -Qunused-arguments -O -fvisibility=hidden
-309
View File
@@ -1,309 +0,0 @@
#!/usr/bin/env python3
import os
import shutil
import sys
import platform
import subprocess
import tempfile
prefix = '/usr/local'
clean = True
linktype = 'so'
host = platform.machine()
host = ''.join(c for c in host if c in '_0123456789abcdefghijklmnopqrstuvwxyz')
if host == 'x86_64': host = 'amd64'
if host == 'i386': host = 'x86'
if host == 'i686': host = 'x86'
if host.startswith('armv8') or host.startswith('aarch64'): host = 'arm64'
if host.startswith('arm'): host = 'arm32'
if host.startswith('riscv64'): host = 'riscv64'
if host.startswith('riscv'): host = 'riscv32'
if host.startswith('mips64'): host = 'mips64'
if host.startswith('mips'): host = 'mips32'
if host.startswith('powerpc64') or host.startswith('ppc64'): host = 'ppc64'
if host.startswith('powerpc') or host.startswith('ppc'): host = 'ppc32'
if host.startswith('sparcv9') or host.startswith('sun4u'): host = 'sparc64'
if host.startswith('sparc') or host.startswith('sun'): host = 'sparc32'
makefile = ''
for arg in sys.argv[1:]:
if arg.startswith('--prefix='):
prefix = arg[9:]
continue
if arg.startswith('--host='):
host = arg[7:]
host = host.split('-')[0]
continue
if arg == '--clean':
clean = True
continue
if arg == '--noclean':
clean = False
continue
raise ValueError('unrecognized argument %s' % arg)
echoargs = './configure'
echoargs += ' --prefix=%s' % prefix
echoargs += ' --host=%s' % host
if clean: echoargs += ' --clean'
if not clean: echoargs += ' --noclean'
print(echoargs)
if prefix[0] != '/':
raise ValueError('prefix %s is not an absolute path' % prefix)
rpath = None
# XXX: rpath = '%s/lib' % prefix
if clean:
shutil.rmtree('build/%s' % host,ignore_errors=True)
def dirlinksym(dir,source,target):
with tempfile.TemporaryDirectory(dir=dir) as t:
os.symlink(target,'%s/symlink' % t)
os.rename('%s/symlink' % t,'%s/%s' % (dir,source))
os.makedirs('build/%s' % host,exist_ok=True)
os.makedirs('build/%s/package/bin' % host,exist_ok=True)
os.makedirs('build/%s/package/lib' % host,exist_ok=True)
os.makedirs('build/%s/package/include' % host,exist_ok=True)
if clean:
os.symlink('../..','build/%s/src' % host)
# ----- build scripts
os.makedirs('build/%s/scripts'%host,exist_ok=True)
dirlinksym('build/%s/scripts'%host,'install','../src/scripts-build/install')
# ----- compilers
def compilerversion(c):
try:
p = subprocess.Popen(c.split()+['--version'],stdout=subprocess.PIPE,stderr=subprocess.STDOUT,universal_newlines=True)
out,err = p.communicate()
assert not err
assert not p.returncode
return out
except:
pass
firstcompiler = None
with open('compilers/default') as f:
for c in f.readlines():
c = c.strip()
cv = compilerversion(c)
if cv == None:
print('skipping default compiler %s' % c)
continue
print('using default compiler %s' % c)
firstcompiler = c
break
if firstcompiler is None:
raise ValueError('did not find a working compiler')
with open('build/%s/scripts/compiledefault' % host,'w') as f:
f.write('#!/bin/sh\n')
f.write('\n')
f.write('dir="$1"; shift\n')
f.write('base="$1"; shift\n')
f.write('ext="$1"; shift\n')
f.write('\n')
f.write('cd "$dir" && \\\n')
f.write('%s \\\n' % firstcompiler)
f.write(' "$@" \\\n')
f.write(' -c "$base.$ext"\n')
os.chmod('build/%s/scripts/compiledefault' % host,0o755)
# ----- libcpucycles
os.makedirs('build/%s/cpucycles' % host,exist_ok=True)
os.makedirs('build/%s/package/man/man3' % host,exist_ok=True)
dirlinksym('build/%s/cpucycles'%host,'cpucycles.h','../src/cpucycles/cpucycles.h')
dirlinksym('build/%s/cpucycles'%host,'cpucycles_internal.h','../src/cpucycles/cpucycles_internal.h')
shutil.copy2('cpucycles/cpucycles.h','build/%s/package/include/cpucycles.h'%host)
shutil.copy2('doc/man/cpucycles.3','build/%s/package/man/man3/cpucycles.3'%host)
with open('build/%s/cpucycles/compile-ticks' % host,'w') as f:
f.write('#!/bin/sh\n')
f.write('arch="$1"; shift\n')
f.write('x="$1"; shift\n')
f.write('for source in try-"$arch"-"$x".c try-default-zero.c\n')
f.write('do\n')
f.write(' cp "$source" "$arch"-"$x".c\n')
f.write(' %s \\\n' % firstcompiler)
f.write(' -Dticks=cpucycles_ticks_"$arch"_"$x" \\\n')
f.write(' -Dticks_setup=cpucycles_ticks_"$arch"_"$x"_setup \\\n')
f.write(' -c "$arch"-"$x".c\n')
f.write(' case $? in\n')
f.write(' 0) break ;;\n')
f.write(' 111) exit 111 ;;\n')
f.write(' *) echo "skipping option that did not compile" ;;\n')
f.write(' esac\n')
f.write('done\n')
os.chmod('build/%s/cpucycles/compile-ticks' % host,0o755)
cpucyclesoptions = []
cpucyclesofiles = []
with open('cpucycles/options') as f:
for line in f:
line = line.strip()
if line == '': continue
if line[0] == '#': continue
base = line.split()[0]
if not os.path.exists('cpucycles/%s.c' % base): continue
cpucycles = base.split('-')
if len(cpucycles) != 2: continue
if cpucycles[0] not in (host,'default'): continue
cpucyclesoptions += [cpucycles]
cpucyclesoptions += [['default','zero']] # must be last
for cpucycles in cpucyclesoptions:
base = '-'.join(cpucycles)
cpucyclesofiles += ['cpucycles/%s.o' % base]
dirlinksym('build/%s/cpucycles'%host,'try-%s.c'%base,'../src/cpucycles/%s.c'%base)
M = 'cpucycles/%s.o: cpucycles/try-%s.c cpucycles/try-default-zero.c\n' % (base,base)
M += '\tcd cpucycles && ./compile-ticks %s %s\n' % tuple(cpucycles)
M += '\n'
makefile = M + makefile
for fn in sorted(os.listdir('cpucycles')):
if not fn.endswith('.c'): continue
if '-' in fn: continue
base = fn[:-2]
cpucyclesofiles += ['cpucycles/%s.o' % base]
dirlinksym('build/%s/cpucycles'%host,fn,'../src/cpucycles/%s'%fn)
M = 'cpucycles/%s.o: cpucycles/%s.c\n' % (base,base)
M += '\tscripts/compiledefault cpucycles %s c\n' % base
M += '\n'
makefile = M + makefile
with open('build/%s/cpucycles/options.inc' % host,'w') as f:
f.write('#define NUMOPTIONS %d\n' % len(cpucyclesoptions))
f.write('#define DEFAULTOPTION (NUMOPTIONS-1)\n')
f.write('\n')
for cpucycles in cpucyclesoptions:
f.write('extern long long cpucycles_ticks_%s_%s_setup(void);\n' % (cpucycles[0],cpucycles[1]))
f.write('extern long long cpucycles_ticks_%s_%s(void);\n' % (cpucycles[0],cpucycles[1]))
f.write('\n')
f.write('static struct {\n')
f.write(' const char *implementation;\n')
f.write(' long long (*ticks_setup)(void);\n')
f.write(' long long (*ticks)(void);\n')
f.write('} options[NUMOPTIONS] = {\n')
for cpucycles in cpucyclesoptions:
f.write('{ "%s-%s", cpucycles_ticks_%s_%s_setup, cpucycles_ticks_%s_%s },\n' % (cpucycles[0],cpucycles[1],cpucycles[0],cpucycles[1],cpucycles[0],cpucycles[1]))
f.write('} ;\n')
dirlinksym('build/%s/scripts'%host,'staticlib','../src/scripts-build/staticlib')
M = 'package/lib/libcpucycles.a: scripts/staticlib %s\n' % ' '.join(cpucyclesofiles)
M += '\tscripts/staticlib %s\n' % ' '.join(cpucyclesofiles)
M += '\n'
makefile = M + makefile
with open('build/%s/scripts/sharedlib' % host,'w') as f:
f.write('#!/bin/sh\n')
f.write('\n')
f.write('%s -shared \\\n' % firstcompiler)
if rpath:
f.write(' -Wl,-rpath=%s \\\n' % rpath)
f.write(' -Wl,-soname,libcpucycles.so.1 \\\n')
f.write(' -o package/lib/libcpucycles.so.1 \\\n')
f.write(' "$@"\n')
f.write('chmod 644 package/lib/libcpucycles.so.1\n')
os.chmod('build/%s/scripts/sharedlib' % host,0o755)
M = 'package/lib/libcpucycles.so.1: scripts/sharedlib %s\n' % ' '.join(cpucyclesofiles)
M += '\tscripts/sharedlib %s\n' % ' '.join(cpucyclesofiles)
M += '\n'
makefile = M + makefile
M = 'package/lib/libcpucycles.so: package/lib/libcpucycles.so.1\n'
M += '\trm -f package/lib/libcpucycles.so\n'
M += '\tln -s libcpucycles.so.1 package/lib/libcpucycles.so\n'
M += '\n'
makefile = M + makefile
# ----- command
os.makedirs('build/%s/command'%host)
for c in sorted(os.listdir('command')):
dirlinksym('build/%s/command'%host,c,'../src/command/%s'%c)
dirlinksym('build/%s/command'%host,'bin','../package/bin')
dirlinksym('build/%s/command'%host,'lib','../package/lib')
dirlinksym('build/%s/command'%host,'include','../package/include')
with open('build/%s/command/link' % host,'w') as f:
f.write('#!/bin/sh\n')
f.write('target="$1"; shift\n')
f.write('%s \\\n' % firstcompiler)
f.write(' -o "$target" "$@"\n')
os.chmod('build/%s/command/link' % host,0o755)
commands = []
for fn in sorted(os.listdir('command')):
if not fn.endswith('.c'): continue
libs = ['libcpucycles']
base = fn[:-2]
M = 'command/%s.o: command/%s.c\n' % (base,base)
M += '\tscripts/compiledefault command %s c -I include\n' % base
M += '\n'
makefile = M + makefile
M = 'package/bin/%s: command/%s.o%s\n' % (base,base,''.join(' package/lib/%s.%s' % (x,linktype) for x in libs))
M += '\tcd command && ./link bin/%s %s.o%s -lm -lrt\n' % (base,base,''.join(' lib/%s.%s' % (x,linktype) for x in libs))
M += '\n'
makefile = M + makefile
commands += ['package/bin/%s' % base]
M = 'commands: %s\n' % ' '.join(commands)
M += '\n'
makefile = M + makefile
# ----- make install
M = 'install: scripts/install default\n'
M += '\tscripts/install %s\n' % prefix
M += '\n'
makefile = M + makefile
# ----- make default
M = 'default: package/lib/libcpucycles.a package/lib/libcpucycles.so package/lib/libcpucycles.so.1 \\\n'
M += 'commands\n'
M += '\n'
makefile = M + makefile
with open('build/%s/Makefile' % host,'w') as f:
f.write(makefile)
# ----- build/0, build/Makefile
dirlinksym('build','0',host)
with open('build/Makefile','w') as f:
f.write('default:\n')
f.write('\tcd %s && $(MAKE)\n' % host)
f.write('\n')
f.write('install:\n')
f.write('\tcd %s && $(MAKE) install\n' % host)
f.write('\n')
f.write('clean:\n')
f.write('\trm -r %s\n' % host)
@@ -1,53 +0,0 @@
// version 20230105
// public domain
// djb
// adapted from supercop/cpucycles/amd64rdpmc.c
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <linux/perf_event.h>
#include "cpucycles_internal.h"
static struct perf_event_attr attr;
static int fdperf = -1;
static struct perf_event_mmap_page *buf = 0;
long long ticks(void)
{
long long result;
unsigned int seq;
long long index;
long long offset;
do {
seq = buf->lock;
asm volatile("" ::: "memory");
index = buf->index;
offset = buf->offset;
asm volatile("rdpmc;shlq $32,%%rdx;orq %%rdx,%%rax"
: "=a"(result) : "c"(index-1) : "%rdx");
asm volatile("" ::: "memory");
} while (buf->lock != seq);
result += offset;
result &= 0xffffffffffff;
return result;
}
long long ticks_setup(void)
{
if (fdperf == -1) {
attr.type = PERF_TYPE_HARDWARE;
attr.config = PERF_COUNT_HW_CPU_CYCLES;
attr.exclude_kernel = 1;
fdperf = syscall(__NR_perf_event_open,&attr,0,-1,-1,0);
if (fdperf == -1) return cpucycles_SKIP;
buf = mmap(NULL,sysconf(_SC_PAGESIZE),PROT_READ,MAP_SHARED,fdperf,0);
}
if (!cpucycles_works(ticks)) return cpucycles_SKIP;
return cpucycles_CYCLECOUNTER;
}
@@ -1,22 +0,0 @@
// version 20230105
// public domain
// djb
#ifdef _MSC_VER
#include <intrin.h>
#else
#include <x86intrin.h>
#endif
#include "cpucycles_internal.h"
long long ticks(void)
{
return __rdtsc();
}
long long ticks_setup(void)
{
if (!cpucycles_works(ticks)) return cpucycles_SKIP;
return cpucycles_MAYBECYCLECOUNTER;
}
@@ -1,20 +0,0 @@
// version 20230105
// public domain
// djb
// adapted from supercop/cpucycles/amd64tscfreq.c
#include "cpucycles_internal.h"
long long ticks(void)
{
unsigned long long result;
asm volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax"
: "=a"(result) :: "%rdx");
return result;
}
long long ticks_setup(void)
{
if (!cpucycles_works(ticks)) return cpucycles_SKIP;
return cpucycles_MAYBECYCLECOUNTER;
}
@@ -1,27 +0,0 @@
// version 20230105
// public domain
// djb
// adapted from supercop/cpucycles/cortex.c
#include "cpucycles_internal.h"
long long ticks(void)
{
unsigned int result;
asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(result));
return (unsigned long long) result;
}
static long enable(void)
{
asm volatile("mcr p15, 0, %0, c9, c12, 0" :: "r"(17));
asm volatile("mcr p15, 0, %0, c9, c12, 1" :: "r"(0x8000000f));
asm volatile("mcr p15, 0, %0, c9, c12, 3" :: "r"(0x8000000f));
}
long long ticks_setup(void)
{
if (!cpucycles_works(enable)) return cpucycles_SKIP;
if (!cpucycles_works(ticks)) return cpucycles_SKIP;
return cpucycles_EXTEND32;
}
@@ -1,19 +0,0 @@
// version 20230105
// public domain
// djb
// adapted from supercop/cpucycles/armv8.c
#include "cpucycles_internal.h"
long long ticks(void)
{
long long result;
asm volatile("mrs %0, PMCCNTR_EL0" : "=r" (result));
return result;
}
long long ticks_setup(void)
{
if (!cpucycles_works(ticks)) return cpucycles_SKIP;
return cpucycles_CYCLECOUNTER;
}
@@ -1,19 +0,0 @@
// version 20230105
// public domain
// djb
// adapted from supercop/cpucycles/vct.c
#include "cpucycles_internal.h"
long long ticks(void)
{
long long result;
asm volatile("mrs %0, CNTVCT_EL0" : "=r" (result));
return result;
}
long long ticks_setup(void)
{
if (!cpucycles_works(ticks)) return cpucycles_SKIP;
return cpucycles_FINDMULTIPLIER;
}
@@ -1,25 +0,0 @@
// version 20230115
// public domain
// djb
// 20230115 djb: cpucycles_version()
// 20230114 djb: improve punctuation
#ifndef cpucycles_h
#define cpucycles_h
#ifdef __cplusplus
extern "C" {
#endif
extern long long (*cpucycles)(void) __attribute__((visibility("default")));
extern const char *cpucycles_implementation(void) __attribute__((visibility("default")));
extern const char *cpucycles_version(void) __attribute__((visibility("default")));
extern long long cpucycles_persecond(void) __attribute__((visibility("default")));
extern void cpucycles_tracesetup(void) __attribute__((visibility("default")));
#ifdef __cplusplus
}
#endif
#endif
@@ -1,20 +0,0 @@
// version 20230105
// public domain
// djb
#ifndef cpucycles_internal_h
#define cpucycles_internal_h
extern long long cpucycles_init(void);
extern long long cpucycles_microseconds(void);
extern int cpucycles_works(long long (*)(void));
// return values from ticks_setup():
#define cpucycles_SKIP (0)
#define cpucycles_CYCLECOUNTER (-1)
#define cpucycles_MAYBECYCLECOUNTER (-2)
#define cpucycles_FINDMULTIPLIER (-3)
#define cpucycles_EXTEND32 (-32)
// and positive values mean known ticks/second
#endif
@@ -1,15 +0,0 @@
// version 20230105
// public domain
// djb
#include "cpucycles_internal.h"
long long ticks_setup(void)
{
return 1000000;
}
long long ticks(void)
{
return cpucycles_microseconds();
}
@@ -1,17 +0,0 @@
// version 20230105
// public domain
// djb
#include <mach/mach_time.h>
#include "cpucycles_internal.h"
long long ticks(void)
{
return mach_absolute_time();
}
long long ticks_setup(void)
{
if (!cpucycles_works(ticks)) return cpucycles_SKIP;
return cpucycles_FINDMULTIPLIER;
}
@@ -1,23 +0,0 @@
// version 20230105
// public domain
// djb
// adapted from supercop/cpucycles/monotonic.c
#include <time.h>
#include <sys/time.h>
long long ticks_setup(void)
{
return 1000000000;
}
long long ticks(void)
{
struct timespec t;
long long result;
clock_gettime(CLOCK_MONOTONIC,&t);
result = t.tv_sec;
result *= 1000000000;
result += t.tv_nsec;
return result;
}
@@ -1,101 +0,0 @@
// version 20230106
// public domain
// djb
// adapted from supercop/cpucycles/perfevent.c
// 20230106 djb: read() into int64_t instead of long long
// 20230106 djb: add comment on RUNNING/ENABLED
/*
This code intentionally avoids dividing by the
PERF_FORMAT_TOTAL_TIME_RUNNING/ENABLED ratio.
The motivation for that ratio is as follows:
* A typical CPU has a limited number of performance-monitoring
counters active at once. For example, there are 8 "programmable"
counters on Intel Skylake.
* "perf stat" allows the user to enable more counters. The OS kernel
periodically (e.g., every millisecond) changes the limited number of
active hardware counters to a new subset of the enabled counters, and
"perf stat" reports PERF_FORMAT_TOTAL_TIME_RUNNING/ENABLED for each
counter, the fraction of time spent with that counter running.
For long-running programs, dividing the hardware counter by
RUNNING/ENABLED usually produces a reasonable estimate of what the count
would have been without competition from other counters.
A fixable problem with this multiplexing of counters is that the kernel
appears to simply cycle through counters, so unlucky programs can
trigger moiré effects. The fix is to select random subsets of counters.
A more fundamental problem is that cpucycles() has to be usable for
timing short subroutines, including subroutines so short that the OS has
no opportunity to change from one selection of counters to another. Say
RUNNING is 0; should cpucycles() then divide by 0?
If a caller runs cpucycles(), X(), cpucycles(), X(), etc., and the cycle
counter happens to be enabled for only 80% of the runs of X(), then
simply computing the median difference of adjacent cycle counts, with no
scaling, will filter out the zeros and correctly compute the cost of X.
Averages won't (without scaling), but averages have other problems, such
as being heavily influenced by interrupts. (Omitting kernel time from
perf results does not remove the influence of interrupts on caches.)
Given the importance of cycle counting, it is better to have cycle
counters always running. For example, on Skylake, Intel provides the 8
"programmable" counters on top of a separate cycle counter ("fixed
counter 1"), so there is no good reason for the kernel to waste a
"programmable" counter on a cycle counter, there is no good reason to
turn the cycle counter off, and there is no good reason for RUNNING to
be below ENABLED for the cycle counter.
Of course, applications that use just one performance counter at a time
don't have to worry about kernels getting this wrong, and don't have to
worry about the possibility of getting noisy or invalid results on CPUs
that have heavier constraints on the number of simultaneous counters.
*/
#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/syscall.h>
#include <linux/perf_event.h>
#include "cpucycles_internal.h"
static int fddev = -1;
long long ticks(void)
{
int64_t result;
if (read(fddev,&result,sizeof result) < sizeof result) return 0;
return result;
}
long long ticks_setup(void)
{
if (fddev == -1) {
static struct perf_event_attr attr;
memset(&attr,0,sizeof attr);
attr.type = PERF_TYPE_HARDWARE;
attr.size = sizeof(struct perf_event_attr);
attr.config = PERF_COUNT_HW_CPU_CYCLES;
attr.disabled = 1;
attr.exclude_kernel = 1;
attr.exclude_hv = 1;
fddev = syscall(__NR_perf_event_open,&attr,0,-1,-1,0);
if (fddev == -1) return cpucycles_SKIP;
ioctl(fddev,PERF_EVENT_IOC_RESET,0);
ioctl(fddev,PERF_EVENT_IOC_ENABLE,0);
}
return cpucycles_MAYBECYCLECOUNTER;
}
@@ -1,15 +0,0 @@
// version 20230105
// public domain
// djb
#include "cpucycles_internal.h"
long long ticks_setup(void)
{
return cpucycles_SKIP;
}
long long ticks(void)
{
return 0;
}
@@ -1,33 +0,0 @@
// version 20230105
// public domain
// djb
// adapted from supercop/cpucycles/mips.c
// mips32 release 2 instruction rdhwr
// 7c02103b: read hwr#2 (cycle count) into $2
// 7c02183b: read hwr#3 (cycle-count multiplier) into $2
#include "cpucycles_internal.h"
static unsigned int multiplier = 0;
static long long multiplier_set(void)
{
asm volatile(".long 0x7c02183b; move %0,$2" : "=r"(multiplier) : : "$2");
return multiplier;
}
long long ticks(void)
{
unsigned int result;
asm volatile(".long 0x7c02103b; move %0,$2" : "=r"(result) :: "$2");
result *= multiplier;
return (unsigned long long) result;
}
long long ticks_setup(void)
{
if (!cpucycles_works(multiplier_set)) return cpucycles_SKIP;
if (!cpucycles_works(ticks)) return cpucycles_SKIP;
return cpucycles_EXTEND32;
}
-19
View File
@@ -1,19 +0,0 @@
amd64-pmc
amd64-tsc
amd64-tscasm
arm32-cortex
arm64-pmc
arm64-vct
mips64-cc
ppc32-mftb
ppc64-mftb
riscv32-rdcycle
riscv64-rdcycle
s390x-stckf
sparc64-rdtick
x86-tsc
x86-tscasm
default-perfevent
default-mach
default-monotonic
default-gettimeofday
@@ -1,30 +0,0 @@
// version 20230105
// public domain
// djb
// adapted from supercop/cpucycles/powerpccpuinfo.c
#include "cpucycles_internal.h"
long long ticks(void)
{
unsigned int high, low, newhigh;
unsigned long long result;
do {
asm volatile(
"mftbu %0; mftb %1; mftbu %2"
: "=r" (high), "=r" (low), "=r" (newhigh)
);
} while (newhigh != high);
result = high;
result <<= 32;
result |= low;
return result;
}
long long ticks_setup(void)
{
if (!cpucycles_works(ticks)) return cpucycles_SKIP;
return cpucycles_FINDMULTIPLIER;
}
@@ -1,30 +0,0 @@
// version 20230105
// public domain
// djb
// adapted from supercop/cpucycles/powerpccpuinfo.c
#include "cpucycles_internal.h"
long long ticks(void)
{
unsigned int high, low, newhigh;
unsigned long long result;
do {
asm volatile(
"mftbu %0; mftb %1; mftbu %2"
: "=r" (high), "=r" (low), "=r" (newhigh)
);
} while (newhigh != high);
result = high;
result <<= 32;
result |= low;
return result;
}
long long ticks_setup(void)
{
if (!cpucycles_works(ticks)) return cpucycles_SKIP;
return cpucycles_FINDMULTIPLIER;
}
@@ -1,39 +0,0 @@
// version 20230105
// public domain
// djb
// adapted from supercop/cpucycles/riscv.c
// which has code from djb and Romain Dolbeau
#include "cpucycles_internal.h"
#ifndef __riscv_xlen
#error this code is only for riscv platforms
#endif
#if __riscv_xlen != 32
#error this code is only for riscv32 platforms
#endif
long long ticks(void)
{
unsigned int low, high, newhigh;
unsigned long long result;
asm volatile( "start%=:\n"
"rdcycleh %0\n"
"rdcycle %1\n"
"rdcycleh %2\n"
"bne %0, %2, start%=\n"
: "=r"(high), "=r"(low), "=r"(newhigh));
result = high;
result <<= 32;
result |= low;
return result;
}
long long ticks_setup(void)
{
if (!cpucycles_works(ticks)) return cpucycles_SKIP;
return cpucycles_CYCLECOUNTER;
}
@@ -1,29 +0,0 @@
// version 20230105
// public domain
// djb
// adapted from supercop/cpucycles/riscv.c
// which has code from djb and Romain Dolbeau
#include "cpucycles_internal.h"
#ifndef __riscv_xlen
#error this code is only for riscv platforms
#endif
#if __riscv_xlen != 64
#error this code is only for riscv64 platforms
#endif
long long ticks(void)
{
long long result;
asm volatile("rdcycle %0" : "=r" (result));
return result;
}
long long ticks_setup(void)
{
if (!cpucycles_works(ticks)) return cpucycles_SKIP;
return cpucycles_CYCLECOUNTER;
}
@@ -1,20 +0,0 @@
// version 20230106
// public domain
// djb
// adapted from sparc64-rdtick.c
#include "cpucycles_internal.h"
long long ticks(void)
{
long long result;
asm volatile("stckf 0(%0)" :: "a"(&result) : "memory","cc");
return result;
}
long long ticks_setup(void)
{
if (!cpucycles_works(ticks)) return cpucycles_SKIP;
return 4096000000; // manual says 2^12 per microsecond
}
@@ -1,24 +0,0 @@
// version 20230105
// public domain
// djb
// adapted from supercop/cpucycles/sparccpuinfo.c
#include "cpucycles_internal.h"
#if defined(__sparcv8) || defined(__sparcv8plus)
#error this code is only for sparc64 platforms
#endif
long long ticks(void)
{
long long result;
asm volatile("rd %%tick,%0" : "=r" (result));
return result;
}
long long ticks_setup(void)
{
if (!cpucycles_works(ticks)) return cpucycles_SKIP;
return cpucycles_CYCLECOUNTER;
}
-420
View File
@@ -1,420 +0,0 @@
// version 20230115
// public domain
// djb
// includes some pieces adapted from supercop
// 20230115 djb: cpucycles_version()
// 20230106 djb: support "cpu MHz static" (ibm z15)
#include <time.h>
#include <sys/time.h>
#include <unistd.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <inttypes.h>
#include <signal.h>
#include <setjmp.h>
#include "cpucycles.h"
#include "cpucycles_internal.h"
static int tracesetup = 0;
void cpucycles_tracesetup(void)
{
tracesetup = 1;
}
static jmp_buf crash_jmp;
static void crash(int s)
{
siglongjmp(crash_jmp,0);
}
int cpucycles_works(long long (*ticks)(void))
{
volatile int result = 0;
struct sigaction old_sigill;
struct sigaction old_sigfpe;
struct sigaction old_sigbus;
struct sigaction old_sigsegv;
struct sigaction crash_action;
memset(&crash_action,0,sizeof crash_action);
crash_action.sa_handler = crash;
sigaction(SIGILL,0,&old_sigill);
sigaction(SIGFPE,0,&old_sigfpe);
sigaction(SIGBUS,0,&old_sigbus);
sigaction(SIGSEGV,0,&old_sigsegv);
if (!sigsetjmp(crash_jmp,1)) {
sigaction(SIGILL,&crash_action,0);
sigaction(SIGFPE,&crash_action,0);
sigaction(SIGBUS,&crash_action,0);
sigaction(SIGSEGV,&crash_action,0);
ticks();
result = 1;
}
sigaction(SIGILL,&old_sigill,0);
sigaction(SIGFPE,&old_sigfpe,0);
sigaction(SIGBUS,&old_sigbus,0);
sigaction(SIGSEGV,&old_sigsegv,0);
return result;
}
static double osfreq(void)
{
FILE *f;
char *x;
double result;
int s;
f = fopen("/etc/cpucyclespersecond", "r");
if (f) {
s = fscanf(f,"%lf",&result);
fclose(f);
if (s > 0) return result;
}
f = fopen("/sys/devices/system/cpu/cpu0/cpufreq/scaling_setspeed", "r");
if (f) {
s = fscanf(f,"%lf",&result);
fclose(f);
if (s > 0) return 1000.0 * result;
}
f = fopen("/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq", "r");
if (f) {
s = fscanf(f,"%lf",&result);
fclose(f);
if (s > 0) return 1000.0 * result;
}
f = fopen("/sys/devices/system/cpu/cpu0/clock_tick", "r");
if (f) {
s = fscanf(f,"%lf",&result);
fclose(f);
if (s > 0) return result;
}
f = fopen("/proc/cpuinfo","r");
if (f) {
for (;;) {
s = fscanf(f,"cpu MHz : %lf",&result);
if (s > 0) break;
if (s == 0) s = fscanf(f,"%*[^\n]\n");
if (s < 0) { result = 0; break; }
}
fclose(f);
if (result) return 1000000.0 * result;
}
f = fopen("/proc/cpuinfo","r");
if (f) {
for (;;) {
s = fscanf(f,"clock : %lf",&result);
if (s > 0) break;
if (s == 0) s = fscanf(f,"%*[^\n]\n");
if (s < 0) { result = 0; break; }
}
fclose(f);
if (result) return 1000000.0 * result;
}
f = fopen("/proc/cpuinfo","r");
if (f) {
for (;;) {
s = fscanf(f,"cpu MHz static : %lf",&result);
if (s > 0) break;
if (s == 0) s = fscanf(f,"%*[^\n]\n");
if (s < 0) { result = 0; break; }
}
fclose(f);
if (result) return 1000000.0 * result;
}
f = popen("sysctl hw.cpufrequency 2>/dev/null","r");
if (f) {
s = fscanf(f,"hw.cpufrequency: %lf",&result);
pclose(f);
if (s > 0) if (result > 0) return result;
}
f = popen("/usr/sbin/lsattr -E -l proc0 -a frequency 2>/dev/null","r");
if (f) {
s = fscanf(f,"frequency %lf",&result);
pclose(f);
if (s > 0) return result;
}
f = popen("/usr/sbin/psrinfo -v 2>/dev/null","r");
if (f) {
for (;;) {
s = fscanf(f," The %*s processor operates at %lf MHz",&result);
if (s > 0) break;
if (s == 0) s = fscanf(f,"%*[^\n]\n");
if (s < 0) { result = 0; break; }
}
pclose(f);
if (result) return 1000000.0 * result;
}
x = getenv("cpucyclespersecond");
if (x) {
s = sscanf(x,"%lf",&result);
if (s > 0) return result;
}
return 2399987654.0;
}
static long long persecond = 0;
static const char *implementation = "none";
long long (*cpucycles)(void) = cpucycles_init;
const char *cpucycles_implementation(void)
{
cpucycles();
return implementation;
}
long long cpucycles_persecond(void)
{
cpucycles();
return persecond;
}
const char *cpucycles_version(void)
{
return "20230115";
}
// ----- cycle counter scaled from ticks
static double cpucycles_scaled_scaling = 0;
static long long cpucycles_scaled_offset = 0;
static long long (*cpucycles_scaled_from)(void) = 0;
static long long cpucycles_scaled(void)
{
return (cpucycles_scaled_from()-cpucycles_scaled_offset)*cpucycles_scaled_scaling;
}
// ----- cycle counter extended from 32-bit ticks
static long long (*cpucycles_extend32_from)(void) = 0;
static uint32_t cpucycles_extend32_prev_ticks;
static long long cpucycles_extend32_prev_us;
static long long cpucycles_extend32_prev_cycles;
static void cpucycles_extend32_setup(void)
{
long long (*ticks)(void) = cpucycles_extend32_from;
cpucycles_extend32_prev_ticks = ticks();
cpucycles_extend32_prev_us = cpucycles_microseconds();
cpucycles_extend32_prev_cycles = 0;
}
static long long cpucycles_extend32(void)
{
long long (*ticks)(void) = cpucycles_extend32_from;
uint32_t new_ticks = ticks();
unsigned long long delta_ticks = new_ticks-cpucycles_extend32_prev_ticks;
long long new_us = cpucycles_microseconds();
long long delta_us = new_us-cpucycles_extend32_prev_us;
// assume that number of cycles cannot increase by 2^32 in 2ms
if (delta_us < 1000)
return cpucycles_extend32_prev_cycles+delta_ticks;
cpucycles_extend32_prev_ticks = new_ticks;
cpucycles_extend32_prev_us = new_us;
if (delta_us >= 2000) {
long long target = (delta_us*0.000001)*persecond;
while (delta_ticks+2147483648ULL < target)
delta_ticks += 4294967296ULL;
}
return cpucycles_extend32_prev_cycles += delta_ticks;
}
// ----- estimating cycles per tick
long long cpucycles_microseconds(void)
{
struct timeval t;
long long result;
gettimeofday(&t,(struct timezone *) 0);
result = t.tv_sec;
result *= 1000000;
result += t.tv_usec;
return result;
}
static double estimate_cyclespertick(long long (*ticks)(void))
{
long long t0,t1,us0,us1;
t0 = ticks();
us0 = cpucycles_microseconds();
do {
t1 = ticks();
us1 = cpucycles_microseconds();
} while (us1-us0 < 10000 || t1-t0 < 1000);
if (t1 <= t0) return 0;
t1 -= t0;
us1 -= us0;
return (persecond * 0.000001 * (double) us1) / (double) t1;
}
// ----- selecting an option
#include "options.inc"
#define CALLS 1000
#define ESTIMATES 3
long long cpucycles_init(void)
{
long long precision[NUMOPTIONS];
double scaling[NUMOPTIONS];
int only32[NUMOPTIONS];
long long bestprecision;
long long bestopt;
long long opt;
persecond = osfreq();
for (opt = 0;opt < NUMOPTIONS;++opt) {
long long freq = options[opt].ticks_setup();
long long tries;
precision[opt] = 0;
scaling[opt] = 0;
only32[opt] = 0;
if (freq > 0) {
scaling[opt] = persecond*1.0/freq;
} else if (freq == cpucycles_CYCLECOUNTER) {
scaling[opt] = 1.0;
} else if (freq == cpucycles_EXTEND32) {
only32[opt] = 1;
scaling[opt] = 1.0;
} else if (freq == cpucycles_MAYBECYCLECOUNTER) {
scaling[opt] = 1.0;
} else if (freq == cpucycles_FINDMULTIPLIER) {
int ok = 0;
double denom;
long long loop;
for (denom = 1;denom <= 1024;denom += denom) {
double est[ESTIMATES];
for (loop = 0;loop < ESTIMATES;++loop)
est[loop] = denom*estimate_cyclespertick(options[opt].ticks);
scaling[opt] = (double) (long long) est[0];
if (scaling[opt] < est[0]-0.5) scaling[opt] += 1;
if (scaling[opt] > est[0]+0.5) scaling[opt] -= 1;
ok = 1;
for (loop = 0;loop < ESTIMATES;++loop) {
if (est[loop]-scaling[opt] > 0.1) ok = 0;
if (scaling[opt]-est[loop] > 0.1) ok = 0;
}
if (ok) {
scaling[opt] /= denom;
break;
}
scaling[opt] = 0;
}
if (!ok) continue;
} else {
continue;
}
for (tries = 0;tries < 10;++tries) {
long long t[CALLS+1];
long long ok = 1;
long long i;
if (scaling[opt] == 1.0) {
for (i = 0;i <= CALLS;++i)
t[i] = options[opt].ticks();
} else {
double scalingopt = scaling[opt];
long long offset = options[opt].ticks();
for (i = 0;i <= CALLS;++i)
t[i] = (options[opt].ticks()-offset)*scalingopt;
}
for (i = 0;i < CALLS;++i)
if (t[i] > t[i+1])
ok = 0;
if (t[0] == t[CALLS])
ok = 0;
if (ok) {
long long smallestdiff = 0;
for (i = 0;i < CALLS;++i) {
long long diff = t[i+1]-t[i];
if (diff <= 0) continue;
if (smallestdiff == 0 || diff < smallestdiff)
smallestdiff = diff;
}
precision[opt] = smallestdiff;
// tilt selection towards more robust counters
if (freq != cpucycles_CYCLECOUNTER && freq != cpucycles_EXTEND32)
precision[opt] += 100;
if (freq > 0)
precision[opt] += 100;
break;
}
// otherwise keep trying
// since !ok can be caused by overflow
// or by core swap
}
}
if (tracesetup) {
for (opt = 0;opt < NUMOPTIONS;++opt)
printf("cpucycles tracesetup %lld %s precision %lld scaling %lf only32 %d\n"
,opt,options[opt].implementation,precision[opt],scaling[opt],only32[opt]);
}
bestopt = DEFAULTOPTION;
bestprecision = 0;
for (opt = 0;opt < NUMOPTIONS;++opt)
if (precision[opt] > 0)
if (!bestprecision || precision[opt] < bestprecision) {
bestopt = opt;
bestprecision = precision[opt];
}
implementation = options[bestopt].implementation;
if (scaling[bestopt] == 1.0) {
if (only32[bestopt]) {
cpucycles_extend32_from = options[bestopt].ticks;
cpucycles_extend32_setup();
cpucycles = cpucycles_extend32;
} else {
cpucycles = options[bestopt].ticks;
}
} else {
cpucycles_scaled_scaling = scaling[bestopt];
cpucycles_scaled_from = options[bestopt].ticks;
cpucycles_scaled_offset = cpucycles_scaled_from();
cpucycles = cpucycles_scaled;
}
return cpucycles();
}
@@ -1,22 +0,0 @@
// version 20230105
// public domain
// djb
#ifdef _MSC_VER
#include <intrin.h>
#else
#include <x86intrin.h>
#endif
#include "cpucycles_internal.h"
long long ticks(void)
{
return __rdtsc();
}
long long ticks_setup(void)
{
if (!cpucycles_works(ticks)) return cpucycles_SKIP;
return cpucycles_MAYBECYCLECOUNTER;
}
@@ -1,22 +0,0 @@
// version 20230105
// public domain
// djb
#include "cpucycles_internal.h"
#ifndef __i386__
#error this code is only for 32-bit x86 platforms
#endif
long long ticks(void)
{
long long result;
asm volatile(".byte 15;.byte 49" : "=A" (result));
return result;
}
long long ticks_setup(void)
{
if (!cpucycles_works(ticks)) return cpucycles_SKIP;
return cpucycles_MAYBECYCLECOUNTER;
}
-47
View File
@@ -1,47 +0,0 @@
### NAME
cpucycles - count CPU cycles
### SYNOPSIS
#include <cpucycles.h>
long long count = cpucycles();
long long persecond = cpucycles_persecond();
const char *implementation = cpucycles_implementation();
const char *version = cpucycles_version();
Link with `-lcpucycles`. Old systems may also need `-lrt`.
### DESCRIPTION
`cpucycles()` returns an estimate for the number of CPU cycles that have
occurred since an unspecified time in the past (perhaps system boot,
perhaps program startup).
Accessing true cycle counters can be difficult on some CPUs and
operating systems. `cpucycles()` does its best to produce accurate
results, but selects a low-precision counter if the only other option is
failure.
`cpucycles_persecond()` returns an estimate for the number of CPU cycles
per second. This estimate comes from `/etc/cpucyclespersecond` if that
file exists, otherwise from various OS mechanisms, otherwise from the
`cpucyclespersecond` environment variable if that is set, otherwise
2399987654.
`cpucycles_implementation()` returns the name of the counter in use:
e.g., `"amd64-pmc"`.
`cpucycles_version()` returns the `libcpucycles` version number as a
string: e.g., `"20230115"`. Results of `cpucycles_implementation()`
should be interpreted relative to `cpucycles_version()`.
`cpucycles` is actually a function pointer. The first call to
`cpucycles()` or `cpucycles_persecond()` or `cpucycles_implementation()`
selects one of the available counters and updates the `cpucycles`
pointer accordingly. Subsequent calls to `cpucycles()` are thread-safe.
### SEE ALSO
**gettimeofday**(2), **clock_gettime**(2)
-447
View File
@@ -1,447 +0,0 @@
Currently libcpucycles supports the following cycle counters. Some
cycle counters are actually other forms of counters that libcpucycles
scales to imitate a cycle counter. There is
[separate documentation](selection.html)
for how libcpucycles makes a choice of cycle counter. See also
[security considerations](security.html) regarding enabling or disabling
counters and regarding Turbo Boost.
`amd64-pmc`: Requires a 64-bit Intel/AMD platform. Requires the Linux
perf_event interface. Accesses a cycle counter through RDPMC. Requires
`/proc/sys/kernel/perf_event_paranoid` to be at most 2 for user-level
RDPMC access. This counter runs at the clock frequency of the CPU core.
`amd64-tsc`, `amd64-tscasm`: Requires a 64-bit Intel/AMD platform.
Requires RDTSC to be enabled, which it is by default. Uses RDTSC to
access the CPU's time-stamp counter. On current CPUs, this is an
off-core clock rather than a cycle counter, but it is typically a very
fast off-core clock, making it adequate for seeing cycle counts if
overclocking and underclocking are disabled. The difference between
`tsc` and `tscasm` is that `tsc` uses the compiler's `__rdtsc()` while
`tscasm` uses inline assembly.
`arm32-cortex`: Requires a 32-bit ARMv7-A platform. Uses
`mrc p15, 0, %0, c9, c13, 0` to read the cycle counter. Requires user
access to the cycle counter, which is not enabled by default but can be
enabled under Linux via
[a kernel module](https://github.com/thoughtpolice/enable_arm_pmu).
This counter is natively 32 bits, but libcpucycles watches how the
counter and `gettimeofday` increase to compute a 64-bit extension of the
counter.
`arm64-pmc`: Requires a 64-bit ARMv8-A platform. Uses
`mrs %0, PMCCNTR_EL0` to read the cycle counter. Requires user access
to the cycle counter, which is not enabled by default but can be enabled
under Linux via
[a kernel module](https://github.com/rdolbeau/enable_arm_pmu).
`arm64-vct`: Requires a 64-bit ARMv8-A platform. Uses
`mrs %0, CNTVCT_EL0` to read a "virtual count" timer. This is an
off-core clock, typically running at 24MHz. Results are scaled by
libcpucycles.
`mips64-cc`: Requires a 64-bit MIPS platform. (Maybe the same code would
also work as `mips32-cc`, but this has not been tested yet.) Uses RDHWR
to read the hardware cycle counter (hardware register 2 times a constant
scale factor in hardware register 3). This counter is natively 32 bits,
but libcpucycles watches how the counter and `gettimeofday` increase to
compute a 64-bit extension of the counter.
`ppc32-mftb`: Requires a 32-bit PowerPC platform. Uses `mftb` and
`mftbu` to read the "time base". This is an off-core clock, typically
running at 24MHz.
`ppc64-mftb`: Requires a 64-bit PowerPC platform. Uses `mftb` and
`mftbu` to read the "time base". This is an off-core clock, typically
running at 24MHz.
`riscv32-rdcycle`: Requires a 32-bit RISC-V platform. Uses `rdcycle`
and `rdcycleh` to read a cycle counter.
`riscv64-rdcycle`: Requires a 64-bit RISC-V platform. Uses `rdcycle`
to read a cycle counter.
`s390x-stckf`: Requires a 64-bit z/Architecture platform. Uses `stckf`
to read the TOD clock, which is documented to run at 4096MHz. On the
z15, this looks like a doubling of an off-core 2048MHz clock. Results
are scaled by libcpucycles.
`sparc64-rdtick`: Requires a 64-bit SPARC platform. Uses `rd %tick`
to read a cycle counter.
`x86-tsc`, `x86-tscasm`: Same as `amd64-tsc` and `amd64-tscasm`, but
for 32-bit Intel/AMD platforms instead of 64-bit Intel/AMD platforms.
`default-gettimeofday`: Reasonably portable. Resolution is limited to 1
microsecond. Results are scaled by libcpucycles.
`default-mach`: Requires an OS with `mach_absolute_time()`. Typically
runs at 24MHz. Results are scaled by libcpucycles.
`default-monotonic`: Requires `CLOCK_MONOTONIC`. Reasonably portable,
although might fail on older systems where `default-gettimeofday` works.
Resolution is limited to 1 nanosecond. Can be almost as good as a cycle
counter, or orders of magnitude worse, depending on the OS and CPU.
Results are scaled by libcpucycles.
`default-perfevent`: Requires the Linux `perf_event` interface, and a
CPU where `perf_event` supports `PERF_COUNT_HW_CPU_CYCLES`. Similar
variations in quality to `default-monotonic`, without the 1-nanosecond
limitation.
`default-zero`: The horrifying last resort if nothing else works.
## Examples
These are examples of `cpucycles-info` output on various machines. The
machines named `gcc*` are from the
[GCC Compile Farm](https://gcc.gnu.org/wiki/CompileFarm).
A `median` line saying, e.g., `47 +47+28+0+2-5+0+2-5...` means that the
differences between adjacent cycle counts were 47+47, 47+28, 47+0, 47+2,
475, 47+0, 47+2, 475, etc., with median difference 47. The first few
differences are typically larger because of cache effects.
`pi3aplus`,
Broadcom BCM2837B0:
```
cpucycles version 20230105
cpucycles tracesetup 0 arm64-pmc precision 9 scaling 1.000000 only32 0
cpucycles tracesetup 1 arm64-vct precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 2 default-perfevent precision 189 scaling 1.000000 only32 0
cpucycles tracesetup 3 default-mach precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 4 default-monotonic precision 272 scaling 1.400000 only32 0
cpucycles tracesetup 5 default-gettimeofday precision 1600 scaling 1400.000000 only32 0
cpucycles tracesetup 6 default-zero precision 0 scaling 0.000000 only32 0
cpucycles persecond 1400000000
cpucycles implementation arm64-pmc
cpucycles median 10 +10+8+3+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0
cpucycles observed persecond 1032000000...4224666667 with 1024 loops 4 microseconds
cpucycles observed persecond 1286000000...1756000000 with 2048 loops 7 microseconds
cpucycles observed persecond 1368266666...1598000000 with 4096 loops 14 microseconds
cpucycles observed persecond 1366700000...1473428572 with 8192 loops 29 microseconds
cpucycles observed persecond 1366100000...1417534483 with 16384 loops 59 microseconds
cpucycles observed persecond 1332739837...1357132232 with 32768 loops 122 microseconds
cpucycles observed persecond 1354483471...1366945834 with 65536 loops 241 microseconds
cpucycles observed persecond 1385684989...1392195330 with 131072 loops 472 microseconds
cpucycles observed persecond 1347223021...1350328528 with 262144 loops 972 microseconds
cpucycles observed persecond 1375460125...1377069853 with 524288 loops 1905 microseconds
cpucycles observed persecond 1376527697...1377335961 with 1048576 loops 3808 microseconds
```
`bblack`,
TI Sitara XAM3359AZCZ100:
```
cpucycles version 20230105
cpucycles tracesetup 0 arm32-cortex precision 8 scaling 1.000000 only32 1
cpucycles tracesetup 1 default-perfevent precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 2 default-mach precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 3 default-monotonic precision 1283 scaling 1.000000 only32 0
cpucycles tracesetup 4 default-gettimeofday precision 1200 scaling 1000.000000 only32 0
cpucycles tracesetup 5 default-zero precision 0 scaling 0.000000 only32 0
cpucycles persecond 1000000000
cpucycles implementation arm32-cortex
cpucycles median 1260 +1506+62+31+7+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+13+7+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0
cpucycles observed persecond 622181818...2101888889 with 1024 loops 10 microseconds
cpucycles observed persecond 806133333...1492615385 with 2048 loops 14 microseconds
cpucycles observed persecond 879880000...1232565218 with 4096 loops 24 microseconds
cpucycles observed persecond 939577777...1130581396 with 8192 loops 44 microseconds
cpucycles observed persecond 956954022...1050047059 with 16384 loops 86 microseconds
cpucycles observed persecond 982878542...1020685715 with 32768 loops 246 microseconds
cpucycles observed persecond 988105105...1012217523 with 65536 loops 332 microseconds
cpucycles observed persecond 993752077...1007159723 with 131072 loops 721 microseconds
cpucycles observed persecond 995364296...1004009448 with 262144 loops 1377 microseconds
cpucycles observed persecond 998216306...1001821536 with 524288 loops 2685 microseconds
cpucycles observed persecond 998991848...1000914196 with 1048576 loops 5397 microseconds
```
`hiphop`,
Intel Xeon E3-1220 v3:
```
cpucycles version 20230105
cpucycles tracesetup 0 amd64-pmc precision 40 scaling 1.000000 only32 0
cpucycles tracesetup 1 amd64-tsc precision 124 scaling 1.000000 only32 0
cpucycles tracesetup 2 amd64-tscasm precision 124 scaling 1.000000 only32 0
cpucycles tracesetup 3 default-perfevent precision 160 scaling 1.000000 only32 0
cpucycles tracesetup 4 default-mach precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 5 default-monotonic precision 272 scaling 3.100000 only32 0
cpucycles tracesetup 6 default-gettimeofday precision 3300 scaling 3100.000000 only32 0
cpucycles tracesetup 7 default-zero precision 0 scaling 0.000000 only32 0
cpucycles persecond 3100000000
cpucycles implementation amd64-pmc
cpucycles median 44 +38+23+23+23-4+0-4+0-4+0-4+0+10-4-2+1-4+1-4+1+17+1-4+1-4+1-4+0-4+0-4+0-4+0-4+0-4+0-4+0-4+0-4+0-4+0-4+0-4+0-4+0-4+0-4+0-4+0-4+0-4+0-4
cpucycles observed persecond 2066500000...4235000000 with 8192 loops 3 microseconds
cpucycles observed persecond 2760833333...4200250000 with 16384 loops 5 microseconds
cpucycles observed persecond 2743416666...3313100000 with 32768 loops 11 microseconds
cpucycles observed persecond 2986227272...3295000000 with 65536 loops 21 microseconds
cpucycles observed persecond 3052069767...3206073171 with 131072 loops 42 microseconds
cpucycles observed persecond 3050395348...3125523810 with 262144 loops 85 microseconds
cpucycles observed persecond 3085123529...3123059524 with 524288 loops 169 microseconds
cpucycles observed persecond 3084561764...3103434912 with 1048576 loops 339 microseconds
```
`nucnuc`,
Intel Pentium N3700:
```
cpucycles version 20230105
cpucycles tracesetup 0 amd64-pmc precision 26 scaling 1.000000 only32 0
cpucycles tracesetup 1 amd64-tsc precision 120 scaling 1.000000 only32 0
cpucycles tracesetup 2 amd64-tscasm precision 120 scaling 1.000000 only32 0
cpucycles tracesetup 3 default-perfevent precision 427 scaling 1.000000 only32 0
cpucycles tracesetup 4 default-mach precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 5 default-monotonic precision 320 scaling 1.600000 only32 0
cpucycles tracesetup 6 default-gettimeofday precision 1800 scaling 1600.000000 only32 0
cpucycles tracesetup 7 default-zero precision 0 scaling 0.000000 only32 0
cpucycles persecond 1600000000
cpucycles implementation amd64-pmc
cpucycles median 66 +12+12+14+14-1-1+0-1+0-1+0-1+0+1-1+0-1+0-1+0-2+0-1+0-1+0-1+0-2+0-1+0-1+0-1+0-2+0-1+0-1+1-1+0-2-1-1+0-1+0-1+0-2+0-1+2+0-1+0-1+0+0-1
cpucycles observed persecond 1060500000...2325000000 with 2048 loops 3 microseconds
cpucycles observed persecond 1387166666...2208250000 with 4096 loops 5 microseconds
cpucycles observed persecond 1376083333...1705500000 with 8192 loops 11 microseconds
cpucycles observed persecond 1495727272...1671800000 with 16384 loops 21 microseconds
cpucycles observed persecond 1563428571...1655100000 with 32768 loops 41 microseconds
cpucycles observed persecond 1580807228...1626234568 with 65536 loops 82 microseconds
cpucycles observed persecond 1589539393...1612619632 with 131072 loops 164 microseconds
cpucycles observed persecond 1598841463...1610230062 with 262144 loops 327 microseconds
cpucycles observed persecond 1564336810...1569988042 with 524288 loops 670 microseconds
cpucycles observed persecond 1599759725...1602608098 with 1048576 loops 1310 microseconds
```
`saber214`,
AMD FX-8350:
```
cpucycles version 20230105
cpucycles tracesetup 0 amd64-pmc precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 1 amd64-tsc precision 167 scaling 1.000000 only32 0
cpucycles tracesetup 2 amd64-tscasm precision 168 scaling 1.000000 only32 0
cpucycles tracesetup 3 default-perfevent precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 4 default-mach precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 5 default-monotonic precision 376 scaling 4.013452 only32 0
cpucycles tracesetup 6 default-gettimeofday precision 4213 scaling 4013.452000 only32 0
cpucycles tracesetup 7 default-zero precision 0 scaling 0.000000 only32 0
cpucycles persecond 4013452000
cpucycles implementation amd64-tsc
cpucycles median 77 +87-2+21+7+4+1+0+2-2-7-4+0+1+4-2+3+1-2-2+5-6+2+2+2+2+1-1-1+0-4+0-1-1-1-2+3-1-1+2-2+0+0+2+0+0+2-2-2+1-1-2+2-5+2+0+2+0+1+0+3-2-1-1
cpucycles observed persecond 2767500000...5759000000 with 4096 loops 3 microseconds
cpucycles observed persecond 3426000000...4893800000 with 8192 loops 6 microseconds
cpucycles observed persecond 3724076923...4446363637 with 16384 loops 12 microseconds
cpucycles observed persecond 3977833333...4363318182 with 32768 loops 23 microseconds
cpucycles observed persecond 3984854166...4168739131 with 65536 loops 47 microseconds
cpucycles observed persecond 3981709923...4048193799 with 131072 loops 130 microseconds
cpucycles observed persecond 3982716417...4026914573 with 262144 loops 200 microseconds
cpucycles observed persecond 4001637602...4025136987 with 524288 loops 366 microseconds
cpucycles observed persecond 4007411111...4018600248 with 1048576 loops 809 microseconds
```
`gcc14`,
Intel Xeon E5-2620 v3,
Debian testing (bookworm),
Linux kernel 6.0.0-6-amd64:
```
cpucycles version 20230105
cpucycles tracesetup 0 amd64-pmc precision 41 scaling 1.000000 only32 0
cpucycles tracesetup 1 amd64-tsc precision 148 scaling 1.000000 only32 0
cpucycles tracesetup 2 amd64-tscasm precision 148 scaling 1.000000 only32 0
cpucycles tracesetup 3 default-perfevent precision 159 scaling 1.000000 only32 0
cpucycles tracesetup 4 default-mach precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 5 default-monotonic precision 289 scaling 3.200000 only32 0
cpucycles tracesetup 6 default-gettimeofday precision 3400 scaling 3200.000000 only32 0
cpucycles tracesetup 7 default-zero precision 0 scaling 0.000000 only32 0
cpucycles persecond 3200000000
cpucycles implementation amd64-pmc
cpucycles median 47 +47+28+0+2-5+0+2-5+16+2-5+0+2-5+0+1-4+0+1-4+0+1-4+0+1-4+0+1-4+0+1-4+0+1-4+0+1-4+0+1-4+0+1-4+0+1-4+0+1-4+0+1-4+0+1-4+0+1-4+0+1-4+0
cpucycles observed persecond 1653800000...2819333334 with 8192 loops 4 microseconds
cpucycles observed persecond 1832111111...2389285715 with 16384 loops 8 microseconds
cpucycles observed persecond 1936058823...2207200000 with 32768 loops 16 microseconds
cpucycles observed persecond 2052843750...2196200000 with 65536 loops 31 microseconds
cpucycles observed persecond 2050750000...2120048388 with 131072 loops 63 microseconds
cpucycles observed persecond 2081896825...2117048388 with 262144 loops 125 microseconds
cpucycles observed persecond 2089478087...2107044177 with 524288 loops 250 microseconds
cpucycles observed persecond 2093343313...2102124249 with 1048576 loops 500 microseconds
```
`gcc23`,
Cavium Octeon II V0.1,
Debian 8.11,
Linux kernel 4.1.4:
```
cpucycles version 20230105
cpucycles tracesetup 0 mips64-cc precision 24 scaling 1.000000 only32 1
cpucycles tracesetup 1 default-perfevent precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 2 default-mach precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 3 default-monotonic precision 46702 scaling 2.399988 only32 0
cpucycles tracesetup 4 default-gettimeofday precision 45799 scaling 2399.987654 only32 0
cpucycles tracesetup 5 default-zero precision 0 scaling 0.000000 only32 0
cpucycles persecond 2399987654
cpucycles implementation mips64-cc
cpucycles median 2177 +828+17+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0
cpucycles observed persecond 641900000...1845125000 with 1024 loops 9 microseconds
cpucycles observed persecond 745357142...1352083334 with 2048 loops 13 microseconds
cpucycles observed persecond 809826086...1162333334 with 4096 loops 22 microseconds
cpucycles observed persecond 897717948...1104405406 with 8192 loops 38 microseconds
cpucycles observed persecond 957467532...1059986667 with 16384 loops 76 microseconds
cpucycles observed persecond 973102189...1029777778 with 32768 loops 136 microseconds
cpucycles observed persecond 986518656...1015830828 with 65536 loops 267 microseconds
cpucycles observed persecond 993452830...1008166667 with 131072 loops 529 microseconds
cpucycles observed persecond 996036966...1003403609 with 262144 loops 1054 microseconds
cpucycles observed persecond 984706378...1001682630 with 524288 loops 2131 microseconds
cpucycles observed persecond 992585292...1001178580 with 1048576 loops 4296 microseconds
```
`gcc45`,
AMD Athlon II X4 640,
Debian 8.11,
Linux kernel 3.16.0-11-686-pae:
```
cpucycles version 20230105
cpucycles tracesetup 0 x86-tsc precision 199 scaling 1.000000 only32 0
cpucycles tracesetup 1 x86-tscasm precision 199 scaling 1.000000 only32 0
cpucycles tracesetup 2 default-perfevent precision 170 scaling 1.000000 only32 0
cpucycles tracesetup 3 default-mach precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 4 default-monotonic precision 941 scaling 3.000000 only32 0
cpucycles tracesetup 5 default-gettimeofday precision 3200 scaling 3000.000000 only32 0
cpucycles tracesetup 6 default-zero precision 0 scaling 0.000000 only32 0
cpucycles persecond 3000000000
cpucycles implementation default-perfevent
cpucycles median 72 +12+0+0+0+0+0+0+0+5+0+0+0+0+0+0+0+2+0+0+0+0+0+0+0+1+0+0+0+0+0+0+0+2+0+0+0+0+0+0+0+1+0+0+0+0+0+0+0+2+0+0+0+0+0+0+0+1+0+0+0+0+0+0
cpucycles observed persecond 541500000...1812000000 with 1024 loops 3 microseconds
cpucycles observed persecond 712333333...1212250000 with 2048 loops 5 microseconds
cpucycles observed persecond 1193285714...1733600000 with 4096 loops 6 microseconds
cpucycles observed persecond 1689176470...1804562500 with 8192 loops 33 microseconds
cpucycles observed persecond 1713074626...1770600000 with 16384 loops 66 microseconds
cpucycles observed persecond 1765107692...1795140625 with 32768 loops 129 microseconds
cpucycles observed persecond 1785369649...1800603922 with 65536 loops 256 microseconds
cpucycles observed persecond 1781377862...1796288462 with 131072 loops 261 microseconds
cpucycles observed persecond 1772647398...1778247827 with 262144 loops 691 microseconds
cpucycles observed persecond 1789670493...1794149598 with 524288 loops 870 microseconds
cpucycles observed persecond 1860276211...1861561332 with 1048576 loops 3156 microseconds
```
`gcc92`,
SiFive Freedom U740,
Ubuntu 22.04,
Linux kernel 5.15.0-1014-generic:
```
cpucycles version 20230105
cpucycles tracesetup 0 riscv64-rdcycle precision 8 scaling 1.000000 only32 0
cpucycles tracesetup 1 default-perfevent precision 3024 scaling 1.000000 only32 0
cpucycles tracesetup 2 default-mach precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 3 default-monotonic precision 2599 scaling 2.399988 only32 0
cpucycles tracesetup 4 default-gettimeofday precision 2599 scaling 2399.987654 only32 0
cpucycles tracesetup 5 default-zero precision 0 scaling 0.000000 only32 0
cpucycles persecond 2399987654
cpucycles implementation riscv64-rdcycle
cpucycles median 8 +33+27+1+1+1+1+0+0+0+22+0+0+0+0+0+0+0+628+0+0+0+7+0+0+0+145+0+0+0+0+0+0+0+22+0+0+0+0+0+0+0+158+0+0+0+0+0+0+0+22+0+0+0+0+0+0+0+22+0+0+0+0+0
cpucycles observed persecond 530250000...1978000000 with 1024 loops 3 microseconds
cpucycles observed persecond 831000000...1915666667 with 2048 loops 4 microseconds
cpucycles observed persecond 1055750000...1689500000 with 4096 loops 7 microseconds
cpucycles observed persecond 1045562500...1305428572 with 8192 loops 15 microseconds
cpucycles observed persecond 1102700000...1236357143 with 16384 loops 29 microseconds
cpucycles observed persecond 1176053571...1247444445 with 32768 loops 55 microseconds
cpucycles observed persecond 1173321428...1209127273 with 65536 loops 111 microseconds
cpucycles observed persecond 1187805429...1205210046 with 131072 loops 220 microseconds
cpucycles observed persecond 1192415909...1201157535 with 262144 loops 439 microseconds
cpucycles observed persecond 1194694760...1199247717 with 524288 loops 877 microseconds
cpucycles observed persecond 1194656004...1197023034 with 1048576 loops 1781 microseconds
```
`gcc103`,
Apple M1 (Icestorm-M1 + Firestorm-M1),
Debian unstable (bookworm),
Linux kernel 6.0.0-rc5-asahi-00001-gc62bd3fe430f:
```
cpucycles version 20230105
cpucycles tracesetup 0 arm64-pmc precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 1 arm64-vct precision 186 scaling 86.000000 only32 0
cpucycles tracesetup 2 default-perfevent precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 3 default-mach precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 4 default-monotonic precision 285 scaling 2.064000 only32 0
cpucycles tracesetup 5 default-gettimeofday precision 2264 scaling 2064.000000 only32 0
cpucycles tracesetup 6 default-zero precision 0 scaling 0.000000 only32 0
cpucycles persecond 2064000000
cpucycles implementation arm64-vct
cpucycles median 0 +0+86+0+0+0+0+0+0+0+0+0+0+0+0+86+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+86+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+86+0+0+0+0+0+0+0+0
cpucycles observed persecond 1784500000...3655000000 with 8192 loops 3 microseconds
cpucycles observed persecond 1773750000...2393666667 with 16384 loops 7 microseconds
cpucycles observed persecond 1897733333...2222769231 with 32768 loops 14 microseconds
cpucycles observed persecond 1951310344...2114962963 with 65536 loops 28 microseconds
cpucycles observed persecond 2024071428...2107000000 with 131072 loops 55 microseconds
cpucycles observed persecond 2041531531...2082935780 with 262144 loops 110 microseconds
cpucycles observed persecond 2051158371...2071461188 with 524288 loops 220 microseconds
cpucycles observed persecond 2058539682...2068309795 with 1048576 loops 440 microseconds
```
`gcc112` (`gcc2-power8`),
IBM POWER8E,
CentOS 7.9 AltArch,
Linux kernel 3.10.0-1127.13.1.el7.ppc64le:
```
cpucycles version 20230105
cpucycles tracesetup 0 ppc64-mftb precision 251 scaling 7.207031 only32 0
cpucycles tracesetup 1 default-perfevent precision 295 scaling 1.000000 only32 0
cpucycles tracesetup 2 default-mach precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 3 default-monotonic precision 536 scaling 3.690000 only32 0
cpucycles tracesetup 4 default-gettimeofday precision 3890 scaling 3690.000000 only32 0
cpucycles tracesetup 5 default-zero precision 0 scaling 0.000000 only32 0
cpucycles persecond 3690000000
cpucycles implementation ppc64-mftb
cpucycles median 195 +2969-8+14+0-8+7-8-7+7+6-7-1+0-1+0+7+7-15+7-1-7+6+0+0-8+0+6+0-8+7+0+7-8-8-7-1+7-8+7+0-8+0+14-8-7+6+0-8+7+7-15+0-1+0-1+14+0-15+14+0-1+7+0
cpucycles observed persecond 2603750000...5510000000 with 2048 loops 3 microseconds
cpucycles observed persecond 3430500000...6052250000 with 4096 loops 5 microseconds
cpucycles observed persecond 3411333333...4457500000 with 8192 loops 11 microseconds
cpucycles observed persecond 3548695652...4060333334 with 16384 loops 22 microseconds
cpucycles observed persecond 3624977777...3876534884 with 32768 loops 44 microseconds
cpucycles observed persecond 3621855555...3745363637 with 65536 loops 89 microseconds
cpucycles observed persecond 3660157303...3722227273 with 131072 loops 177 microseconds
cpucycles observed persecond 3680471751...3711622160 with 262144 loops 353 microseconds
cpucycles observed persecond 3685321074...3700886525 with 524288 loops 706 microseconds
cpucycles observed persecond 3687745930...3695537208 with 1048576 loops 1412 microseconds
```
`gcc202`,
UltraSparc T5,
Debian unstable (bookworm),
Linux kernel 5.19.0-2-sparc64-smp:
```
cpucycles version 20230105
cpucycles tracesetup 0 sparc64-rdtick precision 65 scaling 1.000000 only32 0
cpucycles tracesetup 1 default-perfevent precision 386 scaling 1.000000 only32 0
cpucycles tracesetup 2 default-mach precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 3 default-monotonic precision 442 scaling 3.599910 only32 0
cpucycles tracesetup 4 default-gettimeofday precision 3799 scaling 3599.910000 only32 0
cpucycles tracesetup 5 default-zero precision 0 scaling 0.000000 only32 0
cpucycles persecond 3599910000
cpucycles implementation sparc64-rdtick
cpucycles median 73 +24+0+24+24+24+24+24+24+0+1+24+0+1+24+0+1+24+0+0+1+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+1+0+0+0+0+0+0+0+0+0+0+0+0+0
cpucycles observed persecond 2751500000...4258250000 with 4096 loops 5 microseconds
cpucycles observed persecond 3289200000...4206875000 with 8192 loops 9 microseconds
cpucycles observed persecond 3454789473...3900823530 with 16384 loops 18 microseconds
cpucycles observed persecond 3452026315...3659888889 with 32768 loops 37 microseconds
cpucycles observed persecond 3543770270...3650916667 with 65536 loops 73 microseconds
cpucycles observed persecond 3567299319...3620662069 with 131072 loops 146 microseconds
cpucycles observed persecond 3591373287...3618220690 with 262144 loops 291 microseconds
cpucycles observed persecond 3597353344...3610774527 with 524288 loops 582 microseconds
cpucycles observed persecond 3595899403...3603058071 with 1048576 loops 1172 microseconds
```
IBM z15:
```
cpucycles version 20230106
cpucycles tracesetup 0 s390x-stckf precision 250 scaling 1.269531 only32 0
cpucycles tracesetup 1 default-perfevent precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 2 default-mach precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 3 default-monotonic precision 272 scaling 5.200000 only32 0
cpucycles tracesetup 4 default-gettimeofday precision 5400 scaling 5200.000000 only32 0
cpucycles tracesetup 5 default-zero precision 0 scaling 0.000000 only32 0
cpucycles persecond 5200000000
cpucycles implementation s390x-stckf
cpucycles median 48 +87+8+0-2+0+0+38-2+0+1-3+1+28+0+3-3+1+0+28+0-2+3+0-2+36+0+0+0+1+0+28+0-2+0+3-2+35+1+0-2+0+3+28+0-2+0+0-2+3+25+3+0-2+0+1+35+1+0+0-2+0+28+0
cpucycles observed persecond 4948941176...5627733334 with 8192 loops 16 microseconds
cpucycles observed persecond 4104125000...5515666667 with 16384 loops 7 microseconds
cpucycles observed persecond 5047076923...5987818182 with 32768 loops 12 microseconds
cpucycles observed persecond 5044846153...5475708334 with 65536 loops 25 microseconds
cpucycles observed persecond 5141313725...5357428572 with 131072 loops 50 microseconds
cpucycles observed persecond 5150892156...5257250000 with 262144 loops 101 microseconds
cpucycles observed persecond 5183421568...5236549505 with 524288 loops 203 microseconds
cpucycles observed persecond 5190282555...5216582717 with 1048576 loops 406 microseconds
```
-30
View File
@@ -1,30 +0,0 @@
To download and unpack the latest version of libcpucycles:
wget -m https://cpucycles.cr.yp.to/libcpucycles-latest-version.txt
version=$(cat cpucycles.cr.yp.to/libcpucycles-latest-version.txt)
wget -m https://cpucycles.cr.yp.to/libcpucycles-$version.tar.gz
tar -xzf cpucycles.cr.yp.to/libcpucycles-$version.tar.gz
cd libcpucycles-$version
Then [install](install.html).
### Archives and changelog (reverse chronological)
[`libcpucycles-20230115.tar.gz`](libcpucycles-20230115.tar.gz) [browse](libcpucycles-20230115.html)
Update actual `cpucycles_version` behavior to match documentation.
[`libcpucycles-20230110.tar.gz`](libcpucycles-20230110.tar.gz) [browse](libcpucycles-20230110.html)
`doc/api.md`: Document `cpucycles_version()`.
Add `s390x-stckf` counter.
`cpucycles/default-perfevent.c`: Read into `int64_t` instead of `long long`.
Add comment explaining issues with `PERF_FORMAT_TOTAL_TIME_RUNNING`.
`configure`: Improve `uname` handling.
`doc/api.md`: Update description of default frequency.
[`libcpucycles-20230105.tar.gz`](libcpucycles-20230105.tar.gz) [browse](libcpucycles-20230105.html)
-91
View File
@@ -1,91 +0,0 @@
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<style type="text/css">
html{overflow-y:scroll}
body{font-family:sans-serif}
p,ul,ol,blockquote,pre{font-size:0.9em;line-height:1.6em}
li p{font-size:1.0em}
blockquote p{font-size:1.0em}
tt{font-size:1.2em}
code{font-size:1.2em}
h1{font-size:1.5em}
h2{font-size:1.3em}
h3{font-size:1.0em}
h1 a{text-decoration:none}
table{border-collapse:collapse}
th,td{border:1px solid black}
table a{text-decoration:none}
table tr{font-size:0.9em;line-height:1.6em}
.links a:hover{text-decoration:underline}
.links a:active{text-decoration:underline}
.links img{width:200px;padding-left:1em}
.links td{border:0px;padding-top:0.5em;padding-bottom:0.5em}
.headline{padding:0;font-weight:bold;font-size:1.5em;vertical-align:top;padding-bottom:0.5em;color:#125d0d}
.navt{display:inline-block;box-sizing:border-box;-moz-box-sizing:border-box;-webkit-box-sizing:border-box;
min-width:14%;margin:0;padding:0;padding-left:0.5em;padding-right:0.5em;vertical-align:center;
font-weight:bold;font-size:1.1em;text-align:center;border:1px solid black}
.here{border-bottom:0px;background-color:#ffffff}
.away{background-color:#125d0d;}
.away a{text-decoration:none;display:block;color:#ffffff}
.away a:hover,.away a:active{text-decoration:underline}
.main{margin:0;padding-top:0em;padding-bottom:1%;clear:both}
</style>
<title>
API</title>
</head>
<body>
<div class=headline>
libcpucycles
</div>
<div class=nav>
<div class="navt away"><a href=index.html>Intro</a>
</div><div class="navt away"><a href=download.html>Download</a>
</div><div class="navt away"><a href=install.html>Install</a>
</div><div class="navt here">API
</div><div class="navt away"><a href=counters.html>Counters</a>
</div><div class="navt away"><a href=selection.html>Selection</a>
</div><div class="navt away"><a href=security.html>Security</a>
</div></div>
<div class=main>
<h3>NAME</h3>
<p>cpucycles - count CPU cycles</p>
<h3>SYNOPSIS</h3>
<pre><code>#include &lt;cpucycles.h&gt;
long long count = cpucycles();
long long persecond = cpucycles_persecond();
const char *implementation = cpucycles_implementation();
const char *version = cpucycles_version();
</code></pre>
<p>Link with <code>-lcpucycles</code>. Old systems may also need <code>-lrt</code>.</p>
<h3>DESCRIPTION</h3>
<p><code>cpucycles()</code> returns an estimate for the number of CPU cycles that have
occurred since an unspecified time in the past (perhaps system boot,
perhaps program startup).</p>
<p>Accessing true cycle counters can be difficult on some CPUs and
operating systems. <code>cpucycles()</code> does its best to produce accurate
results, but selects a low-precision counter if the only other option is
failure.</p>
<p><code>cpucycles_persecond()</code> returns an estimate for the number of CPU cycles
per second. This estimate comes from <code>/etc/cpucyclespersecond</code> if that
file exists, otherwise from various OS mechanisms, otherwise from the
<code>cpucyclespersecond</code> environment variable if that is set, otherwise
2399987654.</p>
<p><code>cpucycles_implementation()</code> returns the name of the counter in use:
e.g., <code>"amd64-pmc"</code>.</p>
<p><code>cpucycles_version()</code> returns the <code>libcpucycles</code> version number as a
string: e.g., <code>"20230115"</code>. Results of <code>cpucycles_implementation()</code>
should be interpreted relative to <code>cpucycles_version()</code>.</p>
<p><code>cpucycles</code> is actually a function pointer. The first call to
<code>cpucycles()</code> or <code>cpucycles_persecond()</code> or <code>cpucycles_implementation()</code>
selects one of the available counters and updates the <code>cpucycles</code>
pointer accordingly. Subsequent calls to <code>cpucycles()</code> are thread-safe.</p>
<h3>SEE ALSO</h3>
<p><strong>gettimeofday</strong>(2), <strong>clock_gettime</strong>(2)</p><hr><font size=1><b>Version:</b>
This is version 2023.01.15 of the "API" web page.
</font>
</div>
</body>
</html>
@@ -1,456 +0,0 @@
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<style type="text/css">
html{overflow-y:scroll}
body{font-family:sans-serif}
p,ul,ol,blockquote,pre{font-size:0.9em;line-height:1.6em}
li p{font-size:1.0em}
blockquote p{font-size:1.0em}
tt{font-size:1.2em}
code{font-size:1.2em}
h1{font-size:1.5em}
h2{font-size:1.3em}
h3{font-size:1.0em}
h1 a{text-decoration:none}
table{border-collapse:collapse}
th,td{border:1px solid black}
table a{text-decoration:none}
table tr{font-size:0.9em;line-height:1.6em}
.links a:hover{text-decoration:underline}
.links a:active{text-decoration:underline}
.links img{width:200px;padding-left:1em}
.links td{border:0px;padding-top:0.5em;padding-bottom:0.5em}
.headline{padding:0;font-weight:bold;font-size:1.5em;vertical-align:top;padding-bottom:0.5em;color:#125d0d}
.navt{display:inline-block;box-sizing:border-box;-moz-box-sizing:border-box;-webkit-box-sizing:border-box;
min-width:14%;margin:0;padding:0;padding-left:0.5em;padding-right:0.5em;vertical-align:center;
font-weight:bold;font-size:1.1em;text-align:center;border:1px solid black}
.here{border-bottom:0px;background-color:#ffffff}
.away{background-color:#125d0d;}
.away a{text-decoration:none;display:block;color:#ffffff}
.away a:hover,.away a:active{text-decoration:underline}
.main{margin:0;padding-top:0em;padding-bottom:1%;clear:both}
</style>
<title>
Counters</title>
</head>
<body>
<div class=headline>
libcpucycles
</div>
<div class=nav>
<div class="navt away"><a href=index.html>Intro</a>
</div><div class="navt away"><a href=download.html>Download</a>
</div><div class="navt away"><a href=install.html>Install</a>
</div><div class="navt away"><a href=api.html>API</a>
</div><div class="navt here">Counters
</div><div class="navt away"><a href=selection.html>Selection</a>
</div><div class="navt away"><a href=security.html>Security</a>
</div></div>
<div class=main>
<p>Currently libcpucycles supports the following cycle counters. Some
cycle counters are actually other forms of counters that libcpucycles
scales to imitate a cycle counter. There is
<a href="selection.html">separate documentation</a>
for how libcpucycles makes a choice of cycle counter. See also
<a href="security.html">security considerations</a> regarding enabling or disabling
counters and regarding Turbo Boost.</p>
<p><code>amd64-pmc</code>: Requires a 64-bit Intel/AMD platform. Requires the Linux
perf_event interface. Accesses a cycle counter through RDPMC. Requires
<code>/proc/sys/kernel/perf_event_paranoid</code> to be at most 2 for user-level
RDPMC access. This counter runs at the clock frequency of the CPU core.</p>
<p><code>amd64-tsc</code>, <code>amd64-tscasm</code>: Requires a 64-bit Intel/AMD platform.
Requires RDTSC to be enabled, which it is by default. Uses RDTSC to
access the CPU's time-stamp counter. On current CPUs, this is an
off-core clock rather than a cycle counter, but it is typically a very
fast off-core clock, making it adequate for seeing cycle counts if
overclocking and underclocking are disabled. The difference between
<code>tsc</code> and <code>tscasm</code> is that <code>tsc</code> uses the compiler's <code>__rdtsc()</code> while
<code>tscasm</code> uses inline assembly.</p>
<p><code>arm32-cortex</code>: Requires a 32-bit ARMv7-A platform. Uses
<code>mrc p15, 0, %0, c9, c13, 0</code> to read the cycle counter. Requires user
access to the cycle counter, which is not enabled by default but can be
enabled under Linux via
<a href="https://github.com/thoughtpolice/enable_arm_pmu">a kernel module</a>.
This counter is natively 32 bits, but libcpucycles watches how the
counter and <code>gettimeofday</code> increase to compute a 64-bit extension of the
counter.</p>
<p><code>arm64-pmc</code>: Requires a 64-bit ARMv8-A platform. Uses
<code>mrs %0, PMCCNTR_EL0</code> to read the cycle counter. Requires user access
to the cycle counter, which is not enabled by default but can be enabled
under Linux via
<a href="https://github.com/rdolbeau/enable_arm_pmu">a kernel module</a>.</p>
<p><code>arm64-vct</code>: Requires a 64-bit ARMv8-A platform. Uses
<code>mrs %0, CNTVCT_EL0</code> to read a "virtual count" timer. This is an
off-core clock, typically running at 24MHz. Results are scaled by
libcpucycles.</p>
<p><code>mips64-cc</code>: Requires a 64-bit MIPS platform. (Maybe the same code would
also work as <code>mips32-cc</code>, but this has not been tested yet.) Uses RDHWR
to read the hardware cycle counter (hardware register 2 times a constant
scale factor in hardware register 3). This counter is natively 32 bits,
but libcpucycles watches how the counter and <code>gettimeofday</code> increase to
compute a 64-bit extension of the counter.</p>
<p><code>ppc32-mftb</code>: Requires a 32-bit PowerPC platform. Uses <code>mftb</code> and
<code>mftbu</code> to read the "time base". This is an off-core clock, typically
running at 24MHz.</p>
<p><code>ppc64-mftb</code>: Requires a 64-bit PowerPC platform. Uses <code>mftb</code> and
<code>mftbu</code> to read the "time base". This is an off-core clock, typically
running at 24MHz.</p>
<p><code>riscv32-rdcycle</code>: Requires a 32-bit RISC-V platform. Uses <code>rdcycle</code>
and <code>rdcycleh</code> to read a cycle counter.</p>
<p><code>riscv64-rdcycle</code>: Requires a 64-bit RISC-V platform. Uses <code>rdcycle</code>
to read a cycle counter.</p>
<p><code>s390x-stckf</code>: Requires a 64-bit z/Architecture platform. Uses <code>stckf</code>
to read the TOD clock, which is documented to run at 4096MHz. On the
z15, this looks like a doubling of an off-core 2048MHz clock. Results
are scaled by libcpucycles.</p>
<p><code>sparc64-rdtick</code>: Requires a 64-bit SPARC platform. Uses <code>rd %tick</code>
to read a cycle counter.</p>
<p><code>x86-tsc</code>, <code>x86-tscasm</code>: Same as <code>amd64-tsc</code> and <code>amd64-tscasm</code>, but
for 32-bit Intel/AMD platforms instead of 64-bit Intel/AMD platforms.</p>
<p><code>default-gettimeofday</code>: Reasonably portable. Resolution is limited to 1
microsecond. Results are scaled by libcpucycles.</p>
<p><code>default-mach</code>: Requires an OS with <code>mach_absolute_time()</code>. Typically
runs at 24MHz. Results are scaled by libcpucycles.</p>
<p><code>default-monotonic</code>: Requires <code>CLOCK_MONOTONIC</code>. Reasonably portable,
although might fail on older systems where <code>default-gettimeofday</code> works.
Resolution is limited to 1 nanosecond. Can be almost as good as a cycle
counter, or orders of magnitude worse, depending on the OS and CPU.
Results are scaled by libcpucycles.</p>
<p><code>default-perfevent</code>: Requires the Linux <code>perf_event</code> interface, and a
CPU where <code>perf_event</code> supports <code>PERF_COUNT_HW_CPU_CYCLES</code>. Similar
variations in quality to <code>default-monotonic</code>, without the 1-nanosecond
limitation.</p>
<p><code>default-zero</code>: The horrifying last resort if nothing else works.</p>
<h2>Examples</h2>
<p>These are examples of <code>cpucycles-info</code> output on various machines. The
machines named <code>gcc*</code> are from the
<a href="https://gcc.gnu.org/wiki/CompileFarm">GCC Compile Farm</a>.</p>
<p>A <code>median</code> line saying, e.g., <code>47 +47+28+0+2-5+0+2-5...</code> means that the
differences between adjacent cycle counts were 47+47, 47+28, 47+0, 47+2,
475, 47+0, 47+2, 475, etc., with median difference 47. The first few
differences are typically larger because of cache effects.</p>
<p><code>pi3aplus</code>,
Broadcom BCM2837B0:</p>
<pre><code>cpucycles version 20230105
cpucycles tracesetup 0 arm64-pmc precision 9 scaling 1.000000 only32 0
cpucycles tracesetup 1 arm64-vct precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 2 default-perfevent precision 189 scaling 1.000000 only32 0
cpucycles tracesetup 3 default-mach precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 4 default-monotonic precision 272 scaling 1.400000 only32 0
cpucycles tracesetup 5 default-gettimeofday precision 1600 scaling 1400.000000 only32 0
cpucycles tracesetup 6 default-zero precision 0 scaling 0.000000 only32 0
cpucycles persecond 1400000000
cpucycles implementation arm64-pmc
cpucycles median 10 +10+8+3+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0
cpucycles observed persecond 1032000000...4224666667 with 1024 loops 4 microseconds
cpucycles observed persecond 1286000000...1756000000 with 2048 loops 7 microseconds
cpucycles observed persecond 1368266666...1598000000 with 4096 loops 14 microseconds
cpucycles observed persecond 1366700000...1473428572 with 8192 loops 29 microseconds
cpucycles observed persecond 1366100000...1417534483 with 16384 loops 59 microseconds
cpucycles observed persecond 1332739837...1357132232 with 32768 loops 122 microseconds
cpucycles observed persecond 1354483471...1366945834 with 65536 loops 241 microseconds
cpucycles observed persecond 1385684989...1392195330 with 131072 loops 472 microseconds
cpucycles observed persecond 1347223021...1350328528 with 262144 loops 972 microseconds
cpucycles observed persecond 1375460125...1377069853 with 524288 loops 1905 microseconds
cpucycles observed persecond 1376527697...1377335961 with 1048576 loops 3808 microseconds
</code></pre>
<p><code>bblack</code>,
TI Sitara XAM3359AZCZ100:</p>
<pre><code>cpucycles version 20230105
cpucycles tracesetup 0 arm32-cortex precision 8 scaling 1.000000 only32 1
cpucycles tracesetup 1 default-perfevent precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 2 default-mach precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 3 default-monotonic precision 1283 scaling 1.000000 only32 0
cpucycles tracesetup 4 default-gettimeofday precision 1200 scaling 1000.000000 only32 0
cpucycles tracesetup 5 default-zero precision 0 scaling 0.000000 only32 0
cpucycles persecond 1000000000
cpucycles implementation arm32-cortex
cpucycles median 1260 +1506+62+31+7+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+13+7+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0
cpucycles observed persecond 622181818...2101888889 with 1024 loops 10 microseconds
cpucycles observed persecond 806133333...1492615385 with 2048 loops 14 microseconds
cpucycles observed persecond 879880000...1232565218 with 4096 loops 24 microseconds
cpucycles observed persecond 939577777...1130581396 with 8192 loops 44 microseconds
cpucycles observed persecond 956954022...1050047059 with 16384 loops 86 microseconds
cpucycles observed persecond 982878542...1020685715 with 32768 loops 246 microseconds
cpucycles observed persecond 988105105...1012217523 with 65536 loops 332 microseconds
cpucycles observed persecond 993752077...1007159723 with 131072 loops 721 microseconds
cpucycles observed persecond 995364296...1004009448 with 262144 loops 1377 microseconds
cpucycles observed persecond 998216306...1001821536 with 524288 loops 2685 microseconds
cpucycles observed persecond 998991848...1000914196 with 1048576 loops 5397 microseconds
</code></pre>
<p><code>hiphop</code>,
Intel Xeon E3-1220 v3:</p>
<pre><code>cpucycles version 20230105
cpucycles tracesetup 0 amd64-pmc precision 40 scaling 1.000000 only32 0
cpucycles tracesetup 1 amd64-tsc precision 124 scaling 1.000000 only32 0
cpucycles tracesetup 2 amd64-tscasm precision 124 scaling 1.000000 only32 0
cpucycles tracesetup 3 default-perfevent precision 160 scaling 1.000000 only32 0
cpucycles tracesetup 4 default-mach precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 5 default-monotonic precision 272 scaling 3.100000 only32 0
cpucycles tracesetup 6 default-gettimeofday precision 3300 scaling 3100.000000 only32 0
cpucycles tracesetup 7 default-zero precision 0 scaling 0.000000 only32 0
cpucycles persecond 3100000000
cpucycles implementation amd64-pmc
cpucycles median 44 +38+23+23+23-4+0-4+0-4+0-4+0+10-4-2+1-4+1-4+1+17+1-4+1-4+1-4+0-4+0-4+0-4+0-4+0-4+0-4+0-4+0-4+0-4+0-4+0-4+0-4+0-4+0-4+0-4+0-4+0-4+0-4
cpucycles observed persecond 2066500000...4235000000 with 8192 loops 3 microseconds
cpucycles observed persecond 2760833333...4200250000 with 16384 loops 5 microseconds
cpucycles observed persecond 2743416666...3313100000 with 32768 loops 11 microseconds
cpucycles observed persecond 2986227272...3295000000 with 65536 loops 21 microseconds
cpucycles observed persecond 3052069767...3206073171 with 131072 loops 42 microseconds
cpucycles observed persecond 3050395348...3125523810 with 262144 loops 85 microseconds
cpucycles observed persecond 3085123529...3123059524 with 524288 loops 169 microseconds
cpucycles observed persecond 3084561764...3103434912 with 1048576 loops 339 microseconds
</code></pre>
<p><code>nucnuc</code>,
Intel Pentium N3700:</p>
<pre><code>cpucycles version 20230105
cpucycles tracesetup 0 amd64-pmc precision 26 scaling 1.000000 only32 0
cpucycles tracesetup 1 amd64-tsc precision 120 scaling 1.000000 only32 0
cpucycles tracesetup 2 amd64-tscasm precision 120 scaling 1.000000 only32 0
cpucycles tracesetup 3 default-perfevent precision 427 scaling 1.000000 only32 0
cpucycles tracesetup 4 default-mach precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 5 default-monotonic precision 320 scaling 1.600000 only32 0
cpucycles tracesetup 6 default-gettimeofday precision 1800 scaling 1600.000000 only32 0
cpucycles tracesetup 7 default-zero precision 0 scaling 0.000000 only32 0
cpucycles persecond 1600000000
cpucycles implementation amd64-pmc
cpucycles median 66 +12+12+14+14-1-1+0-1+0-1+0-1+0+1-1+0-1+0-1+0-2+0-1+0-1+0-1+0-2+0-1+0-1+0-1+0-2+0-1+0-1+1-1+0-2-1-1+0-1+0-1+0-2+0-1+2+0-1+0-1+0+0-1
cpucycles observed persecond 1060500000...2325000000 with 2048 loops 3 microseconds
cpucycles observed persecond 1387166666...2208250000 with 4096 loops 5 microseconds
cpucycles observed persecond 1376083333...1705500000 with 8192 loops 11 microseconds
cpucycles observed persecond 1495727272...1671800000 with 16384 loops 21 microseconds
cpucycles observed persecond 1563428571...1655100000 with 32768 loops 41 microseconds
cpucycles observed persecond 1580807228...1626234568 with 65536 loops 82 microseconds
cpucycles observed persecond 1589539393...1612619632 with 131072 loops 164 microseconds
cpucycles observed persecond 1598841463...1610230062 with 262144 loops 327 microseconds
cpucycles observed persecond 1564336810...1569988042 with 524288 loops 670 microseconds
cpucycles observed persecond 1599759725...1602608098 with 1048576 loops 1310 microseconds
</code></pre>
<p><code>saber214</code>,
AMD FX-8350:</p>
<pre><code>cpucycles version 20230105
cpucycles tracesetup 0 amd64-pmc precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 1 amd64-tsc precision 167 scaling 1.000000 only32 0
cpucycles tracesetup 2 amd64-tscasm precision 168 scaling 1.000000 only32 0
cpucycles tracesetup 3 default-perfevent precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 4 default-mach precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 5 default-monotonic precision 376 scaling 4.013452 only32 0
cpucycles tracesetup 6 default-gettimeofday precision 4213 scaling 4013.452000 only32 0
cpucycles tracesetup 7 default-zero precision 0 scaling 0.000000 only32 0
cpucycles persecond 4013452000
cpucycles implementation amd64-tsc
cpucycles median 77 +87-2+21+7+4+1+0+2-2-7-4+0+1+4-2+3+1-2-2+5-6+2+2+2+2+1-1-1+0-4+0-1-1-1-2+3-1-1+2-2+0+0+2+0+0+2-2-2+1-1-2+2-5+2+0+2+0+1+0+3-2-1-1
cpucycles observed persecond 2767500000...5759000000 with 4096 loops 3 microseconds
cpucycles observed persecond 3426000000...4893800000 with 8192 loops 6 microseconds
cpucycles observed persecond 3724076923...4446363637 with 16384 loops 12 microseconds
cpucycles observed persecond 3977833333...4363318182 with 32768 loops 23 microseconds
cpucycles observed persecond 3984854166...4168739131 with 65536 loops 47 microseconds
cpucycles observed persecond 3981709923...4048193799 with 131072 loops 130 microseconds
cpucycles observed persecond 3982716417...4026914573 with 262144 loops 200 microseconds
cpucycles observed persecond 4001637602...4025136987 with 524288 loops 366 microseconds
cpucycles observed persecond 4007411111...4018600248 with 1048576 loops 809 microseconds
</code></pre>
<p><code>gcc14</code>,
Intel Xeon E5-2620 v3,
Debian testing (bookworm),
Linux kernel 6.0.0-6-amd64:</p>
<pre><code>cpucycles version 20230105
cpucycles tracesetup 0 amd64-pmc precision 41 scaling 1.000000 only32 0
cpucycles tracesetup 1 amd64-tsc precision 148 scaling 1.000000 only32 0
cpucycles tracesetup 2 amd64-tscasm precision 148 scaling 1.000000 only32 0
cpucycles tracesetup 3 default-perfevent precision 159 scaling 1.000000 only32 0
cpucycles tracesetup 4 default-mach precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 5 default-monotonic precision 289 scaling 3.200000 only32 0
cpucycles tracesetup 6 default-gettimeofday precision 3400 scaling 3200.000000 only32 0
cpucycles tracesetup 7 default-zero precision 0 scaling 0.000000 only32 0
cpucycles persecond 3200000000
cpucycles implementation amd64-pmc
cpucycles median 47 +47+28+0+2-5+0+2-5+16+2-5+0+2-5+0+1-4+0+1-4+0+1-4+0+1-4+0+1-4+0+1-4+0+1-4+0+1-4+0+1-4+0+1-4+0+1-4+0+1-4+0+1-4+0+1-4+0+1-4+0+1-4+0
cpucycles observed persecond 1653800000...2819333334 with 8192 loops 4 microseconds
cpucycles observed persecond 1832111111...2389285715 with 16384 loops 8 microseconds
cpucycles observed persecond 1936058823...2207200000 with 32768 loops 16 microseconds
cpucycles observed persecond 2052843750...2196200000 with 65536 loops 31 microseconds
cpucycles observed persecond 2050750000...2120048388 with 131072 loops 63 microseconds
cpucycles observed persecond 2081896825...2117048388 with 262144 loops 125 microseconds
cpucycles observed persecond 2089478087...2107044177 with 524288 loops 250 microseconds
cpucycles observed persecond 2093343313...2102124249 with 1048576 loops 500 microseconds
</code></pre>
<p><code>gcc23</code>,
Cavium Octeon II V0.1,
Debian 8.11,
Linux kernel 4.1.4:</p>
<pre><code>cpucycles version 20230105
cpucycles tracesetup 0 mips64-cc precision 24 scaling 1.000000 only32 1
cpucycles tracesetup 1 default-perfevent precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 2 default-mach precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 3 default-monotonic precision 46702 scaling 2.399988 only32 0
cpucycles tracesetup 4 default-gettimeofday precision 45799 scaling 2399.987654 only32 0
cpucycles tracesetup 5 default-zero precision 0 scaling 0.000000 only32 0
cpucycles persecond 2399987654
cpucycles implementation mips64-cc
cpucycles median 2177 +828+17+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0
cpucycles observed persecond 641900000...1845125000 with 1024 loops 9 microseconds
cpucycles observed persecond 745357142...1352083334 with 2048 loops 13 microseconds
cpucycles observed persecond 809826086...1162333334 with 4096 loops 22 microseconds
cpucycles observed persecond 897717948...1104405406 with 8192 loops 38 microseconds
cpucycles observed persecond 957467532...1059986667 with 16384 loops 76 microseconds
cpucycles observed persecond 973102189...1029777778 with 32768 loops 136 microseconds
cpucycles observed persecond 986518656...1015830828 with 65536 loops 267 microseconds
cpucycles observed persecond 993452830...1008166667 with 131072 loops 529 microseconds
cpucycles observed persecond 996036966...1003403609 with 262144 loops 1054 microseconds
cpucycles observed persecond 984706378...1001682630 with 524288 loops 2131 microseconds
cpucycles observed persecond 992585292...1001178580 with 1048576 loops 4296 microseconds
</code></pre>
<p><code>gcc45</code>,
AMD Athlon II X4 640,
Debian 8.11,
Linux kernel 3.16.0-11-686-pae:</p>
<pre><code>cpucycles version 20230105
cpucycles tracesetup 0 x86-tsc precision 199 scaling 1.000000 only32 0
cpucycles tracesetup 1 x86-tscasm precision 199 scaling 1.000000 only32 0
cpucycles tracesetup 2 default-perfevent precision 170 scaling 1.000000 only32 0
cpucycles tracesetup 3 default-mach precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 4 default-monotonic precision 941 scaling 3.000000 only32 0
cpucycles tracesetup 5 default-gettimeofday precision 3200 scaling 3000.000000 only32 0
cpucycles tracesetup 6 default-zero precision 0 scaling 0.000000 only32 0
cpucycles persecond 3000000000
cpucycles implementation default-perfevent
cpucycles median 72 +12+0+0+0+0+0+0+0+5+0+0+0+0+0+0+0+2+0+0+0+0+0+0+0+1+0+0+0+0+0+0+0+2+0+0+0+0+0+0+0+1+0+0+0+0+0+0+0+2+0+0+0+0+0+0+0+1+0+0+0+0+0+0
cpucycles observed persecond 541500000...1812000000 with 1024 loops 3 microseconds
cpucycles observed persecond 712333333...1212250000 with 2048 loops 5 microseconds
cpucycles observed persecond 1193285714...1733600000 with 4096 loops 6 microseconds
cpucycles observed persecond 1689176470...1804562500 with 8192 loops 33 microseconds
cpucycles observed persecond 1713074626...1770600000 with 16384 loops 66 microseconds
cpucycles observed persecond 1765107692...1795140625 with 32768 loops 129 microseconds
cpucycles observed persecond 1785369649...1800603922 with 65536 loops 256 microseconds
cpucycles observed persecond 1781377862...1796288462 with 131072 loops 261 microseconds
cpucycles observed persecond 1772647398...1778247827 with 262144 loops 691 microseconds
cpucycles observed persecond 1789670493...1794149598 with 524288 loops 870 microseconds
cpucycles observed persecond 1860276211...1861561332 with 1048576 loops 3156 microseconds
</code></pre>
<p><code>gcc92</code>,
SiFive Freedom U740,
Ubuntu 22.04,
Linux kernel 5.15.0-1014-generic:</p>
<pre><code>cpucycles version 20230105
cpucycles tracesetup 0 riscv64-rdcycle precision 8 scaling 1.000000 only32 0
cpucycles tracesetup 1 default-perfevent precision 3024 scaling 1.000000 only32 0
cpucycles tracesetup 2 default-mach precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 3 default-monotonic precision 2599 scaling 2.399988 only32 0
cpucycles tracesetup 4 default-gettimeofday precision 2599 scaling 2399.987654 only32 0
cpucycles tracesetup 5 default-zero precision 0 scaling 0.000000 only32 0
cpucycles persecond 2399987654
cpucycles implementation riscv64-rdcycle
cpucycles median 8 +33+27+1+1+1+1+0+0+0+22+0+0+0+0+0+0+0+628+0+0+0+7+0+0+0+145+0+0+0+0+0+0+0+22+0+0+0+0+0+0+0+158+0+0+0+0+0+0+0+22+0+0+0+0+0+0+0+22+0+0+0+0+0
cpucycles observed persecond 530250000...1978000000 with 1024 loops 3 microseconds
cpucycles observed persecond 831000000...1915666667 with 2048 loops 4 microseconds
cpucycles observed persecond 1055750000...1689500000 with 4096 loops 7 microseconds
cpucycles observed persecond 1045562500...1305428572 with 8192 loops 15 microseconds
cpucycles observed persecond 1102700000...1236357143 with 16384 loops 29 microseconds
cpucycles observed persecond 1176053571...1247444445 with 32768 loops 55 microseconds
cpucycles observed persecond 1173321428...1209127273 with 65536 loops 111 microseconds
cpucycles observed persecond 1187805429...1205210046 with 131072 loops 220 microseconds
cpucycles observed persecond 1192415909...1201157535 with 262144 loops 439 microseconds
cpucycles observed persecond 1194694760...1199247717 with 524288 loops 877 microseconds
cpucycles observed persecond 1194656004...1197023034 with 1048576 loops 1781 microseconds
</code></pre>
<p><code>gcc103</code>,
Apple M1 (Icestorm-M1 + Firestorm-M1),
Debian unstable (bookworm),
Linux kernel 6.0.0-rc5-asahi-00001-gc62bd3fe430f:</p>
<pre><code>cpucycles version 20230105
cpucycles tracesetup 0 arm64-pmc precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 1 arm64-vct precision 186 scaling 86.000000 only32 0
cpucycles tracesetup 2 default-perfevent precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 3 default-mach precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 4 default-monotonic precision 285 scaling 2.064000 only32 0
cpucycles tracesetup 5 default-gettimeofday precision 2264 scaling 2064.000000 only32 0
cpucycles tracesetup 6 default-zero precision 0 scaling 0.000000 only32 0
cpucycles persecond 2064000000
cpucycles implementation arm64-vct
cpucycles median 0 +0+86+0+0+0+0+0+0+0+0+0+0+0+0+86+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+86+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+86+0+0+0+0+0+0+0+0
cpucycles observed persecond 1784500000...3655000000 with 8192 loops 3 microseconds
cpucycles observed persecond 1773750000...2393666667 with 16384 loops 7 microseconds
cpucycles observed persecond 1897733333...2222769231 with 32768 loops 14 microseconds
cpucycles observed persecond 1951310344...2114962963 with 65536 loops 28 microseconds
cpucycles observed persecond 2024071428...2107000000 with 131072 loops 55 microseconds
cpucycles observed persecond 2041531531...2082935780 with 262144 loops 110 microseconds
cpucycles observed persecond 2051158371...2071461188 with 524288 loops 220 microseconds
cpucycles observed persecond 2058539682...2068309795 with 1048576 loops 440 microseconds
</code></pre>
<p><code>gcc112</code> (<code>gcc2-power8</code>),
IBM POWER8E,
CentOS 7.9 AltArch,
Linux kernel 3.10.0-1127.13.1.el7.ppc64le:</p>
<pre><code>cpucycles version 20230105
cpucycles tracesetup 0 ppc64-mftb precision 251 scaling 7.207031 only32 0
cpucycles tracesetup 1 default-perfevent precision 295 scaling 1.000000 only32 0
cpucycles tracesetup 2 default-mach precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 3 default-monotonic precision 536 scaling 3.690000 only32 0
cpucycles tracesetup 4 default-gettimeofday precision 3890 scaling 3690.000000 only32 0
cpucycles tracesetup 5 default-zero precision 0 scaling 0.000000 only32 0
cpucycles persecond 3690000000
cpucycles implementation ppc64-mftb
cpucycles median 195 +2969-8+14+0-8+7-8-7+7+6-7-1+0-1+0+7+7-15+7-1-7+6+0+0-8+0+6+0-8+7+0+7-8-8-7-1+7-8+7+0-8+0+14-8-7+6+0-8+7+7-15+0-1+0-1+14+0-15+14+0-1+7+0
cpucycles observed persecond 2603750000...5510000000 with 2048 loops 3 microseconds
cpucycles observed persecond 3430500000...6052250000 with 4096 loops 5 microseconds
cpucycles observed persecond 3411333333...4457500000 with 8192 loops 11 microseconds
cpucycles observed persecond 3548695652...4060333334 with 16384 loops 22 microseconds
cpucycles observed persecond 3624977777...3876534884 with 32768 loops 44 microseconds
cpucycles observed persecond 3621855555...3745363637 with 65536 loops 89 microseconds
cpucycles observed persecond 3660157303...3722227273 with 131072 loops 177 microseconds
cpucycles observed persecond 3680471751...3711622160 with 262144 loops 353 microseconds
cpucycles observed persecond 3685321074...3700886525 with 524288 loops 706 microseconds
cpucycles observed persecond 3687745930...3695537208 with 1048576 loops 1412 microseconds
</code></pre>
<p><code>gcc202</code>,
UltraSparc T5,
Debian unstable (bookworm),
Linux kernel 5.19.0-2-sparc64-smp:</p>
<pre><code>cpucycles version 20230105
cpucycles tracesetup 0 sparc64-rdtick precision 65 scaling 1.000000 only32 0
cpucycles tracesetup 1 default-perfevent precision 386 scaling 1.000000 only32 0
cpucycles tracesetup 2 default-mach precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 3 default-monotonic precision 442 scaling 3.599910 only32 0
cpucycles tracesetup 4 default-gettimeofday precision 3799 scaling 3599.910000 only32 0
cpucycles tracesetup 5 default-zero precision 0 scaling 0.000000 only32 0
cpucycles persecond 3599910000
cpucycles implementation sparc64-rdtick
cpucycles median 73 +24+0+24+24+24+24+24+24+0+1+24+0+1+24+0+1+24+0+0+1+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+0+1+0+0+0+0+0+0+0+0+0+0+0+0+0
cpucycles observed persecond 2751500000...4258250000 with 4096 loops 5 microseconds
cpucycles observed persecond 3289200000...4206875000 with 8192 loops 9 microseconds
cpucycles observed persecond 3454789473...3900823530 with 16384 loops 18 microseconds
cpucycles observed persecond 3452026315...3659888889 with 32768 loops 37 microseconds
cpucycles observed persecond 3543770270...3650916667 with 65536 loops 73 microseconds
cpucycles observed persecond 3567299319...3620662069 with 131072 loops 146 microseconds
cpucycles observed persecond 3591373287...3618220690 with 262144 loops 291 microseconds
cpucycles observed persecond 3597353344...3610774527 with 524288 loops 582 microseconds
cpucycles observed persecond 3595899403...3603058071 with 1048576 loops 1172 microseconds
</code></pre>
<p>IBM z15:</p>
<pre><code>cpucycles version 20230106
cpucycles tracesetup 0 s390x-stckf precision 250 scaling 1.269531 only32 0
cpucycles tracesetup 1 default-perfevent precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 2 default-mach precision 0 scaling 0.000000 only32 0
cpucycles tracesetup 3 default-monotonic precision 272 scaling 5.200000 only32 0
cpucycles tracesetup 4 default-gettimeofday precision 5400 scaling 5200.000000 only32 0
cpucycles tracesetup 5 default-zero precision 0 scaling 0.000000 only32 0
cpucycles persecond 5200000000
cpucycles implementation s390x-stckf
cpucycles median 48 +87+8+0-2+0+0+38-2+0+1-3+1+28+0+3-3+1+0+28+0-2+3+0-2+36+0+0+0+1+0+28+0-2+0+3-2+35+1+0-2+0+3+28+0-2+0+0-2+3+25+3+0-2+0+1+35+1+0+0-2+0+28+0
cpucycles observed persecond 4948941176...5627733334 with 8192 loops 16 microseconds
cpucycles observed persecond 4104125000...5515666667 with 16384 loops 7 microseconds
cpucycles observed persecond 5047076923...5987818182 with 32768 loops 12 microseconds
cpucycles observed persecond 5044846153...5475708334 with 65536 loops 25 microseconds
cpucycles observed persecond 5141313725...5357428572 with 131072 loops 50 microseconds
cpucycles observed persecond 5150892156...5257250000 with 262144 loops 101 microseconds
cpucycles observed persecond 5183421568...5236549505 with 524288 loops 203 microseconds
cpucycles observed persecond 5190282555...5216582717 with 1048576 loops 406 microseconds
</code></pre><hr><font size=1><b>Version:</b>
This is version 2023.01.06 of the "Counters" web page.
</font>
</div>
</body>
</html>
@@ -1,75 +0,0 @@
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<style type="text/css">
html{overflow-y:scroll}
body{font-family:sans-serif}
p,ul,ol,blockquote,pre{font-size:0.9em;line-height:1.6em}
li p{font-size:1.0em}
blockquote p{font-size:1.0em}
tt{font-size:1.2em}
code{font-size:1.2em}
h1{font-size:1.5em}
h2{font-size:1.3em}
h3{font-size:1.0em}
h1 a{text-decoration:none}
table{border-collapse:collapse}
th,td{border:1px solid black}
table a{text-decoration:none}
table tr{font-size:0.9em;line-height:1.6em}
.links a:hover{text-decoration:underline}
.links a:active{text-decoration:underline}
.links img{width:200px;padding-left:1em}
.links td{border:0px;padding-top:0.5em;padding-bottom:0.5em}
.headline{padding:0;font-weight:bold;font-size:1.5em;vertical-align:top;padding-bottom:0.5em;color:#125d0d}
.navt{display:inline-block;box-sizing:border-box;-moz-box-sizing:border-box;-webkit-box-sizing:border-box;
min-width:14%;margin:0;padding:0;padding-left:0.5em;padding-right:0.5em;vertical-align:center;
font-weight:bold;font-size:1.1em;text-align:center;border:1px solid black}
.here{border-bottom:0px;background-color:#ffffff}
.away{background-color:#125d0d;}
.away a{text-decoration:none;display:block;color:#ffffff}
.away a:hover,.away a:active{text-decoration:underline}
.main{margin:0;padding-top:0em;padding-bottom:1%;clear:both}
</style>
<title>
Download</title>
</head>
<body>
<div class=headline>
libcpucycles
</div>
<div class=nav>
<div class="navt away"><a href=index.html>Intro</a>
</div><div class="navt here">Download
</div><div class="navt away"><a href=install.html>Install</a>
</div><div class="navt away"><a href=api.html>API</a>
</div><div class="navt away"><a href=counters.html>Counters</a>
</div><div class="navt away"><a href=selection.html>Selection</a>
</div><div class="navt away"><a href=security.html>Security</a>
</div></div>
<div class=main>
<p>To download and unpack the latest version of libcpucycles:</p>
<pre><code> wget -m https://cpucycles.cr.yp.to/libcpucycles-latest-version.txt
version=$(cat cpucycles.cr.yp.to/libcpucycles-latest-version.txt)
wget -m https://cpucycles.cr.yp.to/libcpucycles-$version.tar.gz
tar -xzf cpucycles.cr.yp.to/libcpucycles-$version.tar.gz
cd libcpucycles-$version
</code></pre>
<p>Then <a href="install.html">install</a>.</p>
<h3>Archives and changelog (reverse chronological)</h3>
<p><a href="libcpucycles-20230115.tar.gz"><code>libcpucycles-20230115.tar.gz</code></a> <a href="libcpucycles-20230115.html">browse</a></p>
<p>Update actual <code>cpucycles_version</code> behavior to match documentation.</p>
<p><a href="libcpucycles-20230110.tar.gz"><code>libcpucycles-20230110.tar.gz</code></a> <a href="libcpucycles-20230110.html">browse</a></p>
<p><code>doc/api.md</code>: Document <code>cpucycles_version()</code>.</p>
<p>Add <code>s390x-stckf</code> counter.</p>
<p><code>cpucycles/default-perfevent.c</code>: Read into <code>int64_t</code> instead of <code>long long</code>.
Add comment explaining issues with <code>PERF_FORMAT_TOTAL_TIME_RUNNING</code>.</p>
<p><code>configure</code>: Improve <code>uname</code> handling.</p>
<p><code>doc/api.md</code>: Update description of default frequency.</p>
<p><a href="libcpucycles-20230105.tar.gz"><code>libcpucycles-20230105.tar.gz</code></a> <a href="libcpucycles-20230105.html">browse</a></p><hr><font size=1><b>Version:</b>
This is version 2023.01.15 of the "Download" web page.
</font>
</div>
</body>
</html>
@@ -1,88 +0,0 @@
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<style type="text/css">
html{overflow-y:scroll}
body{font-family:sans-serif}
p,ul,ol,blockquote,pre{font-size:0.9em;line-height:1.6em}
li p{font-size:1.0em}
blockquote p{font-size:1.0em}
tt{font-size:1.2em}
code{font-size:1.2em}
h1{font-size:1.5em}
h2{font-size:1.3em}
h3{font-size:1.0em}
h1 a{text-decoration:none}
table{border-collapse:collapse}
th,td{border:1px solid black}
table a{text-decoration:none}
table tr{font-size:0.9em;line-height:1.6em}
.links a:hover{text-decoration:underline}
.links a:active{text-decoration:underline}
.links img{width:200px;padding-left:1em}
.links td{border:0px;padding-top:0.5em;padding-bottom:0.5em}
.headline{padding:0;font-weight:bold;font-size:1.5em;vertical-align:top;padding-bottom:0.5em;color:#125d0d}
.navt{display:inline-block;box-sizing:border-box;-moz-box-sizing:border-box;-webkit-box-sizing:border-box;
min-width:14%;margin:0;padding:0;padding-left:0.5em;padding-right:0.5em;vertical-align:center;
font-weight:bold;font-size:1.1em;text-align:center;border:1px solid black}
.here{border-bottom:0px;background-color:#ffffff}
.away{background-color:#125d0d;}
.away a{text-decoration:none;display:block;color:#ffffff}
.away a:hover,.away a:active{text-decoration:underline}
.main{margin:0;padding-top:0em;padding-bottom:1%;clear:both}
</style>
<title>
Intro</title>
</head>
<body>
<div class=headline>
libcpucycles
</div>
<div class=nav>
<div class="navt here">Intro
</div><div class="navt away"><a href=download.html>Download</a>
</div><div class="navt away"><a href=install.html>Install</a>
</div><div class="navt away"><a href=api.html>API</a>
</div><div class="navt away"><a href=counters.html>Counters</a>
</div><div class="navt away"><a href=selection.html>Selection</a>
</div><div class="navt away"><a href=security.html>Security</a>
</div></div>
<div class=main>
<p>libcpucycles is a public-domain microlibrary for counting CPU cycles.
Cycle counts are not as detailed as
<a href="https://gamozolabs.github.io/metrology/2019/08/19/sushi_roll.html">Falk diagrams</a>
but are the most precise timers available to typical software; they are
central tools used in understanding and improving software performance.</p>
<p>The libcpucycles <a href="api.html">API</a> is simple: include <code>&lt;cpucycles.h&gt;</code>, call
<code>cpucycles()</code> to receive a <code>long long</code> whenever desired, and link with
<code>-lcpucycles</code>.</p>
<p><a href="counters.html">Internally</a>, libcpucycles understands machine-level
cycle counters for amd64 (both PMC and TSC), arm32, arm64 (both PMC and
VCT), mips64, ppc32, ppc64, riscv32, riscv64, s390x, sparc64, and x86.
libcpucycles also understands four OS-level mechanisms, which give
varying levels of accuracy: <code>mach_absolute_time</code>, <code>perf_event</code>,
<code>CLOCK_MONOTONIC</code>, and, as a fallback, microsecond-resolution
<code>gettimeofday</code>.</p>
<p>When the program first calls <code>cpucycles()</code>, libcpucycles automatically
benchmarks the available mechanisms and <a href="selection.html">selects</a> the
mechanism that does the best job. Subsequent <code>cpucycles()</code> calls are
thread-safe and very fast. An accompanying <code>cpucycles-info</code> program
prints a summary of cycle-counter accuracy.</p>
<p>For comparison, there is a simple-sounding <code>__rdtsc()</code> API provided by
compilers, but this works only on Intel/AMD CPUs and is generally noisier
than PMC. There is a <code>__builtin_readcyclecounter()</code> that works on more
CPUs, but this works only with <code>clang</code> and has the same noise problems.
Both of these mechanisms put the burden on the caller to figure out what
can be done on other CPUs. Various packages include their own more
portable abstraction layers for counting cycles (see, e.g., FFTW's
<a href="https://github.com/FFTW/fftw3/blob/master/kernel/cycle.h"><code>cycle.h</code></a>,
used to automatically select from among multiple implementations
provided by FFTW), but this creates per-package effort to keep up with
the latest cycle counters. The goal of libcpucycles is to provide
state-of-the-art cycle counting centrally for all packages to use.</p><hr><font size=1><b>Version:</b>
This is version 2023.01.06 of the "Intro" web page.
</font>
</div>
</body>
</html>
@@ -1,101 +0,0 @@
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<style type="text/css">
html{overflow-y:scroll}
body{font-family:sans-serif}
p,ul,ol,blockquote,pre{font-size:0.9em;line-height:1.6em}
li p{font-size:1.0em}
blockquote p{font-size:1.0em}
tt{font-size:1.2em}
code{font-size:1.2em}
h1{font-size:1.5em}
h2{font-size:1.3em}
h3{font-size:1.0em}
h1 a{text-decoration:none}
table{border-collapse:collapse}
th,td{border:1px solid black}
table a{text-decoration:none}
table tr{font-size:0.9em;line-height:1.6em}
.links a:hover{text-decoration:underline}
.links a:active{text-decoration:underline}
.links img{width:200px;padding-left:1em}
.links td{border:0px;padding-top:0.5em;padding-bottom:0.5em}
.headline{padding:0;font-weight:bold;font-size:1.5em;vertical-align:top;padding-bottom:0.5em;color:#125d0d}
.navt{display:inline-block;box-sizing:border-box;-moz-box-sizing:border-box;-webkit-box-sizing:border-box;
min-width:14%;margin:0;padding:0;padding-left:0.5em;padding-right:0.5em;vertical-align:center;
font-weight:bold;font-size:1.1em;text-align:center;border:1px solid black}
.here{border-bottom:0px;background-color:#ffffff}
.away{background-color:#125d0d;}
.away a{text-decoration:none;display:block;color:#ffffff}
.away a:hover,.away a:active{text-decoration:underline}
.main{margin:0;padding-top:0em;padding-bottom:1%;clear:both}
</style>
<title>
Install</title>
</head>
<body>
<div class=headline>
libcpucycles
</div>
<div class=nav>
<div class="navt away"><a href=index.html>Intro</a>
</div><div class="navt away"><a href=download.html>Download</a>
</div><div class="navt here">Install
</div><div class="navt away"><a href=api.html>API</a>
</div><div class="navt away"><a href=counters.html>Counters</a>
</div><div class="navt away"><a href=selection.html>Selection</a>
</div><div class="navt away"><a href=security.html>Security</a>
</div></div>
<div class=main>
<p>Prerequisites: <code>python3</code>; <code>gcc</code> and/or <code>clang</code>. Currently tested only
under Linux, but porting to other systems shouldn't be difficult.</p>
<p>For sysadmins, to install in <code>/usr/local/{include,lib,bin}</code>:</p>
<pre><code> ./configure &amp;&amp; make -j8 install
</code></pre>
<p>For developers with an unprivileged account (typically with</p>
<pre><code> export LD_LIBRARY_PATH="$HOME/lib"
export LIBRARY_PATH="$HOME/lib"
export CPATH="$HOME/include"
export PATH="$HOME/bin:$PATH"
</code></pre>
<p>in <code>$HOME/.profile</code>), to install in <code>$HOME/{include,lib,bin}</code>:</p>
<pre><code> ./configure --prefix=$HOME &amp;&amp; make -j8 install
</code></pre>
<p>For distributors creating a package: Run</p>
<pre><code> ./configure --prefix=/usr &amp;&amp; make -j8
</code></pre>
<p>and then follow your usual packaging procedures for the
<code>build/0/package</code> files:</p>
<pre><code> build/0/package/man/man3/cpucycles.3
build/0/package/include/cpucycles.h
build/0/package/lib/libcpucycles*
build/0/package/bin/cpucycles-info
</code></pre>
<p>There are some old systems where libcpucycles requires <code>-lrt</code> for
<code>clock_gettime</code>; currently <code>libcpucycles.so</code> doesn't link to <code>-lrt</code>,
so it's up to the caller to link to <code>-lrt</code>.</p>
<p>More options: You can run</p>
<pre><code> ./configure --host=amd64
</code></pre>
<p>to override <code>./configure</code>'s guess of the architecture that it should
compile for. The architecture controls which cycle counters to try
compiling: e.g., <code>amd64</code> tries compiling <code>cpucycles/amd64*</code> and
<code>cpucycles/default*</code>.</p>
<p>Inside the <code>build</code> directory, <code>0</code> is symlinked to <code>amd64</code> for
<code>--host=amd64</code>. Running <code>make clean</code> removes <code>build/amd64</code>. Re-running
<code>./configure</code> automatically starts with <code>make clean</code>.</p>
<p>A subsequent <code>./configure --host=arm64</code> will create <code>build/arm64</code> and
symlink <code>0 -&gt; arm64</code>, without touching an existing <code>build/amd64</code>.
However, cross-compilers aren't yet selected automatically.</p>
<p>Compilers tried are listed in <code>compilers/default</code>. Each compiler
includes <code>-fPIC</code> to create a shared library, <code>-fvisibility=hidden</code> to
hide non-public symbols in the library, and <code>-fwrapv</code> to switch to a
slightly less dangerous version of C. The first compiler that seems to
work is used to compile everything.</p><hr><font size=1><b>Version:</b>
This is version 2023.01.05 of the "Install" web page.
</font>
</div>
</body>
</html>
@@ -1,122 +0,0 @@
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<style type="text/css">
html{overflow-y:scroll}
body{font-family:sans-serif}
p,ul,ol,blockquote,pre{font-size:0.9em;line-height:1.6em}
li p{font-size:1.0em}
blockquote p{font-size:1.0em}
tt{font-size:1.2em}
code{font-size:1.2em}
h1{font-size:1.5em}
h2{font-size:1.3em}
h3{font-size:1.0em}
h1 a{text-decoration:none}
table{border-collapse:collapse}
th,td{border:1px solid black}
table a{text-decoration:none}
table tr{font-size:0.9em;line-height:1.6em}
.links a:hover{text-decoration:underline}
.links a:active{text-decoration:underline}
.links img{width:200px;padding-left:1em}
.links td{border:0px;padding-top:0.5em;padding-bottom:0.5em}
.headline{padding:0;font-weight:bold;font-size:1.5em;vertical-align:top;padding-bottom:0.5em;color:#125d0d}
.navt{display:inline-block;box-sizing:border-box;-moz-box-sizing:border-box;-webkit-box-sizing:border-box;
min-width:14%;margin:0;padding:0;padding-left:0.5em;padding-right:0.5em;vertical-align:center;
font-weight:bold;font-size:1.1em;text-align:center;border:1px solid black}
.here{border-bottom:0px;background-color:#ffffff}
.away{background-color:#125d0d;}
.away a{text-decoration:none;display:block;color:#ffffff}
.away a:hover,.away a:active{text-decoration:underline}
.main{margin:0;padding-top:0em;padding-bottom:1%;clear:both}
</style>
<title>
Security</title>
</head>
<body>
<div class=headline>
libcpucycles
</div>
<div class=nav>
<div class="navt away"><a href=index.html>Intro</a>
</div><div class="navt away"><a href=download.html>Download</a>
</div><div class="navt away"><a href=install.html>Install</a>
</div><div class="navt away"><a href=api.html>API</a>
</div><div class="navt away"><a href=counters.html>Counters</a>
</div><div class="navt away"><a href=selection.html>Selection</a>
</div><div class="navt here">Security
</div></div>
<div class=main>
<p>Many security systems have been shown to be breakable by "timing
attacks". These attacks extract secrets by analyzing timings of the
legitimate user's operations on secret data. See the June 2022 survey
page <a href="https://timing.attacks.cr.yp.to">https://timing.attacks.cr.yp.to</a>
for an overview and further references.</p>
<p>Sometimes these attacks are used as motivation to disable the attacker's
access to various timing mechanisms. For example, Firefox rounds its
<code>performance.now</code> timer to 1-millisecond resolution
<a href="https://developer.mozilla.org/en-US/docs/Web/API/Performance/now">"to mitigate potential security threats"</a>.</p>
<p>As another example, reducing <code>/proc/sys/kernel/perf_event_paranoid</code>
under Linux to 2 (from 3 or higher), so that libcpucycles has access to
the best available Intel/AMD cycle counter (RDPMC), also means making
this cycle counter and other performance-monitoring counters available
to any attacker-controlled software running on the computer. Perhaps
this helps timing attacks, not to mention the possibility of opening up
other vulnerabilities via the complicated <code>perf_event</code> interface.</p>
<p>As yet another example, ARM CPUs disable user access to the main CPU
cycle counter by default. Installing a kernel module to enable user
access to the cycle counter could help attacks.</p>
<p>Given the availability of simple mechanisms to disable RDPMC etc., it is
easy to recommend using those mechanisms. To avoid creating unnecessary
tension between those recommendations and the use of libcpucycles,
applications that use libcpucycles should be structured so that
high-resolution timers are used only on controlled development and
benchmarking machines, not on general end-user machines.</p>
<p>This structure might seem incompatible with using cycle counts to
automatically select the best of multiple options, as in FFTW. However,
new infrastructure introduced in <a href="https://lib25519.cr.yp.to">lib25519</a>
automatically selects options on end-user machines based on cycle counts
that were <em>collected on benchmarking machines</em>.</p>
<p>The above text should not be understood as endorsing the idea that
disabling timers is an <em>effective</em> defense against timing attacks.
Certainly disabling high-resolution timers is not sufficient for
security: there are many ways for attackers to amplify timing signals
and to statistically filter out noise from low-resolution timers.
Disabling <em>every</em> standard timing mechanism on the machine does not stop
the attacker from accessing a remote timer or a counter maintained by
the attacker's software. Perhaps disabling timers sometimes makes the
difference between a feasible attack and an infeasible attack, but
evaluating this is extremely difficult.</p>
<p>Meanwhile there is an auditable methodology available to stop timing
attacks: constant-time programming, which systematically cuts off data
flow from secrets to timings.</p>
<p>For example, secrets affect a CPU's power consumption, and Turbo Boost
creates data flow from power consumption to timings, as illustrated by
the <a href="https://www.hertzbleed.com">Hertzbleed attack</a> extracting secret
keys from the SIKE cryptosystem (before SIKE was broken in other ways),
and an <a href="https://arxiv.org/abs/2206.07012">independent attack</a>
extracting secret AES keys. Consequently, the constant-time methodology
does not allow Turbo Boost.</p>
<p>This is why <a href="https://timing.attacks.cr.yp.to">https://timing.attacks.cr.yp.to</a>
recommends turning off Turbo Boost "right now", and explains the
mechanisms available to do this. One non-security reason that it was
already normal (although not universal) for manufacturers to provide
these mechanisms to end users is that Turbo Boost has a reputation for
causing premature hardware failures. Turbo Boost also provides very
little speed benefit for modern multithreaded vectorized applications.</p>
<p>Another reaction to timing attacks is to apply "masking" techniques.
These techniques <em>seem</em> to make it more difficult for attackers to
extract secrets from power consumption and other side channels. However,
as <a href="https://timing.attacks.cr.yp.to">https://timing.attacks.cr.yp.to</a>
explains, it is "practically impossible for an auditor to obtain any
real assurance that these techniques are secure". See the December 2022
paper
<a href="https://eprint.iacr.org/2022/1713">"Breaking a fifth-order masked implementation of CRYSTALS-Kyber by copy-paste"</a>
for a newer example of a security failure in a masked implementation.</p><hr><font size=1><b>Version:</b>
This is version 2023.01.05 of the "Security" web page.
</font>
</div>
</body>
</html>
@@ -1,158 +0,0 @@
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<style type="text/css">
html{overflow-y:scroll}
body{font-family:sans-serif}
p,ul,ol,blockquote,pre{font-size:0.9em;line-height:1.6em}
li p{font-size:1.0em}
blockquote p{font-size:1.0em}
tt{font-size:1.2em}
code{font-size:1.2em}
h1{font-size:1.5em}
h2{font-size:1.3em}
h3{font-size:1.0em}
h1 a{text-decoration:none}
table{border-collapse:collapse}
th,td{border:1px solid black}
table a{text-decoration:none}
table tr{font-size:0.9em;line-height:1.6em}
.links a:hover{text-decoration:underline}
.links a:active{text-decoration:underline}
.links img{width:200px;padding-left:1em}
.links td{border:0px;padding-top:0.5em;padding-bottom:0.5em}
.headline{padding:0;font-weight:bold;font-size:1.5em;vertical-align:top;padding-bottom:0.5em;color:#125d0d}
.navt{display:inline-block;box-sizing:border-box;-moz-box-sizing:border-box;-webkit-box-sizing:border-box;
min-width:14%;margin:0;padding:0;padding-left:0.5em;padding-right:0.5em;vertical-align:center;
font-weight:bold;font-size:1.1em;text-align:center;border:1px solid black}
.here{border-bottom:0px;background-color:#ffffff}
.away{background-color:#125d0d;}
.away a{text-decoration:none;display:block;color:#ffffff}
.away a:hover,.away a:active{text-decoration:underline}
.main{margin:0;padding-top:0em;padding-bottom:1%;clear:both}
</style>
<title>
Selection</title>
</head>
<body>
<div class=headline>
libcpucycles
</div>
<div class=nav>
<div class="navt away"><a href=index.html>Intro</a>
</div><div class="navt away"><a href=download.html>Download</a>
</div><div class="navt away"><a href=install.html>Install</a>
</div><div class="navt away"><a href=api.html>API</a>
</div><div class="navt away"><a href=counters.html>Counters</a>
</div><div class="navt here">Selection
</div><div class="navt away"><a href=security.html>Security</a>
</div></div>
<div class=main>
<p>Here is how libcpucycles decides which cycle counter to use. The
underlying principles are as follows:</p>
<ul>
<li>
<p>Failure is not allowed. Using a low-resolution timer such as
<code>gettimeofday()</code> to estimate cycle counts is not desirable but is better
than providing no information.</p>
</li>
<li>
<p>A counter that does well on some CPUs and OSes can do badly on others.
The counter selection in libcpucycles is based not just on rules set
at compile time but also on measurements of how well the counters
perform when the program first calls <code>cpucycles()</code>.</p>
</li>
<li>
<p>A critical application of cycle counting is collecting cycle counts
for multiple options to see which option is faster. It is the caller's
responsibility to compute medians of cycle counts for many runs of
whatever is being benchmarked: medians filter out occasional
cycle-count jumps caused by migration to another core (if the
benchmark is not pinned to a single core) or interrupts from other OS
activity. libcpucycles does not reject an otherwise attractive counter
merely because of occasional jumps.</p>
</li>
<li>
<p>Cycle-counting overhead is not desirable, but does not directly affect
comparisons of multiple options measured using the same cycle counter,
so it is less important than consistent major errors such as treating
2^32 + x cycles as x cycles. (Performance experts seeing a function
that takes billions of cycles usually focus on smaller subroutines,
but libcpucycles should not break larger measurements.) This is why
libcpucycles does not provide direct access to 32-bit cycle counters:
it provides wrappers that combine the counters with gettimeofday() to
produce 64 bits, even though this incurs some extra overhead.</p>
</li>
<li>
<p>The noise introduced by typical off-core clocks, such as multiplying a
24MHz clock by 86 to estimate cycles on a 2.064GHz CPU core, comes in
small part from low resolution but much more from changes in CPU
frequency: e.g., a 10000-cycle computation might be measured as 20000
cycles when the CPU enters a power-saving mode. When libcpucycles has
access to what is believed to be an on-core cycle counter, it uses
that even when its measurements show some noise. (Choosing an on-core
cycle counter does not magically eliminate the change in the relative
speed of the CPU and DRAM; the usual advice to warm up the CPU and set
constant frequencies if possible still applies.)</p>
</li>
</ul>
<p>When <code>cpucycles()</code> is first called, libcpucycles tries running each
cycle counter that has been compiled into the library. For example, for
64-bit ARM CPUs, libcpucycles will try <code>arm64-pmc</code>, <code>arm64-vct</code>,
<code>default-gettimeofday</code>, <code>default-mach</code>, <code>default-monotonic</code>, and
<code>default-perfevent</code>, minus any of those that failed to compile.</p>
<p>Cycle counters that fail at run time with SIGILL (or SIGFPE or SIGBUS or
SIGSEGV) are eliminated from the list. For example, <code>arm64-pmc</code> will
fail with SIGILL if the kernel does not allow user access to
<code>PMCCNTR_EL0</code>. Beware that libcpucycles does not catch SIGILL after its
initial tests: if the kernel initially allows user access to
<code>PMCCNTR_EL0</code> but later turns it off then <code>arm64-pmc</code> will crash.</p>
<p>Independently of these counters, libcpucycles uses various OS mechanisms
to obtain an <em>estimate</em> of the CPU frequency. This estimate is also
available to the caller as <code>cpucycles_persecond()</code>.</p>
<p>The methods that libcpucycles uses to ask the OS for an estimated CPU
frequency fail on some OS-CPU combinations, in which case libcpucycles
falls back to a <code>cpucyclespersecond</code> environment variable, or, if that
variable does not exist, an estimate of 2399987654 cycles per second.
(This estimate is in a realistic range of CPU speeds, and is close to
multiples of 24MHz, 25MHz, and 19.2MHz, which are common crystal
frequencies.) The sysadmin can create <code>/etc/cpucyclespersecond</code> to
override all of the OS mechanisms.</p>
<p>For counters that do not ask for scaling, the estimated CPU frequency is
shown in <code>cpucycles-info</code> as a double-check on the counter results. For
counters that ask for scaling, libcpucycles uses the estimated CPU
frequency to compute the scaling, so this is not a double-check. If a
counter asks for scaling and the estimated CPU frequency does not seem
close to a multiple of the counter frequency (possibly with a small
power-of-2 denominator) then libcpucycles will throw the counter away,
except in the case of fixed-resolution OS counters such as
<code>gettimeofday</code> and <code>CLOCK_MONOTONIC</code>.</p>
<p>libcpucycles computes a precision estimate for each counter (times any
applicable scaling) as follows. Call the counter 1000 times. Check that
the counter has never decreased, and has increased at least once. (A
counter where the decrease/increase checks fail is retried 10 times, so
10000 calls overall, and removed if it fails all 10 times.) The
precision estimate is then the smallest nonzero difference between
adjacent counter results, plus a penalty explained below.</p>
<p>The penalty is 100 cycles for off-core counters (including RDTSC) and
<code>default-perfevent</code>, and 200 cycles for fixed-resolution OS counters.
For example, an on-core CPU cycle counter will be selected even if it
actually has, e.g., a resolution of 8 cycles and 50 cycles of overhead.</p>
<p>Finally, libcpucycles selects the counter where the precision estimate
is the smallest number of cycles. Note that an inaccurate estimate of
CPU frequency can influence the choice between a scaled counter and an
unscaled counter.</p>
<p>libcpucycles does <em>not</em> carry out its counter selection (typically tens
of milliseconds, sometimes even more) as a static initializer; callers
are presumed to not want to incur the cost of initialization unless and
until they are actually using <code>cpucycles()</code>. A multithreaded caller thus
has to place locks around any possibly-first call to <code>cpucycles()</code>, or
create its own static initializer (an <code>__attribute__((constructor))</code>
function) with an initial <code>cpucycles()</code> call so that all subsequent
<code>cpucycles()</code> calls are thread-safe.</p><hr><font size=1><b>Version:</b>
This is version 2023.01.05 of the "Selection" web page.
</font>
</div>
</body>
</html>
-56
View File
@@ -1,56 +0,0 @@
Prerequisites: `python3`; `gcc` and/or `clang`. Currently tested only
under Linux, but porting to other systems shouldn't be difficult.
For sysadmins, to install in `/usr/local/{include,lib,bin}`:
./configure && make -j8 install
For developers with an unprivileged account (typically with
export LD_LIBRARY_PATH="$HOME/lib"
export LIBRARY_PATH="$HOME/lib"
export CPATH="$HOME/include"
export PATH="$HOME/bin:$PATH"
in `$HOME/.profile`), to install in `$HOME/{include,lib,bin}`:
./configure --prefix=$HOME && make -j8 install
For distributors creating a package: Run
./configure --prefix=/usr && make -j8
and then follow your usual packaging procedures for the
`build/0/package` files:
build/0/package/man/man3/cpucycles.3
build/0/package/include/cpucycles.h
build/0/package/lib/libcpucycles*
build/0/package/bin/cpucycles-info
There are some old systems where libcpucycles requires `-lrt` for
`clock_gettime`; currently `libcpucycles.so` doesn't link to `-lrt`,
so it's up to the caller to link to `-lrt`.
More options: You can run
./configure --host=amd64
to override `./configure`'s guess of the architecture that it should
compile for. The architecture controls which cycle counters to try
compiling: e.g., `amd64` tries compiling `cpucycles/amd64*` and
`cpucycles/default*`.
Inside the `build` directory, `0` is symlinked to `amd64` for
`--host=amd64`. Running `make clean` removes `build/amd64`. Re-running
`./configure` automatically starts with `make clean`.
A subsequent `./configure --host=arm64` will create `build/arm64` and
symlink `0 -> arm64`, without touching an existing `build/amd64`.
However, cross-compilers aren't yet selected automatically.
Compilers tried are listed in `compilers/default`. Each compiler
includes `-fPIC` to create a shared library, `-fvisibility=hidden` to
hide non-public symbols in the library, and `-fwrapv` to switch to a
slightly less dangerous version of C. The first compiler that seems to
work is used to compile everything.
@@ -1,57 +0,0 @@
.\" Automatically generated by Pandoc 2.9.2.1
.\"
.TH "cpucycles" "3" "" "" ""
.hy
.SS NAME
.PP
cpucycles - count CPU cycles
.SS SYNOPSIS
.IP
.nf
\f[C]
#include <cpucycles.h>
long long count = cpucycles();
long long persecond = cpucycles_persecond();
const char *implementation = cpucycles_implementation();
const char *version = cpucycles_version();
\f[R]
.fi
.PP
Link with \f[C]-lcpucycles\f[R].
Old systems may also need \f[C]-lrt\f[R].
.SS DESCRIPTION
.PP
\f[C]cpucycles()\f[R] returns an estimate for the number of CPU cycles
that have occurred since an unspecified time in the past (perhaps system
boot, perhaps program startup).
.PP
Accessing true cycle counters can be difficult on some CPUs and
operating systems.
\f[C]cpucycles()\f[R] does its best to produce accurate results, but
selects a low-precision counter if the only other option is failure.
.PP
\f[C]cpucycles_persecond()\f[R] returns an estimate for the number of
CPU cycles per second.
This estimate comes from \f[C]/etc/cpucyclespersecond\f[R] if that file
exists, otherwise from various OS mechanisms, otherwise from the
\f[C]cpucyclespersecond\f[R] environment variable if that is set,
otherwise 2399987654.
.PP
\f[C]cpucycles_implementation()\f[R] returns the name of the counter in
use: e.g., \f[C]\[dq]amd64-pmc\[dq]\f[R].
.PP
\f[C]cpucycles_version()\f[R] returns the \f[C]libcpucycles\f[R] version
number as a string: e.g., \f[C]\[dq]20230115\[dq]\f[R].
Results of \f[C]cpucycles_implementation()\f[R] should be interpreted
relative to \f[C]cpucycles_version()\f[R].
.PP
\f[C]cpucycles\f[R] is actually a function pointer.
The first call to \f[C]cpucycles()\f[R] or
\f[C]cpucycles_persecond()\f[R] or \f[C]cpucycles_implementation()\f[R]
selects one of the available counters and updates the
\f[C]cpucycles\f[R] pointer accordingly.
Subsequent calls to \f[C]cpucycles()\f[R] are thread-safe.
.SS SEE ALSO
.PP
\f[B]gettimeofday\f[R](2), \f[B]clock_gettime\f[R](2)
-36
View File
@@ -1,36 +0,0 @@
libcpucycles is a public-domain microlibrary for counting CPU cycles.
Cycle counts are not as detailed as
[Falk diagrams](https://gamozolabs.github.io/metrology/2019/08/19/sushi_roll.html)
but are the most precise timers available to typical software; they are
central tools used in understanding and improving software performance.
The libcpucycles [API](api.html) is simple: include `<cpucycles.h>`, call
`cpucycles()` to receive a `long long` whenever desired, and link with
`-lcpucycles`.
[Internally](counters.html), libcpucycles understands machine-level
cycle counters for amd64 (both PMC and TSC), arm32, arm64 (both PMC and
VCT), mips64, ppc32, ppc64, riscv32, riscv64, s390x, sparc64, and x86.
libcpucycles also understands four OS-level mechanisms, which give
varying levels of accuracy: `mach_absolute_time`, `perf_event`,
`CLOCK_MONOTONIC`, and, as a fallback, microsecond-resolution
`gettimeofday`.
When the program first calls `cpucycles()`, libcpucycles automatically
benchmarks the available mechanisms and [selects](selection.html) the
mechanism that does the best job. Subsequent `cpucycles()` calls are
thread-safe and very fast. An accompanying `cpucycles-info` program
prints a summary of cycle-counter accuracy.
For comparison, there is a simple-sounding `__rdtsc()` API provided by
compilers, but this works only on Intel/AMD CPUs and is generally noisier
than PMC. There is a `__builtin_readcyclecounter()` that works on more
CPUs, but this works only with `clang` and has the same noise problems.
Both of these mechanisms put the burden on the caller to figure out what
can be done on other CPUs. Various packages include their own more
portable abstraction layers for counting cycles (see, e.g., FFTW's
[`cycle.h`](https://github.com/FFTW/fftw3/blob/master/kernel/cycle.h),
used to automatically select from among multiple implementations
provided by FFTW), but this creates per-package effort to keep up with
the latest cycle counters. The goal of libcpucycles is to provide
state-of-the-art cycle counting centrally for all packages to use.
-76
View File
@@ -1,76 +0,0 @@
Many security systems have been shown to be breakable by "timing
attacks". These attacks extract secrets by analyzing timings of the
legitimate user's operations on secret data. See the June 2022 survey
page [https://timing.attacks.cr.yp.to](https://timing.attacks.cr.yp.to)
for an overview and further references.
Sometimes these attacks are used as motivation to disable the attacker's
access to various timing mechanisms. For example, Firefox rounds its
`performance.now` timer to 1-millisecond resolution
["to mitigate potential security threats"](https://developer.mozilla.org/en-US/docs/Web/API/Performance/now).
As another example, reducing `/proc/sys/kernel/perf_event_paranoid`
under Linux to 2 (from 3 or higher), so that libcpucycles has access to
the best available Intel/AMD cycle counter (RDPMC), also means making
this cycle counter and other performance-monitoring counters available
to any attacker-controlled software running on the computer. Perhaps
this helps timing attacks, not to mention the possibility of opening up
other vulnerabilities via the complicated `perf_event` interface.
As yet another example, ARM CPUs disable user access to the main CPU
cycle counter by default. Installing a kernel module to enable user
access to the cycle counter could help attacks.
Given the availability of simple mechanisms to disable RDPMC etc., it is
easy to recommend using those mechanisms. To avoid creating unnecessary
tension between those recommendations and the use of libcpucycles,
applications that use libcpucycles should be structured so that
high-resolution timers are used only on controlled development and
benchmarking machines, not on general end-user machines.
This structure might seem incompatible with using cycle counts to
automatically select the best of multiple options, as in FFTW. However,
new infrastructure introduced in [lib25519](https://lib25519.cr.yp.to)
automatically selects options on end-user machines based on cycle counts
that were _collected on benchmarking machines_.
The above text should not be understood as endorsing the idea that
disabling timers is an _effective_ defense against timing attacks.
Certainly disabling high-resolution timers is not sufficient for
security: there are many ways for attackers to amplify timing signals
and to statistically filter out noise from low-resolution timers.
Disabling _every_ standard timing mechanism on the machine does not stop
the attacker from accessing a remote timer or a counter maintained by
the attacker's software. Perhaps disabling timers sometimes makes the
difference between a feasible attack and an infeasible attack, but
evaluating this is extremely difficult.
Meanwhile there is an auditable methodology available to stop timing
attacks: constant-time programming, which systematically cuts off data
flow from secrets to timings.
For example, secrets affect a CPU's power consumption, and Turbo Boost
creates data flow from power consumption to timings, as illustrated by
the [Hertzbleed attack](https://www.hertzbleed.com) extracting secret
keys from the SIKE cryptosystem (before SIKE was broken in other ways),
and an [independent attack](https://arxiv.org/abs/2206.07012)
extracting secret AES keys. Consequently, the constant-time methodology
does not allow Turbo Boost.
This is why [https://timing.attacks.cr.yp.to](https://timing.attacks.cr.yp.to)
recommends turning off Turbo Boost "right now", and explains the
mechanisms available to do this. One non-security reason that it was
already normal (although not universal) for manufacturers to provide
these mechanisms to end users is that Turbo Boost has a reputation for
causing premature hardware failures. Turbo Boost also provides very
little speed benefit for modern multithreaded vectorized applications.
Another reaction to timing attacks is to apply "masking" techniques.
These techniques _seem_ to make it more difficult for attackers to
extract secrets from power consumption and other side channels. However,
as [https://timing.attacks.cr.yp.to](https://timing.attacks.cr.yp.to)
explains, it is "practically impossible for an auditor to obtain any
real assurance that these techniques are secure". See the December 2022
paper
["Breaking a fifth-order masked implementation of CRYSTALS-Kyber by copy-paste"](https://eprint.iacr.org/2022/1713)
for a newer example of a security failure in a masked implementation.
-104
View File
@@ -1,104 +0,0 @@
Here is how libcpucycles decides which cycle counter to use. The
underlying principles are as follows:
* Failure is not allowed. Using a low-resolution timer such as
`gettimeofday()` to estimate cycle counts is not desirable but is better
than providing no information.
* A counter that does well on some CPUs and OSes can do badly on others.
The counter selection in libcpucycles is based not just on rules set
at compile time but also on measurements of how well the counters
perform when the program first calls `cpucycles()`.
* A critical application of cycle counting is collecting cycle counts
for multiple options to see which option is faster. It is the caller's
responsibility to compute medians of cycle counts for many runs of
whatever is being benchmarked: medians filter out occasional
cycle-count jumps caused by migration to another core (if the
benchmark is not pinned to a single core) or interrupts from other OS
activity. libcpucycles does not reject an otherwise attractive counter
merely because of occasional jumps.
* Cycle-counting overhead is not desirable, but does not directly affect
comparisons of multiple options measured using the same cycle counter,
so it is less important than consistent major errors such as treating
2^32 + x cycles as x cycles. (Performance experts seeing a function
that takes billions of cycles usually focus on smaller subroutines,
but libcpucycles should not break larger measurements.) This is why
libcpucycles does not provide direct access to 32-bit cycle counters:
it provides wrappers that combine the counters with gettimeofday() to
produce 64 bits, even though this incurs some extra overhead.
* The noise introduced by typical off-core clocks, such as multiplying a
24MHz clock by 86 to estimate cycles on a 2.064GHz CPU core, comes in
small part from low resolution but much more from changes in CPU
frequency: e.g., a 10000-cycle computation might be measured as 20000
cycles when the CPU enters a power-saving mode. When libcpucycles has
access to what is believed to be an on-core cycle counter, it uses
that even when its measurements show some noise. (Choosing an on-core
cycle counter does not magically eliminate the change in the relative
speed of the CPU and DRAM; the usual advice to warm up the CPU and set
constant frequencies if possible still applies.)
When `cpucycles()` is first called, libcpucycles tries running each
cycle counter that has been compiled into the library. For example, for
64-bit ARM CPUs, libcpucycles will try `arm64-pmc`, `arm64-vct`,
`default-gettimeofday`, `default-mach`, `default-monotonic`, and
`default-perfevent`, minus any of those that failed to compile.
Cycle counters that fail at run time with SIGILL (or SIGFPE or SIGBUS or
SIGSEGV) are eliminated from the list. For example, `arm64-pmc` will
fail with SIGILL if the kernel does not allow user access to
`PMCCNTR_EL0`. Beware that libcpucycles does not catch SIGILL after its
initial tests: if the kernel initially allows user access to
`PMCCNTR_EL0` but later turns it off then `arm64-pmc` will crash.
Independently of these counters, libcpucycles uses various OS mechanisms
to obtain an _estimate_ of the CPU frequency. This estimate is also
available to the caller as `cpucycles_persecond()`.
The methods that libcpucycles uses to ask the OS for an estimated CPU
frequency fail on some OS-CPU combinations, in which case libcpucycles
falls back to a `cpucyclespersecond` environment variable, or, if that
variable does not exist, an estimate of 2399987654 cycles per second.
(This estimate is in a realistic range of CPU speeds, and is close to
multiples of 24MHz, 25MHz, and 19.2MHz, which are common crystal
frequencies.) The sysadmin can create `/etc/cpucyclespersecond` to
override all of the OS mechanisms.
For counters that do not ask for scaling, the estimated CPU frequency is
shown in `cpucycles-info` as a double-check on the counter results. For
counters that ask for scaling, libcpucycles uses the estimated CPU
frequency to compute the scaling, so this is not a double-check. If a
counter asks for scaling and the estimated CPU frequency does not seem
close to a multiple of the counter frequency (possibly with a small
power-of-2 denominator) then libcpucycles will throw the counter away,
except in the case of fixed-resolution OS counters such as
`gettimeofday` and `CLOCK_MONOTONIC`.
libcpucycles computes a precision estimate for each counter (times any
applicable scaling) as follows. Call the counter 1000 times. Check that
the counter has never decreased, and has increased at least once. (A
counter where the decrease/increase checks fail is retried 10 times, so
10000 calls overall, and removed if it fails all 10 times.) The
precision estimate is then the smallest nonzero difference between
adjacent counter results, plus a penalty explained below.
The penalty is 100 cycles for off-core counters (including RDTSC) and
`default-perfevent`, and 200 cycles for fixed-resolution OS counters.
For example, an on-core CPU cycle counter will be selected even if it
actually has, e.g., a resolution of 8 cycles and 50 cycles of overhead.
Finally, libcpucycles selects the counter where the precision estimate
is the smallest number of cycles. Note that an inaccurate estimate of
CPU frequency can influence the choice between a scaled counter and an
unscaled counter.
libcpucycles does _not_ carry out its counter selection (typically tens
of milliseconds, sometimes even more) as a static initializer; callers
are presumed to not want to incur the cost of initialization unless and
until they are actually using `cpucycles()`. A multithreaded caller thus
has to place locks around any possibly-first call to `cpucycles()`, or
create its own static initializer (an `__attribute__((constructor))`
function) with an initial `cpucycles()` call so that all subsequent
`cpucycles()` calls are thread-safe.
@@ -1,27 +0,0 @@
#!/usr/bin/env python3
import os
import sys
import shutil
import tempfile
prefix = sys.argv[1]
dirs = 'man/man3','lib','include','bin'
install = {}
os.umask(0o22)
for target in dirs:
install[target] = '%s/%s'%(prefix,target)
os.makedirs(install[target],exist_ok=True)
os.umask(0o77)
for target in dirs:
with tempfile.TemporaryDirectory(dir=install[target]) as t:
for fn in sorted(os.listdir('package/'+target)):
try:
shutil.copy2('package/%s/%s' % (target,fn),'%s/%s' % (t,fn),follow_symlinks=False)
except TypeError: # XXX: old python3; should copy symlinks manually
shutil.copy2('package/%s/%s' % (target,fn),'%s/%s' % (t,fn))
os.rename('%s/%s' % (t,fn),'%s/%s' % (install[target],fn))
@@ -1,6 +0,0 @@
#!/bin/sh
rm -f package/lib/libcpucycles.a
ar cr package/lib/libcpucycles.a "$@"
ranlib package/lib/libcpucycles.a || :
chmod 644 package/lib/libcpucycles.a
-1
View File
@@ -1 +0,0 @@
20230115
-9
View File
@@ -1,9 +0,0 @@
#[link(name = "cpucycles", kind = "static")]
extern "C" {
pub static mut cpucycles:
::std::option::Option<unsafe extern "C" fn() -> ::std::os::raw::c_longlong>;
pub fn cpucycles_implementation() -> *const ::std::os::raw::c_char;
pub fn cpucycles_version() -> *const ::std::os::raw::c_char;
pub fn cpucycles_persecond() -> ::std::os::raw::c_longlong;
pub fn cpucycles_tracesetup();
}
-82
View File
@@ -1,82 +0,0 @@
#![allow(non_upper_case_globals)]
#![allow(non_camel_case_types)]
#![allow(non_snake_case)]
mod bindings;
use bindings as c;
use std::fmt;
use std::{
error::Error,
ffi::{CStr, CString, IntoStringError},
};
#[derive(Debug)]
pub struct CpuCyclesError {
message: String,
}
impl fmt::Display for CpuCyclesError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.message)
}
}
impl Error for CpuCyclesError {}
pub fn cpucycles_tracesetup() {
unsafe { c::cpucycles_tracesetup() }
}
pub fn cpucycles() -> Result<i64, CpuCyclesError> {
if let Some(count) = unsafe { c::cpucycles.map(|f| f()) } {
Ok(count)
} else {
Err(CpuCyclesError {
message: "Could not execute cpucycles!".to_string(),
})
}
}
pub fn cpucycles_persecond() -> Result<i64, CpuCyclesError> {
Ok(unsafe { c::cpucycles_persecond() })
}
pub fn cpucycles_implementation() -> Result<String, IntoStringError> {
let implementation = unsafe { CString::from(CStr::from_ptr(c::cpucycles_implementation())) };
implementation.into_string()
}
pub fn cpucycles_version() -> Result<String, IntoStringError> {
let version = unsafe { CString::from(CStr::from_ptr(c::cpucycles_version())) };
version.into_string()
}
#[cfg(test)]
mod test {
use crate::*;
#[test]
fn cpucycles_test() {
let count = cpucycles();
assert!(count.is_ok())
}
#[test]
fn cpucycles_persecond_test() {
let per_second = cpucycles_persecond();
assert!(per_second.is_ok());
}
#[test]
fn cpucycles_implementation_test() {
let implementation = cpucycles_implementation();
assert!(implementation.is_ok());
}
#[test]
fn cpucycles_version_test() {
let version = cpucycles_version();
assert!(version.is_ok());
}
}
+1 -1
View File
@@ -17,7 +17,7 @@ rust-version = "1.56"
[dependencies]
anyhow = "1.0.53"
async-trait = { workspace = true }
bip39 = "1.0.1"
bip39 = { workspace = true }
bs58 = "0.4.0"
clap = { version = "4.0", features = ["cargo", "derive"] }
colored = "2.0"
+13 -18
View File
@@ -29,37 +29,32 @@ log = { workspace = true }
pretty_env_logger = "0.4.0"
rand = "0.7.3"
rocket = { version = "0.5.0-rc.2", features = ["json"] }
serde = { version = "1.0", features = ["derive"] }
serde = { version="1.0", features = ["derive"] }
serde_json = "1.0"
sysinfo = "0.27.7"
tokio = { version = "1.21.2", features = ["rt-multi-thread", "net", "signal"] }
tokio-util = { version = "0.7.3", features = ["codec"] }
tokio = { version="1.21.2", features = ["rt-multi-thread", "net", "signal"] }
tokio-util = { version="0.7.3", features = ["codec"] }
toml = "0.5.8"
url = { version = "2.2", features = ["serde"] }
atty = "0.2"
## internal
nym-config = { path = "../common/config" }
nym-crypto = { path = "../common/crypto" }
nym-config = { path="../common/config" }
nym-crypto = { path="../common/crypto" }
nym-contracts-common = { path = "../common/cosmwasm-smart-contracts/contracts-common" }
mixnet-client = { path = "../common/client-libs/mixnet-client" }
mixnode-common = { path = "../common/mixnode-common" }
nym-nonexhaustive-delayqueue = { path = "../common/nonexhaustive-delayqueue" }
nym-sphinx = { path = "../common/nymsphinx" }
mixnet-client = { path="../common/client-libs/mixnet-client" }
mixnode-common = { path="../common/mixnode-common" }
nym-nonexhaustive-delayqueue = { path="../common/nonexhaustive-delayqueue" }
nym-sphinx = { path="../common/nymsphinx" }
nym-pemstore = { path = "../common/pemstore", version = "0.2.0" }
nym-task = { path = "../common/task" }
nym-types = { path = "../common/types" }
nym-topology = { path = "../common/topology" }
validator-client = { path = "../common/client-libs/validator-client" }
nym-bin-common = { path = "../common/bin-common" }
nym-topology = { path="../common/topology" }
validator-client = { path="../common/client-libs/validator-client" }
nym-bin-common = { path="../common/bin-common" }
[dev-dependencies]
tokio = { version = "1.21.2", features = [
"rt-multi-thread",
"net",
"signal",
"test-util",
] }
tokio = { version="1.21.2", features = ["rt-multi-thread", "net", "signal", "test-util"] }
nym-sphinx-types = { path = "../common/nymsphinx/types" }
nym-sphinx-params = { path = "../common/nymsphinx/params" }
+1 -1
View File
@@ -17,7 +17,7 @@ rust-version = "1.56"
[dependencies]
async-trait = { workspace = true }
bs58 = {version = "0.4.0" }
bip39 = "1"
bip39 = { workspace = true }
cfg-if = "1.0"
clap = { version = "4.0", features = ["cargo", "derive"] }
console-subscriber = { version = "0.1.1", optional = true } # validator-api needs to be built with RUSTFLAGS="--cfg tokio_unstable"
+68 -49
View File
@@ -84,6 +84,18 @@ impl<R: RngCore + CryptoRng + Clone> DkgController<R> {
})
}
async fn dump_persistent_state(&self) {
if !self.state.coconut_keypair_is_some().await {
// Delete the files just in case the process is killed before the new keys are generated
std::fs::remove_file(&self.secret_key_path).ok();
std::fs::remove_file(&self.verification_key_path).ok();
}
let persistent_state = PersistentState::from(&self.state);
if let Err(err) = persistent_state.save_to_file(self.state.persistent_state_path()) {
warn!("Could not backup the state for this iteration: {err}");
}
}
pub(crate) async fn handle_epoch_state(&mut self) {
match self.dkg_client.get_current_epoch().await {
Err(err) => warn!("Could not get current epoch state {err}"),
@@ -99,57 +111,64 @@ impl<R: RngCore + CryptoRng + Clone> DkgController<R> {
return;
}
if let Err(err) = self.state.is_consistent(epoch.state).await {
error!("Epoch state is corrupted - {err}, the process should be terminated");
return;
}
let ret = match epoch.state {
EpochState::PublicKeySubmission { resharing } => {
public_key_submission(&self.dkg_client, &mut self.state, resharing).await
}
EpochState::DealingExchange { resharing } => {
dealing_exchange(
&self.dkg_client,
&mut self.state,
self.rng.clone(),
resharing,
)
.await
}
EpochState::VerificationKeySubmission { resharing } => {
let keypair_path = nym_pemstore::KeyPairPath::new(
self.secret_key_path.clone(),
self.verification_key_path.clone(),
);
verification_key_submission(
&self.dkg_client,
&mut self.state,
&keypair_path,
resharing,
)
.await
}
EpochState::VerificationKeyValidation { resharing } => {
verification_key_validation(&self.dkg_client, &mut self.state, resharing)
debug!("Epoch state is corrupted - {err}. Awaiting for a DKG restart.");
} else {
let ret = match epoch.state {
EpochState::PublicKeySubmission { resharing } => {
public_key_submission(&self.dkg_client, &mut self.state, resharing)
.await
}
EpochState::DealingExchange { resharing } => {
dealing_exchange(
&self.dkg_client,
&mut self.state,
self.rng.clone(),
resharing,
)
.await
}
EpochState::VerificationKeyFinalization { resharing } => {
verification_key_finalization(&self.dkg_client, &mut self.state, resharing)
}
EpochState::VerificationKeySubmission { resharing } => {
let keypair_path = nym_pemstore::KeyPairPath::new(
self.secret_key_path.clone(),
self.verification_key_path.clone(),
);
verification_key_submission(
&self.dkg_client,
&mut self.state,
&keypair_path,
resharing,
)
.await
}
// Just wait, in case we need to redo dkg at some point
EpochState::InProgress => {
self.state.set_was_in_progress();
Ok(())
}
};
if let Err(err) = ret {
warn!("Could not handle this iteration for the epoch state: {err}");
} else if epoch.state != EpochState::InProgress {
let persistent_state = PersistentState::from(&self.state);
if let Err(err) =
persistent_state.save_to_file(self.state.persistent_state_path())
{
warn!("Could not backup the state for this iteration: {err}");
}
EpochState::VerificationKeyValidation { resharing } => {
verification_key_validation(
&self.dkg_client,
&mut self.state,
resharing,
)
.await
}
EpochState::VerificationKeyFinalization { resharing } => {
verification_key_finalization(
&self.dkg_client,
&mut self.state,
resharing,
)
.await
}
// Just wait, in case we need to redo dkg at some point
EpochState::InProgress => {
self.state.set_was_in_progress();
// We're dumping state here so that we don't do it uselessly during the
// long InProgress state
self.dump_persistent_state().await;
Ok(())
}
};
if let Err(err) = ret {
warn!("Could not handle this iteration for the epoch state: {err}");
} else if epoch.state != EpochState::InProgress {
self.dump_persistent_state().await;
}
}
if let Ok(current_timestamp) =
+24 -11
View File
@@ -4,6 +4,7 @@
use crate::coconut::dkg::client::DkgClient;
use crate::coconut::dkg::state::{ConsistentState, State};
use crate::coconut::error::CoconutError;
use log::debug;
use nym_coconut_dkg_common::types::TOTAL_DEALINGS;
use nym_contracts_common::dealings::ContractSafeBytes;
use nym_dkg::bte::setup;
@@ -18,6 +19,7 @@ pub(crate) async fn dealing_exchange(
resharing: bool,
) -> Result<(), CoconutError> {
if state.receiver_index().is_some() {
debug!("Receiver index was set previously, nothing to do");
return Ok(());
}
@@ -45,6 +47,7 @@ pub(crate) async fn dealing_exchange(
return Err(CoconutError::CorruptedCoconutKeyPair);
}
// We can now erase the keypair from memory
debug!("Removing coconut keypair from memory");
state.set_coconut_keypair(None).await;
scalars.push(x);
scalars
@@ -59,6 +62,11 @@ pub(crate) async fn dealing_exchange(
if !resharing || initial_dealers.iter().any(|d| *d == own_address) {
let params = setup();
for _ in 0..TOTAL_DEALINGS {
debug!(
"Submitting dealing for indexes {:?} with resharing: {}",
receivers.keys().collect::<Vec<_>>(),
prior_resharing_secrets.front().is_some()
);
let (dealing, _) = Dealing::create(
rng.clone(),
&params,
@@ -71,9 +79,11 @@ pub(crate) async fn dealing_exchange(
.submit_dealing(ContractSafeBytes::from(&dealing), resharing)
.await?;
}
} else {
debug!("Nothing to do, waiting for initial dealers to submit dealings");
}
info!("DKG: Finished submitting dealing");
info!("DKG: Finished dealing exchange");
state.set_receiver_index(receiver_index);
Ok(())
@@ -109,7 +119,7 @@ pub(crate) mod tests {
fn insert_dealers(
params: &Params,
dealer_details_db: &Arc<RwLock<HashMap<String, DealerDetails>>>,
dealer_details_db: &Arc<RwLock<HashMap<String, (DealerDetails, bool)>>>,
) -> Vec<DkgKeyPair> {
let mut keypairs = vec![];
for (idx, addr) in TEST_VALIDATORS_ADDRESS.iter().enumerate() {
@@ -119,12 +129,15 @@ pub(crate) mod tests {
keypairs.push(keypair);
dealer_details_db.write().unwrap().insert(
addr.to_string(),
DealerDetails {
address: Addr::unchecked(*addr),
bte_public_key_with_proof,
announce_address: format!("localhost:80{}", idx),
assigned_index: (idx + 1) as u64,
},
(
DealerDetails {
address: Addr::unchecked(*addr),
bte_public_key_with_proof,
announce_address: format!("localhost:80{}", idx),
assigned_index: (idx + 1) as u64,
},
true,
),
);
}
keypairs
@@ -216,7 +229,7 @@ pub(crate) mod tests {
.unwrap()
.entry(TEST_VALIDATORS_ADDRESS[1].to_string())
.and_modify(|details| {
let mut bytes = bs58::decode(details.bte_public_key_with_proof.clone())
let mut bytes = bs58::decode(details.0.bte_public_key_with_proof.clone())
.into_vec()
.unwrap();
// Find another value for last byte that still deserializes to a public key with proof
@@ -231,7 +244,7 @@ pub(crate) mod tests {
break;
}
}
details.bte_public_key_with_proof = bs58::encode(&bytes).into_string();
details.0.bte_public_key_with_proof = bs58::encode(&bytes).into_string();
});
dealing_exchange(&dkg_client, &mut state, OsRng, false)
@@ -257,7 +270,7 @@ pub(crate) mod tests {
let threshold_db = Arc::new(RwLock::new(Some(3)));
let initial_dealers_db = Arc::new(RwLock::new(Some(InitialReplacementData {
initial_dealers: vec![Addr::unchecked(TEST_VALIDATORS_ADDRESS[0])],
initial_height: Some(100),
initial_height: 100,
})));
let dkg_client = DkgClient::new(
DummyClient::new(

Some files were not shown because too many files have changed in this diff Show More