Skip to content

Commit

Permalink
#Centipede Refactor stats reporting in preparation for upcoming new s…
Browse files Browse the repository at this point in the history
…tats

Also:

- Stop reporting timestamps in stats comparisons between experiments (as well as in non-experiment runs with --v=1 or higher). Timestamps weren't reported historically, and their value is dubious in this context. They were added primarily for .csv file dumping, and are actively used there.
- Slightly the logged stats formatting for better readability. Evident in the updated test.

PiperOrigin-RevId: 588972760
  • Loading branch information
ussuri authored and copybara-github committed Dec 8, 2023
1 parent b92caa9 commit 8071e48
Show file tree
Hide file tree
Showing 5 changed files with 177 additions and 135 deletions.
1 change: 0 additions & 1 deletion centipede/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,6 @@ cc_library(
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format",
"@com_google_absl//absl/time",
"@com_google_absl//absl/types:span",
],
)
Expand Down
5 changes: 2 additions & 3 deletions centipede/centipede.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@
#include <cmath>
#include <cstddef>
#include <cstdlib>
#include <filesystem>
#include <functional>
#include <iostream>
#include <memory>
Expand Down Expand Up @@ -199,8 +198,8 @@ void Centipede::UpdateAndMaybeLogStats(std::string_view log_type,
size_t min_log_level) {
auto [max_corpus_size, avg_corpus_size] = corpus_.MaxAndAvgSize();

stats_.unix_micros = absl::ToUnixMicros(absl::Now());
stats_.corpus_size = corpus_.NumActive();
stats_.timestamp_unix_micros = absl::ToUnixMicros(absl::Now());
stats_.active_corpus_size = corpus_.NumActive();
stats_.num_covered_pcs = fs_.CountFeatures(feature_domains::kPCs);
stats_.max_corpus_element_size = max_corpus_size;
stats_.avg_corpus_element_size = avg_corpus_size;
Expand Down
116 changes: 58 additions & 58 deletions centipede/stats.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

#include <algorithm>
#include <cinttypes>
#include <cmath>
#include <cstdint>
#include <cstdlib>
#include <cstring>
Expand All @@ -28,6 +27,7 @@
#include <numeric>
#include <sstream>
#include <string>
#include <string_view>
#include <utility>
#include <vector>

Expand All @@ -36,8 +36,6 @@
#include "absl/strings/ascii.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/strings/substitute.h"
#include "absl/time/time.h"
#include "absl/types/span.h"
#include "./centipede/environment.h"
#include "./centipede/logging.h"
Expand Down Expand Up @@ -84,51 +82,56 @@ void StatsReporter::ReportCurrStats() {
// -----------------------------------------------------------------------------
// StatsLogger

bool StatsLogger::ShouldLogThisField(const Stats::FieldInfo &field) {
// Skip timestamps and rusage stats: the former because it's timestamps are
// not useful in experiment logs (only in CSVs), the latter because rusage is
// (at least currently) per the whole process, not per shard or experiment.
return (field.traits & Stats::TraitBits::kFuzzing) != 0;
}

void StatsLogger::PreAnnounceFields(
std::initializer_list<Stats::FieldInfo> fields) {
// Nothing to do: field names are logged together with every sample values.
// Nothing to do: field names are logged together with every sample's values.
}

void StatsLogger::SetCurrGroup(const Environment &master_env) {
if (!master_env.experiment_name.empty())
os_ << master_env.experiment_name << ": ";
curr_experiment_name_ = master_env.experiment_name;
}

void StatsLogger::SetCurrField(const Stats::FieldInfo &field_info) {
os_ << field_info.description << ":\n";
curr_field_info_ = field_info;
if (!ShouldLogThisField(curr_field_info_)) return;
os_ << curr_field_info_.description << ":\n";
}

namespace {
std::string FormatTimestamp(uint64_t unix_micros) {
return absl::FormatTime("%Y-%m-%d%ET%H:%M:%S",
absl::FromUnixMicros(unix_micros),
absl::LocalTimeZone());
}
} // namespace

void StatsLogger::ReportCurrFieldSample(std::vector<uint64_t> &&values) {
// Print min/max/avg and the full sorted contents of `values`.
std::sort(values.begin(), values.end());
const uint64_t min = values.front();
const uint64_t max = values.back();
const double avg = std::accumulate(values.begin(), values.end(), 0.) /
static_cast<double>(values.size());
if (!ShouldLogThisField(curr_field_info_)) return;

if (!curr_experiment_name_.empty())
os_ << " " << curr_experiment_name_ << ": ";

// Print the requested aggregated values as well as the individual samples.
uint64_t min = std::numeric_limits<uint64_t>::max();
uint64_t max = std::numeric_limits<uint64_t>::min();
long double sum = 0;
for (const auto value : values) {
min = std::min(min, value);
max = std::max(max, value);
sum += value;
}
long double avg = !values.empty() ? (sum / values.size()) : 0;
os_ << std::fixed << std::setprecision(1);
switch (curr_field_info_.aggregation) {
case Stats::Aggregation::kMinMaxAvg: {
os_ << "min:\t" << min << "\t"
<< "max:\t" << max << "\t"
<< "avg:\t" << avg << "\t";
os_ << "--";
for (const auto value : values) {
os_ << "\t" << value;
}
} break;
case Stats::Aggregation::kMinMax: {
os_ << "min:\t" << FormatTimestamp(min) << "\t"
<< "max:\t" << FormatTimestamp(max);
} break;
if (curr_field_info_.traits & Stats::TraitBits::kMin)
os_ << "min:\t" << min << "\t";
if (curr_field_info_.traits & Stats::TraitBits::kMax)
os_ << "max:\t" << max << "\t";
if (curr_field_info_.traits & Stats::TraitBits::kAvg)
os_ << "avg:\t" << avg << "\t";
if (curr_field_info_.traits & Stats::TraitBits::kSum)
os_ << "sum:\t" << sum << "\t";
os_ << "--";
for (const auto value : values) {
os_ << "\t" << value;
}
os_ << "\n";
}
Expand All @@ -137,7 +140,7 @@ void StatsLogger::ReportFlags(const GroupToFlags &group_to_flags) {
std::stringstream fos;
for (const auto &[group_name, group_flags] : group_to_flags) {
if (!group_name.empty() || !group_flags.empty()) {
fos << group_name << ": " << group_flags << "\n";
fos << " " << group_name << ": " << group_flags << "\n";
}
}
if (fos.tellp() != std::streampos{0}) os_ << "Flags:\n" << fos.rdbuf();
Expand All @@ -163,16 +166,14 @@ void StatsCsvFileAppender::PreAnnounceFields(
if (!csv_header_.empty()) return;

for (const auto &field : fields) {
std::string col_names;
switch (field.aggregation) {
case Stats::Aggregation::kMinMax:
col_names = absl::Substitute("$0_Min,$0_Max,", field.name);
break;
case Stats::Aggregation::kMinMaxAvg:
col_names = absl::Substitute("$0_Min,$0_Max,$0_Avg,", field.name);
break;
}
absl::StrAppend(&csv_header_, col_names);
if (field.traits & Stats::TraitBits::kMin)
absl::StrAppend(&csv_header_, field.name, "_Min,");
if (field.traits & Stats::TraitBits::kMax)
absl::StrAppend(&csv_header_, field.name, "_Max,");
if (field.traits & Stats::TraitBits::kAvg)
absl::StrAppend(&csv_header_, field.name, "_Avg,");
if (field.traits & Stats::TraitBits::kSum)
absl::StrAppend(&csv_header_, field.name, "_Sum,");
}
absl::StrAppend(&csv_header_, "\n");
}
Expand Down Expand Up @@ -200,23 +201,22 @@ void StatsCsvFileAppender::ReportCurrFieldSample(
// Print min/max/avg of `values`.
uint64_t min = std::numeric_limits<uint64_t>::max();
uint64_t max = std::numeric_limits<uint64_t>::min();
long double avg = 0;
long double sum = 0;
for (const auto value : values) {
min = std::min(min, value);
max = std::max(max, value);
avg += value;
sum += value;
}
if (!values.empty()) avg /= values.size();
long double avg = !values.empty() ? (sum / values.size()) : 0;
std::string values_str;
switch (curr_field_info_.aggregation) {
case Stats::Aggregation::kMinMax:
values_str = absl::StrFormat("%" PRIu64 ",%" PRIu64 ",", min, max);
break;
case Stats::Aggregation::kMinMaxAvg:
values_str =
absl::StrFormat("%" PRIu64 ",%" PRIu64 ",%.1Lf,", min, max, avg);
break;
}
if (curr_field_info_.traits & Stats::TraitBits::kMin)
absl::StrAppendFormat(&values_str, "%" PRIu64 ",", min);
if (curr_field_info_.traits & Stats::TraitBits::kMax)
absl::StrAppendFormat(&values_str, "%" PRIu64 ",", max);
if (curr_field_info_.traits & Stats::TraitBits::kAvg)
absl::StrAppendFormat(&values_str, "%.1Lf,", avg);
if (curr_field_info_.traits & Stats::TraitBits::kSum)
absl::StrAppendFormat(&values_str, "%.1Lf,", sum);
RemoteFileAppend(curr_file_, values_str);
}

Expand Down
50 changes: 36 additions & 14 deletions centipede/stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,59 +39,77 @@ namespace centipede {
// All such objects may be read synchronously by another thread,
// hence the use of atomics.
// These objects may also be accessed after all worker threads have joined.
// TODO(ussuri): Too many atomics now? Consider mutexing instead.
struct Stats {
std::atomic<uint64_t> unix_micros;
std::atomic<uint64_t> timestamp_unix_micros;
std::atomic<uint64_t> fuzz_time_sec;

std::atomic<uint64_t> num_executions;
std::atomic<uint64_t> num_target_crashes;

std::atomic<uint64_t> num_covered_pcs;
std::atomic<uint64_t> corpus_size;
std::atomic<uint64_t> active_corpus_size;
std::atomic<uint64_t> max_corpus_element_size;
std::atomic<uint64_t> avg_corpus_element_size;

enum class Aggregation { kMinMaxAvg, kMinMax };
using Traits = uint32_t;
enum TraitBits : Traits {
// The kind of the stat.
kTimestamp = 1UL << 0,
kFuzzing = 1UL << 1,

// The aggregate value(s) to report for the stat.
kMin = 1UL << 8,
kMax = 1UL << 9,
kAvg = 1UL << 10,
kSum = 1UL << 11,
};

struct FieldInfo {
std::atomic<uint64_t> Stats::*field;
std::string_view name;
std::string_view description;
Aggregation aggregation;
Traits traits;
};

// WARNING!!! Before reordering these or changing the aggregation types,
// do consider backward compatibility with the historical CSVs first.
static constexpr std::initializer_list<FieldInfo> kFieldInfos = {
{
&Stats::num_covered_pcs,
"NumCoveredPcs",
"Coverage",
Aggregation::kMinMaxAvg,
kFuzzing | kMin | kMax | kAvg,
},
{
&Stats::num_executions,
"NumExecs",
"Number of executions",
Aggregation::kMinMaxAvg,
kFuzzing | kMin | kMax | kAvg,
},
{
&Stats::corpus_size,
"CorpusSize",
"Corpus size",
Aggregation::kMinMaxAvg,
&Stats::active_corpus_size,
"ActiveCorpusSize",
"Active corpus size",
kFuzzing | kMin | kMax | kAvg,
},
{
&Stats::max_corpus_element_size,
"MaxEltSize",
"Max element size",
Aggregation::kMinMaxAvg,
kFuzzing | kMin | kMax | kAvg,
},
{
&Stats::avg_corpus_element_size,
"AvgEltSize",
"Avg element size",
Aggregation::kMinMaxAvg,
kFuzzing | kMin | kMax | kAvg,
},
{
&Stats::unix_micros,
&Stats::timestamp_unix_micros,
"UnixMicros",
"Timestamp",
Aggregation::kMinMax,
kTimestamp | kMin | kMax,
},
};
};
Expand Down Expand Up @@ -171,6 +189,9 @@ class StatsLogger : public StatsReporter {
~StatsLogger() override = default;

private:
// Returns true if the field is "interesting" for logging.
static bool ShouldLogThisField(const Stats::FieldInfo &field);

void PreAnnounceFields(
std::initializer_list<Stats::FieldInfo> fields) override;
void SetCurrGroup(const Environment &master_env) override;
Expand All @@ -180,6 +201,7 @@ class StatsLogger : public StatsReporter {
void ReportFlags(const GroupToFlags &group_to_flags) override;

std::stringstream os_;
std::string curr_experiment_name_;
Stats::FieldInfo curr_field_info_;
};

Expand Down
Loading

0 comments on commit 8071e48

Please sign in to comment.