Skip to content

Commit

Permalink
#Centipede Refactor stats reporting in preparation for upcoming new s…
Browse files Browse the repository at this point in the history
…tats

Also:

- Stop reporting timestamps in stats comparisons between experiments (as well as in non-experiment runs with --v=1 or higher). Timestamps weren't reported historically, and their value is dubious in this context. They were added primarily for .csv file dumping, and are actively used there.
- Slightly the logged stats formatting for better readability. Evident in the updated test.

PiperOrigin-RevId: 588972760
  • Loading branch information
ussuri authored and copybara-github committed Dec 12, 2023
1 parent 4923853 commit ade0efa
Show file tree
Hide file tree
Showing 4 changed files with 277 additions and 180 deletions.
2 changes: 2 additions & 0 deletions centipede/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -1302,12 +1302,14 @@ cc_test(
srcs = ["stats_test.cc"],
deps = [
":environment",
":logging",
":stats",
":test_util",
":util",
"@com_google_absl//absl/log:log_entry",
"@com_google_absl//absl/log:log_sink",
"@com_google_absl//absl/log:log_sink_registry",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/time",
"@com_google_googletest//:gtest_main",
],
Expand Down
96 changes: 50 additions & 46 deletions centipede/stats.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

#include <algorithm>
#include <cinttypes>
#include <cmath>
#include <cstdint>
#include <cstdlib>
#include <cstring>
Expand All @@ -28,6 +27,7 @@
#include <numeric>
#include <sstream>
#include <string>
#include <string_view>
#include <utility>
#include <vector>

Expand All @@ -36,7 +36,6 @@
#include "absl/strings/ascii.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/strings/substitute.h"
#include "absl/time/time.h"
#include "absl/types/span.h"
#include "./centipede/environment.h"
Expand All @@ -46,6 +45,8 @@

namespace centipede {

using TraitBits = Stats::TraitBits;

// -----------------------------------------------------------------------------
// StatsReporter

Expand All @@ -65,6 +66,7 @@ StatsReporter::StatsReporter(const std::vector<Stats> &stats_vec,
void StatsReporter::ReportCurrStats() {
PreAnnounceFields(Stats::kFieldInfos);
for (const Stats::FieldInfo &field_info : Stats::kFieldInfos) {
if (!ShouldReportThisField(field_info)) continue;
SetCurrField(field_info);
for (const auto &[group_name, group_indices] : group_to_indices_) {
SetCurrGroup(env_vec_[group_indices.at(0)]);
Expand All @@ -84,19 +86,22 @@ void StatsReporter::ReportCurrStats() {
// -----------------------------------------------------------------------------
// StatsLogger

bool StatsLogger::ShouldReportThisField(const Stats::FieldInfo &field) {
return (field.traits & (TraitBits::kFuzzStat | TraitBits::kTimestamp)) != 0;
}

void StatsLogger::PreAnnounceFields(
std::initializer_list<Stats::FieldInfo> fields) {
// Nothing to do: field names are logged together with every sample values.
// Nothing to do: field names are logged together with every sample's values.
}

void StatsLogger::SetCurrGroup(const Environment &master_env) {
if (!master_env.experiment_name.empty())
os_ << master_env.experiment_name << ": ";
curr_experiment_name_ = master_env.experiment_name;
}

void StatsLogger::SetCurrField(const Stats::FieldInfo &field_info) {
os_ << field_info.description << ":\n";
curr_field_info_ = field_info;
os_ << curr_field_info_.description << ":\n";
}

namespace {
Expand All @@ -108,27 +113,32 @@ std::string FormatTimestamp(uint64_t unix_micros) {
} // namespace

void StatsLogger::ReportCurrFieldSample(std::vector<uint64_t> &&values) {
// Print min/max/avg and the full sorted contents of `values`.
if (!curr_experiment_name_.empty()) os_ << curr_experiment_name_ << ": ";

// Print the requested aggregate stats as well as the full sorted contents of
// `values`.
std::sort(values.begin(), values.end());
const uint64_t min = values.front();
const uint64_t max = values.back();
const double avg = std::accumulate(values.begin(), values.end(), 0.) /
static_cast<double>(values.size());
const uint64_t sum = std::accumulate(values.begin(), values.end(), 0.);
const double avg = !values.empty() ? (1.0 * sum / values.size()) : 0;

os_ << std::fixed << std::setprecision(1);
switch (curr_field_info_.aggregation) {
case Stats::Aggregation::kMinMaxAvg: {
os_ << "min:\t" << min << "\t"
<< "max:\t" << max << "\t"
<< "avg:\t" << avg << "\t";
os_ << "--";
for (const auto value : values) {
os_ << "\t" << value;
}
} break;
case Stats::Aggregation::kMinMax: {
os_ << "min:\t" << FormatTimestamp(min) << "\t"
<< "max:\t" << FormatTimestamp(max);
} break;
if (curr_field_info_.traits & TraitBits::kTimestamp) {
os_ << "min:\t" << FormatTimestamp(min) << "\t"
<< "max:\t" << FormatTimestamp(max);
} else {
if (curr_field_info_.traits & TraitBits::kMin)
os_ << "min:\t" << min << "\t";
if (curr_field_info_.traits & TraitBits::kMax)
os_ << "max:\t" << max << "\t";
if (curr_field_info_.traits & TraitBits::kAvg)
os_ << "avg:\t" << avg << "\t";

os_ << "--";
for (auto value : values) {
os_ << "\t" << value;
}
}
os_ << "\n";
}
Expand Down Expand Up @@ -163,16 +173,12 @@ void StatsCsvFileAppender::PreAnnounceFields(
if (!csv_header_.empty()) return;

for (const auto &field : fields) {
std::string col_names;
switch (field.aggregation) {
case Stats::Aggregation::kMinMax:
col_names = absl::Substitute("$0_Min,$0_Max,", field.name);
break;
case Stats::Aggregation::kMinMaxAvg:
col_names = absl::Substitute("$0_Min,$0_Max,$0_Avg,", field.name);
break;
}
absl::StrAppend(&csv_header_, col_names);
if (field.traits & TraitBits::kMin)
absl::StrAppend(&csv_header_, field.name, "_Min,");
if (field.traits & TraitBits::kMax)
absl::StrAppend(&csv_header_, field.name, "_Max,");
if (field.traits & TraitBits::kAvg)
absl::StrAppend(&csv_header_, field.name, "_Avg,");
}
absl::StrAppend(&csv_header_, "\n");
}
Expand All @@ -197,26 +203,24 @@ void StatsCsvFileAppender::SetCurrField(const Stats::FieldInfo &field_info) {

void StatsCsvFileAppender::ReportCurrFieldSample(
std::vector<uint64_t> &&values) {
// Print min/max/avg of `values`.
uint64_t min = std::numeric_limits<uint64_t>::max();
uint64_t max = std::numeric_limits<uint64_t>::min();
long double avg = 0;
uint64_t sum = 0;
for (const auto value : values) {
min = std::min(min, value);
max = std::max(max, value);
avg += value;
sum += value;
}
if (!values.empty()) avg /= values.size();
double avg = !values.empty() ? (1.0L * sum / values.size()) : 0;

std::string values_str;
switch (curr_field_info_.aggregation) {
case Stats::Aggregation::kMinMax:
values_str = absl::StrFormat("%" PRIu64 ",%" PRIu64 ",", min, max);
break;
case Stats::Aggregation::kMinMaxAvg:
values_str =
absl::StrFormat("%" PRIu64 ",%" PRIu64 ",%.1Lf,", min, max, avg);
break;
}
if (curr_field_info_.traits & TraitBits::kMin)
absl::StrAppendFormat(&values_str, "%" PRIu64 ",", min);
if (curr_field_info_.traits & TraitBits::kMax)
absl::StrAppendFormat(&values_str, "%" PRIu64 ",", max);
if (curr_field_info_.traits & TraitBits::kAvg)
absl::StrAppendFormat(&values_str, "%.1Lf,", avg);

RemoteFileAppend(curr_file_, values_str);
}

Expand Down
52 changes: 37 additions & 15 deletions centipede/stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,58 +40,73 @@ namespace centipede {
// hence the use of atomics.
// These objects may also be accessed after all worker threads have joined.
struct Stats {
std::atomic<uint64_t> timestamp_unix_micros;
std::atomic<uint64_t> num_executions;
std::atomic<uint64_t> num_covered_pcs;
std::atomic<uint64_t> active_corpus_size;
std::atomic<uint64_t> max_corpus_element_size;
std::atomic<uint64_t> avg_corpus_element_size;

enum class Aggregation { kMinMaxAvg, kMinMax };
std::atomic<uint64_t> timestamp_unix_micros = 0;
std::atomic<uint64_t> num_executions = 0;
std::atomic<uint64_t> num_covered_pcs = 0;
std::atomic<uint64_t> active_corpus_size = 0;
std::atomic<uint64_t> max_corpus_element_size = 0;
std::atomic<uint64_t> avg_corpus_element_size = 0;

// Some traits of each stat.
using Traits = uint32_t;
enum TraitBits : Traits {
// The kind of the stat.
kTimestamp = 1UL << 0,
kFuzzStat = 1UL << 1,
// The aggregate value(s) to report for the stat.
kMin = 1UL << 8,
kMax = 1UL << 9,
kAvg = 1UL << 10,
};

// Ascribes some properties to each stat. Used in `StatReporter` & subclasses.
struct FieldInfo {
std::atomic<uint64_t> Stats::*field;
std::string_view name;
std::string_view description;
Aggregation aggregation;
Traits traits;
};

// WARNING!!! Before reordering these or changing the aggregation types,
// consider if you want to maintain backward compatibility with any
// historically written CSVs: e.g. you might have a CSV post-processing step
// sensitive to the certain order or aggregation type of the CSV fields.
static constexpr std::initializer_list<FieldInfo> kFieldInfos = {
{
&Stats::num_covered_pcs,
"NumCoveredPcs",
"Coverage",
Aggregation::kMinMaxAvg,
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::num_executions,
"NumExecs",
"Number of executions",
Aggregation::kMinMaxAvg,
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::active_corpus_size,
"ActiveCorpusSize",
"Active corpus size",
Aggregation::kMinMaxAvg,
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::max_corpus_element_size,
"MaxEltSize",
"Max element size",
Aggregation::kMinMaxAvg,
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::avg_corpus_element_size,
"AvgEltSize",
"Avg element size",
Aggregation::kMinMaxAvg,
kFuzzStat | kMin | kMax | kAvg,
},
{
&Stats::timestamp_unix_micros,
"UnixMicros",
"Timestamp",
Aggregation::kMinMax,
kTimestamp | kMin | kMax,
},
};
};
Expand Down Expand Up @@ -126,6 +141,11 @@ class StatsReporter {
// `ReportCurrStats()`, that subclasses need to override to implement their
// stats reporting.

// Should this field be reported or skipped for the particular type of
// reporting that the subclass does. Can use `field.traits` to determine that.
virtual bool ShouldReportThisField(const Stats::FieldInfo &field) {
return true;
}
// Gives a chance to subclasses to learn ahead of time the fields for which
// samples are going to be reported, in this order. Is called once.
virtual void PreAnnounceFields(
Expand Down Expand Up @@ -171,6 +191,7 @@ class StatsLogger : public StatsReporter {
~StatsLogger() override = default;

private:
bool ShouldReportThisField(const Stats::FieldInfo &field) override;
void PreAnnounceFields(
std::initializer_list<Stats::FieldInfo> fields) override;
void SetCurrGroup(const Environment &master_env) override;
Expand All @@ -180,6 +201,7 @@ class StatsLogger : public StatsReporter {
void ReportFlags(const GroupToFlags &group_to_flags) override;

std::stringstream os_;
std::string curr_experiment_name_;
Stats::FieldInfo curr_field_info_;
};

Expand Down
Loading

0 comments on commit ade0efa

Please sign in to comment.