Skip to content

Commit

Permalink
Make memory util more fair between baseline/rackscale benches; prepar…
Browse files Browse the repository at this point in the history
…e to use hugepages for qemu memory (not done yet)
  • Loading branch information
hunhoffe committed Jul 30, 2023
1 parent d6dbc4d commit aa86b14
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 63 deletions.
12 changes: 11 additions & 1 deletion kernel/src/arch/x86_64/rackscale/registration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use crate::memory::backends::AllocatorStatistics;
use crate::memory::mcache::MCache;
use crate::memory::shmem_affinity::mid_to_shmem_affinity;
use crate::memory::{Frame, PAddr, LARGE_PAGE_SIZE};
use crate::transport::shmem::get_affinity_shmem;
use crate::transport::shmem::{get_affinity_shmem, get_affinity_shmem_by_mid};

#[derive(Debug, Default)]
pub(crate) struct ClientRegistrationRequest {
Expand Down Expand Up @@ -117,6 +117,16 @@ pub(crate) fn register_client(hdr: &mut RPCHeader, payload: &mut [u8]) -> Result
}
};

// Make sure the controller and the client are seeing the same shmem addresses.
{
let shmem_region = get_affinity_shmem_by_mid(req.mid);
assert_eq!(
shmem_region.base.as_u64(),
req.shmem_region_base,
"Controller did not assign shmem region the same address as the client"
);
}

// Create shmem memory manager
let frame = Frame::new(
PAddr::from(req.shmem_region_base),
Expand Down
25 changes: 10 additions & 15 deletions kernel/tests/s11_rackscale_benchmarks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ fn rackscale_fxmark_benchmark(transport: RackscaleTransport) {
test.controller_match_fn = controller_match_fn;
test.transport = transport;
test.use_affinity_shmem = cfg!(feature = "affinity-shmem");
test.use_qemu_huge_pages = cfg!(feature = "affinity-shmem");
test.file_name = file_name.clone();
test.arg = Some(config);

Expand Down Expand Up @@ -144,9 +145,7 @@ fn rackscale_fxmark_benchmark(transport: RackscaleTransport) {
cmd_fn,
baseline_timeout_fn: timeout_fn,
rackscale_timeout_fn: timeout_fn,
controller_mem_fn: mem_fn,
client_mem_fn: mem_fn,
baseline_mem_fn: mem_fn,
mem_fn,
};

if cfg!(feature = "baseline") {
Expand Down Expand Up @@ -284,6 +283,7 @@ fn rackscale_vmops_benchmark(transport: RackscaleTransport, benchtype: VMOpsBenc
test.controller_match_fn = controller_match_fn;
test.transport = transport;
test.use_affinity_shmem = cfg!(feature = "affinity-shmem");
test.use_qemu_huge_pages = cfg!(feature = "affinity-shmem");
test.file_name = file_name.clone();
test.arg = Some(benchtype);

Expand All @@ -308,9 +308,7 @@ fn rackscale_vmops_benchmark(transport: RackscaleTransport, benchtype: VMOpsBenc
cmd_fn,
baseline_timeout_fn,
rackscale_timeout_fn,
controller_mem_fn: mem_fn,
client_mem_fn: mem_fn,
baseline_mem_fn: mem_fn,
mem_fn,
};

if cfg!(feature = "baseline") {
Expand Down Expand Up @@ -411,6 +409,7 @@ fn s11_rackscale_shmem_leveldb_benchmark() {
test.controller_match_fn = controller_match_fn;
test.transport = RackscaleTransport::Shmem;
test.use_affinity_shmem = cfg!(feature = "affinity-shmem");
test.use_qemu_huge_pages = cfg!(feature = "affinity-shmem");
test.file_name = file_name.to_string();
test.arg = Some(config);
test.run_dhcpd_for_baseline = true;
Expand Down Expand Up @@ -440,9 +439,7 @@ fn s11_rackscale_shmem_leveldb_benchmark() {
cmd_fn,
baseline_timeout_fn,
rackscale_timeout_fn,
controller_mem_fn: mem_fn,
client_mem_fn: mem_fn,
baseline_mem_fn: mem_fn,
mem_fn,
};

if cfg!(feature = "baseline") {
Expand Down Expand Up @@ -601,6 +598,7 @@ fn rackscale_memcached_benchmark(transport: RackscaleTransport) {
test.transport = transport;
test.shmem_size *= 2;
test.use_affinity_shmem = cfg!(feature = "affinity-shmem");
test.use_qemu_huge_pages = cfg!(feature = "affinity-shmem");
test.file_name = file_name.to_string();
test.arg = Some(config);
test.run_dhcpd_for_baseline = true;
Expand Down Expand Up @@ -630,9 +628,7 @@ fn rackscale_memcached_benchmark(transport: RackscaleTransport) {
cmd_fn,
baseline_timeout_fn,
rackscale_timeout_fn,
controller_mem_fn: mem_fn,
client_mem_fn: mem_fn,
baseline_mem_fn: mem_fn,
mem_fn,
};

if cfg!(feature = "baseline") {
Expand Down Expand Up @@ -682,6 +678,7 @@ fn rackscale_monetdb_benchmark(transport: RackscaleTransport) {
test.controller_match_fn = controller_match_fn;
test.transport = transport;
test.use_affinity_shmem = cfg!(feature = "affinity-shmem");
test.use_qemu_huge_pages = cfg!(feature = "affinity-shmem");
test.file_name = file_name.to_string();
test.arg = None;
test.run_dhcpd_for_baseline = true;
Expand Down Expand Up @@ -710,9 +707,7 @@ fn rackscale_monetdb_benchmark(transport: RackscaleTransport) {
cmd_fn,
baseline_timeout_fn,
rackscale_timeout_fn,
controller_mem_fn: mem_fn,
client_mem_fn: mem_fn,
baseline_mem_fn: mem_fn,
mem_fn,
};

if cfg!(feature = "baseline") {
Expand Down
54 changes: 32 additions & 22 deletions kernel/testutils/src/rackscale_runner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,12 @@ where
built: Built<'static>,
/// Timeout for the controller process
pub controller_timeout: u64,
/// Amount of non-shmem QEMU memory given to the controller
pub controller_memory: usize,
/// Function that is called after the controller is spawned to match output of the controller process
pub controller_match_fn: RackscaleMatchFn<T>,
/// Timeout for each client process
pub client_timeout: u64,
/// Amount of non-shmem QEMU memory given to each client
pub client_memory: usize,
/// Amount of non-shmem QEMU memory given to each QEMU instance
pub memory: usize,
/// Function that is called after each client is spawned to match output of the client process
pub client_match_fn: RackscaleMatchFn<T>,
/// Number of client machines to spawn
Expand All @@ -90,6 +88,8 @@ where
pub arg: Option<T>,
/// Run DHCPD in baseline test
pub run_dhcpd_for_baseline: bool,
/// Huge huge pages for qemu memory. This requires pre-alloc'ing them on the host before running.
pub use_qemu_huge_pages: bool,
}

impl<T: Clone + Send + 'static> RackscaleRun<T> {
Expand All @@ -109,11 +109,10 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {

RackscaleRun {
controller_timeout: 60_000,
controller_memory: 1024,
controller_match_fn: blank_match_fn,
client_timeout: 60_000,
client_memory: 1024,
client_match_fn: blank_match_fn,
memory: 1024,
kernel_test,
built,
num_clients: 1,
Expand All @@ -127,6 +126,7 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {
cmd: "".to_string(),
arg: None,
run_dhcpd_for_baseline: false,
use_qemu_huge_pages: false,
}
}

Expand Down Expand Up @@ -186,10 +186,11 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {
let controller_placement_cores = placement_cores.clone();
let state = self.clone();
let controller_tx_build_timer = tx_build_timer_mut.clone();
let use_large_pages = self.use_qemu_huge_pages;
let controller = std::thread::Builder::new()
.name("Controller".to_string())
.spawn(move || {
let cmdline_controller =
let mut cmdline_controller =
RunnerArgs::new_with_build(&controller_kernel_test, &state.built)
.timeout(state.controller_timeout)
.transport(state.transport)
Expand All @@ -200,12 +201,16 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {
.no_network_setup()
.workers(state.num_clients + 1)
.use_vmxnet3()
.memory(state.controller_memory)
.memory(state.memory)
.nodes(1)
.cores(controller_cores)
.node_offset(controller_placement_cores[0].0)
.setaffinity(controller_placement_cores[0].1.clone());

if use_large_pages {
cmdline_controller = cmdline_controller.large_pages().prealloc();
}

let mut output = String::new();
let qemu_run = || -> Result<WaitStatus> {
let mut p = spawn_nrk(&cmdline_controller)?;
Expand Down Expand Up @@ -287,10 +292,11 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {
let client_placement_cores = placement_cores.clone();
let state = self.clone();
let client_tx_build_timer = tx_build_timer_mut.clone();
let use_large_pages = self.use_qemu_huge_pages;
let client = std::thread::Builder::new()
.name(format!("Client{}", i + 1))
.spawn(move || {
let cmdline_client =
let mut cmdline_client =
RunnerArgs::new_with_build(&client_kernel_test, &state.built)
.timeout(state.client_timeout)
.transport(state.transport)
Expand All @@ -301,14 +307,18 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {
.no_network_setup()
.workers(state.num_clients + 1)
.cores(state.cores_per_client)
.memory(state.client_memory)
.memory(state.memory)
.nobuild() // Use single build for all for consistency
.use_vmxnet3()
.cmd(&client_cmd)
.nodes(1)
.node_offset(client_placement_cores[i + 1].0)
.setaffinity(client_placement_cores[i + 1].1.clone());

if use_large_pages {
cmdline_client = cmdline_client.large_pages().prealloc();
}

let mut output = String::new();
let qemu_run = || -> Result<WaitStatus> {
let mut p = spawn_nrk(&cmdline_client)?;
Expand Down Expand Up @@ -425,16 +435,20 @@ impl<T: Clone + Send + 'static> RackscaleRun<T> {
setup_network(self.num_clients + 1);
}

let cmdline_baseline = RunnerArgs::new_with_build(&self.kernel_test, &self.built)
let mut cmdline_baseline = RunnerArgs::new_with_build(&self.kernel_test, &self.built)
.timeout(self.controller_timeout)
.memory(self.controller_memory)
.memory(self.memory)
.workers(1)
.cores(self.cores_per_client * self.num_clients)
.cmd(&self.cmd)
.no_network_setup()
.nodes(self.num_clients)
.setaffinity(all_placement_cores);

if self.use_qemu_huge_pages {
cmdline_baseline = cmdline_baseline.large_pages().prealloc();
}

let mut output = String::new();
let mut qemu_run = || -> Result<WaitStatus> {
let dhcpd_server = if self.run_dhcpd_for_baseline {
Expand Down Expand Up @@ -472,12 +486,8 @@ pub struct RackscaleBench<T: Clone + Send + 'static> {
pub rackscale_timeout_fn: fn(usize) -> u64,
// Function to calculate the timeout. Takes as argument number of application cores
pub baseline_timeout_fn: fn(usize) -> u64,
// Function to calculate controller (and baseline) memory. Takes as argument number of application cores and is_smoke
pub controller_mem_fn: fn(usize, bool) -> usize,
// Function to calculate client memory. Takes as argument number of application cores and is_smoke
pub client_mem_fn: fn(usize, bool) -> usize,
// Function to calculate baseline nros memory. Takes as argument number of application cores and is_smoke
pub baseline_mem_fn: fn(usize, bool) -> usize,
// Function to calculate memory (excpeting controller memory). Takes as argument number of application cores and is_smoke
pub mem_fn: fn(usize, bool) -> usize,
}

impl<T: Clone + Send + 'static> RackscaleBench<T> {
Expand Down Expand Up @@ -565,11 +575,11 @@ impl<T: Clone + Send + 'static> RackscaleBench<T> {

// Caclulate memory for each component
if !is_baseline {
test_run.controller_memory = (self.controller_mem_fn)(total_cores, is_smoke);
test_run.client_memory = (self.client_mem_fn)(total_cores, is_smoke);
test_run.memory = ((self.mem_fn)(total_cores, is_smoke) / test_run.num_clients)
- test_run.shmem_size;
assert!(test_run.memory > 0);
} else {
test_run.controller_memory = (self.baseline_mem_fn)(total_cores, is_smoke);
test_run.client_memory = test_run.controller_memory;
test_run.memory = (self.mem_fn)(total_cores, is_smoke);
}

if is_baseline {
Expand Down
Loading

0 comments on commit aa86b14

Please sign in to comment.