diff --git a/perfmetrics/scripts/testing_on_gke/examples/dlio/dlio_workload.py b/perfmetrics/scripts/testing_on_gke/examples/dlio/dlio_workload.py index ac89b82f66..c1a1b8688b 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/dlio/dlio_workload.py +++ b/perfmetrics/scripts/testing_on_gke/examples/dlio/dlio_workload.py @@ -6,24 +6,55 @@ import json -def validateDlioWorkload(workload, name): +def validateDlioWorkload(workload: dict, name: str): """Validates the given json workload object.""" - if ( - 'dlioWorkload' not in workload - or 'fioWorkload' in workload - or 'bucket' not in workload - ): - print( - f"{name} does not have 'dlioWorkload' or 'bucket' key in it, or" - " has 'fioWorkload' key in it" - ) + if 'dlioWorkload' not in workload: + print(f"{name} does not have 'dlioWorkload' key in it.") + return False + + if 'bucket' not in workload: + print(f"{name} does not have 'bucket' key in it.") + return False + + if 'fioWorkload' in workload: + print(f"{name} has 'fioWorkload' key in it, which is unexpected.") return False dlioWorkload = workload['dlioWorkload'] - for requiredAttribute in ['numFilesTrain', 'recordLength', 'batchSizes']: + for requiredAttribute, _type in { + 'numFilesTrain': int, + 'recordLength': int, + 'batchSizes': list, + }.items(): if requiredAttribute not in dlioWorkload: - print(f'dlioWorkload for {name} does not have {requiredAttribute} in it') + print( + f'In {name}, dlioWorkload for {name} does not have' + f' {requiredAttribute} in it' + ) return False + if not type(dlioWorkload[requiredAttribute]) is _type: + print( + f'In {name}, dlioWorkload[{requiredAttribute}] is of type' + f' {type(dlioWorkload[requiredAttribute])}, not of type {_type} ' + ) + return False + + for batchSize in dlioWorkload['batchSizes']: + if not type(batchSize) is int: + print( + f'In {name}, one of the batch-size values in' + f" dlioWorkload['batchSizes'] is '{batchSize}', which is of type" + f' {type("batchSize")}, not int' + ) + return False + if batchSize < 1: + print( + f'In {name}, one of the batch-size values in' + f" dlioWorkload['batchSizes'] is '{batchSize}' < 1, which is not" + ' supported.' + ) + return False + return True @@ -52,7 +83,7 @@ def __init__(self, scenario, numFilesTrain, recordLength, bucket, batchSizes): self.batchSizes = batchSizes -def ParseTestConfigForDlioWorkloads(testConfigFileName): +def ParseTestConfigForDlioWorkloads(testConfigFileName: str): """Parses the given workload test configuration file for DLIO workloads.""" print(f'Parsing {testConfigFileName} for DLIO workloads ...') with open(testConfigFileName) as f: @@ -80,15 +111,7 @@ def ParseTestConfigForDlioWorkloads(testConfigFileName): dlioWorkload['numFilesTrain'], dlioWorkload['recordLength'], workload['bucket'], - ( - dlioWorkload['batchSizes'].split(',') - if ( - 'batchSizes' in dlioWorkload - and dlioWorkload['batchSizes'] - and not str.isspace(dlioWorkload['batchSizes']) - ) - else [] - ), + dlioWorkload['batchSizes'], ) ) return dlioWorkloads diff --git a/perfmetrics/scripts/testing_on_gke/examples/dlio/dlio_workload_test.py b/perfmetrics/scripts/testing_on_gke/examples/dlio/dlio_workload_test.py new file mode 100644 index 0000000000..ba85a7b3bc --- /dev/null +++ b/perfmetrics/scripts/testing_on_gke/examples/dlio/dlio_workload_test.py @@ -0,0 +1,177 @@ +"""This file defines unit tests for functionalities in dlio_workload.py""" + +import unittest +from dlio_workload import DlioWorkload, validateDlioWorkload + + +class DlioWorkloadTest(unittest.TestCase): + + def test_validate_dlio_workload_empty(self): + self.assertFalse(validateDlioWorkload(({}), "empty-dlio-workload")) + + def test_validate_dlio_workload_invalid_no_bucket(self): + self.assertFalse( + validateDlioWorkload(({"dlioWorkload": {}}), "invalid-dlio-workload-1") + ) + + def test_validate_dlio_workload_invalid_no_dlioWorkloadSpecified(self): + self.assertFalse( + validateDlioWorkload(({"bucket": {}}), "invalid-dlio-workload-2") + ) + + def test_validate_dlio_workload_invalid_commented_out_dlioWorkload(self): + self.assertFalse( + validateDlioWorkload( + ({"_dlioWorkload": {}, "bucket": "dummy-bucket"}), + "commented-out-dlio-workload", + ) + ) + + def test_validate_dlio_workload_invalid_mixed_dlioWorkload_fioWorkload(self): + self.assertFalse( + validateDlioWorkload( + ({ + "dlioWorkload": {}, + "fioWorkload": {}, + "bucket": "dummy-bucket", + }), + "mixed-dlio/fio-workload", + ) + ) + + def test_validate_dlio_workload_invalid_missing_numFilesTrain(self): + workload = dict({ + "dlioWorkload": { + "recordLength": 10000, + "batchSizes": [100, 200], + }, + "bucket": "dummy-bucket", + }) + self.assertFalse( + validateDlioWorkload( + workload, "invalid-dlio-workload-missing-numFilesTrain" + ) + ) + pass + + def test_validate_dlio_workload_invalid_unsupported_numFilesTrain(self): + workload = dict({ + "dlioWorkload": { + "numFilesTrain": "1000", + "recordLength": 10000, + "batchSizes": [100, 200], + }, + "bucket": "dummy-bucket", + }) + self.assertFalse( + validateDlioWorkload( + workload, "invalid-dlio-workload-unsupported-numFilesTrain" + ) + ) + pass + + def test_validate_dlio_workload_invalid_missing_recordLength(self): + workload = dict({ + "dlioWorkload": { + "numFilesTrain": 1000, + "batchSizes": [100, 200], + }, + "bucket": "dummy-bucket", + }) + self.assertFalse( + validateDlioWorkload( + workload, "invalid-dlio-workload-missing-recordLength" + ) + ) + pass + + def test_validate_dlio_workload_invalid_unsupported_recordLength(self): + workload = dict({ + "dlioWorkload": { + "numFilesTrain": 1000, + "recordLength": "10000", + "batchSizes": [100, 200], + }, + "bucket": "dummy-bucket", + }) + self.assertFalse( + validateDlioWorkload( + workload, "invalid-dlio-workload-unsupported-recordLength" + ) + ) + pass + + def test_validate_dlio_workload_invalid_missing_batchSizes(self): + workload = dict({ + "dlioWorkload": { + "numFilesTrain": 1000, + "recordLength": 10000, + }, + "bucket": "dummy-bucket", + }) + self.assertFalse( + validateDlioWorkload( + workload, "invalid-dlio-workload-missing-batchSizes" + ) + ) + pass + + def test_validate_dlio_workload_invalid_unsupported_batchSizes1(self): + workload = dict({ + "dlioWorkload": { + "numFilesTrain": 1000, + "recordLength": 10000, + "batchSizes": ["100"], + }, + "bucket": "dummy-bucket", + }) + self.assertFalse( + validateDlioWorkload( + workload, "invalid-dlio-workload-unsupported-batchSizes1" + ) + ) + pass + + def test_validate_dlio_workload_invalid_unsupported_batchSizes2(self): + workload = dict({ + "dlioWorkload": { + "numFilesTrain": 1000, + "recordLength": 10000, + "batchSizes": [0, -1], + }, + "bucket": "dummy-bucket", + }) + self.assertFalse( + validateDlioWorkload( + workload, "invalid-dlio-workload-unsupported-batchSizes2" + ) + ) + pass + + def test_validate_dlio_workload_valid_single_batchSize(self): + workload = dict({ + "dlioWorkload": { + "numFilesTrain": 1000, + "recordLength": 10000, + "batchSizes": [100], + }, + "bucket": "dummy-bucket", + }) + self.assertTrue(validateDlioWorkload(workload, "valid-dlio-workload-2")) + pass + + def test_validate_dlio_workload_valid_multiple_batchSizes(self): + workload = dict({ + "dlioWorkload": { + "numFilesTrain": 1000, + "recordLength": 10000, + "batchSizes": [100, 200], + }, + "bucket": "dummy-bucket", + }) + self.assertTrue(validateDlioWorkload(workload, "valid-dlio-workload-2")) + pass + + +if __name__ == "__main__": + unittest.main() diff --git a/perfmetrics/scripts/testing_on_gke/examples/fio/fio_workload.py b/perfmetrics/scripts/testing_on_gke/examples/fio/fio_workload.py index 1afe99487d..d329f69103 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/fio/fio_workload.py +++ b/perfmetrics/scripts/testing_on_gke/examples/fio/fio_workload.py @@ -6,29 +6,61 @@ import json -def validateFioWorkload(workload, name): +def validateFioWorkload(workload: dict, name: str): """Validates the given json workload object.""" - if ( - 'fioWorkload' not in workload - or 'dlioWorkload' in workload - or 'bucket' not in workload - ): - print( - f"{name} does not have 'fioWorkload' or 'bucket' key in it, or" - " has 'dlioWorkload' key in it." - ) + if 'fioWorkload' not in workload: + print(f"{name} does not have 'fioWorkload' key in it.") + return False + + if 'bucket' not in workload: + print(f"{name} does not have 'bucket' key in it.") + return False + + if 'dlioWorkload' in workload: + print(f"{name} has 'dlioWorkload' key in it, which is unexpected.") return False fioWorkload = workload['fioWorkload'] - for requiredAttribute in [ - 'fileSize', - 'blockSize', - 'filesPerThread', - 'numThreads', - ]: + for requiredAttribute, _type in { + 'fileSize': str, + 'blockSize': str, + 'filesPerThread': int, + 'numThreads': int, + }.items(): if requiredAttribute not in fioWorkload: - print(f'fioWorkload for {name} does not have {requiredAttribute} in it.') + print(f'In {name}, fioWorkload does not have {requiredAttribute} in it') + return False + if not type(fioWorkload[requiredAttribute]) is _type: + print( + f'In {name}, fioWorkload[{requiredAttribute}] is of type' + f' {type(fioWorkload[requiredAttribute])}, not of type {_type} ' + ) return False + + if 'readTypes' in fioWorkload: + readTypes = fioWorkload['readTypes'] + if not type(readTypes) is list: + print( + f"In {name}, fioWorkload['readTypes'] is of type {type(readTypes)}," + " not 'list'." + ) + return False + for readType in readTypes: + if not type(readType) is str: + print( + f'In {name}, one of the values in' + f" fioWorkload['readTypes'] is '{readType}', which is of type" + f' {type(readType)}, not str' + ) + return False + if not readType == 'read' and not readType == 'randread': + print( + f"In {name}, one of the values in fioWorkload['readTypes'] is" + f" '{readType}' which is not a supported value. Supported values" + ' are read, randread' + ) + return False + return True @@ -55,13 +87,13 @@ class FioWorkload: def __init__( self, - scenario, - fileSize, - blockSize, - filesPerThread, - numThreads, - bucket, - readTypes, + scenario: str, + fileSize: str, + blockSize: str, + filesPerThread: int, + numThreads: int, + bucket: str, + readTypes: list, ): self.scenario = scenario self.fileSize = fileSize @@ -80,7 +112,7 @@ def PPrint(self): ) -def ParseTestConfigForFioWorkloads(fioTestConfigFile): +def ParseTestConfigForFioWorkloads(fioTestConfigFile: str): """Parses the given workload test configuration file for FIO workloads.""" print(f'Parsing {fioTestConfigFile} for FIO workloads ...') with open(fioTestConfigFile) as f: @@ -111,11 +143,7 @@ def ParseTestConfigForFioWorkloads(fioTestConfigFile): fioWorkload['numThreads'], workload['bucket'], ( - ( - [] - if str.isspace(fioWorkload['readTypes']) - else fioWorkload['readTypes'].split(',') - ) + fioWorkload['readTypes'] if 'readTypes' in fioWorkload else ['read', 'randread'] ), diff --git a/perfmetrics/scripts/testing_on_gke/examples/fio/fio_workload_test.py b/perfmetrics/scripts/testing_on_gke/examples/fio/fio_workload_test.py new file mode 100644 index 0000000000..c6b44ddcca --- /dev/null +++ b/perfmetrics/scripts/testing_on_gke/examples/fio/fio_workload_test.py @@ -0,0 +1,261 @@ +"""This file defines unit tests for functionalities in fio_workload.py""" + +import unittest +from fio_workload import FioWorkload, validateFioWorkload + + +class FioWorkloadTest(unittest.TestCase): + + def test_validate_fio_workload_empty(self): + self.assertFalse(validateFioWorkload(({}), "empty-fio-workload")) + + def test_validate_fio_workload_invalid_no_bucket(self): + self.assertFalse( + validateFioWorkload(({"fioWorkload": {}}), "invalid-fio-workload-1") + ) + + def test_validate_fio_workload_invalid_no_fioWorkloadSpecified(self): + self.assertFalse( + validateFioWorkload(({"bucket": {}}), "invalid-fio-workload-2") + ) + + def test_validate_fio_workload_invalid_commented_out_fioWorkload(self): + self.assertFalse( + validateFioWorkload( + ({"_fioWorkload": {}, "bucket": "dummy-bucket"}), + "commented-out-fio-workload", + ) + ) + + def test_validate_fio_workload_invalid_mixed_fioWorkload_dlioWorkload(self): + self.assertFalse( + validateFioWorkload( + ({"fioWorkload": {}, "dlioWorkload": {}, "bucket": "dummy-bucket"}), + "mixed-fio/dlio-workload", + ) + ) + + def test_validate_fio_workload_invalid_missing_fileSize(self): + workload = dict({ + "fioWorkload": { + "filesPerThread": 2, + "numThreads": 100, + "blockSize": "1kb", + }, + "bucket": "dummy-bucket", + }) + self.assertFalse( + validateFioWorkload(workload, "invalid-fio-workload-missing-fileSize") + ) + pass + + def test_validate_fio_workload_invalid_unsupported_fileSize(self): + workload = dict({ + "fioWorkload": { + "fileSize": 1000, + "filesPerThread": 2, + "numThreads": 100, + "blockSize": "1kb", + }, + "bucket": "dummy-bucket", + }) + self.assertFalse( + validateFioWorkload( + workload, "invalid-fio-workload-unsupported-fileSize" + ) + ) + pass + + def test_validate_fio_workload_invalid_missing_blockSize(self): + workload = dict({ + "fioWorkload": { + "fileSize": "1kb", + "filesPerThread": 2, + "numThreads": 100, + }, + "bucket": "dummy-bucket", + }) + self.assertFalse( + validateFioWorkload(workload, "invalid-fio-workload-missing-blockSize") + ) + pass + + def test_validate_fio_workload_invalid_unsupported_blockSize(self): + workload = dict({ + "fioWorkload": { + "fileSize": "1kb", + "blockSize": 1000, + "filesPerThread": 2, + "numThreads": 100, + }, + "bucket": "dummy-bucket", + }) + self.assertFalse( + validateFioWorkload( + workload, "invalid-fio-workload-unsupported-blockSize" + ) + ) + pass + + def test_validate_fio_workload_invalid_missing_filesPerThread(self): + workload = dict({ + "fioWorkload": { + "fileSize": "1kb", + "numThreads": 100, + "blockSize": "1kb", + }, + "bucket": "dummy-bucket", + }) + self.assertFalse( + validateFioWorkload( + workload, "invalid-fio-workload-missing-filesPerThread" + ) + ) + pass + + def test_validate_fio_workload_invalid_unsupported_filesPerThread(self): + workload = dict({ + "fioWorkload": { + "fileSize": "1kb", + "filesPerThread": "1k", + "numThreads": 100, + "blockSize": "1kb", + }, + "bucket": "dummy-bucket", + }) + self.assertFalse( + validateFioWorkload( + workload, "invalid-fio-workload-unsupported-filesPerThread" + ) + ) + pass + + def test_validate_fio_workload_invalid_missing_numThreads(self): + workload = dict({ + "fioWorkload": { + "fileSize": "1kb", + "filesPerThread": 2, + "blockSize": "1kb", + }, + "bucket": "dummy-bucket", + }) + self.assertFalse( + validateFioWorkload(workload, "invalid-fio-workload-missing-numThreads") + ) + pass + + def test_validate_fio_workload_invalid_unsupported_numThreads(self): + workload = dict({ + "fioWorkload": { + "fileSize": "1kb", + "filesPerThread": 2, + "blockSize": "1kb", + "numThreads": "1k", + }, + "bucket": "dummy-bucket", + }) + self.assertFalse( + validateFioWorkload( + workload, "invalid-fio-workload-unsupported-numThreads" + ) + ) + pass + + def test_validate_fio_workload_invalid_unsupported_readTypes_1(self): + workload = dict({ + "fioWorkload": { + "fileSize": "1kb", + "filesPerThread": 2, + "blockSize": "1kb", + "numThreads": 10, + "readTypes": True, + }, + "bucket": "dummy-bucket", + }) + self.assertFalse( + validateFioWorkload( + workload, "invalid-fio-workload-unsupported-readTypes-1" + ) + ) + pass + + def test_validate_fio_workload_invalid_unsupported_readTypes_2(self): + workload = dict({ + "fioWorkload": { + "fileSize": "1kb", + "filesPerThread": 2, + "blockSize": "1kb", + "numThreads": 10, + "readTypes": ["read", 1], + }, + "bucket": "dummy-bucket", + }) + self.assertFalse( + validateFioWorkload( + workload, "invalid-fio-workload-unsupported-readTypes-2" + ) + ) + pass + + def test_validate_fio_workload_invalid_unsupported_readTypes_3(self): + workload = dict({ + "fioWorkload": { + "fileSize": "1kb", + "filesPerThread": 2, + "blockSize": "1kb", + "numThreads": 10, + "readTypes": ["read", "write"], + }, + "bucket": "dummy-bucket", + }) + self.assertFalse( + validateFioWorkload( + workload, "invalid-fio-workload-unsupported-readTypes-3" + ) + ) + pass + + def test_validate_fio_workload_valid_without_readTypes(self): + workload = dict({ + "fioWorkload": { + "fileSize": "1kb", + "filesPerThread": 2, + "numThreads": 100, + "blockSize": "1kb", + }, + "bucket": "dummy-bucket", + }) + self.assertTrue(validateFioWorkload(workload, "valid-fio-workload-1")) + pass + + def test_validate_fio_workload_valid_with_readTypes(self): + workload = dict({ + "fioWorkload": { + "fileSize": "1kb", + "filesPerThread": 2, + "numThreads": 100, + "blockSize": "1kb", + "readTypes": ["read", "randread"], + }, + "bucket": "dummy-bucket", + }) + self.assertTrue(validateFioWorkload(workload, "valid-fio-workload-2")) + pass + + def test_validate_fio_workload_valid_with_single_readType(self): + workload = dict({ + "fioWorkload": { + "fileSize": "1kb", + "filesPerThread": 2, + "numThreads": 100, + "blockSize": "1kb", + "readTypes": ["randread"], + }, + "bucket": "dummy-bucket", + }) + self.assertTrue(validateFioWorkload(workload, "valid-fio-workload-2")) + pass + + +if __name__ == "__main__": + unittest.main() diff --git a/perfmetrics/scripts/testing_on_gke/examples/workloads.json b/perfmetrics/scripts/testing_on_gke/examples/workloads.json index d3be21dd11..3b8d5ee0b4 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/workloads.json +++ b/perfmetrics/scripts/testing_on_gke/examples/workloads.json @@ -8,12 +8,12 @@ { "_description": "This is a dummy fio workload (missing the 'fioWorkload' field), purely standing as a header and does not execute any workload. For it to execute a fio workload, it must have a valid 'fioWorkload' object and a valid 'bucket' attribute.", "_fioWorkload": { - "_description": "Every fioWorkload must have fileSize, filesPerThread, numThreads, and blockSize fields. readTypes is an optional comma-separated field with supported values 'read' and 'randread', which if missed equals 'read,randread'.", + "_description": "Every fioWorkload must have fileSize, filesPerThread, numThreads, and blockSize fields. readTypes is an array of string values 'read' and 'randread'. If readTypes is missing, then it defaults to [\"read\",\"randread\"].", "fileSize": "64k", "filesPerThread": 20000, "numThreads": 50, "blockSize": "64K", - "readTypes": "read or randread or read,randread" + "readTypes": ["read","randread"] }, "bucket":"The bucket must have objects with name Workload.{i}/{j} for every i,j where i:0-{numThreads}-1, j:0-{filesPerThread}-1, and each of these objects must be of size {fileSize}. The buckets gke-* are all in us-central1, are owned by GKE team and are in their GCP project(s)." }, @@ -23,7 +23,7 @@ "filesPerThread": 20000, "numThreads": 50, "blockSize": "64K", - "readTypes": "read" + "readTypes": ["read"] }, "bucket":"fio-64k-1m-us-west1", "_bucket_alt2":"fio-64k-1m-us-central1", @@ -35,7 +35,7 @@ "filesPerThread": 20000, "numThreads": 50, "blockSize": "128K", - "readTypes": "read" + "readTypes": ["read"] }, "bucket":"fio-128k-1m-us-west1", "_bucket_alt2":"fio-128k-1m-us-central1", @@ -47,7 +47,7 @@ "filesPerThread": 20000, "numThreads": 50, "blockSize": "256K", - "readTypes": "read,randread" + "readTypes": ["read","randread"] }, "bucket":"fio-1mb-1m-us-west1", "_bucket_alt2":"fio-1mb-1m-us-central1", @@ -71,7 +71,7 @@ "filesPerThread": 1, "numThreads": 100, "blockSize": "1M", - "readTypes": "read" + "readTypes": ["read"] }, "bucket":"fio-200gb-1-us-west1", "_bucket_alt2":"fio-200gb-1-us-central1", @@ -80,18 +80,18 @@ { "_description": "This is a dummy dlio workload (missing the 'dlioWorkload' field), purely standing as a header and does not execute any workload. For it to execute a dlio workload, it must have a valid 'dlioWorkload' object and a valid 'bucket' attribute.", "_dlioWorkload": { - "_description": "Every dlioWorkload must have numFilesTrain, recordLength, and batchSizes fields. batchSizes is a comma-separated field with integer values", - "numFilesTrain": "500000", - "recordLength": "102400", - "batchSizes":"800,128" + "_description": "Every dlioWorkload must have numFilesTrain, recordLength, and batchSizes fields. batchSizes is an array of integer values", + "numFilesTrain": 500000, + "recordLength": 102400, + "batchSizes": [800,128] }, "bucket":"The bucket must have objects with name 'train/', 'valid/', and train/img_{i}_of_{numFilesTrain}.npz for every i where i:0-{numFilesTrain}-1 and each train/img_{i}_of_{numFilesTrain}.npz must be of size {recordLength} bytes. The buckets gke-* are all in us-central1, are owned by GKE team and are in their GCP project(s)." }, { "_dlioWorkload": { - "numFilesTrain": "500000", - "recordLength": "102400", - "batchSizes":"800,128" + "numFilesTrain": 500000, + "recordLength": 102400, + "batchSizes": [800,128] }, "bucket":"dlio-unet3d-100kb-500k-us-west1", "_bucket_alt2":"dlio-unet3d-100kb-500k-us-central1", @@ -99,9 +99,9 @@ }, { "_dlioWorkload": { - "numFilesTrain": "1000000", - "recordLength": "512000", - "batchSizes":"800,128" + "numFilesTrain": 1000000, + "recordLength": 512000, + "batchSizes": [800,128] }, "bucket":"dlio-unet3d-500kb-1m-us-west1", "_bucket_alt2":"dlio-unet3d-500kb-1m-us-central1", @@ -109,9 +109,9 @@ }, { "dlioWorkload": { - "numFilesTrain": "100000", - "recordLength": "3145728", - "batchSizes":"200" + "numFilesTrain": 100000, + "recordLength": 3145728, + "batchSizes": [200] }, "bucket":"dlio-unet3d-3mb-100k-us-west1", "_bucket_alt2":"dlio-unet3d-3mb-100k-us-central1", @@ -119,9 +119,9 @@ }, { "_dlioWorkload": { - "numFilesTrain": "5000", - "recordLength": "157286400", - "batchSizes":"4" + "numFilesTrain": 5000, + "recordLength": 157286400, + "batchSizes": [4] }, "bucket":"dlio-unet3d-150mb-5k-us-west1", "_bucket_alt2":"dlio-unet3d-150mb-5k-us-central1",