diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py
index 445ce97f3..2acfdb838 100755
--- a/tools/submission/submission_checker.py
+++ b/tools/submission/submission_checker.py
@@ -98,16 +98,16 @@
             "dlrm-v2-99.9": ("AUC", 80.31 * 0.999),
             "3d-unet-99": ("DICE", 0.86170 * 0.99),
             "3d-unet-99.9": ("DICE", 0.86170 * 0.999),
-            "gptj-99" : ("ROUGE1", 42.9865 * 0.99, "ROUGE2", 20.1235 * 0.99, "ROUGEL", 29.9881 * 0.99, "GEN_LEN", 4016878*0.9),
-            "gptj-99.9" : ("ROUGE1", 42.9865 * 0.999, "ROUGE2", 20.1235 * 0.999, "ROUGEL", 29.9881 * 0.999, "GEN_LEN", 4016878*0.9),
-            "llama2-70b-99" : ("ROUGE1", 44.4312 * 0.99, "ROUGE2", 22.0352 * 0.99, "ROUGEL", 28.6162 * 0.99, "TOKENS_PER_SAMPLE", 294.45*0.9),
-            "llama2-70b-99.9" : ("ROUGE1", 44.4312 * 0.999, "ROUGE2", 22.0352 * 0.999, "ROUGEL", 28.6162 * 0.999, "TOKENS_PER_SAMPLE", 294.45*0.9),
+            "gptj-99": ("ROUGE1", 42.9865 * 0.99, "ROUGE2", 20.1235 * 0.99, "ROUGEL", 29.9881 * 0.99, "GEN_LEN", 4016878 * 0.9),
+            "gptj-99.9": ("ROUGE1", 42.9865 * 0.999, "ROUGE2", 20.1235 * 0.999, "ROUGEL", 29.9881 * 0.999, "GEN_LEN", 4016878 * 0.9),
+            "llama2-70b-99": ("ROUGE1", 44.4312 * 0.99, "ROUGE2", 22.0352 * 0.99, "ROUGEL", 28.6162 * 0.99, "TOKENS_PER_SAMPLE", 294.45 * 0.9),
+            "llama2-70b-99.9": ("ROUGE1", 44.4312 * 0.999, "ROUGE2", 22.0352 * 0.999, "ROUGEL", 28.6162 * 0.999, "TOKENS_PER_SAMPLE", 294.45 * 0.9),
             "stable-diffusion-xl": ("CLIP_SCORE", 31.68631873, "FID_SCORE", 23.01085758)
         },
         "accuracy-upper-limit": {
             "stable-diffusion-xl": ("CLIP_SCORE", 31.81331801, "FID_SCORE", 23.95007626),
-            "llama2-70b-99" : ("TOKENS_PER_SAMPLE", 294.45*1.1),
-            "llama2-70b-99.9" : ("TOKENS_PER_SAMPLE", 294.45*1.1)
+            "llama2-70b-99": ("TOKENS_PER_SAMPLE", 294.45 * 1.1),
+            "llama2-70b-99.9": ("TOKENS_PER_SAMPLE", 294.45 * 1.1)
         },
         "accuracy-delta-perc": {
             "stable-diffusion-xl": {
@@ -163,7 +163,7 @@
             "gptj-99.9": {"Server": 20000000000},
             "llama2-70b-99": {"Server": 20000000000},
             "llama2-70b-99.9": {"Server": 20000000000},
-            "stable-diffusion-xl" : {"Server": 20000000000}
+            "stable-diffusion-xl": {"Server": 20000000000}
         },
         "min-queries": {
             "resnet": {
@@ -263,18 +263,18 @@
             "dlrm-v2-99.9": ("AUC", 80.31 * 0.999),
             "3d-unet-99": ("DICE", 0.86170 * 0.99),
             "3d-unet-99.9": ("DICE", 0.86170 * 0.999),
-            "gptj-99" : ("ROUGE1", 42.9865 * 0.99, "ROUGE2", 20.1235 * 0.99, "ROUGEL", 29.9881 * 0.99, "GEN_LEN", 4016878*0.9),
-            "gptj-99.9" : ("ROUGE1", 42.9865 * 0.999, "ROUGE2", 20.1235 * 0.999, "ROUGEL", 29.9881 * 0.999, "GEN_LEN", 4016878*0.9),
-            "llama2-70b-99" : ("ROUGE1", 44.4312 * 0.99, "ROUGE2", 22.0352 * 0.99, "ROUGEL", 28.6162 * 0.99, "TOKENS_PER_SAMPLE", 294.45*0.9),
-            "llama2-70b-99.9" : ("ROUGE1", 44.4312 * 0.999, "ROUGE2", 22.0352 * 0.999, "ROUGEL", 28.6162 * 0.999, "TOKENS_PER_SAMPLE", 294.45*0.9),
+            "gptj-99": ("ROUGE1", 42.9865 * 0.99, "ROUGE2", 20.1235 * 0.99, "ROUGEL", 29.9881 * 0.99, "GEN_LEN", 4016878 * 0.9),
+            "gptj-99.9": ("ROUGE1", 42.9865 * 0.999, "ROUGE2", 20.1235 * 0.999, "ROUGEL", 29.9881 * 0.999, "GEN_LEN", 4016878 * 0.9),
+            "llama2-70b-99": ("ROUGE1", 44.4312 * 0.99, "ROUGE2", 22.0352 * 0.99, "ROUGEL", 28.6162 * 0.99, "TOKENS_PER_SAMPLE", 294.45 * 0.9),
+            "llama2-70b-99.9": ("ROUGE1", 44.4312 * 0.999, "ROUGE2", 22.0352 * 0.999, "ROUGEL", 28.6162 * 0.999, "TOKENS_PER_SAMPLE", 294.45 * 0.9),
             "stable-diffusion-xl": ("CLIP_SCORE", 31.68631873, "FID_SCORE", 23.01085758),
-            "mixtral-8x7b" : ("ROUGE1", 45.4911 * 0.99, "ROUGE2", 23.2829 * 0.99, "ROUGEL", 30.3615 * 0.99, "TOKENS_PER_SAMPLE", 145.9 * 0.9, "gsm8k_accuracy", 73.78*0.99, "mbxp_accuracy", 60.12 * 0.99),
+            "mixtral-8x7b": ("ROUGE1", 45.4911 * 0.99, "ROUGE2", 23.2829 * 0.99, "ROUGEL", 30.3615 * 0.99, "TOKENS_PER_SAMPLE", 145.9 * 0.9, "gsm8k_accuracy", 73.78 * 0.99, "mbxp_accuracy", 60.12 * 0.99),
         },
         "accuracy-upper-limit": {
             "stable-diffusion-xl": ("CLIP_SCORE", 31.81331801, "FID_SCORE", 23.95007626),
-            "llama2-70b-99" : ("TOKENS_PER_SAMPLE", 294.45*1.1),
-            "llama2-70b-99.9" : ("TOKENS_PER_SAMPLE", 294.45*1.1),
-            "mixtral-8x7b" : ("TOKENS_PER_SAMPLE", 145.9 * 1.1)
+            "llama2-70b-99": ("TOKENS_PER_SAMPLE", 294.45 * 1.1),
+            "llama2-70b-99.9": ("TOKENS_PER_SAMPLE", 294.45 * 1.1),
+            "mixtral-8x7b": ("TOKENS_PER_SAMPLE", 145.9 * 1.1)
         },
         "accuracy-delta-perc": {
             "stable-diffusion-xl": {
@@ -329,7 +329,7 @@
             "gptj-99.9": {"Server": 20000000000},
             "llama2-70b-99": {"Server": 20000000000},
             "llama2-70b-99.9": {"Server": 20000000000},
-            "stable-diffusion-xl" : {"Server": 20000000000}
+            "stable-diffusion-xl": {"Server": 20000000000}
             # TODO: Mixtral metrics
             # "mixtral-8x7b" : {"Server": 20000000000}
         },
@@ -514,19 +514,19 @@
 }
 
 LLM_LATENCY_LIMITS = {
-    "llama2-70b-99":{
+    "llama2-70b-99": {
         "conversational": {
             "ttft": 2000 * 1000000,
             "tpot": 200 * 1000000
         }
     },
-    "llama2-70b-99.9":{
+    "llama2-70b-99.9": {
         "conversational": {
             "ttft": 2000 * 1000000,
             "tpot": 200 * 1000000
         }
     },
-    "mixtral-8x7b":{
+    "mixtral-8x7b": {
         "conversational": {
             "ttft": 2000 * 1000000,
             "tpot": 200 * 1000000
@@ -755,7 +755,7 @@ def get_accuracy_target(self, model):
         if model not in self.accuracy_target:
             raise ValueError("model not known: " + model)
         return self.accuracy_target[model]
-    
+
     def get_accuracy_upper_limit(self, model):
         return self.accuracy_upper_limit.get(model, None)
 
@@ -781,12 +781,12 @@ def get_min_query_count(self, model, scenario):
         if model not in self.min_queries:
             raise ValueError("model not known: " + model)
         return self.min_queries[model].get(scenario)
-    
+
     def get_delta_perc(self, model, metric):
         if model in self.accuracy_delta_perc:
             if metric in self.accuracy_delta_perc[model]:
                 return self.accuracy_delta_perc[model][metric]
-        
+
         more_accurate = model.find("99.9")
         if more_accurate == -1:
             required_delta_perc = 1
@@ -797,12 +797,11 @@ def get_delta_perc(self, model, metric):
     def has_new_logging_format(self):
         return True
 
-
     def uses_early_stopping(self, scenario):
         return (
             scenario in ["Server", "SingleStream", "MultiStream"]
         )
-    
+
     def requires_equal_issue(self, model, division):
         return (
             division in ["closed", "network"] and
@@ -812,7 +811,7 @@ def requires_equal_issue(self, model, division):
                 "gptj-99",
                 "gptj-99.9",
                 "llama2-70b-99",
-                "llama2-70b-99.9", 
+                "llama2-70b-99.9",
                 "mixtral-8x7b"
             ]
             and self.version in ["v4.1"]
@@ -830,7 +829,10 @@ def get_args():
         help="mlperf version",
     )
     parser.add_argument("--submitter", help="filter to submitter")
-    parser.add_argument("--csv", default="summary.csv", help="csv file with results")
+    parser.add_argument(
+        "--csv",
+        default="summary.csv",
+        help="csv file with results")
     parser.add_argument(
         "--skip_compliance",
         action="store_true",
@@ -841,7 +843,10 @@ def get_args():
         help="File containing extra custom model mapping. It is assumed to be inside the folder open/<submitter>",
         default="model_mapping.json",
     )
-    parser.add_argument("--debug", action="store_true", help="extra debug output")
+    parser.add_argument(
+        "--debug",
+        action="store_true",
+        help="extra debug output")
     parser.add_argument(
         "--submission-exceptions",
         action="store_true",
@@ -883,17 +888,20 @@ def get_args():
 
 def list_dir(*path):
     path = os.path.join(*path)
-    return [f for f in os.listdir(path) if os.path.isdir(os.path.join(path, f))]
+    return [f for f in os.listdir(
+        path) if os.path.isdir(os.path.join(path, f))]
 
 
 def list_files(*path):
     path = os.path.join(*path)
-    return [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
+    return [f for f in os.listdir(
+        path) if os.path.isfile(os.path.join(path, f))]
 
 
 def list_empty_dirs_recursively(*path):
     path = os.path.join(*path)
-    return [dirpath for dirpath, dirs, files in os.walk(path) if not dirs and not files]
+    return [dirpath for dirpath, dirs, files in os.walk(
+        path) if not dirs and not files]
 
 
 def list_dirs_recursively(*path):
@@ -919,19 +927,23 @@ def check_extra_files(path, target_files):
             check_pass = False
             missing_files.append(os.path.join(path, dir))
         else:
-            files = [f.split(".")[0] for f in list_files(os.path.join(path, dir))]
+            files = [f.split(".")[0]
+                     for f in list_files(os.path.join(path, dir))]
             for target_file in target_files[dir]:
                 if target_file not in files:
                     check_pass = False
-                    missing_files.append(f"{os.path.join(path, dir, target_file)}.png")
+                    missing_files.append(
+                        f"{os.path.join(path, dir, target_file)}.png")
             if "captions" not in files:
-                missing_files.append(f"{os.path.join(path, dir, 'captions.txt')}")
+                missing_files.append(
+                    f"{os.path.join(path, dir, 'captions.txt')}")
     return check_pass, missing_files
 
 
 def split_path(m):
     return m.replace("\\", "/").split("/")
 
+
 def get_boolean(s):
     if s is None:
         return False
@@ -942,7 +954,8 @@ def get_boolean(s):
     elif isinstance(s, int):
         return bool(s)
     else:
-        raise TypeError(f"Variable should be bool, string or int, got {type(s)} instead")
+        raise TypeError(
+            f"Variable should be bool, string or int, got {type(s)} instead")
 
 
 def find_error_in_detail_log(config, fname):
@@ -987,19 +1000,20 @@ def check_accuracy_dir(config, model, path, verbose):
         up_patterns = []
         acc_limit_check = True
         for i in range(0, len(acc_upper_limit), 2):
-            acc_type, acc_target = acc_upper_limit[i:i+2]
+            acc_type, acc_target = acc_upper_limit[i:i + 2]
             acc_limits.append(acc_target)
             up_patterns.append(ACC_PATTERN[acc_type])
 
     for i in range(0, len(target), 2):
-        acc_type, acc_target = target[i:i+2]
+        acc_type, acc_target = target[i:i + 2]
         patterns.append(ACC_PATTERN[acc_type])
         acc_targets.append(acc_target)
         acc_types.append(acc_type)
     acc_seen = [False for _ in acc_targets]
     with open(os.path.join(path, "accuracy.txt"), "r", encoding="utf-8") as f:
         for line in f:
-            for i, (pattern, acc_target, acc_type) in enumerate(zip(patterns, acc_targets, acc_types)):
+            for i, (pattern, acc_target, acc_type) in enumerate(
+                    zip(patterns, acc_targets, acc_types)):
                 m = re.match(pattern, line)
                 if m:
                     acc = m.group(1)
@@ -1011,24 +1025,34 @@ def check_accuracy_dir(config, model, path, verbose):
                     acc_seen[i] = True
                 elif acc is not None:
                     all_accuracy_valid = False
-                    log.warning("%s accuracy not met: expected=%f, found=%s", path, acc_target, acc)
+                    log.warning(
+                        "%s accuracy not met: expected=%f, found=%s",
+                        path,
+                        acc_target,
+                        acc)
                 if acc:
                     result_acc[acc_type] = acc
                 acc = None
             if acc_upper_limit is not None:
-                for i, (pattern, acc_limit) in enumerate(zip(up_patterns, acc_limits)):
+                for i, (pattern, acc_limit) in enumerate(
+                        zip(up_patterns, acc_limits)):
                     m = re.match(pattern, line)
                     if m:
                         acc = m.group(1)
                     m = re.match(r"^hash=([\w\d]+)$", line)
                     if m:
                         hash_val = m.group(1)
-                    if acc is not None and acc_upper_limit is not None and float(acc) > acc_limit:
+                    if acc is not None and acc_upper_limit is not None and float(
+                            acc) > acc_limit:
                         acc_limit_check = False
-                        log.warning("%s accuracy not met: upper limit=%f, found=%s", path, acc_limit, acc)
+                        log.warning(
+                            "%s accuracy not met: upper limit=%f, found=%s",
+                            path,
+                            acc_limit,
+                            acc)
                     acc = None
             if all(acc_seen) and hash_val:
-                break;
+                break
         is_valid = all_accuracy_valid & all(acc_seen)
         if acc_upper_limit is not None:
             is_valid &= acc_limit_check
@@ -1062,20 +1086,23 @@ def extra_check_llm(mlperf_log, scenario, model):
             return None, True
         else:
             for constraint, limits in LLM_LATENCY_LIMITS[model].items():
-                if mlperf_log["result_first_token_99.00_percentile_latency_ns"] < limits["ttft"] and mlperf_log["result_time_per_output_token_99.00_percentile_ns"] < limits["tpot"]:
+                if mlperf_log["result_first_token_99.00_percentile_latency_ns"] < limits[
+                        "ttft"] and mlperf_log["result_time_per_output_token_99.00_percentile_ns"] < limits["tpot"]:
                     return constraint, True
     else:
-        log.error(f'use_token_latencies flag needs to be enabled for Llama2 benchmark')
+        log.error(
+            f'use_token_latencies flag needs to be enabled for Llama2 benchmark')
         return None, False
 
-    log.error(f'Failed Llama2 extra check for TTFT and TPOT. TTFT 99-tile: {mlperf_log["result_first_token_99.00_percentile_latency_ns"]}, TPOT 99-tile: {mlperf_log["result_time_per_output_token_99.00_percentile_ns"]}')
+    log.error(
+        f'Failed Llama2 extra check for TTFT and TPOT. TTFT 99-tile: {mlperf_log["result_first_token_99.00_percentile_latency_ns"]}, TPOT 99-tile: {mlperf_log["result_time_per_output_token_99.00_percentile_ns"]}')
     return None, False
-            
+
 
 def get_performance_metric(
     config, model, path, scenario_fixed, division, system_json, has_power=False
 ):
-    #Assumes new logging format
+    # Assumes new logging format
     version = config.version
 
     fname = os.path.join(path, "mlperf_log_detail.txt")
@@ -1088,15 +1115,19 @@ def get_performance_metric(
     scenario = mlperf_log["effective_scenario"]
 
     res = float(mlperf_log[RESULT_FIELD_NEW[version][scenario]])
-    if version in RESULT_FIELD_BENCHMARK_OVERWRITE and model in RESULT_FIELD_BENCHMARK_OVERWRITE[version] and scenario in RESULT_FIELD_BENCHMARK_OVERWRITE[version][model]:
-        res = float(mlperf_log[RESULT_FIELD_BENCHMARK_OVERWRITE[version][model][scenario]])
+    if version in RESULT_FIELD_BENCHMARK_OVERWRITE and model in RESULT_FIELD_BENCHMARK_OVERWRITE[
+            version] and scenario in RESULT_FIELD_BENCHMARK_OVERWRITE[version][model]:
+        res = float(
+            mlperf_log[RESULT_FIELD_BENCHMARK_OVERWRITE[version][model][scenario]])
 
     inferred = False
     if scenario_fixed != scenario:
-        inferred, res =  get_inferred_result(scenario_fixed, scenario, res, mlperf_log, config, False)
+        inferred, res = get_inferred_result(
+            scenario_fixed, scenario, res, mlperf_log, config, False)
 
     return res
 
+
 def check_performance_dir(
     config, model, path, scenario_fixed, division, system_json, has_power=False
 ):
@@ -1119,12 +1150,14 @@ def check_performance_dir(
     scenario = mlperf_log["effective_scenario"]
 
     res = float(mlperf_log[RESULT_FIELD_NEW[version][scenario]])
-    if version in RESULT_FIELD_BENCHMARK_OVERWRITE and model in RESULT_FIELD_BENCHMARK_OVERWRITE[version] and scenario in RESULT_FIELD_BENCHMARK_OVERWRITE[version][model]:
-        res = float(mlperf_log[RESULT_FIELD_BENCHMARK_OVERWRITE[version][model][scenario]])
+    if version in RESULT_FIELD_BENCHMARK_OVERWRITE and model in RESULT_FIELD_BENCHMARK_OVERWRITE[
+            version] and scenario in RESULT_FIELD_BENCHMARK_OVERWRITE[version][model]:
+        res = float(
+            mlperf_log[RESULT_FIELD_BENCHMARK_OVERWRITE[version][model][scenario]])
 
-        
     if model in ["llama2-70b-99", "llama2-70b-99.9", "mixtral-8x7b"]:
-        llama_constraint, is_valid = extra_check_llm(mlperf_log, scenario_fixed, model)
+        llama_constraint, is_valid = extra_check_llm(
+            mlperf_log, scenario_fixed, model)
 
     latency_99_percentile = mlperf_log["result_99.00_percentile_latency_ns"]
     latency_mean = mlperf_log["result_mean_latency_ns"]
@@ -1136,7 +1169,8 @@ def check_performance_dir(
     min_query_count = mlperf_log["effective_min_query_count"]
     samples_per_query = mlperf_log["effective_samples_per_query"]
     min_duration = mlperf_log["effective_min_duration_ms"]
-    equal_issue_used_check = (mlperf_log["effective_sample_concatenate_permutation"] == "true")
+    equal_issue_used_check = (
+        mlperf_log["effective_sample_concatenate_permutation"] == "true")
     if not config.requires_equal_issue(model, division):
         equal_issue_used_check = True
     sut_name = mlperf_log["sut_name"]
@@ -1146,7 +1180,8 @@ def check_performance_dir(
     if not find_error_in_detail_log(config, fname):
         is_valid = False
 
-    required_performance_sample_count = config.get_performance_sample_count(model)
+    required_performance_sample_count = config.get_performance_sample_count(
+        model)
     if performance_sample_count < required_performance_sample_count:
         log.error(
             "%s performance_sample_count, found %d, needs to be >= %d",
@@ -1184,7 +1219,6 @@ def check_performance_dir(
 
     if scenario == "SingleStream" or scenario == "MultiStream":
         res /= MS_TO_NS
-    
 
     # Check if the current scenario uses early stopping
     uses_early_stopping = config.uses_early_stopping(scenario)
@@ -1201,7 +1235,8 @@ def check_performance_dir(
         # If the scenario has a target latency (Server scenario), check
         # that the target latency that was passed to the early stopping
         # is less than the target latency.
-        target_latency = config.latency_constraint.get(model, dict()).get(scenario)
+        target_latency = config.latency_constraint.get(
+            model, dict()).get(scenario)
         if target_latency:
             early_stopping_latency_ns = mlperf_log["effective_target_latency_ns"]
             log.info(
@@ -1221,7 +1256,8 @@ def check_performance_dir(
 
     else:
         # check if the benchmark meets latency constraint
-        target_latency = config.latency_constraint.get(model, dict()).get(scenario)
+        target_latency = config.latency_constraint.get(
+            model, dict()).get(scenario)
         log.info(
             "Target latency: %s, Latency: %s, Scenario: %s",
             target_latency,
@@ -1252,7 +1288,8 @@ def check_performance_dir(
             )
             is_valid = False
 
-    if scenario == "Offline" and (samples_per_query < OFFLINE_MIN_SPQ_SINCE_V4[model]):
+    if scenario == "Offline" and (
+            samples_per_query < OFFLINE_MIN_SPQ_SINCE_V4[model]):
         log.error(
             "%s Required minimum samples per query not met by user config, Expected=%s, Found=%s",
             fname,
@@ -1275,14 +1312,16 @@ def check_performance_dir(
 
     inferred = False
     if scenario_fixed != scenario:
-        inferred, res =  get_inferred_result(scenario_fixed, scenario, res, mlperf_log, config, True)
+        inferred, res = get_inferred_result(
+            scenario_fixed, scenario, res, mlperf_log, config, True)
 
     is_network_system, is_network_mode_valid = is_system_over_network(
         division, system_json, path
     )
     is_valid &= is_network_mode_valid
     if is_network_system:
-        # for network mode verify the SUT name is valid, according to the rules (must include "Network SUT" in name)
+        # for network mode verify the SUT name is valid, according to the rules
+        # (must include "Network SUT" in name)
         if NETWORK_MODE_REQUIRED_SUBSTRING_IN_SUT_NAME not in sut_name:
             log.error(
                 f"{fname} invalid sut name for network mode. expecting the substring '{NETWORK_MODE_REQUIRED_SUBSTRING_IN_SUT_NAME}' got '{sut_name}'"
@@ -1291,7 +1330,9 @@ def check_performance_dir(
 
     return is_valid, res, inferred, equal_issue_used_check
 
-def get_inferred_result(scenario_fixed, scenario, res, mlperf_log, config, log_error=False):
+
+def get_inferred_result(scenario_fixed, scenario, res,
+                        mlperf_log, config, log_error=False):
 
     inferred = False
     # Check if current scenario (and version) uses early stopping
@@ -1305,7 +1346,8 @@ def get_inferred_result(scenario_fixed, scenario, res, mlperf_log, config, log_e
         latency_mean = mlperf_log["result_mean_query_latency_ns"]
     samples_per_query = mlperf_log["effective_samples_per_query"]
     if scenario == "SingleStream":
-        # qps_wo_loadgen_overhead is only used for inferring Offline from SingleStream; only for old submissions
+        # qps_wo_loadgen_overhead is only used for inferring Offline from
+        # SingleStream; only for old submissions
         qps_wo_loadgen_overhead = mlperf_log["result_qps_without_loadgen_overhead"]
 
     # special case for results inferred from different scenario
@@ -1338,11 +1380,12 @@ def get_inferred_result(scenario_fixed, scenario, res, mlperf_log, config, log_e
             res = (latency_99_percentile * samples_per_query) / MS_TO_NS
     return inferred, res
 
+
 def get_power_metric(config, scenario_fixed, log_path, is_valid, res):
     # parse the power logs
     server_timezone = datetime.timedelta(0)
     client_timezone = datetime.timedelta(0)
-    
+
     detail_log_fname = os.path.join(log_path, "mlperf_log_detail.txt")
     mlperf_log = MLPerfLog(detail_log_fname)
     datetime_format = "%m-%d-%Y %H:%M:%S.%f"
@@ -1356,7 +1399,7 @@ def get_power_metric(config, scenario_fixed, log_path, is_valid, res):
     )
     # Obtain the scenario also from logs to check if power is inferred
     scenario = mlperf_log["effective_scenario"]
-    
+
     spl_fname = os.path.join(log_path, "spl.txt")
     power_list = []
     with open(spl_fname) as f:
@@ -1389,7 +1432,8 @@ def get_power_metric(config, scenario_fixed, log_path, is_valid, res):
             avg_power_efficiency = res / avg_power
 
         else:
-            # In SingleStream and MultiStream scenarios, the power metric is in mJ/query.
+            # In SingleStream and MultiStream scenarios, the power metric is in
+            # mJ/query.
             assert scenario_fixed in [
                 "MultiStream",
                 "SingleStream",
@@ -1432,11 +1476,17 @@ def check_power_dir(
 
     # check if all the required files are present
     required_files = REQUIRED_PERF_FILES + REQUIRED_PERF_POWER_FILES
-    diff = files_diff(list_files(testing_path), required_files, OPTIONAL_PERF_FILES)
+    diff = files_diff(
+        list_files(testing_path),
+        required_files,
+        OPTIONAL_PERF_FILES)
     if diff:
         log.error("%s has file list mismatch (%s)", testing_path, diff)
         is_valid = False
-    diff = files_diff(list_files(ranging_path), required_files, OPTIONAL_PERF_FILES)
+    diff = files_diff(
+        list_files(ranging_path),
+        required_files,
+        OPTIONAL_PERF_FILES)
     if diff:
         log.error("%s has file list mismatch (%s)", ranging_path, diff)
         is_valid = False
@@ -1474,7 +1524,9 @@ def check_power_dir(
         sys.stdout.flush()
         sys.stderr.flush()
         if check_power_result != 0:
-            log.error("Power WG power_checker.py did not pass for: %s", perf_path)
+            log.error(
+                "Power WG power_checker.py did not pass for: %s",
+                perf_path)
             is_valid = False
 
     return is_valid, power_metric, power_efficiency_testing
@@ -1620,19 +1672,19 @@ def log_result(
                 "Offline": "Tokens/s",
                 "Server": "Tokens/s",
             },
-            "llama2-70b-99" : {
+            "llama2-70b-99": {
                 "SingleStream": "Latency (ms)",
                 "MultiStream": "Latency (ms)",
                 "Offline": "Tokens/s",
                 "Server": "Tokens/s",
             },
-            "llama2-70b-99.9" : {
+            "llama2-70b-99.9": {
                 "SingleStream": "Latency (ms)",
                 "MultiStream": "Latency (ms)",
                 "Offline": "Tokens/s",
                 "Server": "Tokens/s",
             },
-            "mixtral-8x7b" : {
+            "mixtral-8x7b": {
                 "SingleStream": "Latency (ms)",
                 "MultiStream": "Latency (ms)",
                 "Offline": "Tokens/s",
@@ -1657,8 +1709,8 @@ def log_result(
             unit = special_unit_dict.get(model_name, unit_dict)[scenario_fixed]
         power_unit = power_unit_dict[scenario_fixed]
 
-
-        if (power_metric <= 0) or (not get_boolean(system_json.get("system_power_only"))):
+        if (power_metric <= 0) or (not get_boolean(
+                system_json.get("system_power_only"))):
             csv.write(
                 fmt.format(
                     submitter,
@@ -1690,7 +1742,7 @@ def log_result(
                     unit,
                     '"' + weight_data_types + '"',
                 )
-        )
+            )
 
         if power_metric > 0:
             csv.write(
@@ -1770,7 +1822,7 @@ def log_result(
             if not os.path.exists(results_path):
                 continue
 
-            ## Apply folder checks
+            # Apply folder checks
             dirs = list_dirs_recursively(division, submitter)
             files = list_files_recursively(division, submitter)
 
@@ -1871,7 +1923,9 @@ def log_result(
                         extra_model_mapping = json.load(fp)
 
             for system_desc in list_dir(results_path):
-                # we are looking at ./$division/$submitter/results/$system_desc, ie ./closed/mlperf_org/results/t4-ort
+                # we are looking at
+                # ./$division/$submitter/results/$system_desc, ie
+                # ./closed/mlperf_org/results/t4-ort
 
                 #
                 # check if system_id is good.
@@ -1900,7 +1954,8 @@ def log_result(
                     if config.version not in ["v0.5"]:
                         valid_system_types = ["datacenter", "edge"]
                         if config.version not in ["v0.7"]:
-                            valid_system_types += ["datacenter,edge", "edge,datacenter"]
+                            valid_system_types += ["datacenter,edge",
+                                                   "edge,datacenter"]
                         if system_type not in valid_system_types:
                             log.error(
                                 "%s has invalid system type (%s)",
@@ -1934,7 +1989,8 @@ def log_result(
 
                     if is_closed_or_network and mlperf_model not in config.models:
                         # for closed/network divisions we want the model name to match.
-                        # for open division the model_name might be different than the task
+                        # for open division the model_name might be different
+                        # than the task
                         log.error(
                             "%s has an invalid model %s for closed/network division",
                             name,
@@ -1962,9 +2018,12 @@ def log_result(
                         list(required_scenarios)
                         + list(config.get_optional(mlperf_model))
                     )
-                    for scenario in list_dir(results_path, system_desc, model_name):
-                        # some submissions in v0.5 use lower case scenarios - map them for now
-                        scenario_fixed = SCENARIO_MAPPING.get(scenario, scenario)
+                    for scenario in list_dir(
+                            results_path, system_desc, model_name):
+                        # some submissions in v0.5 use lower case scenarios -
+                        # map them for now
+                        scenario_fixed = SCENARIO_MAPPING.get(
+                            scenario, scenario)
 
                         # Skip scenario for debug purposes
                         if scenario in scenarios_to_skip:
@@ -2016,7 +2075,8 @@ def log_result(
                             scenario,
                         )
                         if not os.path.exists(measurement_dir):
-                            log.error("no measurement_dir for %s", measurement_dir)
+                            log.error(
+                                "no measurement_dir for %s", measurement_dir)
                             results[measurement_dir] = None
                             errors += 1
                             continue
@@ -2046,7 +2106,8 @@ def log_result(
                         # check accuracy
                         accuracy_is_valid = False
                         acc_path = os.path.join(name, "accuracy")
-                        if not os.path.exists(os.path.join(acc_path, "accuracy.txt")):
+                        if not os.path.exists(
+                                os.path.join(acc_path, "accuracy.txt")):
                             log.error(
                                 "%s has no accuracy.txt. Generate it with accuracy-imagenet.py or accuracy-coco.py or "
                                 "process_accuracy.py",
@@ -2055,7 +2116,8 @@ def log_result(
                             errors += 1
                             continue
                         elif scenario not in scenarios_to_skip:
-                            diff = files_diff(list_files(acc_path), REQUIRED_ACC_FILES)
+                            diff = files_diff(
+                                list_files(acc_path), REQUIRED_ACC_FILES)
                             if diff:
                                 log.error(
                                     "%s has file list mismatch (%s)", acc_path, diff
@@ -2068,10 +2130,19 @@ def log_result(
                                 acc_path,
                                 debug or is_closed_or_network,
                             )
-                            acc = json.dumps(acc).replace(",", " ").replace('"', "").replace("{", "").replace("}", "")
+                            acc = json.dumps(acc).replace(
+                                ",",
+                                " ").replace(
+                                '"',
+                                "").replace(
+                                "{",
+                                "").replace(
+                                "}",
+                                "")
                             if mlperf_model in REQUIRED_ACC_BENCHMARK:
                                 if config.version in REQUIRED_ACC_BENCHMARK[mlperf_model]:
-                                    extra_files_pass, missing_files = check_extra_files(acc_path, REQUIRED_ACC_BENCHMARK[mlperf_model][config.version])
+                                    extra_files_pass, missing_files = check_extra_files(
+                                        acc_path, REQUIRED_ACC_BENCHMARK[mlperf_model][config.version])
                                     if not extra_files_pass:
                                         log.error(
                                             "%s expected to have the following extra files (%s)", acc_path, missing_files
@@ -2085,7 +2156,8 @@ def log_result(
                                     )
                                 accuracy_is_valid = True
                             if not accuracy_is_valid:
-                                # a little below we'll not copy this into the results csv
+                                # a little below we'll not copy this into the
+                                # results csv
                                 errors += 1
                                 log.error("%s, accuracy not valid", acc_path)
 
@@ -2278,14 +2350,16 @@ def log_result(
                                 )
                             else:
                                 results[name] = None
-                                log.error("%s is OK but accuracy has issues", name)
+                                log.error(
+                                    "%s is OK but accuracy has issues", name)
 
                     # Discard scenarios that we want to skip
                     for scenario in scenarios_to_skip:
                         required_scenarios.discard(scenario)
 
                     if required_scenarios:
-                        name = os.path.join(results_path, system_desc, model_name)
+                        name = os.path.join(
+                            results_path, system_desc, model_name)
                         if is_closed_or_network:
                             results[name] = None
                             log.error(
@@ -2313,9 +2387,8 @@ def check_system_desc_id(
 ):
     is_valid = True
     # check all required fields
-   
-    required_fields = SYSTEM_DESC_REQUIRED_FIELDS.copy()
 
+    required_fields = SYSTEM_DESC_REQUIRED_FIELDS.copy()
 
     is_network_system, is_network_mode_valid = is_system_over_network(
         division, systems_json, fname
@@ -2340,7 +2413,6 @@ def check_system_desc_id(
                 "%s, field %s requires a meaningful response but is empty", fname, k
             )
 
-   
     # SYSTEM_DESC_REQUIRED_FIELDS_POWER should be mandatory when a submission has power logs, but since we
     # check power submission in check_results_dir, the information is not available yet at this stage and we do
     # this check later
@@ -2428,21 +2500,45 @@ def check_measurement_dir(
 
     if has_power and not skip_check_power_measure_files:
         path = measurement_dir
-        all_files_1 = [os.path.join(path, f) for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
+        all_files_1 = [
+            os.path.join(
+                path,
+                f) for f in os.listdir(path) if os.path.isfile(
+                os.path.join(
+                    path,
+                    f))]
         path = os.path.join(path, "..")
-        all_files_2 = [os.path.join(path, f) for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
+        all_files_2 = [
+            os.path.join(
+                path,
+                f) for f in os.listdir(path) if os.path.isfile(
+                os.path.join(
+                    path,
+                    f))]
         path = os.path.join(path, "..")
-        all_files_3 = [os.path.join(path, f) for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
+        all_files_3 = [
+            os.path.join(
+                path,
+                f) for f in os.listdir(path) if os.path.isfile(
+                os.path.join(
+                    path,
+                    f))]
         path = os.path.join(path, "..")
-        all_files_4 = [os.path.join(path, f) for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
+        all_files_4 = [
+            os.path.join(
+                path,
+                f) for f in os.listdir(path) if os.path.isfile(
+                os.path.join(
+                    path,
+                    f))]
         all_files = all_files_1 + all_files_2 + all_files_3 + all_files_4
 
         for i in REQUIRED_POWER_MEASURE_FILES:
             found = False
             for file in all_files:
                 if re.match(i, os.path.basename(file)):
-                   found = True
-                   file_path = file
+                    found = True
+                    file_path = file
             if not found:
                 log.error("%s is missing %s", measurement_dir, i)
                 is_valid = False
@@ -2482,9 +2578,10 @@ def check_measurement_dir(
                     log.error("%s, field %s is missing", fname, k)
                 elif check_empty_fields and not j[k]:
                     is_valid = False
-                    log.error("%s, field %s is missing meaningful value", fname, k)
+                    log.error(
+                        "%s, field %s is missing meaningful value", fname, k)
 
-        impl = system_file[len(system_desc) + 1 : -end]
+        impl = system_file[len(system_desc) + 1: -end]
         code_dir = os.path.join(root, "code", model)
         if os.path.isfile(code_dir):
             with open(code_dir, "r") as f:
@@ -2498,7 +2595,7 @@ def check_measurement_dir(
             if not os.path.exists(os.path.dirname(code_dir)):
                 log.error("%s is missing code_dir %s", fname, code_dir)
                 is_valid = False
-  
+
     else:
         log.error("%s is missing %s*.json", fname, system_desc)
         is_valid = False
@@ -2521,7 +2618,9 @@ def check_compliance_perf_dir(test_dir):
                     is_valid = True
                     break
         if is_valid == False:
-            log.error("Compliance test performance check in %s failed", test_dir)
+            log.error(
+                "Compliance test performance check in %s failed",
+                test_dir)
 
         # Check performance dir
         test_perf_path = os.path.join(test_dir, "performance", "run_1")
@@ -2535,7 +2634,10 @@ def check_compliance_perf_dir(test_dir):
                 ["mlperf_log_accuracy.json"],
             )
             if diff:
-                log.error("%s has file list mismatch (%s)", test_perf_path, diff)
+                log.error(
+                    "%s has file list mismatch (%s)",
+                    test_perf_path,
+                    diff)
                 is_valid = False
 
     return is_valid
@@ -2577,14 +2679,17 @@ def check_compliance_acc_dir(test_dir, model, config):
                     else REQUIRED_TEST01_ACC_FILES,
                 )
                 if diff:
-                    log.error("%s has file list mismatch (%s)", test_acc_path, diff)
+                    log.error(
+                        "%s has file list mismatch (%s)",
+                        test_acc_path,
+                        diff)
                     is_valid = False
                 elif not acc_passed:
                     target = config.get_accuracy_target(model)
                     patterns = []
                     acc_types = []
                     for i in range(0, len(target), 2):
-                        acc_type = target[i:i+2]
+                        acc_type = target[i:i + 2]
                         acc_types.append(acc_type)
                         patterns.append(ACC_PATTERN[acc_type[0]])
                     acc_seen = [False for _ in acc_type]
@@ -2613,14 +2718,17 @@ def check_compliance_acc_dir(test_dir, model, config):
                             for acc_type, pattern in zip(acc_types, patterns):
                                 m = re.match(pattern, line)
                                 if m:
-                                    acc_compliance[acc_type] = float(m.group(1))
+                                    acc_compliance[acc_type] = float(
+                                        m.group(1))
                     for acc_type in acc_types:
                         if acc_baseline[acc_type] == 0 or acc_compliance[acc_type] == 0:
                             is_valid = False
                             break
                         else:
-                            required_delta_perc = config.get_delta_perc(model, acc_type[0])
-                            delta_perc = abs(1 - acc_baseline[acc_type] / acc_compliance[acc_type]) * 100
+                            required_delta_perc = config.get_delta_perc(
+                                model, acc_type[0])
+                            delta_perc = abs(
+                                1 - acc_baseline[acc_type] / acc_compliance[acc_type]) * 100
                             if delta_perc <= required_delta_perc:
                                 is_valid = True
                             else:
@@ -2641,9 +2749,11 @@ def check_compliance_acc_dir(test_dir, model, config):
             length_check_pass = "Sample length check pass: True" in lines
             is_valid = first_token_pass and eos_pass and length_check_pass
             if not is_valid:
-                log.error(f"TEST06 accuracy check failed. first_token_check: {first_token_pass} eos_check: {eos_pass} length_check: {length_check_pass}.")
+                log.error(
+                    f"TEST06 accuracy check failed. first_token_check: {first_token_pass} eos_check: {eos_pass} length_check: {length_check_pass}.")
         else:
-            raise NotImplemented(f"{test_dir} is neither TEST01 and TEST06, which doesn't require accuracy check")
+            raise NotImplemented(
+                f"{test_dir} is neither TEST01 and TEST06, which doesn't require accuracy check")
 
     return is_valid
 
@@ -2694,17 +2804,16 @@ def check_compliance_dir(
 
     if model in [
         "stable-diffusion-xl"
-    ] and config.version in [ "v4.0" ]:
+    ] and config.version in ["v4.0"]:
         test_list.remove("TEST01")
         test_list.remove("TEST04")
 
-
     if model in [
         "llama2-70b-99",
         "llama2-70b-99.9",
         "mixtral-8x7b"
     ]:
-        test_list.append("TEST06") 
+        test_list.append("TEST06")
 
     if test_list and not os.path.exists(compliance_dir):
         log.error("no compliance dir for %s: %s", name, compliance_dir)
@@ -2728,7 +2837,10 @@ def check_compliance_dir(
                     config, model, compliance_perf_dir, scenario, division, system_json
                 )
                 if is_inferred:
-                    log.info("%s has inferred results, qps=%s", compliance_perf_dir, r)
+                    log.info(
+                        "%s has inferred results, qps=%s",
+                        compliance_perf_dir,
+                        r)
             except Exception as e:
                 log.error(
                     "%s caused exception in check_performance_dir: %s",
@@ -2742,7 +2854,7 @@ def check_compliance_dir(
                 and compliance_perf_valid
             )
 
-    compliance_acc_pass= True
+    compliance_acc_pass = True
     for test in ["TEST01", "TEST06"]:
         if test in test_list:
             # Check accuracy for TEST01
@@ -2750,7 +2862,6 @@ def check_compliance_dir(
                 os.path.join(compliance_dir, test), model, config
             )
 
-
     return compliance_perf_pass and compliance_acc_pass and compliance_perf_dir_pass
 
 
@@ -2836,7 +2947,8 @@ def merge_two_dict(x, y):
     unique_closed_systems = merge_two_dict(
         closed_power_systems, closed_non_power_systems
     )
-    unique_open_systems = merge_two_dict(open_power_systems, open_non_power_systems)
+    unique_open_systems = merge_two_dict(
+        open_power_systems, open_non_power_systems)
     unique_network_systems = merge_two_dict(
         network_power_systems, network_non_power_systems
     )
@@ -2845,8 +2957,10 @@ def merge_two_dict(x, y):
     unique_systems = merge_two_dict(unique_systems, unique_network_systems)
 
     # power systems can be repeating in open, closed and network
-    unique_power_systems = merge_two_dict(closed_power_systems, open_power_systems)
-    unique_power_systems = merge_two_dict(unique_power_systems, network_power_systems)
+    unique_power_systems = merge_two_dict(
+        closed_power_systems, open_power_systems)
+    unique_power_systems = merge_two_dict(
+        unique_power_systems, network_power_systems)
 
     number_systems = len(unique_systems)
     number_power_systems = len(unique_power_systems)
@@ -2867,7 +2981,8 @@ def sum_dict_values(x):
     count_open_results = count_open_power_results + count_open_non_power_results
 
     count_network_power_results = sum_dict_values(network_power_systems)
-    count_network_non_power_results = sum_dict_values(network_non_power_systems)
+    count_network_non_power_results = sum_dict_values(
+        network_non_power_systems)
     count_network_results = (
         count_network_power_results + count_network_non_power_results
     )
@@ -2905,7 +3020,10 @@ def sum_dict_values(x):
     )
     log.info("---")
 
-    log.info("Systems=%d, Power Systems=%d", number_systems, number_power_systems)
+    log.info(
+        "Systems=%d, Power Systems=%d",
+        number_systems,
+        number_power_systems)
     log.info(
         "Closed Systems=%d, Closed Power Systems=%d",
         number_closed_systems,