From 9d857baf82ce163801459fa7cec71c1e38016b02 Mon Sep 17 00:00:00 2001 From: Chen Cui Date: Fri, 25 Oct 2024 17:11:01 -0400 Subject: [PATCH 001/125] PEFT Inference (#11030) * adapter inference first commit Signed-off-by: Chen Cui * Apply isort and black reformatting Signed-off-by: cuichenx * Fix yaml serialization Signed-off-by: Hemil Desai * add copyright header Signed-off-by: Chen Cui * Apply isort and black reformatting Signed-off-by: cuichenx * revert accidental commit Signed-off-by: Chen Cui --------- Signed-off-by: Chen Cui Signed-off-by: cuichenx Signed-off-by: Hemil Desai Co-authored-by: cuichenx Co-authored-by: Hemil Desai Co-authored-by: Pablo Garay --- nemo/collections/llm/inference/base.py | 49 ++++++++++++++++++++---- nemo/lightning/ckpt_utils.py | 15 ++++++++ nemo/lightning/io/mixin.py | 33 +++++++++++++++- nemo/lightning/pytorch/callbacks/peft.py | 11 +++--- nemo/lightning/resume.py | 5 +-- 5 files changed, 95 insertions(+), 18 deletions(-) diff --git a/nemo/collections/llm/inference/base.py b/nemo/collections/llm/inference/base.py index 0171f1c2dd5c2..9c4da7940b704 100644 --- a/nemo/collections/llm/inference/base.py +++ b/nemo/collections/llm/inference/base.py @@ -1,5 +1,20 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json from pathlib import Path -from typing import Optional +from typing import Optional, Union import pytorch_lightning as pl import torch @@ -15,8 +30,9 @@ from pytorch_lightning.trainer.states import TrainerFn import nemo.lightning as nl +from nemo.collections.llm.peft import LoRA from nemo.lightning import io -from nemo.lightning.ckpt_utils import ckpt_to_context_subdir +from nemo.lightning.ckpt_utils import ADAPTER_META_FILENAME, ckpt_to_context_subdir, ckpt_to_weights_subdir from nemo.lightning.pytorch.strategies.megatron_strategy import MegatronStrategy from nemo.lightning.pytorch.strategies.utils import RestoreConfig @@ -39,11 +55,21 @@ def tokenize(self, prompt): def _setup_trainer_and_restore_model(path: Path, trainer: nl.Trainer, model: pl.LightningModule): assert isinstance(trainer.strategy, MegatronStrategy), "Only MegatronStrategy is supported for trainer.strategy." assert trainer.strategy.context_parallel_size <= 1, "Context parallelism is not supported for inference." - restore_config = RestoreConfig( - path=path, - load_model_state=True, - load_optim_state=False, - ) + if (adapter_meta_path := ckpt_to_weights_subdir(path) / ADAPTER_META_FILENAME).exists(): + with open(adapter_meta_path, "r") as f: + metadata = json.load(f) + restore_config = RestoreConfig( + path=metadata['model_ckpt_path'], + load_model_state=True, + load_optim_state=False, + ) + else: + restore_config = RestoreConfig( + path=path, + load_model_state=True, + load_optim_state=False, + ) + trainer.strategy.restore_config = restore_config trainer.strategy._setup_optimizers = False trainer.ckpt_path = None @@ -60,6 +86,15 @@ def _setup_trainer_and_restore_model(path: Path, trainer: nl.Trainer, model: pl. trainer.strategy.trainer = trainer trainer.strategy.selective_restore() + lora: Union[io.TrainerContext, LoRA] = io.load_context(ckpt_to_context_subdir(path), "model.model_transform") + if isinstance(lora, LoRA): + model = lora(model) + adapter_sharded_state_dict = {k: v for k, v in model.sharded_state_dict().items() if ".adapter." in k} + adapter_state = trainer.strategy.checkpoint_io.load_checkpoint( + ckpt_to_weights_subdir(path), sharded_state_dict=adapter_sharded_state_dict + ) + trainer.strategy.load_model_state_dict(adapter_state, strict=False) + def setup_model_and_tokenizer( path: Path, diff --git a/nemo/lightning/ckpt_utils.py b/nemo/lightning/ckpt_utils.py index a532d1335bae5..ae1fe520a1195 100644 --- a/nemo/lightning/ckpt_utils.py +++ b/nemo/lightning/ckpt_utils.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from pathlib import Path from typing import Union @@ -5,6 +19,7 @@ # WEIGHTS_PATH stores the weights while CONTEXT_PATH stores the hyper-parameters. WEIGHTS_PATH: str = "weights" CONTEXT_PATH: str = "context" +ADAPTER_META_FILENAME = "adapter_metadata.json" def idempotent_path_append(base_dir: Union[str, Path], suffix) -> Path: diff --git a/nemo/lightning/io/mixin.py b/nemo/lightning/io/mixin.py index 5d1738e348b1a..e2b9d7f409aef 100644 --- a/nemo/lightning/io/mixin.py +++ b/nemo/lightning/io/mixin.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import functools import inspect import json @@ -66,6 +80,21 @@ def _partial_representer_with_defaults(dumper, data): return _config_representer_with_defaults(dumper, data, type_name="Partial") +def _safe_object_representer(dumper, data): + if not inspect.isclass(data): + cls = data.__class__ + call = True + else: + cls = data + call = False + + value = { + "_target_": f"{inspect.getmodule(cls).__name__}.{cls.__qualname__}", # type: ignore + "_call_": call, + } + return dumper.represent_data(value) + + class IOMixin: """ A mixin class designed to capture the arguments passed to the `__init__` method, @@ -208,14 +237,14 @@ def _io_dump_yaml(self, io: config_lib.Config, attrs: list[str]): original_representers = yaml.SafeDumper.yaml_representers.copy() from nemo_run.config import Config, Partial - from nemo_run.core.serialization.yaml import YamlSerializer, _function_representer + from nemo_run.core.serialization.yaml import YamlSerializer yaml.SafeDumper.add_representer(config_lib.Config, _config_representer_with_defaults) yaml.SafeDumper.add_representer(partial.Partial, _partial_representer_with_defaults) yaml.SafeDumper.add_representer(Config, _config_representer_with_defaults) yaml.SafeDumper.add_representer(Partial, _partial_representer_with_defaults) - yaml.SafeDumper.add_multi_representer(object, _function_representer) + yaml.SafeDumper.add_multi_representer(object, _safe_object_representer) serializer = YamlSerializer() result = {} diff --git a/nemo/lightning/pytorch/callbacks/peft.py b/nemo/lightning/pytorch/callbacks/peft.py index 725c28f341a59..28f16882305c9 100644 --- a/nemo/lightning/pytorch/callbacks/peft.py +++ b/nemo/lightning/pytorch/callbacks/peft.py @@ -25,6 +25,8 @@ from pytorch_lightning.trainer.states import TrainerFn from typing_extensions import override +from nemo.lightning.ckpt_utils import ADAPTER_META_FILENAME +from nemo.lightning.io.mixin import IOMixin from nemo.lightning.io.pl import ckpt_to_dir from nemo.lightning.pytorch.callbacks.model_transform import ModelTransform from nemo.utils import logging @@ -34,10 +36,7 @@ from megatron.core.dist_checkpointing.mapping import ShardedStateDict -_ADAPTER_META_FILENAME = "adapter_metadata.json" - - -class PEFT(ABC, ModelTransform): +class PEFT(IOMixin, ABC, ModelTransform): """Abstract base class for Parameter-Efficient Fine-Tuning (PEFT) methods. This class defines the interface for PEFT methods, which are used to fine-tune @@ -312,7 +311,7 @@ def save_checkpoint(self, checkpoint: Dict[str, Any], path: _PATH, storage_optio if is_global_rank_zero(): metadata = {"model_ckpt_path": str(self.model_ckpt_path)} - adapter_meta_path = ckpt_to_dir(path) / _ADAPTER_META_FILENAME + adapter_meta_path = ckpt_to_dir(path) / ADAPTER_META_FILENAME with open(adapter_meta_path, "w") as f: json.dump(metadata, f) return request @@ -346,7 +345,7 @@ def load_checkpoint( assert self.checkpoint_io is not None - adapter_meta_path = ckpt_to_dir(path) / _ADAPTER_META_FILENAME + adapter_meta_path = ckpt_to_dir(path) / ADAPTER_META_FILENAME adapter_ckpt = None if getattr(path, "base_model_path", None): ## PEFT Resume, FIRST TIME diff --git a/nemo/lightning/resume.py b/nemo/lightning/resume.py index 40b4aa704575b..412ca8665b848 100644 --- a/nemo/lightning/resume.py +++ b/nemo/lightning/resume.py @@ -23,6 +23,7 @@ from nemo.lightning import io from nemo.lightning.base import NEMO_MODELS_CACHE +from nemo.lightning.ckpt_utils import ADAPTER_META_FILENAME from nemo.lightning.pytorch.strategies.utils import RestoreConfig from nemo.utils import logging from nemo.utils.app_state import AppState @@ -279,9 +280,7 @@ def get_trainer_ckpt_path(self, model: Optional[io.ConnectorMixin] = None) -> Op if self.adapter_path: return AdapterPath(Path(self.adapter_path), base_model_path=checkpoint) else: - from nemo.lightning.pytorch.callbacks.peft import _ADAPTER_META_FILENAME - - adapter_meta_path = checkpoint / _ADAPTER_META_FILENAME + adapter_meta_path = checkpoint / ADAPTER_META_FILENAME if adapter_meta_path.exists(): base_model_path = self._resume_peft(adapter_meta_path, model) return AdapterPath(checkpoint, base_model_path=base_model_path) From 90d82dce00b521ef2749e8ee770f15796bc79a63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Sat, 26 Oct 2024 00:22:09 +0200 Subject: [PATCH 002/125] ci: Switch to CPU only runner (#11035) * ci: Switch to CPU only runner Signed-off-by: Oliver Koenig * runner Signed-off-by: Oliver Koenig * Raise EnvironmentError when CUDA is unavailable to prevent cuda-python failure Signed-off-by: Vladimir Bataev * Fix error message Signed-off-by: Vladimir Bataev * update Signed-off-by: Oliver Koenig * fix Signed-off-by: Oliver Koenig * finalize Signed-off-by: Oliver Koenig --------- Signed-off-by: Oliver Koenig Signed-off-by: Vladimir Bataev Co-authored-by: Vladimir Bataev --- .github/workflows/_test_template.yml | 7 ++++++- .github/workflows/cicd-main.yml | 2 +- .../asr/parts/submodules/rnnt_greedy_decoding.py | 4 ++-- .../asr/parts/submodules/rnnt_loop_labels_computer.py | 4 ++-- .../asr/parts/submodules/tdt_loop_labels_computer.py | 4 ++-- nemo/core/utils/cuda_python_utils.py | 9 +++++++-- 6 files changed, 20 insertions(+), 10 deletions(-) diff --git a/.github/workflows/_test_template.yml b/.github/workflows/_test_template.yml index c0aedbc1524ef..17cceb665747e 100644 --- a/.github/workflows/_test_template.yml +++ b/.github/workflows/_test_template.yml @@ -51,7 +51,12 @@ jobs: - name: Start container run: | - docker run --rm -d --name nemo_container_${{ github.run_id }} --runtime=nvidia --gpus all --shm-size=64g --env TRANSFORMERS_OFFLINE=0 --env HYDRA_FULL_ERROR=1 --volume /mnt/datadrive/TestData:/home/TestData nemoci.azurecr.io/nemo_container_${{ github.run_id }} bash -c "sleep $(( ${{ inputs.TIMEOUT }} * 60 + 60 ))" + ARG=("") + if [[ "${{ inputs.RUNNER }}" != *cpu* ]]; then + ARG=("--runtime=nvidia --gpus all") + fi + + docker run --rm -d --name nemo_container_${{ github.run_id }} ${ARG[@]} --shm-size=64g --env TRANSFORMERS_OFFLINE=0 --env HYDRA_FULL_ERROR=1 --volume /mnt/datadrive/TestData:/home/TestData nemoci.azurecr.io/nemo_container_${{ github.run_id }} bash -c "sleep $(( ${{ inputs.TIMEOUT }} * 60 + 60 ))" - id: main name: Run main script diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml index d5b4d2d8081e6..098b9d635cb31 100644 --- a/.github/workflows/cicd-main.yml +++ b/.github/workflows/cicd-main.yml @@ -300,7 +300,7 @@ jobs: uses: ./.github/workflows/_test_template.yml if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_CPU_NLP') || needs.cicd-test-container-setup.outputs.all == 'true' with: - RUNNER: self-hosted-azure-cpu + RUNNER: self-hosted-azure TIMEOUT: 20 SCRIPT: | CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/nlp -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat diff --git a/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py b/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py index 996ac8977f35f..f9cf368fe405c 100644 --- a/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py +++ b/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py @@ -646,9 +646,9 @@ def __init__( ) self._greedy_decode = RNNTGreedyDecodeCudaGraph(max_symbols_per_step, self) - except (ImportError, ModuleNotFoundError, ValueError) as e: + except (ImportError, ModuleNotFoundError, ValueError, EnvironmentError) as e: self.use_cuda_graph_decoder = False - logging.warning(f"Cannot use decoder with CUDA graphs, reason: {e.msg}") + logging.warning(f"Cannot use decoder with CUDA graphs, reason: {e}") self._greedy_decode = self._greedy_decode_blank_as_pad_loop_frames else: self._greedy_decode = self._greedy_decode_blank_as_pad_loop_frames diff --git a/nemo/collections/asr/parts/submodules/rnnt_loop_labels_computer.py b/nemo/collections/asr/parts/submodules/rnnt_loop_labels_computer.py index c0783c301c440..13bb0b471ed2e 100644 --- a/nemo/collections/asr/parts/submodules/rnnt_loop_labels_computer.py +++ b/nemo/collections/asr/parts/submodules/rnnt_loop_labels_computer.py @@ -266,11 +266,11 @@ def maybe_enable_cuda_graphs(self): try: check_cuda_python_cuda_graphs_conditional_nodes_supported() self.cuda_graphs_mode = self.CudaGraphsMode.FULL_GRAPH - except (ImportError, ModuleNotFoundError) as e: + except (ImportError, ModuleNotFoundError, EnvironmentError) as e: logging.warning( "No conditional node support for Cuda.\n" "Cuda graphs with while loops are disabled, decoding speed will be slower\n" - f"Reason: {e.msg}" + f"Reason: {e}" ) self.cuda_graphs_mode = self.CudaGraphsMode.NO_WHILE_LOOPS self.reset_cuda_graphs_state() diff --git a/nemo/collections/asr/parts/submodules/tdt_loop_labels_computer.py b/nemo/collections/asr/parts/submodules/tdt_loop_labels_computer.py index 61bd71f09037f..c0fbe53617614 100644 --- a/nemo/collections/asr/parts/submodules/tdt_loop_labels_computer.py +++ b/nemo/collections/asr/parts/submodules/tdt_loop_labels_computer.py @@ -277,11 +277,11 @@ def maybe_enable_cuda_graphs(self): try: check_cuda_python_cuda_graphs_conditional_nodes_supported() self.cuda_graphs_mode = self.CudaGraphsMode.FULL_GRAPH - except (ImportError, ModuleNotFoundError) as e: + except (ImportError, ModuleNotFoundError, EnvironmentError) as e: logging.warning( "No conditional node support for Cuda.\n" "Cuda graphs with while loops are disabled, decoding speed will be slower\n" - f"Reason: {e.msg}" + f"Reason: {e}" ) self.cuda_graphs_mode = self.CudaGraphsMode.NO_WHILE_LOOPS self.reset_cuda_graphs_state() diff --git a/nemo/core/utils/cuda_python_utils.py b/nemo/core/utils/cuda_python_utils.py index eb8897df07979..8bd25333488ff 100644 --- a/nemo/core/utils/cuda_python_utils.py +++ b/nemo/core/utils/cuda_python_utils.py @@ -22,6 +22,10 @@ def check_cuda_python_cuda_graphs_conditional_nodes_supported(): + # for CPU-only environment we need to raise an exception, otherwise cuda-python library will fail + if not torch.cuda.is_available(): + raise EnvironmentError("CUDA is not available") + try: from cuda import cuda except ImportError: @@ -55,11 +59,12 @@ def skip_cuda_python_test_if_cuda_graphs_conditional_nodes_not_supported(): """ try: check_cuda_python_cuda_graphs_conditional_nodes_supported() - except (ImportError, ModuleNotFoundError) as e: + except (ImportError, ModuleNotFoundError, EnvironmentError) as e: import pytest pytest.skip( - f"Test using cuda graphs with conditional nodes is being skipped because cuda graphs with conditional nodes aren't supported. Error message: {e}" + "Test using cuda graphs with conditional nodes is being skipped because " + f"cuda graphs with conditional nodes aren't supported. Error message: {e}" ) From 60b4ac7fd8ecd5339e52c0e10223254f6f18af15 Mon Sep 17 00:00:00 2001 From: nasretdinovr Date: Sat, 26 Oct 2024 11:40:40 +0400 Subject: [PATCH 003/125] added Lhotse online augmentation tutorial for SE (#10944) Signed-off-by: Rauf --- .../masking_with_online_augmentation.yaml | 119 +++ .../audio/data/audio_to_audio_lhotse.py | 7 + ...Enhancement_with_Online_Augmentation.ipynb | 984 ++++++++++++++++++ 3 files changed, 1110 insertions(+) create mode 100644 examples/audio/conf/masking_with_online_augmentation.yaml create mode 100644 tutorials/audio/speech_enhancement/Speech_Enhancement_with_Online_Augmentation.ipynb diff --git a/examples/audio/conf/masking_with_online_augmentation.yaml b/examples/audio/conf/masking_with_online_augmentation.yaml new file mode 100644 index 0000000000000..0f9252b1f290f --- /dev/null +++ b/examples/audio/conf/masking_with_online_augmentation.yaml @@ -0,0 +1,119 @@ +name: "masking_with_online_augmenatation" + +model: + sample_rate: 16000 + skip_nan_grad: false + num_outputs: 1 + + train_ds: + use_lhotse: true # enable Lhotse data loader + cuts_path: ??? # path to Lhotse cuts manifest with speech signals for augmentation (including custom "target_recording" field with the same signals) + truncate_duration: 4.0 # Number of STFT time frames = 1 + truncate_duration // encoder.hop_length = 256 + truncate_offset_type: random # if the file is longer than truncate_duration, use random offset to select a subsegment + batch_size: 64 # batch size may be increased based on the available memory + shuffle: true + num_workers: 8 + pin_memory: true + rir_enabled: true # enable room impulse response augmentation + rir_path: ??? # path to Lhotse recordings manifest with room impulse response signals + noise_path: ??? # path to Lhotse cuts manifest with noise signals + + validation_ds: + use_lhotse: true # enable Lhotse data loader + cuts_path: ??? # path to Lhotse cuts manifest with noisy speech signals (including custom "target_recording" field with the clean signals) + batch_size: 64 # batch size may be increased based on the available memory + shuffle: false + num_workers: 4 + pin_memory: true + + test_ds: + use_lhotse: true # enable Lhotse data loader + cuts_path: ??? # path to Lhotse cuts manifest with noisy speech signals (including custom "target_recording" field with the clean signals) + batch_size: 1 # batch size may be increased based on the available memory + shuffle: false + num_workers: 4 + pin_memory: true + + encoder: + _target_: nemo.collections.audio.modules.transforms.AudioToSpectrogram + fft_length: 512 # Length of the window and FFT for calculating spectrogram + hop_length: 256 # Hop length for calculating spectrogram + + decoder: + _target_: nemo.collections.audio.modules.transforms.SpectrogramToAudio + fft_length: 512 # Length of the window and FFT for calculating spectrogram + hop_length: 256 # Hop length for calculating spectrogram + + mask_estimator: + _target_: nemo.collections.audio.modules.masking.MaskEstimatorRNN + num_outputs: ${model.num_outputs} + num_subbands: 257 # Number of subbands of the input spectrogram + num_features: 256 # Number of features at RNN input + num_layers: 5 # Number of RNN layers + bidirectional: true # Use bi-directional RNN + + mask_processor: + _target_: nemo.collections.audio.modules.masking.MaskReferenceChannel # Apply mask on the reference channel + ref_channel: 0 # Reference channel for the output + + loss: + _target_: nemo.collections.audio.losses.SDRLoss + scale_invariant: true # Use scale-invariant SDR + + metrics: + val: + sdr: # output SDR + _target_: torchmetrics.audio.SignalDistortionRatio + test: + sdr_ch0: # SDR on output channel 0 + _target_: torchmetrics.audio.SignalDistortionRatio + channel: 0 + + optim: + name: adamw + lr: 1e-4 + # optimizer arguments + betas: [0.9, 0.98] + weight_decay: 1e-3 + +trainer: + devices: -1 # number of GPUs, -1 would use all available GPUs + num_nodes: 1 + max_epochs: -1 + max_steps: -1 # computed at runtime if not set + val_check_interval: 1.0 # Set to 0.25 to check 4 times per epoch, or an int for number of iterations + accelerator: auto + strategy: ddp + accumulate_grad_batches: 1 + gradient_clip_val: null + precision: 32 # Should be set to 16 for O1 and O2 to enable the AMP. + log_every_n_steps: 25 # Interval of logging. + enable_progress_bar: true + num_sanity_val_steps: 0 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it + check_val_every_n_epoch: 1 # number of evaluations on validation every n epochs + sync_batchnorm: true + enable_checkpointing: False # Provided by exp_manager + logger: false # Provided by exp_manager + +exp_manager: + exp_dir: null + name: ${name} + create_tensorboard_logger: true + create_checkpoint_callback: true + checkpoint_callback_params: + # in case of multiple validation sets, first one is used + monitor: "val_loss" + mode: "min" + save_top_k: 5 + always_save_nemo: true # saves the checkpoints as nemo files instead of PTL checkpoints + + resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. + # you need to set these two to true to continue the training + resume_if_exists: false + resume_ignore_no_checkpoint: false + + # You may use this section to create a W&B logger + create_wandb_logger: false + wandb_logger_kwargs: + name: null + project: null diff --git a/nemo/collections/audio/data/audio_to_audio_lhotse.py b/nemo/collections/audio/data/audio_to_audio_lhotse.py index d8978c19d6922..4cf243b68ed30 100644 --- a/nemo/collections/audio/data/audio_to_audio_lhotse.py +++ b/nemo/collections/audio/data/audio_to_audio_lhotse.py @@ -55,6 +55,13 @@ def __getitem__(self, cuts: CutSet) -> dict[str, torch.Tensor]: retained_cuts = [ cut._first_non_padding_cut if isinstance(cut, MixedCut) else cut for cut in retained_padded_cuts ] + + # if online augmentation is applied, some retained cuts still may be MixedCuts (including the original speech, noise, and augmentation) + # get the first non-padding cut from there, which is supposed to be the clean speech signal + for n, cut in enumerate(retained_cuts): + if isinstance(cut, MixedCut): + retained_cuts[n] = cut._first_non_padding_cut + # create cutset retained_cuts = CutSet.from_cuts(retained_cuts) if _key_available(retained_cuts, self.TARGET_KEY): diff --git a/tutorials/audio/speech_enhancement/Speech_Enhancement_with_Online_Augmentation.ipynb b/tutorials/audio/speech_enhancement/Speech_Enhancement_with_Online_Augmentation.ipynb new file mode 100644 index 0000000000000..ff6970d985226 --- /dev/null +++ b/tutorials/audio/speech_enhancement/Speech_Enhancement_with_Online_Augmentation.ipynb @@ -0,0 +1,984 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3b53ae0a-4cb5-44fd-bcde-5ce7abce79e9", + "metadata": { + "id": "3b53ae0a-4cb5-44fd-bcde-5ce7abce79e9" + }, + "source": [ + "# Introduction\n", + "\n", + "The goal of this tutorial is to demonstrate the basic steps required to setup and train a simple single-channel speech enhancement model in NeMo using online augmentation with noise and room impulse responce (RIR). Online augmentation is performed using a dataloader based on Lhotse speech data processing toolkit [1].\n", + "\n", + "\n", + "This notebook covers the following steps:\n", + "\n", + "* Download speech, noise and RIR data\n", + "* Prepare Lhotse manifests for speech, noise and RIR data\n", + "* Prepare fixed validation set by mixing speech, noise and RIR data\n", + "* Configure and train a simple single-output model\n", + "\n", + "Note that this tutorial is only for demonstration purposes.\n", + "To achieve best performance for a particular use case, carefully prepared data and more advanced models should be used.\n", + "\n", + "*Disclaimer:*\n", + "User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d603b38-b63c-42a6-92b3-575925149bde", + "metadata": { + "id": "3d603b38-b63c-42a6-92b3-575925149bde" + }, + "outputs": [], + "source": [ + "\"\"\"\n", + "You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.\n", + "\n", + "Instructions for setting up Colab are as follows:\n", + "1. Open a new Python 3 notebook.\n", + "2. Import this notebook from GitHub (File -> Upload Notebook -> \"GITHUB\" tab -> copy/paste GitHub URL)\n", + "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", + "4. Run this cell to set up dependencies.\n", + "5. Restart the runtime (Runtime -> Restart Runtime) for any upgraded packages to take effect\n", + "\"\"\"\n", + "\n", + "GIT_USER = 'NVIDIA'\n", + "BRANCH = 'main'\n", + "\n", + "if 'google.colab' in str(get_ipython()):\n", + "\n", + " # Install dependencies\n", + " !pip install wget\n", + " !apt-get install sox libsndfile1 ffmpeg\n", + " !pip install text-unidecode\n", + " !pip install matplotlib>=3.3.2\n", + "\n", + " ## Install NeMo\n", + " !python -m pip install git+https://github.com/{GIT_USER}/NeMo.git@{BRANCH}#egg=nemo_toolkit[all]\n", + "\n", + " ## Install TorchAudio\n", + " !pip install torchaudio>=0.13.0 -f https://download.pytorch.org/whl/torch_stable.html" + ] + }, + { + "cell_type": "markdown", + "id": "bdac6ac8-a21f-4ea8-8484-41a6d187a2fe", + "metadata": { + "id": "bdac6ac8-a21f-4ea8-8484-41a6d187a2fe" + }, + "source": [ + "The following cell will take care of the necessary imports and prepare utility functions used throughout the notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "baca13c6-b0ed-429b-93b6-77249fdf4710", + "metadata": { + "id": "baca13c6-b0ed-429b-93b6-77249fdf4710" + }, + "outputs": [], + "source": [ + "import glob\n", + "import librosa\n", + "import os\n", + "import torch\n", + "import tqdm\n", + "from itertools import islice\n", + "\n", + "import IPython.display as ipd\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pytorch_lightning as pl\n", + "import soundfile as sf\n", + "from pathlib import Path\n", + "from omegaconf import OmegaConf, open_dict\n", + "from sklearn.model_selection import train_test_split\n", + "from torchmetrics.functional.audio import signal_distortion_ratio, scale_invariant_signal_distortion_ratio\n", + "from lhotse import CutSet, RecordingSet, Recording, MonoCut\n", + "from lhotse.recipes import (\n", + " download_rir_noise,\n", + " prepare_rir_noise,\n", + " download_librispeech,\n", + " prepare_librispeech\n", + ")\n", + "\n", + "from nemo.collections.common.data.lhotse import get_lhotse_dataloader_from_config\n", + "from nemo.collections.audio.data.audio_to_audio_lhotse import LhotseAudioToTargetDataset" + ] + }, + { + "cell_type": "markdown", + "id": "fc0fafa4-bc65-4066-8e75-d740e2d15259", + "metadata": { + "id": "fc0fafa4-bc65-4066-8e75-d740e2d15259" + }, + "source": [ + "Utility functions for displaying signals and metrics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "49720c06-3b4d-45b6-a054-73800c3bddea", + "metadata": { + "id": "49720c06-3b4d-45b6-a054-73800c3bddea" + }, + "outputs": [], + "source": [ + "def show_signal(signal: np.ndarray, sample_rate: int = 16000, tag: str = 'Signal'):\n", + " \"\"\"Show the time-domain signal and its spectrogram.\n", + " \"\"\"\n", + " fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 2.5))\n", + "\n", + " # show waveform\n", + " t = np.arange(0, len(signal)) / sample_rate\n", + "\n", + " ax[0].plot(t, signal)\n", + " ax[0].set_xlim(0, t.max())\n", + " ax[0].grid()\n", + " ax[0].set_xlabel('time / s')\n", + " ax[0].set_ylabel('amplitude')\n", + " ax[0].set_title(tag)\n", + "\n", + " n_fft = 1024\n", + " hop_length = 256\n", + "\n", + " D = librosa.amplitude_to_db(np.abs(librosa.stft(signal, n_fft=n_fft, hop_length=hop_length)), ref=np.max)\n", + " img = librosa.display.specshow(D, y_axis='linear', x_axis='time', sr=sample_rate, n_fft=n_fft, hop_length=hop_length, ax=ax[1])\n", + " ax[1].set_title(tag)\n", + "\n", + " plt.tight_layout()\n", + " plt.colorbar(img, format=\"%+2.f dB\", ax=ax)\n", + "\n", + "def show_metrics(signal: np.ndarray, reference: np.ndarray, sample_rate: int = 16000, tag: str = 'Signal'):\n", + " \"\"\"Show metrics for the time-domain signal and the reference signal.\n", + " \"\"\"\n", + " sdr = signal_distortion_ratio(preds=torch.tensor(signal), target=torch.tensor(reference))\n", + " sisdr = scale_invariant_signal_distortion_ratio(preds=torch.tensor(signal), target=torch.tensor(reference))\n", + " print(tag)\n", + " print('\\tsdr: ', sdr.item())\n", + " print('\\tsisdr:', sisdr.item())" + ] + }, + { + "cell_type": "markdown", + "id": "653eb22f-b09e-4421-8028-05d123fc47a5", + "metadata": { + "id": "653eb22f-b09e-4421-8028-05d123fc47a5" + }, + "source": [ + "### Data preparation" + ] + }, + { + "cell_type": "markdown", + "id": "42deb721-f734-43ea-bfef-931733d6379b", + "metadata": { + "id": "42deb721-f734-43ea-bfef-931733d6379b" + }, + "source": [ + "In this notebook, it is assumed that all audio will be resampled to 16kHz and the data and configuration will be stored under `root_dir` as defined below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "577d4be2-10f8-4050-8f40-b2566ef9dfef", + "metadata": { + "id": "577d4be2-10f8-4050-8f40-b2566ef9dfef" + }, + "outputs": [], + "source": [ + "# sample rate used throughout the notebook\n", + "sample_rate = 16000\n", + "\n", + "# root directory for data preparation, configurations, etc\n", + "root_dir = Path('./')\n", + "\n", + "# data directory\n", + "data_dir = root_dir / 'data'\n", + "data_dir.mkdir(exist_ok=True)\n", + "\n", + "# scripts directory\n", + "scripts_dir = root_dir / 'scripts'\n", + "scripts_dir.mkdir(exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "id": "9f6dc925-04b1-4118-8092-c3107a734f4f", + "metadata": { + "id": "9f6dc925-04b1-4118-8092-c3107a734f4f" + }, + "source": [ + "Create dictionary with paths for all of the manifests files which will be stored under `data_dir`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da232926-a529-4409-b983-8a921c578c91", + "metadata": { + "id": "da232926-a529-4409-b983-8a921c578c91" + }, + "outputs": [], + "source": [ + "dataset_manifest = {\n", + " 'speech_train': data_dir / 'libri_cuts_train.jsonl.gz',\n", + " 'speech_val': data_dir / 'libri_cuts_val.jsonl.gz',\n", + " 'noise_train': data_dir / 'demand_cuts_train.jsonl.gz',\n", + " 'noise_val': data_dir / 'demand_cuts_val.jsonl.gz',\n", + " 'rir_train': data_dir / 'rir_recordings_train.jsonl.gz',\n", + " 'rir_val': data_dir / 'rir_recordings_val.jsonl.gz',\n", + " 'noisy_val': data_dir / 'noisy_cuts_val.jsonl.gz'\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "d0e19997-b151-47f5-b19b-97d8c7e65e2e", + "metadata": { + "id": "d0e19997-b151-47f5-b19b-97d8c7e65e2e" + }, + "source": [ + "In this tutorial, a subset of LibriSpeech dataset [2] will be downloaded and used as the speech material.\n", + "\n", + "To use a dataset with the Lhotse dataloader, we need to create manifest files from Lhotse cuts (refer to [3] for the details). In this cell, we first download and prepare the LibriSpeech dataset in a Lhotse format and then save it as manifest files for training and validation sets. Note that the target recording in the speech enhancement task is the original (unchanged) clean speech signal, which is defined under the custom field \"target_recording\" in the cuts." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4156a68-1906-4a4c-9ecf-cab5d79eb13b", + "metadata": { + "id": "d4156a68-1906-4a4c-9ecf-cab5d79eb13b" + }, + "outputs": [], + "source": [ + "libri_variant = 'mini_librispeech'\n", + "speech_dir = data_dir / 'speech'\n", + "\n", + "libri_root = download_librispeech(speech_dir, dataset_parts=libri_variant)\n", + "\n", + "# Use script from Lhotse to prepate Librispeech dataset to Lhotse format\n", + "libri = prepare_librispeech(\n", + " libri_root, dataset_parts=libri_variant,\n", + ")\n", + "cuts_train = CutSet.from_manifests(**libri[\"train-clean-5\"]).trim_to_supervisions()\n", + "cuts_val = CutSet.from_manifests(**libri[\"dev-clean-2\"]).trim_to_supervisions()\n", + "\n", + "# Save the manifest with a custom \"target_recording\"\n", + "with CutSet.open_writer(dataset_manifest['speech_train']) as writer:\n", + " for cut in cuts_train:\n", + " cut.target_recording = cut.recording\n", + " writer.write(cut)\n", + "\n", + "with CutSet.open_writer(dataset_manifest['speech_val']) as writer:\n", + " for cut in cuts_val:\n", + " cut.target_recording = cut.recording\n", + " writer.write(cut)" + ] + }, + { + "cell_type": "markdown", + "id": "6f5a0eb7-b913-4323-a8ce-1da77521730b", + "metadata": { + "id": "6f5a0eb7-b913-4323-a8ce-1da77521730b" + }, + "source": [ + "During the training phase, noise data will be used for online augmentation by mixing it with the downloaded speech on-the-fly. During the validation and test phases, the noise will be used to create fixed sets.\n", + "\n", + "The following cell will download and prepare the noise data using a subset of the DEMAND dataset [4]." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba4dcfe4-5614-438d-9c2b-73c0fbf3bc97", + "metadata": { + "id": "ba4dcfe4-5614-438d-9c2b-73c0fbf3bc97" + }, + "outputs": [], + "source": [ + "noise_dir = data_dir / 'noise'\n", + "noise_data_set = 'STRAFFIC,PSTATION'\n", + "\n", + "# Copy script\n", + "get_demand_script = os.path.join(scripts_dir, 'get_demand_data.py')\n", + "if not os.path.exists(get_demand_script):\n", + " !wget -P $scripts_dir https://raw.githubusercontent.com/{GIT_USER}/NeMo/{BRANCH}/scripts/dataset_processing/get_demand_data.py\n", + "\n", + "if not noise_dir.is_dir():\n", + " noise_dir.mkdir(exist_ok=True)\n", + " !python {get_demand_script} --data_root={noise_dir} --data_sets={noise_data_set}\n", + "else:\n", + " print('Noise directory already exists in:', noise_dir)\n", + "\n", + "noise_dir = data_dir / 'noise'\n", + "demand_recordings = RecordingSet.from_dir(noise_dir, pattern='*.wav')\n", + "\n", + "demand_cuts = CutSet.from_manifests(recordings=demand_recordings)\n", + "shuffled_demand_cuts = demand_cuts.shuffle()\n", + "\n", + "demand_cuts_train = shuffled_demand_cuts.subset(last=len(shuffled_demand_cuts)-3)\n", + "demand_cuts_val = shuffled_demand_cuts.subset(first=3)\n", + "\n", + "demand_cuts_train.to_file(dataset_manifest['noise_train'])\n", + "demand_cuts_val.to_file(dataset_manifest['noise_val'])" + ] + }, + { + "cell_type": "markdown", + "id": "f9429dd5-fedc-486d-92f1-5a0255d600e6", + "metadata": { + "id": "f9429dd5-fedc-486d-92f1-5a0255d600e6" + }, + "source": [ + "The following cell will download and prepare a simulated subset from room impulse responses dataset, described in the following paper [5]." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e83f5d2-26cd-44cb-9efe-4eafcd709bfe", + "metadata": { + "id": "3e83f5d2-26cd-44cb-9efe-4eafcd709bfe" + }, + "outputs": [], + "source": [ + "rir_recordings = RecordingSet()\n", + "rir_raw_dir = download_rir_noise(data_dir)\n", + "rirs = prepare_rir_noise(rir_raw_dir, parts=[\"sim_rir\"])\n", + "rir_recordings = rirs[\"sim_rir\"][\"recordings\"]\n", + "shuffled_rir_recordings = rir_recordings.shuffle()\n", + "\n", + "rir_val_part = int(len(rir_recordings) * 0.1)\n", + "rir_train_part = len(rir_recordings) - rir_val_part\n", + "\n", + "rir_recordings_train = shuffled_rir_recordings.subset(last=rir_train_part)\n", + "rir_recordings_val = shuffled_rir_recordings.subset(first=rir_val_part)\n", + "\n", + "rir_recordings_train.to_file(dataset_manifest['rir_train'])\n", + "rir_recordings_val.to_file(dataset_manifest['rir_val'])" + ] + }, + { + "cell_type": "markdown", + "id": "b568a03e-57a1-412f-a3a2-85af3a1e3a20", + "metadata": { + "id": "b568a03e-57a1-412f-a3a2-85af3a1e3a20" + }, + "source": [ + "For this tutorial, a single-channel noisy validation set is constructed by adding speech and noise.\n", + "\n", + "The following block will use based on Lhotse data loader from NeMo to create fixed noisy validation set and save it do `data/val` folder." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "62dc5755-ccc5-4eb8-80d4-d59b418406a3", + "metadata": { + "id": "62dc5755-ccc5-4eb8-80d4-d59b418406a3" + }, + "outputs": [], + "source": [ + "# Create the cofing for the Lhotse data loader\n", + "val_noise_config = {\n", + "'cuts_path': dataset_manifest['speech_val'].as_posix(), # path to Lhotse cuts manifest with speech signals for augmentation\n", + "'sample_rate': 16000,\n", + "'batch_size': 1,\n", + "'rir_enabled': True, # enable room impulse response augmentation\n", + "'rir_path': dataset_manifest['rir_val'].as_posix(), # path to Lhotse recordings manifest with room impulse response signals\n", + "'rir_prob': 1.0, # probability of applying RIR augmentation\n", + "'noise_path': dataset_manifest['noise_val'].as_posix(), # path to Lhotse cuts manifest with noise signals\n", + "'noise_mix_prob': 1.0, # probability of applying noise augmentation\n", + "'noise_snr': (0, 20), # range of speech-to-noise ratio for the noise augmentation\n", + "'shuffle': False\n", + "}\n", + "\n", + "# Instantiate the data loader\n", + "dl = get_lhotse_dataloader_from_config(\n", + "OmegaConf.create(val_noise_config), global_rank=0, world_size=1, dataset=LhotseAudioToTargetDataset()\n", + ")\n", + "\n", + "# Define number of samples for the validation set\n", + "num_examples = 100\n", + "print(f'Get {num_examples} samples for the validation set')\n", + "samples = [sample for sample in islice(dl, num_examples)]\n", + "\n", + "\n", + "# Create folders for saving noisy (input) and clean (target) samples\n", + "val_dir = data_dir / 'val'\n", + "val_noisy_dir = val_dir / 'noisy'\n", + "val_clean_dir = val_dir / 'clean'\n", + "\n", + "val_dir.mkdir(exist_ok=True)\n", + "val_noisy_dir.mkdir(exist_ok =True)\n", + "val_clean_dir.mkdir(exist_ok=True)\n", + "\n", + "val_noisy_basename = 'val_noisy_fileid'\n", + "val_clean_basename = 'val_clean_fileid'\n", + "\n", + "with CutSet.open_writer(dataset_manifest['noisy_val']) as writer:\n", + " for n, sample in enumerate(samples):\n", + " noisy, clean = sample['input_signal'].numpy()[0], sample['target_signal'].numpy()[0]\n", + " #Save\n", + " sf.write(val_noisy_dir / f'{val_noisy_basename}_{str(n)}.wav', noisy, samplerate=val_noise_config['sample_rate'])\n", + " sf.write(val_clean_dir / f'{val_clean_basename}_{str(n)}.wav', clean, samplerate=val_noise_config['sample_rate'])\n", + " noisy_rec = Recording.from_file(val_noisy_dir / f'{val_noisy_basename}_{str(n)}.wav')\n", + " clean_rec = Recording.from_file(val_clean_dir / f'{val_clean_basename}_{str(n)}.wav')\n", + "\n", + " val_cut = MonoCut(id=noisy_rec.id,\n", + " start=0,\n", + " duration=noisy_rec.duration,\n", + " channel=0,\n", + " recording=noisy_rec)\n", + " val_cut.target_recording = clean_rec\n", + " writer.write(val_cut)" + ] + }, + { + "cell_type": "markdown", + "id": "f108c5d2-c87b-47a7-8c2d-d363a9234abb", + "metadata": { + "id": "f108c5d2-c87b-47a7-8c2d-d363a9234abb" + }, + "source": [ + "### Model configuration\n", + "\n", + "Here, a simple encoder-mask-decoder model will be used to process the noisy input signal and produce an enhanced output signal.\n", + "\n", + "In general, an encoder-mask-decoder model can be configured using `EncMaskDecAudioToAudioModel` class, which is depicted in the following block diagram." + ] + }, + { + "cell_type": "markdown", + "id": "c0ff2bff-1637-4a17-85b2-c92307a4d8d7", + "metadata": { + "id": "c0ff2bff-1637-4a17-85b2-c92307a4d8d7" + }, + "source": [ + "\"encmaskdecoder_model\"" + ] + }, + { + "cell_type": "markdown", + "id": "470b0a50-fe6c-4ded-9d27-64db20f83cad", + "metadata": { + "id": "470b0a50-fe6c-4ded-9d27-64db20f83cad" + }, + "source": [ + "The model structure can briefly be described as follows:\n", + "* Input to the model is a time-domain signal.\n", + "* Encoder transforms the input signal to the analysis domain.\n", + "* Mask estimator estimates a mask used to generate the output signal.\n", + "* Mask processor combines the estimated mask and the encoded input to produce the encoded output.\n", + "* Decoder transforms the encoded output into a time-domain signal.\n", + "* Output is a time-domain signal." + ] + }, + { + "cell_type": "markdown", + "id": "4cf1cfde-913f-42da-a9e0-fdb5ae8e50c5", + "metadata": { + "id": "4cf1cfde-913f-42da-a9e0-fdb5ae8e50c5" + }, + "source": [ + "For this example, the model will be configured to use a fixed short-time Fourier transform-based encoder and decoder, and the mask will be estimated using a recurrent neural network. The model used here is depicted in the following block diagram." + ] + }, + { + "cell_type": "markdown", + "id": "8ed37321-2e8b-4d8e-90bf-efc0897517e5", + "metadata": { + "id": "8ed37321-2e8b-4d8e-90bf-efc0897517e5" + }, + "source": [ + "\"single_output_example_model\"" + ] + }, + { + "cell_type": "markdown", + "id": "36596c01-db7d-402f-a5df-0fb9b4641b08", + "metadata": { + "id": "36596c01-db7d-402f-a5df-0fb9b4641b08" + }, + "source": [ + "In this particular configuration, the model structure can be described as follows:\n", + "* `AudioToSpectrogram` implements the analysis STFT transform.\n", + "* `MaskEstimatorRNN` is a mask estimator using RNNs.\n", + "* `MaskReferenceChannel` is a simple processor which applies the estimated mask on the reference channel. In this tutorial, the input signal has only a single channel, so the reference channel will be set to `0`.\n", + "* `SpectrogramToAudio` implements the synthesis STFT transform." + ] + }, + { + "cell_type": "markdown", + "id": "03546307-1c4e-4775-a754-276c9a69be5c", + "metadata": { + "id": "03546307-1c4e-4775-a754-276c9a69be5c" + }, + "source": [ + "The following cell will load and show the default configuration for the model depicted above." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "03952acf-1792-4470-9120-dddf20a9f646", + "metadata": { + "scrolled": true, + "id": "03952acf-1792-4470-9120-dddf20a9f646" + }, + "outputs": [], + "source": [ + "config_dir = root_dir / 'conf'\n", + "config_dir.mkdir(exist_ok=True)\n", + "\n", + "config_path = config_dir / 'masking_online_aug.yaml'\n", + "\n", + "if not config_path.is_file():\n", + " !wget https://raw.githubusercontent.com/{GIT_USER}/NeMo/{BRANCH}/examples/audio/conf/masking_online_aug.yaml -P {config_dir.as_posix()}\n", + "\n", + "config = OmegaConf.load(config_path)\n", + "config = OmegaConf.to_container(config, resolve=True)\n", + "config = OmegaConf.create(config)\n", + "\n", + "print('Loaded config')\n", + "print(OmegaConf.to_yaml(config))" + ] + }, + { + "cell_type": "markdown", + "id": "1c872345-3bfd-4933-8c51-dbb62fce790a", + "metadata": { + "id": "1c872345-3bfd-4933-8c51-dbb62fce790a" + }, + "source": [ + "Training dataset is configured with the following parameters\n", + "* `cuts_path` points to a Lhotse manifest file, containing speech samples\n", + "* `noise_path` poins to a Lhotse manifest file, containing noise samples\n", + "* `noise_mix_prob` defines the probabilty with which noise will be added during training\n", + "* `noise_snr` defines an SNR range for mixing noise samples\n", + "* `rir_enabled` enables room impulse response agmentation\n", + "* `rir_path` points to a Lhotse manifest file, containing RIR samples\n", + "* `rir_prob` defines the probabilty with which RIR will be added during training\n", + " \n", + "For the validation and test sets only `cuts_path` parameter is used since the `val` manifest already contains noisy and clean samples" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e19c4209-b10b-4924-a3de-e792de57c313", + "metadata": { + "id": "e19c4209-b10b-4924-a3de-e792de57c313" + }, + "outputs": [], + "source": [ + "# Setup training dataset\n", + "config.model.train_ds.cuts_path = dataset_manifest['speech_train'].as_posix() # path to Lhotse cuts manifest with speech signals for augmentation\n", + "config.model.train_ds.noise_path = dataset_manifest['noise_train'].as_posix() # path to Lhotse cuts manifest with noise signals\n", + "config.model.train_ds.noise_mix_prob = 1.0 # probability of applying noise augmentation\n", + "config.model.train_ds.noise_snr = (0, 20) # range of speech-to-noise ratio for the noise augmentation\n", + "config.model.train_ds.rir_enabled = True # enable room impulse response augmentation\n", + "config.model.train_ds.rir_path = dataset_manifest['rir_val'].as_posix() # path to Lhotse recordings manifest with room impulse response signals\n", + "config.model.train_ds.rir_prob = 1.0 # probability of applying RIR augmentation\n", + "\n", + "config.model.validation_ds.cuts_path = dataset_manifest['noisy_val'].as_posix() # fixed noisy validation set\n", + "\n", + "config.model.test_ds.cuts_path = dataset_manifest['noisy_val'].as_posix() # fixed noisy test set\n", + "\n", + "\n", + "print(\"Train dataset config:\")\n", + "print(OmegaConf.to_yaml(config.model.train_ds))" + ] + }, + { + "cell_type": "markdown", + "id": "534aef42-d3a7-4477-8f89-8fe780f2b5f7", + "metadata": { + "id": "534aef42-d3a7-4477-8f89-8fe780f2b5f7" + }, + "source": [ + "Metrics for validation and test set are configured in the following cell.\n", + "\n", + "In this tutorial, signal-to-distortion ratio (SDR) and scale-invariant SDR from torch metrics are used [5]." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "522f1065-fe23-4101-b6a9-0706cc8db389", + "metadata": { + "id": "522f1065-fe23-4101-b6a9-0706cc8db389" + }, + "outputs": [], + "source": [ + "# Setup metrics to compute on validation and test sets\n", + "metrics = OmegaConf.create({\n", + " 'sisdr': {\n", + " '_target_': 'torchmetrics.audio.ScaleInvariantSignalDistortionRatio',\n", + " },\n", + " 'sdr': {\n", + " '_target_': 'torchmetrics.audio.SignalDistortionRatio',\n", + " }\n", + "})\n", + "config.model.metrics.val = metrics\n", + "config.model.metrics.test = metrics\n", + "\n", + "print(\"Metrics config:\")\n", + "print(OmegaConf.to_yaml(config.model.metrics))" + ] + }, + { + "cell_type": "markdown", + "id": "4771d02f-3b01-481c-8894-bb43644a941a", + "metadata": { + "id": "4771d02f-3b01-481c-8894-bb43644a941a" + }, + "source": [ + "### Trainer configuration\n", + "NeMo models are primarily PyTorch Lightning modules and therefore are entirely compatible with the PyTorch Lightning ecosystem." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2208b407-07e2-4bb0-bfad-2ef488acf217", + "metadata": { + "id": "2208b407-07e2-4bb0-bfad-2ef488acf217" + }, + "outputs": [], + "source": [ + "print(\"Trainer config:\")\n", + "print(OmegaConf.to_yaml(config.trainer))" + ] + }, + { + "cell_type": "markdown", + "id": "f2dac35a-b487-4037-907d-326f321564ca", + "metadata": { + "id": "f2dac35a-b487-4037-907d-326f321564ca" + }, + "source": [ + "We can modify some trainer configs for this tutorial.\n", + "Most importantly, the number of epochs is set to a small value, to limit the runtime for the purpose of this example." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2c18dbd2-ad22-4eae-912a-56d13aa74a6b", + "metadata": { + "id": "2c18dbd2-ad22-4eae-912a-56d13aa74a6b" + }, + "outputs": [], + "source": [ + "# Checks if we have GPU available and uses it\n", + "accelerator = 'gpu' if torch.cuda.is_available() else 'cpu'\n", + "config.trainer.devices = 1\n", + "config.trainer.accelerator = accelerator\n", + "\n", + "# Reduces maximum number of epochs for quick demonstration\n", + "config.trainer.max_epochs = 30\n", + "\n", + "# Remove distributed training flags\n", + "config.trainer.strategy = 'auto'\n", + "\n", + "# Instantiate the trainer\n", + "trainer = pl.Trainer(**config.trainer)" + ] + }, + { + "cell_type": "markdown", + "id": "c1198508-7fb5-4ba7-a4e5-ad15153037a1", + "metadata": { + "id": "c1198508-7fb5-4ba7-a4e5-ad15153037a1" + }, + "source": [ + "### Experiment manager\n", + "\n", + "NeMo has an experiment manager that handles logging and checkpointing." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72642c8c-97f5-4b6c-9e63-030c7a07810c", + "metadata": { + "scrolled": true, + "id": "72642c8c-97f5-4b6c-9e63-030c7a07810c" + }, + "outputs": [], + "source": [ + "from nemo.utils.exp_manager import exp_manager\n", + "\n", + "exp_dir = exp_manager(trainer, config.get(\"exp_manager\", None))\n", + "# The exp_dir provides a path to the current experiment for easy access\n", + "\n", + "print(\"Experiment directory:\")\n", + "print(exp_dir)" + ] + }, + { + "cell_type": "markdown", + "id": "00562a21-0c0f-4c53-b89e-344431e75a42", + "metadata": { + "id": "00562a21-0c0f-4c53-b89e-344431e75a42" + }, + "source": [ + "### Model instantiation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b73c3de-6ded-491d-8026-8a093f7ffc16", + "metadata": { + "scrolled": true, + "id": "0b73c3de-6ded-491d-8026-8a093f7ffc16" + }, + "outputs": [], + "source": [ + "from nemo.collections import audio as nemo_audio\n", + "\n", + "enhancement_model = nemo_audio.models.EncMaskDecAudioToAudioModel(cfg=config.model, trainer=trainer)" + ] + }, + { + "cell_type": "markdown", + "id": "a8f4373e-6aca-41d3-a69d-5bef53a88f20", + "metadata": { + "id": "a8f4373e-6aca-41d3-a69d-5bef53a88f20" + }, + "source": [ + "### Training\n", + "Create a Tensorboard visualization to monitor progress" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a253ce7c-6cca-441f-818e-1c8a2f00a9f5", + "metadata": { + "id": "a253ce7c-6cca-441f-818e-1c8a2f00a9f5" + }, + "outputs": [], + "source": [ + "try:\n", + " from google import colab\n", + " COLAB_ENV = True\n", + "except (ImportError, ModuleNotFoundError):\n", + " COLAB_ENV = False\n", + "\n", + "# Load the TensorBoard notebook extension\n", + "if COLAB_ENV:\n", + " %load_ext tensorboard\n", + " %tensorboard --logdir {exp_dir}\n", + "else:\n", + " print(\"To use tensorboard, please use this notebook in a Google Colab environment.\")" + ] + }, + { + "cell_type": "markdown", + "id": "9183a35d-8332-49f6-96b3-3d14dd70ff4e", + "metadata": { + "id": "9183a35d-8332-49f6-96b3-3d14dd70ff4e" + }, + "source": [ + "Training can be started using `trainer.fit`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0eab4e7e-3004-49fa-a09b-10dfd12ba294", + "metadata": { + "scrolled": true, + "id": "0eab4e7e-3004-49fa-a09b-10dfd12ba294" + }, + "outputs": [], + "source": [ + "trainer.fit(enhancement_model)" + ] + }, + { + "cell_type": "markdown", + "id": "af906f16-626c-4ca4-a465-fb0cca8d514a", + "metadata": { + "id": "af906f16-626c-4ca4-a465-fb0cca8d514a" + }, + "source": [ + "After the training is completed, the configured metrics can be easily computed on the test set as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "589af425-18b2-4445-8c88-d3161e9928c5", + "metadata": { + "id": "589af425-18b2-4445-8c88-d3161e9928c5" + }, + "outputs": [], + "source": [ + "trainer.test(enhancement_model, ckpt_path=None)" + ] + }, + { + "cell_type": "markdown", + "id": "cce96446-a969-4c6f-a865-968ee61119a8", + "metadata": { + "id": "cce96446-a969-4c6f-a865-968ee61119a8" + }, + "source": [ + "### Inference\n", + "\n", + "The following cell provides an example of inference on an single audio file.\n", + "For simplicity, the audio file information is taken from the test dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f29ade5-424e-4a7a-81b7-dc3c5cc90bf7", + "metadata": { + "id": "9f29ade5-424e-4a7a-81b7-dc3c5cc90bf7" + }, + "outputs": [], + "source": [ + "# Load 10 samples from test_dataloader\n", + "samples = [sample for sample in islice(enhancement_model.test_dataloader(), 10)]\n", + "\n", + "# Different sample can be used via list index\n", + "sample = samples[0]\n", + "\n", + "noisy_tensor = sample['input_signal']\n", + "speech_tensor = sample['target_signal']\n", + "\n", + "# Get the one-dimentional numpy signals for the plotting audio files and metrics calculation\n", + "noisy_signal = noisy_tensor.squeeze(0).numpy()\n", + "speech_signal = speech_tensor.squeeze(0).numpy()\n", + "\n", + "\n", + "# Move to device\n", + "device = 'cuda' if accelerator == 'gpu' else 'cpu'\n", + "enhancement_model = enhancement_model.to(device)\n", + "\n", + "# Process using the model\n", + "with torch.no_grad():\n", + " output_tensor, _ = enhancement_model(input_signal=noisy_tensor.unsqueeze(1).cuda())\n", + "output_signal = output_tensor[0][0].detach().cpu().numpy()" + ] + }, + { + "cell_type": "markdown", + "id": "5110ee6c-3939-4d70-a4d6-6422de31da51", + "metadata": { + "id": "5110ee6c-3939-4d70-a4d6-6422de31da51" + }, + "source": [ + "Signals can be easily plotted and signal metrics can be calculated for the given example." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0275a032-969c-4e31-b8b5-8a61cb1f82ee", + "metadata": { + "id": "0275a032-969c-4e31-b8b5-8a61cb1f82ee" + }, + "outputs": [], + "source": [ + "# Show noisy and clean signals\n", + "show_metrics(signal=noisy_signal, reference=speech_signal, tag='Noisy signal', sample_rate=sample_rate)\n", + "show_metrics(signal=output_signal, reference=speech_signal, tag='Output signal', sample_rate=sample_rate)\n", + "\n", + "# Show signals\n", + "show_signal(speech_signal, tag='Speech signal')\n", + "show_signal(noisy_signal, tag='Noisy signal')\n", + "show_signal(output_signal, tag='Output signal')\n", + "\n", + "# Play audio\n", + "print('Speech signal')\n", + "ipd.display(ipd.Audio(speech_signal, rate=sample_rate))\n", + "\n", + "print('Noisy signal')\n", + "ipd.display(ipd.Audio(noisy_signal, rate=sample_rate))\n", + "\n", + "print('Output signal')\n", + "ipd.display(ipd.Audio(output_signal, rate=sample_rate))" + ] + }, + { + "cell_type": "markdown", + "id": "89eb3478-bf82-4fbd-aa7d-0a55edf57f3a", + "metadata": { + "id": "89eb3478-bf82-4fbd-aa7d-0a55edf57f3a" + }, + "source": [ + "## Next steps\n", + "This is a simple tutorial which can serve as a starting point for prototyping and experimentation with audio-to-audio models.\n", + "A processed audio output can be used, for example, for ASR or TTS.\n", + "\n", + "For more details about NeMo models and applications in in ASR and TTS, we recommend you checkout other tutorials next:\n", + "\n", + "* [NeMo fundamentals](https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/00_NeMo_Primer.ipynb)\n", + "* [NeMo models](https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/01_NeMo_Models.ipynb)\n", + "* [Speech Recognition](https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/asr/ASR_with_NeMo.ipynb)\n", + "* [Speech Synthesis](https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/tts/Inference_ModelSelect.ipynb)" + ] + }, + { + "cell_type": "markdown", + "id": "31a162ae-6b9e-40e4-a468-fbbad3360b11", + "metadata": { + "id": "31a162ae-6b9e-40e4-a468-fbbad3360b11" + }, + "source": [ + "## References\n", + "\n", + "[1] Żelasko, P., Povey, D., Trmal, J., & Khudanpur, S. (2021). Lhotse: a speech data representation library for the modern deep learning ecosystem. https://arxiv.org/abs/2110.12561\n", + "\n", + "[2] V. Panayotov, G. Chen, D. Povery, S. Khudanpur, \"LibriSpeech: An ASR corpus based on public domain audio books,\" ICASSP 2015\n", + "\n", + "[3] Lhotse documentation, https://lhotse.readthedocs.io/\n", + "\n", + "[4] J. Thieman, N. Ito, V. Emmanuel, \"DEMAND: collection of multi-channel recordings of acoustic noise in diverse environments,\" ICA 2013\n", + "\n", + "[5] T. Ko, V. Peddinti, D. Povey, M. L. Seltzer and S. Khudanpur, \"A study on data augmentation of reverberant speech for robust speech recognition,\" 2017 ICASSP\n", + "\n", + "[6] https://github.com/Lightning-AI/torchmetrics" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file From 8fcd5cbae4afb3b7a804ea78c55a10a8cb690709 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Sun, 27 Oct 2024 08:02:41 +0100 Subject: [PATCH 004/125] =?UTF-8?q?[=F0=9F=A4=A0]:=20Howdy=20folks,=20let'?= =?UTF-8?q?s=20bump=20`Dockerfile.ci`=20to=20397e9da=20!=20(#11051)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: pablo-garay <7166088+pablo-garay@users.noreply.github.com> --- Dockerfile.ci | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile.ci b/Dockerfile.ci index 6ef99a35ae82c..74cfd75eb042a 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -53,7 +53,7 @@ RUN pip install nemo_run@git+https://github.com/NVIDIA/NeMo-Run.git@${NEMO_RUN_T # Install NeMo requirements ARG TE_TAG=7d576ed25266a17a7b651f2c12e8498f67e0baea ARG MODELOPT_VERSION=0.17.0 -ARG MCORE_TAG=425cdd48d5ef5d360d8033288ff7cb0d378f535f +ARG MCORE_TAG=397e9da9511a09ae8badba30129c7e4934b06118 ARG APEX_TAG=810ffae374a2b9cb4b5c5e28eaeca7d7998fca0c RUN \ From 0c5f5fb130969ea4eda21721fd01d47616a4cfda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Sun, 27 Oct 2024 13:15:38 +0100 Subject: [PATCH 005/125] ci: Send team alerts on specific keywords (#10986) * ci: Send team alerts on specific keywords Signed-off-by: Oliver Koenig * f Signed-off-by: Oliver Koenig --------- Signed-off-by: Oliver Koenig --- .github/workflows/_test_template.yml | 7 +++++++ .github/workflows/cicd-main.yml | 14 +++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/.github/workflows/_test_template.yml b/.github/workflows/_test_template.yml index 17cceb665747e..54579ab2d850a 100644 --- a/.github/workflows/_test_template.yml +++ b/.github/workflows/_test_template.yml @@ -33,6 +33,9 @@ on: log: description: Last 2000 characters of the test step's log value: ${{ jobs.main.outputs.log }} + potential_infra_failure: + description: Boolean flag when infra-related keyword spotted in logs. + value: ${{ jobs.main.outputs.potential_infra_failure }} jobs: main: @@ -40,6 +43,7 @@ jobs: outputs: conclusion: ${{ steps.main.conclusion }} log: ${{ steps.main.outputs.log }} + potential_infra_failure: ${{ steps.main.outputs.potential_infra_failure }} steps: - name: Docker system cleanup run: | @@ -75,6 +79,9 @@ jobs: echo "log=$(tail -c 2000 err.log | base64 -w 0)" >> "$GITHUB_OUTPUT" + potential_infra_failure=$(cat err.log | grep -Eqi "gpu|cuda|device" && echo true || echo false) + echo "potential_infra_failure=$potential_infra_failure" >> "$GITHUB_OUTPUT" + exit $EXIT_CODE - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main" diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml index 098b9d635cb31..6b39d2a9082ee 100644 --- a/.github/workflows/cicd-main.yml +++ b/.github/workflows/cicd-main.yml @@ -4515,7 +4515,10 @@ jobs: if: ${{ always() && steps.pipeline-conclusion.outputs.FAILED == 'true' && env.SLACK_WEBHOOK != '' }} env: SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + SLACK_WEBHOOK_ADMIN: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_ACTOR: ${{ github.actor }} + BRANCH: ${{ github.head_ref || github.ref_name }} REPOSITORY: ${{ github.repository }} RUN_ID: ${{ github.run_id }} PR_NUMBER: ${{ github.event.number }} @@ -4571,13 +4574,15 @@ jobs: echo "* [$JOB_NAME]($JOB_URL)" | tee -a $GITHUB_STEP_SUMMARY LOGS=$(echo $JOB | yq '(.value.outputs.log | @base64d)' | tr -d '"') + LOGS=$([[ $(echo $LOGS | wc -c) -gt 0 ]] && echo -E "\`\`\`\n$LOGS\n\`\`\`" || echo "") + LOGS=$([[ $(echo $JOB | yq '.value.outputs.potential_infra_failure') == "true" ]] && echo -E "$LOGS\n\ncc: $SLACK_WEBHOOK_ADMIN" || echo -E "$LOGS") SUMMARY=$(echo "$SUMMARY" | jq \ --arg pr "<$PR_URL|$PR_TITLE>" \ --arg job "<$JOB_URL|$JOB_NAME>" \ - --arg logs "$LOGS" \ - --arg author "" \ - --arg branch ""\ + --arg logs "$(echo -e "$LOGS")" \ + --arg author "" \ + --arg branch ""\ '. += [ { "type": "section", @@ -4588,8 +4593,7 @@ jobs: + "\nJob: " + $job + "\nAuthor: " + $author + "\nBranch: " + $branch - + "\nLogs:" - + "```\n" + $logs + "\n```" + + "\nLogs:" + $logs ) } } From e13466fc1a2dbe3f061f6c910a084b7cff9a32f8 Mon Sep 17 00:00:00 2001 From: monica-sekoyan <166123533+monica-sekoyan@users.noreply.github.com> Date: Sun, 27 Oct 2024 21:51:32 +0400 Subject: [PATCH 006/125] Fix timestamps tests (#11053) * change timestamps tests Signed-off-by: Monica Sekoyan * Apply isort and black reformatting Signed-off-by: monica-sekoyan --------- Signed-off-by: Monica Sekoyan Signed-off-by: monica-sekoyan Co-authored-by: monica-sekoyan --- .../asr/decoding/test_ctc_decoding.py | 20 +++++++++++++------ .../asr/decoding/test_rnnt_decoding.py | 20 +++++++++++++------ 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/tests/collections/asr/decoding/test_ctc_decoding.py b/tests/collections/asr/decoding/test_ctc_decoding.py index fd16aca67713d..7a16db4324bcb 100644 --- a/tests/collections/asr/decoding/test_ctc_decoding.py +++ b/tests/collections/asr/decoding/test_ctc_decoding.py @@ -31,7 +31,7 @@ def char_vocabulary(): - return [' ', 'a', 'b', 'c', 'd', 'e', 'f'] + return [' ', 'a', 'b', 'c', 'd', 'e', 'f', '.'] @pytest.fixture() @@ -60,11 +60,19 @@ def check_char_timestamps(hyp: Hypothesis, decoding: CTCDecoding): words = list(filter(lambda x: x != '', words)) assert len(hyp.timestep['word']) == len(words) - segments_count = sum([hyp.text.count(seperator) for seperator in decoding.segment_seperators]) - if hyp.text[-1] not in decoding.segment_seperators: - segments_count += 1 + segments = [] + segment = [] - assert len(hyp.timestep['segment']) == segments_count + for word in words: + segment.append(word) + if word[-1] in decoding.segment_seperators: + segments.append(' '.join(segment)) + segment = [] + + if segment: + segments.append(' '.join(segment)) + + assert len(hyp.timestep['segment']) == len(segments) def check_subword_timestamps(hyp: Hypothesis, decoding: CTCBPEDecoding): @@ -83,7 +91,7 @@ def check_subword_timestamps(hyp: Hypothesis, decoding: CTCBPEDecoding): assert len(chars) == len(all_chars) segments_count = sum([hyp.text.count(seperator) for seperator in decoding.segment_seperators]) - if hyp.text[-1] not in decoding.segment_seperators: + if not hyp.text or hyp.text[-1] not in decoding.segment_seperators: segments_count += 1 assert len(hyp.timestep['segment']) == segments_count diff --git a/tests/collections/asr/decoding/test_rnnt_decoding.py b/tests/collections/asr/decoding/test_rnnt_decoding.py index 59da7b11d2863..82b5d00bede6a 100644 --- a/tests/collections/asr/decoding/test_rnnt_decoding.py +++ b/tests/collections/asr/decoding/test_rnnt_decoding.py @@ -35,7 +35,7 @@ def char_vocabulary(): - return [' ', 'a', 'b', 'c', 'd', 'e', 'f'] + return [' ', 'a', 'b', 'c', 'd', 'e', 'f', '.'] @pytest.fixture() @@ -129,11 +129,19 @@ def check_char_timestamps(hyp: rnnt_utils.Hypothesis, decoding: RNNTDecoding): words = list(filter(lambda x: x != '', words)) assert len(hyp.timestep['word']) == len(words) - segments_count = sum([hyp.text.count(seperator) for seperator in decoding.segment_seperators]) - if hyp.text[-1] not in decoding.segment_seperators: - segments_count += 1 + segments = [] + segment = [] - assert len(hyp.timestep['segment']) == segments_count + for word in words: + segment.append(word) + if word[-1] in decoding.segment_seperators: + segments.append(' '.join(segment)) + segment = [] + + if segment: + segments.append(' '.join(segment)) + + assert len(hyp.timestep['segment']) == len(segments) def check_subword_timestamps(hyp: rnnt_utils.Hypothesis, decoding: RNNTBPEDecoding): @@ -152,7 +160,7 @@ def check_subword_timestamps(hyp: rnnt_utils.Hypothesis, decoding: RNNTBPEDecodi assert len(chars) == len(all_chars) segments_count = sum([hyp.text.count(seperator) for seperator in decoding.segment_seperators]) - if hyp.text[-1] not in decoding.segment_seperators: + if not hyp.text or hyp.text[-1] not in decoding.segment_seperators: segments_count += 1 assert len(hyp.timestep['segment']) == segments_count From a814e4a5c238ab93f11cb590b71a8fab411270fe Mon Sep 17 00:00:00 2001 From: Ao Tang Date: Sun, 27 Oct 2024 18:33:28 -0400 Subject: [PATCH 007/125] Qwen2 Recipe (#10974) * Add qwen recipe * Apply isort and black reformatting Signed-off-by: suiyoubi * change to TP1 for small models Signed-off-by: Ao Tang --------- Signed-off-by: suiyoubi Signed-off-by: Ao Tang Co-authored-by: suiyoubi --- nemo/collections/llm/gpt/model/qwen2.py | 2 +- nemo/collections/llm/recipes/__init__.py | 10 + nemo/collections/llm/recipes/qwen2.py | 139 +++++++++++++ nemo/collections/llm/recipes/qwen2_1p5b.py | 222 ++++++++++++++++++++ nemo/collections/llm/recipes/qwen2_500m.py | 222 ++++++++++++++++++++ nemo/collections/llm/recipes/qwen2_72b.py | 226 +++++++++++++++++++++ nemo/collections/llm/recipes/qwen2_7b.py | 223 ++++++++++++++++++++ 7 files changed, 1043 insertions(+), 1 deletion(-) create mode 100644 nemo/collections/llm/recipes/qwen2.py create mode 100644 nemo/collections/llm/recipes/qwen2_1p5b.py create mode 100644 nemo/collections/llm/recipes/qwen2_500m.py create mode 100644 nemo/collections/llm/recipes/qwen2_72b.py create mode 100644 nemo/collections/llm/recipes/qwen2_7b.py diff --git a/nemo/collections/llm/gpt/model/qwen2.py b/nemo/collections/llm/gpt/model/qwen2.py index 03dc53ec679ed..75f436aa95369 100644 --- a/nemo/collections/llm/gpt/model/qwen2.py +++ b/nemo/collections/llm/gpt/model/qwen2.py @@ -296,7 +296,7 @@ def _import_qkv_bias(ctx: io.TransformCTX, q, k, v): k = k.view(*new_kv_tensor_shape) v = v.view(*new_kv_tensor_shape) - qkv_bias = torch.empty((0, head_size)) + qkv_bias = torch.empty((0, head_size)).type_as(q) for i in range(num_query_groups): qkv_bias = torch.cat((qkv_bias, q[i * heads_per_group : (i + 1) * heads_per_group, :])) qkv_bias = torch.cat((qkv_bias, k[i : i + 1, :])) diff --git a/nemo/collections/llm/recipes/__init__.py b/nemo/collections/llm/recipes/__init__.py index 21994b75f60dc..2aa6eb8bf784c 100644 --- a/nemo/collections/llm/recipes/__init__.py +++ b/nemo/collections/llm/recipes/__init__.py @@ -50,6 +50,11 @@ nemotron4_22b_16k, nemotron4_22b_64k, nemotron4_340b, + qwen2, + qwen2_1p5b, + qwen2_7b, + qwen2_72b, + qwen2_500m, ) from nemo.collections.llm.recipes.log.default import default_log, default_resume from nemo.collections.llm.recipes.optim import adam @@ -90,6 +95,11 @@ "nemotron4_22b_16k", "nemotron4_22b_64k", "nemotron4_340b", + "qwen2", + "qwen2_500m", + "qwen2_1p5b", + "qwen2_7b", + "qwen2_72b", "gpt3_175b", "adam", "default_log", diff --git a/nemo/collections/llm/recipes/qwen2.py b/nemo/collections/llm/recipes/qwen2.py new file mode 100644 index 0000000000000..ff0c76a714f12 --- /dev/null +++ b/nemo/collections/llm/recipes/qwen2.py @@ -0,0 +1,139 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +import nemo_run as run +import pytorch_lightning as pl +import torch +from pytorch_lightning.callbacks.callback import Callback + +from nemo import lightning as nl +from nemo.collections.llm.gpt.model.qwen2 import ( + Qwen2Config1P5B, + Qwen2Config7B, + Qwen2Config72B, + Qwen2Config500M, + Qwen2Model, +) +from nemo.collections.llm.recipes.precision.mixed_precision import bf16_mixed, fp16_mixed + + +def qwen2_model(version: str) -> run.Config[pl.LightningModule]: + """ + A function to create a qwen2 models. + + Args: + version (str): The version of the qwen2 model to create. one of ["qwen2_500m", "qwen2_1p5b", + "qwen2_7b", "qwen2_72b"]. + + Returns: + run.Config[pl.LightningModule]: Configuration for the qwen2 model. + """ + config = None + if version == "qwen2_500m": + config = run.Config(Qwen2Config500M) + elif version == "qwen2_1p5b": + config = run.Config(Qwen2Config1P5B) + elif version == "qwen2_7b": + config = run.Config(Qwen2Config7B) + elif version == "qwen2_72b": + config = run.Config(Qwen2Config72B) + + assert config is not None, f"Invalid version: {version}" + return run.Config(Qwen2Model, config=config) + + +def qwen2_trainer( + tensor_parallelism: int = 2, + pipeline_parallelism: int = 1, + pipeline_parallelism_type: Optional[torch.dtype] = None, + virtual_pipeline_parallelism: Optional[int] = None, + context_parallelism: int = 1, + sequence_parallelism: bool = False, + num_nodes: int = 1, + num_gpus_per_node: int = 8, + max_steps: int = 1168251, + precision: str = "bf16-mixed", + accumulate_grad_batches: int = 1, + limit_test_batches: int = 32, + limit_val_batches: int = 32, + log_every_n_steps: int = 10, + val_check_interval: int = 2000, + callbacks: Optional[list[run.Config[Callback]]] = None, +) -> run.Config[nl.Trainer]: + """ + Configure the NeMo Lightning Trainer for qwen2 models. + + This function sets up the distributed training strategy and other training parameters. + + Args: + tensor_parallelism (int): Degree of tensor model parallelism. + pipeline_parallelism (int): Degree of pipeline model parallelism. + pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism. + virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism. + context_parallelism (int): Degree of context parallelism. + sequence_parallelism (bool): Whether to use sequence parallelism. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + max_steps (int): Maximum number of training steps. + precision (str): Precision configuration, one of fp32, 16-mixed or bf16-mixed. + accumulate_grad_batches (int): Number of steps per gradient accumulation. + limit_test_batches (int): Limit the number of test batches. + limit_val_batches (int): Limit the number of validation batches. + log_every_n_steps (int): Log every n steps. + val_check_interval (int): Run validation every N steps. + callbacks (Optional[list[run.Config[Callback]]]): List of callback configurations. + + Returns: + run.Config[nl.Trainer]: Configuration for the NeMo Lightning Trainer. + """ + strategy = run.Config( + nl.MegatronStrategy, + tensor_model_parallel_size=tensor_parallelism, + pipeline_model_parallel_size=pipeline_parallelism, + pipeline_dtype=pipeline_parallelism_type, + virtual_pipeline_model_parallel_size=virtual_pipeline_parallelism, + context_parallel_size=context_parallelism, + sequence_parallel=sequence_parallelism, + gradient_as_bucket_view=True, + ckpt_include_optimizer=True, + ckpt_async_save=True, + ckpt_parallel_load=True, + ) + + precision_plugin = None + if precision == "16-mixed": + precision_plugin = fp16_mixed() + elif precision == "bf16-mixed": + precision_plugin = bf16_mixed() + + trainer = run.Config( + nl.Trainer, + accelerator="gpu", + callbacks=callbacks, + devices=num_gpus_per_node, + accumulate_grad_batches=accumulate_grad_batches, + limit_test_batches=limit_test_batches, + limit_val_batches=limit_val_batches, + log_every_n_steps=log_every_n_steps, + max_steps=max_steps, + num_nodes=num_nodes, + plugins=precision_plugin, + strategy=strategy, + use_distributed_sampler=False, + val_check_interval=val_check_interval, + ) + + return trainer diff --git a/nemo/collections/llm/recipes/qwen2_1p5b.py b/nemo/collections/llm/recipes/qwen2_1p5b.py new file mode 100644 index 0000000000000..80ed957e3b48d --- /dev/null +++ b/nemo/collections/llm/recipes/qwen2_1p5b.py @@ -0,0 +1,222 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +import nemo_run as run +import pytorch_lightning as pl +import torch + +from nemo.collections.llm.api import finetune, pretrain +from nemo.collections.llm.gpt.data.mock import MockDataModule +from nemo.collections.llm.peft.lora import LoRA +from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe +from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger +from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing +from nemo.collections.llm.recipes.qwen2 import qwen2_model, qwen2_trainer +from nemo.utils.exp_manager import TimingCallback + +NAME = "qwen2_1p5b" + + +@run.cli.factory(name=NAME) +def model() -> run.Config[pl.LightningModule]: + """ + Factory function to create a Qwen2 1.5b model configuration. + + Returns: + run.Config[pl.LightningModule]: Configuration for the Qwen2 1.5b model. + + Examples: + CLI usage: + $ nemo llm pretrain model=qwen2_1p5b ... + + Python API usage: + >>> model_config = model() + >>> print(model_config) + """ + + return qwen2_model(version=NAME) + + +@run.cli.factory(target=pretrain, name=NAME) +def pretrain_recipe( + # General + dir: Optional[str] = None, + name: str = "default", + # Trainer + tensor_parallelism: int = 1, + pipeline_parallelism: int = 1, + pipeline_parallelism_type: Optional[torch.dtype] = None, + virtual_pipeline_parallelism: Optional[int] = None, + context_parallelism: int = 1, + sequence_parallelism: bool = False, + num_nodes: int = 1, + num_gpus_per_node: int = 8, + max_steps: int = 300000, + precision: str = "bf16-mixed", + accumulate_grad_batches: int = 1, + gradient_clip_val: float = 1.0, + limit_test_batches: int = 32, + limit_val_batches: int = 32, + log_every_n_steps: int = 10, + val_check_interval: int = 500, + # Data + global_batch_size=32, + micro_batch_size=2, + seq_length=4096, + # Optimizer + warmup_steps=500, + constant_steps=0, + min_lr=3e-5, + max_lr=3e-4, + # Training function + fn=pretrain, +) -> run.Partial: + """ + Create a pre-training recipe for Qwen2 1.5b model. + + This function sets up a complete configuration for pre-training, including + model, trainer, data, logging, optimization, and resumption settings. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the pre-training run. + tensor_parallelism (int): Degree of tensor model parallelism. + pipeline_parallelism (int): Degree of pipeline model parallelism. + pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism. + virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism. + context_parallelism (int): Degree of context parallelism. + sequence_parallelism (bool): Whether to use sequence parallelism. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + max_steps (int): Maximum number of training steps. + precision (str): Precision configuration, one of fp32, 16-mixed or bf16-mixed. + accumulate_grad_batches (int): Number of steps per gradient accumulation. + gradient_clip_val (float): Value for gradient clipping. + limit_test_batches (int): Limit the number of test batches. + limit_val_batches (int): Limit the number of validation batches. + log_every_n_steps (int): Log every n steps. + val_check_interval (int): Run validation every N steps. + global_batch_size (int): Global batch size. + micro_batch_size (int): Micro batch size. + seq_length (int): Sequence length. + warmup_steps (int): Number of warmup steps. + constant_steps (int): Number of constant steps. + min_lr (float): Minimum learning rate. + max_lr (float): Maximum learning rate. + fn (Callable): The pre-training function to use. + + Returns: + run.Partial: Partial configuration for pre-training. + + Examples: + CLI usage: + $ nemo llm pretrain --factory qwen2_1p5b + $ nemo llm pretrain --factory "qwen2_1p5b(num_nodes=1, name='my_qwen2_pretrain')" + + Python API usage: + >>> recipe = pretrain_recipe(name="qwen2_pretrain", num_nodes=1) + >>> print(recipe) + + Note: + This recipe uses a mock dataset, look for the finetune examples to see how to change the dataset. + """ + return run.Partial( + fn, + model=model(), + trainer=qwen2_trainer( + tensor_parallelism=tensor_parallelism, + pipeline_parallelism=pipeline_parallelism, + pipeline_parallelism_type=pipeline_parallelism_type, + virtual_pipeline_parallelism=virtual_pipeline_parallelism, + context_parallelism=context_parallelism, + sequence_parallelism=sequence_parallelism, + num_nodes=num_nodes, + num_gpus_per_node=num_gpus_per_node, + max_steps=max_steps, + precision=precision, + accumulate_grad_batches=accumulate_grad_batches, + limit_test_batches=limit_test_batches, + limit_val_batches=limit_val_batches, + log_every_n_steps=log_every_n_steps, + val_check_interval=val_check_interval, + callbacks=[run.Config(TimingCallback)], + ), + data=run.Config( + MockDataModule, + seq_length=seq_length, + global_batch_size=global_batch_size, + micro_batch_size=micro_batch_size, + ), + log=default_log(dir=dir, name=name, tensorboard_logger=tensorboard_logger(name=name)), + optim=distributed_fused_adam_with_cosine_annealing( + precision=precision, + warmup_steps=warmup_steps, + constant_steps=constant_steps, + min_lr=min_lr, + max_lr=max_lr, + clip_grad=gradient_clip_val, + ), + resume=default_resume(), + ) + + +@run.cli.factory(target=finetune, name=NAME) +def finetune_recipe( + dir: Optional[str] = None, + name: str = "default", + num_nodes: int = 1, + num_gpus_per_node: int = 8, + peft_scheme: Optional[str] = 'lora', +) -> run.Partial: + """ + Create a fine-tuning recipe for Qwen2 1.5b model. + + This function sets up a complete configuration for fine-tuning, including + model, trainer, data, logging, optimization, and resumption settings. + The recipe uses LoRA (Low-Rank Adaptation) for efficient fine-tuning, unless peft_scheme is set to None. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the fine-tuning run. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + + Returns: + run.Partial: Partial configuration for fine-tuning. + + Examples: + CLI usage: + $ nemo llm finetune --factory qwen2_1p5b + + Python API usage: + >>> recipe = finetune_recipe(name="qwen2_1p5b_finetune", num_nodes=2) + >>> print(recipe) + + Note: + This recipe uses the SQuAD dataset for fine-tuning. For more information + on fine-tuning LLMs with NeMo, see the fine-tuning guide in the + `examples/llm/finetune/` directory. + """ + recipe = default_finetune_recipe(model(), "Qwen/Qwen2-1.5B", dir, name, num_nodes, num_gpus_per_node) + if peft_scheme is None or peft_scheme.lower() == 'none': + recipe.optim.config.lr = 5e-6 + elif peft_scheme.lower() == 'lora': + recipe.peft = run.Config(LoRA) + recipe.optim.config.lr = 1e-4 + else: + raise ValueError(f"Unrecognized peft scheme: {peft_scheme}") + return recipe diff --git a/nemo/collections/llm/recipes/qwen2_500m.py b/nemo/collections/llm/recipes/qwen2_500m.py new file mode 100644 index 0000000000000..677fc066c0478 --- /dev/null +++ b/nemo/collections/llm/recipes/qwen2_500m.py @@ -0,0 +1,222 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +import nemo_run as run +import pytorch_lightning as pl +import torch + +from nemo.collections.llm.api import finetune, pretrain +from nemo.collections.llm.gpt.data.mock import MockDataModule +from nemo.collections.llm.peft.lora import LoRA +from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe +from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger +from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing +from nemo.collections.llm.recipes.qwen2 import qwen2_model, qwen2_trainer +from nemo.utils.exp_manager import TimingCallback + +NAME = "qwen2_500m" + + +@run.cli.factory(name=NAME) +def model() -> run.Config[pl.LightningModule]: + """ + Factory function to create a Qwen2 500m model configuration. + + Returns: + run.Config[pl.LightningModule]: Configuration for the Qwen2 500m model. + + Examples: + CLI usage: + $ nemo llm pretrain model=qwen2_500m ... + + Python API usage: + >>> model_config = model() + >>> print(model_config) + """ + + return qwen2_model(version=NAME) + + +@run.cli.factory(target=pretrain, name=NAME) +def pretrain_recipe( + # General + dir: Optional[str] = None, + name: str = "default", + # Trainer + tensor_parallelism: int = 1, + pipeline_parallelism: int = 1, + pipeline_parallelism_type: Optional[torch.dtype] = None, + virtual_pipeline_parallelism: Optional[int] = None, + context_parallelism: int = 1, + sequence_parallelism: bool = False, + num_nodes: int = 1, + num_gpus_per_node: int = 8, + max_steps: int = 300000, + precision: str = "bf16-mixed", + accumulate_grad_batches: int = 1, + gradient_clip_val: float = 1.0, + limit_test_batches: int = 32, + limit_val_batches: int = 32, + log_every_n_steps: int = 10, + val_check_interval: int = 500, + # Data + global_batch_size=32, + micro_batch_size=2, + seq_length=4096, + # Optimizer + warmup_steps=500, + constant_steps=0, + min_lr=3e-5, + max_lr=3e-4, + # Training function + fn=pretrain, +) -> run.Partial: + """ + Create a pre-training recipe for Qwen2 500m model. + + This function sets up a complete configuration for pre-training, including + model, trainer, data, logging, optimization, and resumption settings. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the pre-training run. + tensor_parallelism (int): Degree of tensor model parallelism. + pipeline_parallelism (int): Degree of pipeline model parallelism. + pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism. + virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism. + context_parallelism (int): Degree of context parallelism. + sequence_parallelism (bool): Whether to use sequence parallelism. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + max_steps (int): Maximum number of training steps. + precision (str): Precision configuration, one of fp32, 16-mixed or bf16-mixed. + accumulate_grad_batches (int): Number of steps per gradient accumulation. + gradient_clip_val (float): Value for gradient clipping. + limit_test_batches (int): Limit the number of test batches. + limit_val_batches (int): Limit the number of validation batches. + log_every_n_steps (int): Log every n steps. + val_check_interval (int): Run validation every N steps. + global_batch_size (int): Global batch size. + micro_batch_size (int): Micro batch size. + seq_length (int): Sequence length. + warmup_steps (int): Number of warmup steps. + constant_steps (int): Number of constant steps. + min_lr (float): Minimum learning rate. + max_lr (float): Maximum learning rate. + fn (Callable): The pre-training function to use. + + Returns: + run.Partial: Partial configuration for pre-training. + + Examples: + CLI usage: + $ nemo llm pretrain --factory qwen2_500m + $ nemo llm pretrain --factory "qwen2_500m(num_nodes=1, name='my_qwen2_pretrain')" + + Python API usage: + >>> recipe = pretrain_recipe(name="qwen2_pretrain", num_nodes=1) + >>> print(recipe) + + Note: + This recipe uses a mock dataset, look for the finetune examples to see how to change the dataset. + """ + return run.Partial( + fn, + model=model(), + trainer=qwen2_trainer( + tensor_parallelism=tensor_parallelism, + pipeline_parallelism=pipeline_parallelism, + pipeline_parallelism_type=pipeline_parallelism_type, + virtual_pipeline_parallelism=virtual_pipeline_parallelism, + context_parallelism=context_parallelism, + sequence_parallelism=sequence_parallelism, + num_nodes=num_nodes, + num_gpus_per_node=num_gpus_per_node, + max_steps=max_steps, + precision=precision, + accumulate_grad_batches=accumulate_grad_batches, + limit_test_batches=limit_test_batches, + limit_val_batches=limit_val_batches, + log_every_n_steps=log_every_n_steps, + val_check_interval=val_check_interval, + callbacks=[run.Config(TimingCallback)], + ), + data=run.Config( + MockDataModule, + seq_length=seq_length, + global_batch_size=global_batch_size, + micro_batch_size=micro_batch_size, + ), + log=default_log(dir=dir, name=name, tensorboard_logger=tensorboard_logger(name=name)), + optim=distributed_fused_adam_with_cosine_annealing( + precision=precision, + warmup_steps=warmup_steps, + constant_steps=constant_steps, + min_lr=min_lr, + max_lr=max_lr, + clip_grad=gradient_clip_val, + ), + resume=default_resume(), + ) + + +@run.cli.factory(target=finetune, name=NAME) +def finetune_recipe( + dir: Optional[str] = None, + name: str = "default", + num_nodes: int = 1, + num_gpus_per_node: int = 8, + peft_scheme: Optional[str] = 'lora', +) -> run.Partial: + """ + Create a fine-tuning recipe for Qwen2 500m model. + + This function sets up a complete configuration for fine-tuning, including + model, trainer, data, logging, optimization, and resumption settings. + The recipe uses LoRA (Low-Rank Adaptation) for efficient fine-tuning, unless peft_scheme is set to None. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the fine-tuning run. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + + Returns: + run.Partial: Partial configuration for fine-tuning. + + Examples: + CLI usage: + $ nemo llm finetune --factory qwen2_500m + + Python API usage: + >>> recipe = finetune_recipe(name="qwen2_500m_finetune", num_nodes=2) + >>> print(recipe) + + Note: + This recipe uses the SQuAD dataset for fine-tuning. For more information + on fine-tuning LLMs with NeMo, see the fine-tuning guide in the + `examples/llm/finetune/` directory. + """ + recipe = default_finetune_recipe(model(), "Qwen/Qwen2-0.5B", dir, name, num_nodes, num_gpus_per_node) + if peft_scheme is None or peft_scheme.lower() == 'none': + recipe.optim.config.lr = 5e-6 + elif peft_scheme.lower() == 'lora': + recipe.peft = run.Config(LoRA) + recipe.optim.config.lr = 1e-4 + else: + raise ValueError(f"Unrecognized peft scheme: {peft_scheme}") + return recipe diff --git a/nemo/collections/llm/recipes/qwen2_72b.py b/nemo/collections/llm/recipes/qwen2_72b.py new file mode 100644 index 0000000000000..d93be1b9257a9 --- /dev/null +++ b/nemo/collections/llm/recipes/qwen2_72b.py @@ -0,0 +1,226 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +import nemo_run as run +import pytorch_lightning as pl +import torch + +from nemo.collections.llm.api import finetune, pretrain +from nemo.collections.llm.gpt.data.mock import MockDataModule +from nemo.collections.llm.peft.lora import LoRA +from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe +from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger +from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing +from nemo.collections.llm.recipes.qwen2 import qwen2_model, qwen2_trainer +from nemo.utils.exp_manager import TimingCallback + +NAME = "qwen2_72b" + + +@run.cli.factory(name=NAME) +def model() -> run.Config[pl.LightningModule]: + """ + Factory function to create a Qwen2 72b model configuration. + + Returns: + run.Config[pl.LightningModule]: Configuration for the Qwen2 72b model. + + Examples: + CLI usage: + $ nemo llm pretrain model=qwen2_72b ... + + Python API usage: + >>> model_config = model() + >>> print(model_config) + """ + + return qwen2_model(version=NAME) + + +@run.cli.factory(target=pretrain, name=NAME) +def pretrain_recipe( + # General + dir: Optional[str] = None, + name: str = "default", + # Trainer + tensor_parallelism: int = 8, + pipeline_parallelism: int = 4, + pipeline_parallelism_type: Optional[torch.dtype] = torch.bfloat16, + virtual_pipeline_parallelism: Optional[int] = None, + context_parallelism: int = 1, + sequence_parallelism: bool = False, + num_nodes: int = 4, + num_gpus_per_node: int = 8, + max_steps: int = 300000, + precision: str = "bf16-mixed", + accumulate_grad_batches: int = 1, + gradient_clip_val: float = 1.0, + limit_test_batches: int = 32, + limit_val_batches: int = 32, + log_every_n_steps: int = 10, + val_check_interval: int = 500, + # Data + global_batch_size=32, + micro_batch_size=2, + seq_length=4096, + # Optimizer + warmup_steps=500, + constant_steps=0, + min_lr=3e-5, + max_lr=3e-4, + # Training function + fn=pretrain, +) -> run.Partial: + """ + Create a pre-training recipe for Qwen2 72b model. + + This function sets up a complete configuration for pre-training, including + model, trainer, data, logging, optimization, and resumption settings. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the pre-training run. + tensor_parallelism (int): Degree of tensor model parallelism. + pipeline_parallelism (int): Degree of pipeline model parallelism. + pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism. + virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism. + context_parallelism (int): Degree of context parallelism. + sequence_parallelism (bool): Whether to use sequence parallelism. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + max_steps (int): Maximum number of training steps. + precision (str): Precision configuration, one of fp32, 16-mixed or bf16-mixed. + accumulate_grad_batches (int): Number of steps per gradient accumulation. + gradient_clip_val (float): Value for gradient clipping. + limit_test_batches (int): Limit the number of test batches. + limit_val_batches (int): Limit the number of validation batches. + log_every_n_steps (int): Log every n steps. + val_check_interval (int): Run validation every N steps. + global_batch_size (int): Global batch size. + micro_batch_size (int): Micro batch size. + seq_length (int): Sequence length. + warmup_steps (int): Number of warmup steps. + constant_steps (int): Number of constant steps. + min_lr (float): Minimum learning rate. + max_lr (float): Maximum learning rate. + fn (Callable): The pre-training function to use. + + Returns: + run.Partial: Partial configuration for pre-training. + + Examples: + CLI usage: + $ nemo llm pretrain --factory qwen2_72b + $ nemo llm pretrain --factory "qwen2_72b(num_nodes=1, name='my_qwen2_pretrain')" + + Python API usage: + >>> recipe = pretrain_recipe(name="qwen2_pretrain", num_nodes=1) + >>> print(recipe) + + Note: + This recipe uses a mock dataset, look for the finetune examples to see how to change the dataset. + """ + return run.Partial( + fn, + model=model(), + trainer=qwen2_trainer( + tensor_parallelism=tensor_parallelism, + pipeline_parallelism=pipeline_parallelism, + pipeline_parallelism_type=pipeline_parallelism_type, + virtual_pipeline_parallelism=virtual_pipeline_parallelism, + context_parallelism=context_parallelism, + sequence_parallelism=sequence_parallelism, + num_nodes=num_nodes, + num_gpus_per_node=num_gpus_per_node, + max_steps=max_steps, + precision=precision, + accumulate_grad_batches=accumulate_grad_batches, + limit_test_batches=limit_test_batches, + limit_val_batches=limit_val_batches, + log_every_n_steps=log_every_n_steps, + val_check_interval=val_check_interval, + callbacks=[run.Config(TimingCallback)], + ), + data=run.Config( + MockDataModule, + seq_length=seq_length, + global_batch_size=global_batch_size, + micro_batch_size=micro_batch_size, + ), + log=default_log(dir=dir, name=name, tensorboard_logger=tensorboard_logger(name=name)), + optim=distributed_fused_adam_with_cosine_annealing( + precision=precision, + warmup_steps=warmup_steps, + constant_steps=constant_steps, + min_lr=min_lr, + max_lr=max_lr, + clip_grad=gradient_clip_val, + ), + resume=default_resume(), + ) + + +@run.cli.factory(target=finetune, name=NAME) +def finetune_recipe( + dir: Optional[str] = None, + name: str = "default", + num_nodes: int = 1, + num_gpus_per_node: int = 8, + peft_scheme: Optional[str] = 'lora', +) -> run.Partial: + """ + Create a fine-tuning recipe for Qwen2 72b model. + + This function sets up a complete configuration for fine-tuning, including + model, trainer, data, logging, optimization, and resumption settings. + The recipe uses LoRA (Low-Rank Adaptation) for efficient fine-tuning, unless peft_scheme is set to None. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the fine-tuning run. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + + Returns: + run.Partial: Partial configuration for fine-tuning. + + Examples: + CLI usage: + $ nemo llm finetune --factory qwen2_72b + + Python API usage: + >>> recipe = finetune_recipe(name="qwen2_72b_finetune", num_nodes=2) + >>> print(recipe) + + Note: + This recipe uses the SQuAD dataset for fine-tuning. For more information + on fine-tuning LLMs with NeMo, see the fine-tuning guide in the + `examples/llm/finetune/` directory. + """ + recipe = default_finetune_recipe(model(), "Qwen/Qwen2-72B", dir, name, num_nodes, num_gpus_per_node) + if peft_scheme is None or peft_scheme.lower() == 'none': + assert num_nodes >= 4 + recipe.trainer.strategy.tensor_model_parallel_size = 8 + recipe.trainer.strategy.pipeline_model_parallel_size = 4 + recipe.optim.config.lr = 5e-6 + elif peft_scheme.lower() == 'lora': + recipe.peft = run.Config(LoRA) + recipe.trainer.strategy.tensor_model_parallel_size = 8 + recipe.optim.config.lr = 1e-4 + else: + raise ValueError(f"Unrecognized peft scheme: {peft_scheme}") + return recipe diff --git a/nemo/collections/llm/recipes/qwen2_7b.py b/nemo/collections/llm/recipes/qwen2_7b.py new file mode 100644 index 0000000000000..57ccd48e9fe18 --- /dev/null +++ b/nemo/collections/llm/recipes/qwen2_7b.py @@ -0,0 +1,223 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +import nemo_run as run +import pytorch_lightning as pl +import torch + +from nemo.collections.llm.api import finetune, pretrain +from nemo.collections.llm.gpt.data.mock import MockDataModule +from nemo.collections.llm.peft.lora import LoRA +from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe +from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger +from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing +from nemo.collections.llm.recipes.qwen2 import qwen2_model, qwen2_trainer +from nemo.utils.exp_manager import TimingCallback + +NAME = "qwen2_7b" + + +@run.cli.factory(name=NAME) +def model() -> run.Config[pl.LightningModule]: + """ + Factory function to create a Qwen2 7b model configuration. + + Returns: + run.Config[pl.LightningModule]: Configuration for the Qwen2 7b model. + + Examples: + CLI usage: + $ nemo llm pretrain model=qwen2_7b ... + + Python API usage: + >>> model_config = model() + >>> print(model_config) + """ + + return qwen2_model(version=NAME) + + +@run.cli.factory(target=pretrain, name=NAME) +def pretrain_recipe( + # General + dir: Optional[str] = None, + name: str = "default", + # Trainer + tensor_parallelism: int = 2, + pipeline_parallelism: int = 1, + pipeline_parallelism_type: Optional[torch.dtype] = None, + virtual_pipeline_parallelism: Optional[int] = None, + context_parallelism: int = 1, + sequence_parallelism: bool = False, + num_nodes: int = 1, + num_gpus_per_node: int = 8, + max_steps: int = 300000, + precision: str = "bf16-mixed", + accumulate_grad_batches: int = 1, + gradient_clip_val: float = 1.0, + limit_test_batches: int = 32, + limit_val_batches: int = 32, + log_every_n_steps: int = 10, + val_check_interval: int = 500, + # Data + global_batch_size=32, + micro_batch_size=2, + seq_length=4096, + # Optimizer + warmup_steps=500, + constant_steps=0, + min_lr=3e-5, + max_lr=3e-4, + # Training function + fn=pretrain, +) -> run.Partial: + """ + Create a pre-training recipe for Qwen2 7b model. + + This function sets up a complete configuration for pre-training, including + model, trainer, data, logging, optimization, and resumption settings. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the pre-training run. + tensor_parallelism (int): Degree of tensor model parallelism. + pipeline_parallelism (int): Degree of pipeline model parallelism. + pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism. + virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism. + context_parallelism (int): Degree of context parallelism. + sequence_parallelism (bool): Whether to use sequence parallelism. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + max_steps (int): Maximum number of training steps. + precision (str): Precision configuration, one of fp32, 16-mixed or bf16-mixed. + accumulate_grad_batches (int): Number of steps per gradient accumulation. + gradient_clip_val (float): Value for gradient clipping. + limit_test_batches (int): Limit the number of test batches. + limit_val_batches (int): Limit the number of validation batches. + log_every_n_steps (int): Log every n steps. + val_check_interval (int): Run validation every N steps. + global_batch_size (int): Global batch size. + micro_batch_size (int): Micro batch size. + seq_length (int): Sequence length. + warmup_steps (int): Number of warmup steps. + constant_steps (int): Number of constant steps. + min_lr (float): Minimum learning rate. + max_lr (float): Maximum learning rate. + fn (Callable): The pre-training function to use. + + Returns: + run.Partial: Partial configuration for pre-training. + + Examples: + CLI usage: + $ nemo llm pretrain --factory qwen2_7b + $ nemo llm pretrain --factory "qwen2_7b(num_nodes=1, name='my_qwen2_pretrain')" + + Python API usage: + >>> recipe = pretrain_recipe(name="qwen2_pretrain", num_nodes=1) + >>> print(recipe) + + Note: + This recipe uses a mock dataset, look for the finetune examples to see how to change the dataset. + """ + return run.Partial( + fn, + model=model(), + trainer=qwen2_trainer( + tensor_parallelism=tensor_parallelism, + pipeline_parallelism=pipeline_parallelism, + pipeline_parallelism_type=pipeline_parallelism_type, + virtual_pipeline_parallelism=virtual_pipeline_parallelism, + context_parallelism=context_parallelism, + sequence_parallelism=sequence_parallelism, + num_nodes=num_nodes, + num_gpus_per_node=num_gpus_per_node, + max_steps=max_steps, + precision=precision, + accumulate_grad_batches=accumulate_grad_batches, + limit_test_batches=limit_test_batches, + limit_val_batches=limit_val_batches, + log_every_n_steps=log_every_n_steps, + val_check_interval=val_check_interval, + callbacks=[run.Config(TimingCallback)], + ), + data=run.Config( + MockDataModule, + seq_length=seq_length, + global_batch_size=global_batch_size, + micro_batch_size=micro_batch_size, + ), + log=default_log(dir=dir, name=name, tensorboard_logger=tensorboard_logger(name=name)), + optim=distributed_fused_adam_with_cosine_annealing( + precision=precision, + warmup_steps=warmup_steps, + constant_steps=constant_steps, + min_lr=min_lr, + max_lr=max_lr, + clip_grad=gradient_clip_val, + ), + resume=default_resume(), + ) + + +@run.cli.factory(target=finetune, name=NAME) +def finetune_recipe( + dir: Optional[str] = None, + name: str = "default", + num_nodes: int = 1, + num_gpus_per_node: int = 8, + peft_scheme: Optional[str] = 'lora', +) -> run.Partial: + """ + Create a fine-tuning recipe for Qwen2 7b model. + + This function sets up a complete configuration for fine-tuning, including + model, trainer, data, logging, optimization, and resumption settings. + The recipe uses LoRA (Low-Rank Adaptation) for efficient fine-tuning, unless peft_scheme is set to None. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the fine-tuning run. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + + Returns: + run.Partial: Partial configuration for fine-tuning. + + Examples: + CLI usage: + $ nemo llm finetune --factory qwen2_7b + + Python API usage: + >>> recipe = finetune_recipe(name="qwen2_7b_finetune", num_nodes=2) + >>> print(recipe) + + Note: + This recipe uses the SQuAD dataset for fine-tuning. For more information + on fine-tuning LLMs with NeMo, see the fine-tuning guide in the + `examples/llm/finetune/` directory. + """ + recipe = default_finetune_recipe(model(), "Qwen/Qwen2-7B", dir, name, num_nodes, num_gpus_per_node) + if peft_scheme is None or peft_scheme.lower() == 'none': + recipe.trainer.strategy.tensor_model_parallel_size = 2 + recipe.optim.config.lr = 5e-6 + elif peft_scheme.lower() == 'lora': + recipe.peft = run.Config(LoRA) + recipe.optim.config.lr = 1e-4 + else: + raise ValueError(f"Unrecognized peft scheme: {peft_scheme}") + return recipe From 7e036e6efc6cb7a37a6d1fdb8cf653b91a07f9db Mon Sep 17 00:00:00 2001 From: Pablo Garay Date: Sun, 27 Oct 2024 17:49:41 -0700 Subject: [PATCH 008/125] Add copyright notice (#11050) * Add copyright notice * Apply isort and black reformatting Signed-off-by: pablo-garay --------- Signed-off-by: pablo-garay Co-authored-by: pablo-garay --- .../multimodal_llm/neva/eval/mixtral_eval.py | 19 ++++++- examples/nlp/rag/rag_generating.py | 14 ++++++ examples/nlp/rag/rag_indexing.py | 14 ++++++ .../adapters/attention_adapter_mixin.py | 14 ++++++ .../common/metrics/perf_metrics.py | 14 ++++++ .../common/parts/perf_metrics_utils.py | 14 ++++++ nemo/collections/common/prompts/canary.py | 14 ++++++ nemo/collections/common/prompts/example.py | 14 ++++++ nemo/collections/common/prompts/fn.py | 14 ++++++ nemo/collections/common/prompts/formatter.py | 14 ++++++ nemo/collections/common/prompts/gemma.py | 14 ++++++ nemo/collections/common/prompts/llama.py | 14 ++++++ nemo/collections/common/prompts/mistral.py | 14 ++++++ nemo/collections/common/prompts/phi2.py | 14 ++++++ .../common/tokenizers/chat_template_mixin.py | 14 ++++++ .../recipes/tp_overlap_configs/userbuffers.py | 14 ++++++ nemo/collections/llm/t5/data/core.py | 14 ++++++ nemo/collections/llm/t5/data/fine_tuning.py | 14 ++++++ nemo/collections/llm/t5/data/pre_training.py | 14 ++++++ nemo/collections/llm/t5/data/squad.py | 14 ++++++ nemo/collections/llm/t5/model/t5.py | 14 ++++++ .../data/neva/neva_energon_dataset.py | 14 ++++++ .../nlp/models/rag/custom_bert_embedder.py | 14 ++++++ .../nlp/models/rag/custom_gpt_llm.py | 14 ++++++ .../modules/common/hyena/fftconv_wrapper.py | 14 ++++++ .../nlp/modules/common/hyena/hyena_filter.py | 14 ++++++ .../nlp/modules/common/hyena/hyena_spec.py | 14 ++++++ nemo/export/trt_llm/qnemo/utils.py | 14 ++++++ nemo/lightning/fabric/conversion.py | 14 ++++++ nemo/lightning/fabric/fabric.py | 14 ++++++ nemo/lightning/fabric/plugins.py | 14 ++++++ nemo/lightning/io/api.py | 14 ++++++ nemo/lightning/io/artifact/base.py | 14 ++++++ nemo/lightning/io/artifact/file.py | 14 ++++++ nemo/lightning/io/artifact/pickle.py | 14 ++++++ nemo/lightning/io/capture.py | 14 ++++++ nemo/lightning/io/connector.py | 14 ++++++ nemo/lightning/io/fdl_torch.py | 14 ++++++ nemo/lightning/io/pl.py | 14 ++++++ nemo/lightning/io/state.py | 14 ++++++ nemo/lightning/pytorch/callbacks/debugging.py | 14 ++++++ .../callbacks/megatron_comm_overlap.py | 14 ++++++ .../pytorch/callbacks/moe_token_drop.py | 14 ++++++ nemo/lightning/pytorch/utils.py | 14 ++++++ nemo/utils/callbacks/s3_checkpoint_io.py | 14 ++++++ nemo/utils/s3_dirpath_utils.py | 14 ++++++ nemo/utils/s3_utils.py | 14 ++++++ nemo/utils/trainer_utils.py | 14 ++++++ .../convert_griffin_hf_to_nemo.py | 50 ++++++++++++------- .../convert_griffin_nemo_to_hf.py | 14 ++++++ .../quantize_model_to_nf4.py | 14 ++++++ .../filter_tarred_audio_dataset.py | 14 ++++++ scripts/speech_recognition/oomptimizer.py | 14 ++++++ .../asr/decoding/test_multi_task_decoding.py | 14 ++++++ .../asr/test_asr_lhotse_dataset.py | 14 ++++++ .../common/prompt_formatters/conftest.py | 14 ++++++ .../test_canary_prompt_formatter.py | 14 ++++++ .../test_gemma_prompt_formatter.py | 14 ++++++ .../test_llama2_prompt_formatter.py | 14 ++++++ .../test_mistral_prompt_formatter.py | 14 ++++++ .../test_prompt_formatter_api.py | 14 ++++++ .../common/test_2d_bucketing_constraint.py | 14 ++++++ tests/collections/common/test_data_utils.py | 14 ++++++ .../common/test_lhotse_multirank_rng.py | 14 ++++++ .../common/test_lhotse_nemo_adapters.py | 14 ++++++ tests/collections/common/test_perf_metrics.py | 14 ++++++ .../llm/auto_conf/test_autoconf_utils.py | 14 ++++++ .../llm/auto_conf/test_base_configs.py | 14 ++++++ .../llm/auto_conf/test_generate_configs.py | 14 ++++++ .../llm/bitexact/mixtral/compare_ckpts.py | 14 ++++++ .../bitexact/mixtral/pretrain_mini_mixtral.py | 14 ++++++ .../llm/gpt/model/test_baichuan.py | 14 ++++++ tests/collections/llm/gpt/model/test_base.py | 14 ++++++ .../collections/llm/gpt/model/test_chatglm.py | 14 ++++++ tests/collections/llm/gpt/model/test_gemma.py | 14 ++++++ tests/collections/llm/gpt/model/test_llama.py | 14 ++++++ .../collections/llm/gpt/model/test_mistral.py | 14 ++++++ .../collections/llm/gpt/model/test_mixtral.py | 14 ++++++ .../llm/gpt/model/test_model_import.py | 14 ++++++ .../llm/gpt/model/test_nemotron.py | 14 ++++++ tests/collections/llm/gpt/model/test_qwen2.py | 14 ++++++ tests/collections/llm/gpt/model/test_ssm.py | 14 ++++++ .../llm/gpt/model/test_starcoder.py | 14 ++++++ .../llm/gpt/model/test_starcoder2.py | 14 ++++++ .../collections/llm/megatron_t5_finetuning.py | 14 ++++++ .../llm/recipes/test_llama3_70b.py | 14 ++++++ .../llm/recipes/test_llama3_70b_16k.py | 14 ++++++ .../llm/recipes/test_llama3_70b_64k.py | 14 ++++++ .../collections/llm/recipes/test_llama3_8b.py | 14 ++++++ .../llm/recipes/test_llama3_8b_16k.py | 14 ++++++ .../llm/recipes/test_llama3_8b_64k.py | 14 ++++++ tests/collections/llm/recipes/test_mistral.py | 14 ++++++ .../llm/recipes/test_mixtral_8x22b.py | 14 ++++++ .../llm/recipes/test_mixtral_8x7b_16k.py | 14 ++++++ .../llm/recipes/test_mixtral_8x7b_64k.py | 14 ++++++ .../collections/llm/recipes/test_nemotron.py | 14 ++++++ .../llm/recipes/test_nemotron3_4b.py | 14 ++++++ .../llm/recipes/test_nemotron3_8b.py | 14 ++++++ .../llm/recipes/test_nemotron4_15b.py | 14 ++++++ .../llm/recipes/test_nemotron4_15b_16k.py | 14 ++++++ .../llm/recipes/test_nemotron4_15b_64k.py | 14 ++++++ .../llm/recipes/test_nemotron4_22b.py | 14 ++++++ .../llm/recipes/test_nemotron4_22b_16k.py | 14 ++++++ .../llm/recipes/test_nemotron4_22b_64k.py | 14 ++++++ .../llm/recipes/test_nemotron4_340b.py | 14 ++++++ tests/collections/llm/test_hf_import.py | 14 ++++++ tests/collections/nlp/test_qlora.py | 14 ++++++ tests/core/test_dist_ckpt.py | 14 ++++++ tests/lightning/mcore_microbatch_utils.py | 14 ++++++ .../callbacks/test_model_checkpoint.py | 14 ++++++ .../strategies/test_megatron_strategy.py | 14 ++++++ tests/lightning/test_nemo_run.py | 14 ++++++ tests/lightning/test_state_restoration.py | 14 ++++++ tests/utils/test_trainer_utils.py | 14 ++++++ 114 files changed, 1618 insertions(+), 19 deletions(-) diff --git a/examples/multimodal/multimodal_llm/neva/eval/mixtral_eval.py b/examples/multimodal/multimodal_llm/neva/eval/mixtral_eval.py index d3fd8c644afb7..60a56eb02d56a 100644 --- a/examples/multimodal/multimodal_llm/neva/eval/mixtral_eval.py +++ b/examples/multimodal/multimodal_llm/neva/eval/mixtral_eval.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Script to query Mixtral-8x7B as a judge via NGC API for evaluation""" import argparse import json @@ -61,7 +75,10 @@ def get_eval(content: str, max_tokens: int): 'role': 'system', 'content': 'You are a helpful and precise assistant for checking the quality of the answer.', }, - {'role': 'user', 'content': content,}, + { + 'role': 'user', + 'content': content, + }, ], "temperature": 0.2, "top_p": 0.7, diff --git a/examples/nlp/rag/rag_generating.py b/examples/nlp/rag/rag_generating.py index 952dc25321029..fc3915f89dee9 100644 --- a/examples/nlp/rag/rag_generating.py +++ b/examples/nlp/rag/rag_generating.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from llama_index.core import Settings, StorageContext, load_index_from_storage from nemo.collections.nlp.models.rag.custom_bert_embedder import NeMoBertEmbeddings diff --git a/examples/nlp/rag/rag_indexing.py b/examples/nlp/rag/rag_indexing.py index ab487c035228f..2130a6c5f85af 100644 --- a/examples/nlp/rag/rag_indexing.py +++ b/examples/nlp/rag/rag_indexing.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex from llama_index.core.node_parser import SentenceSplitter diff --git a/nemo/collections/asr/parts/submodules/adapters/attention_adapter_mixin.py b/nemo/collections/asr/parts/submodules/adapters/attention_adapter_mixin.py index 0c1852773072b..9fee3cea24360 100644 --- a/nemo/collections/asr/parts/submodules/adapters/attention_adapter_mixin.py +++ b/nemo/collections/asr/parts/submodules/adapters/attention_adapter_mixin.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import torch from nemo.core.classes.mixins import adapter_mixins diff --git a/nemo/collections/common/metrics/perf_metrics.py b/nemo/collections/common/metrics/perf_metrics.py index 2f35171af75d4..d668d29c42ffb 100644 --- a/nemo/collections/common/metrics/perf_metrics.py +++ b/nemo/collections/common/metrics/perf_metrics.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import Any, Dict, List, Optional import numpy as np diff --git a/nemo/collections/common/parts/perf_metrics_utils.py b/nemo/collections/common/parts/perf_metrics_utils.py index 1633b1343340d..4fb1e85b60bdd 100644 --- a/nemo/collections/common/parts/perf_metrics_utils.py +++ b/nemo/collections/common/parts/perf_metrics_utils.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import glob import os from typing import List diff --git a/nemo/collections/common/prompts/canary.py b/nemo/collections/common/prompts/canary.py index 0eb3296bcff91..3197bf7efe2c0 100644 --- a/nemo/collections/common/prompts/canary.py +++ b/nemo/collections/common/prompts/canary.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import Any import torch diff --git a/nemo/collections/common/prompts/example.py b/nemo/collections/common/prompts/example.py index 3589efb938f4d..3324b36db496e 100644 --- a/nemo/collections/common/prompts/example.py +++ b/nemo/collections/common/prompts/example.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ Implemented following the guide at https://www.promptingguide.ai/models/phi-2#phi-2-usage """ diff --git a/nemo/collections/common/prompts/fn.py b/nemo/collections/common/prompts/fn.py index ce7d2fc8a69a6..a93dabb01f6a5 100644 --- a/nemo/collections/common/prompts/fn.py +++ b/nemo/collections/common/prompts/fn.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import Callable, Sequence import torch diff --git a/nemo/collections/common/prompts/formatter.py b/nemo/collections/common/prompts/formatter.py index 6d2c67f5311d5..9fb4992540681 100644 --- a/nemo/collections/common/prompts/formatter.py +++ b/nemo/collections/common/prompts/formatter.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from abc import ABC from enum import Enum from functools import lru_cache diff --git a/nemo/collections/common/prompts/gemma.py b/nemo/collections/common/prompts/gemma.py index e3b81c848a3e7..22b5ac5c13b7d 100644 --- a/nemo/collections/common/prompts/gemma.py +++ b/nemo/collections/common/prompts/gemma.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ Implemented following the guide at https://www.promptingguide.ai/models/gemma#gemma-7b-prompt-format """ diff --git a/nemo/collections/common/prompts/llama.py b/nemo/collections/common/prompts/llama.py index fdaccfaa846e4..affbc94da904e 100644 --- a/nemo/collections/common/prompts/llama.py +++ b/nemo/collections/common/prompts/llama.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from nemo.collections.common.prompts.formatter import BOS_SLOT, EOS_SLOT, Modality, PromptFormatter diff --git a/nemo/collections/common/prompts/mistral.py b/nemo/collections/common/prompts/mistral.py index e882ac5973b11..09398a7b5fcc2 100644 --- a/nemo/collections/common/prompts/mistral.py +++ b/nemo/collections/common/prompts/mistral.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ Implemented following the guide at https://www.promptingguide.ai/models/mistral-7b#chat-template-for-mistral-7b-instruct """ diff --git a/nemo/collections/common/prompts/phi2.py b/nemo/collections/common/prompts/phi2.py index 67dad8d5dd82a..fd2f1d7778b2b 100644 --- a/nemo/collections/common/prompts/phi2.py +++ b/nemo/collections/common/prompts/phi2.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ Implemented following the guide at https://www.promptingguide.ai/models/phi-2#phi-2-usage """ diff --git a/nemo/collections/common/tokenizers/chat_template_mixin.py b/nemo/collections/common/tokenizers/chat_template_mixin.py index 83a5e537519cd..0dad380f0da0b 100644 --- a/nemo/collections/common/tokenizers/chat_template_mixin.py +++ b/nemo/collections/common/tokenizers/chat_template_mixin.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import re from functools import cache diff --git a/nemo/collections/llm/recipes/tp_overlap_configs/userbuffers.py b/nemo/collections/llm/recipes/tp_overlap_configs/userbuffers.py index 8be50c08e7685..5c6a54520504c 100644 --- a/nemo/collections/llm/recipes/tp_overlap_configs/userbuffers.py +++ b/nemo/collections/llm/recipes/tp_overlap_configs/userbuffers.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from dataclasses import dataclass diff --git a/nemo/collections/llm/t5/data/core.py b/nemo/collections/llm/t5/data/core.py index 11543274c3b9f..fb12a8fdf0362 100644 --- a/nemo/collections/llm/t5/data/core.py +++ b/nemo/collections/llm/t5/data/core.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from pathlib import Path from typing import TYPE_CHECKING, Optional diff --git a/nemo/collections/llm/t5/data/fine_tuning.py b/nemo/collections/llm/t5/data/fine_tuning.py index 9326dabe7b840..4e2a88e5712c9 100644 --- a/nemo/collections/llm/t5/data/fine_tuning.py +++ b/nemo/collections/llm/t5/data/fine_tuning.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import math from functools import lru_cache from pathlib import Path diff --git a/nemo/collections/llm/t5/data/pre_training.py b/nemo/collections/llm/t5/data/pre_training.py index e6f619972284f..9f6cb27933d55 100644 --- a/nemo/collections/llm/t5/data/pre_training.py +++ b/nemo/collections/llm/t5/data/pre_training.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import logging import warnings from pathlib import Path diff --git a/nemo/collections/llm/t5/data/squad.py b/nemo/collections/llm/t5/data/squad.py index cee0549c80be0..3e413919211cd 100644 --- a/nemo/collections/llm/t5/data/squad.py +++ b/nemo/collections/llm/t5/data/squad.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json import shutil from typing import TYPE_CHECKING, List, Optional diff --git a/nemo/collections/llm/t5/model/t5.py b/nemo/collections/llm/t5/model/t5.py index 83dc2029a5131..fa4095d755882 100644 --- a/nemo/collections/llm/t5/model/t5.py +++ b/nemo/collections/llm/t5/model/t5.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import copy from dataclasses import dataclass from typing import TYPE_CHECKING, Callable, Dict, Literal, Optional, Union diff --git a/nemo/collections/multimodal/data/neva/neva_energon_dataset.py b/nemo/collections/multimodal/data/neva/neva_energon_dataset.py index a83e616f248f6..81ec784b19b78 100644 --- a/nemo/collections/multimodal/data/neva/neva_energon_dataset.py +++ b/nemo/collections/multimodal/data/neva/neva_energon_dataset.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import dataclasses from dataclasses import dataclass from typing import List, Optional, Union diff --git a/nemo/collections/nlp/models/rag/custom_bert_embedder.py b/nemo/collections/nlp/models/rag/custom_bert_embedder.py index e2f26fadf247e..d27ee98a14ef4 100644 --- a/nemo/collections/nlp/models/rag/custom_bert_embedder.py +++ b/nemo/collections/nlp/models/rag/custom_bert_embedder.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import Any, List import torch diff --git a/nemo/collections/nlp/models/rag/custom_gpt_llm.py b/nemo/collections/nlp/models/rag/custom_gpt_llm.py index bcd52b3f9b169..f26a86cfaaf70 100644 --- a/nemo/collections/nlp/models/rag/custom_gpt_llm.py +++ b/nemo/collections/nlp/models/rag/custom_gpt_llm.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import Any from llama_index.core.bridge.pydantic import PrivateAttr diff --git a/nemo/collections/nlp/modules/common/hyena/fftconv_wrapper.py b/nemo/collections/nlp/modules/common/hyena/fftconv_wrapper.py index ca9a44489697b..cb06f91fa37f8 100644 --- a/nemo/collections/nlp/modules/common/hyena/fftconv_wrapper.py +++ b/nemo/collections/nlp/modules/common/hyena/fftconv_wrapper.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import math import torch diff --git a/nemo/collections/nlp/modules/common/hyena/hyena_filter.py b/nemo/collections/nlp/modules/common/hyena/hyena_filter.py index bf6752102480c..cb4db1014ae30 100644 --- a/nemo/collections/nlp/modules/common/hyena/hyena_filter.py +++ b/nemo/collections/nlp/modules/common/hyena/hyena_filter.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import math from dataclasses import dataclass from typing import Union diff --git a/nemo/collections/nlp/modules/common/hyena/hyena_spec.py b/nemo/collections/nlp/modules/common/hyena/hyena_spec.py index 58797cf278381..82cc7970a772c 100644 --- a/nemo/collections/nlp/modules/common/hyena/hyena_spec.py +++ b/nemo/collections/nlp/modules/common/hyena/hyena_spec.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import torch.nn as nn from megatron.core.extensions.transformer_engine import TELayerNormColumnParallelLinear, TERowParallelLinear from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_with_transformer_engine_spec diff --git a/nemo/export/trt_llm/qnemo/utils.py b/nemo/export/trt_llm/qnemo/utils.py index 58d1d308507f4..a2bd74d3ff4c0 100644 --- a/nemo/export/trt_llm/qnemo/utils.py +++ b/nemo/export/trt_llm/qnemo/utils.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from pathlib import Path diff --git a/nemo/lightning/fabric/conversion.py b/nemo/lightning/fabric/conversion.py index cc2b074940dd8..9ad713ec5261b 100644 --- a/nemo/lightning/fabric/conversion.py +++ b/nemo/lightning/fabric/conversion.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from functools import singledispatch from typing import Any, TypeVar diff --git a/nemo/lightning/fabric/fabric.py b/nemo/lightning/fabric/fabric.py index ddad49f7d2110..b1ca867cab83f 100644 --- a/nemo/lightning/fabric/fabric.py +++ b/nemo/lightning/fabric/fabric.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from copy import deepcopy from pathlib import Path from typing import TYPE_CHECKING, Optional, Protocol, Sequence, Type, TypeVar, Union, runtime_checkable diff --git a/nemo/lightning/fabric/plugins.py b/nemo/lightning/fabric/plugins.py index 4026fb9b549e8..723b48b6b357f 100644 --- a/nemo/lightning/fabric/plugins.py +++ b/nemo/lightning/fabric/plugins.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from contextlib import contextmanager from typing import TYPE_CHECKING, Any, Generator, Literal, TypeVar diff --git a/nemo/lightning/io/api.py b/nemo/lightning/io/api.py index 7a702edb7f21b..4abc8fa7cca3d 100644 --- a/nemo/lightning/io/api.py +++ b/nemo/lightning/io/api.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from pathlib import Path from typing import Callable, Optional, Type, overload import fiddle as fdl diff --git a/nemo/lightning/io/artifact/base.py b/nemo/lightning/io/artifact/base.py index 7d2d608c4149b..c7243a22af52e 100644 --- a/nemo/lightning/io/artifact/base.py +++ b/nemo/lightning/io/artifact/base.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from abc import ABC, abstractmethod from pathlib import Path from typing import Generic, TypeVar diff --git a/nemo/lightning/io/artifact/file.py b/nemo/lightning/io/artifact/file.py index 1cd63b706c9a8..619effbea58f5 100644 --- a/nemo/lightning/io/artifact/file.py +++ b/nemo/lightning/io/artifact/file.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import shutil from pathlib import Path diff --git a/nemo/lightning/io/artifact/pickle.py b/nemo/lightning/io/artifact/pickle.py index 61a9c82237fc1..941d69e777a12 100644 --- a/nemo/lightning/io/artifact/pickle.py +++ b/nemo/lightning/io/artifact/pickle.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from pathlib import Path from typing import Any diff --git a/nemo/lightning/io/capture.py b/nemo/lightning/io/capture.py index 910506f131472..2379960bf6d99 100644 --- a/nemo/lightning/io/capture.py +++ b/nemo/lightning/io/capture.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import functools import logging from typing import Callable, Generic, Optional, Protocol, TypeVar, runtime_checkable diff --git a/nemo/lightning/io/connector.py b/nemo/lightning/io/connector.py index a0a9b2547d425..be1dcc3c088d8 100644 --- a/nemo/lightning/io/connector.py +++ b/nemo/lightning/io/connector.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import logging import os import shutil diff --git a/nemo/lightning/io/fdl_torch.py b/nemo/lightning/io/fdl_torch.py index aa46341a105fa..a619e4d4d160d 100644 --- a/nemo/lightning/io/fdl_torch.py +++ b/nemo/lightning/io/fdl_torch.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Fiddle extensions to handle PyTorch code more elegantly. This module provides extensions for better handling of PyTorch types and functions diff --git a/nemo/lightning/io/pl.py b/nemo/lightning/io/pl.py index 51c47ad94dbb4..fb6ef707ab7c3 100644 --- a/nemo/lightning/io/pl.py +++ b/nemo/lightning/io/pl.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import logging from dataclasses import dataclass, field from pathlib import Path diff --git a/nemo/lightning/io/state.py b/nemo/lightning/io/state.py index 7c8add5429abc..6632768ec8dd6 100644 --- a/nemo/lightning/io/state.py +++ b/nemo/lightning/io/state.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import inspect import re from dataclasses import dataclass diff --git a/nemo/lightning/pytorch/callbacks/debugging.py b/nemo/lightning/pytorch/callbacks/debugging.py index 1a3c528c741fc..5f6e722ef89bf 100644 --- a/nemo/lightning/pytorch/callbacks/debugging.py +++ b/nemo/lightning/pytorch/callbacks/debugging.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import Callable, Dict, List, Optional, Union import pytorch_lightning as pl diff --git a/nemo/lightning/pytorch/callbacks/megatron_comm_overlap.py b/nemo/lightning/pytorch/callbacks/megatron_comm_overlap.py index f9181e8ad70e5..2f53babfe5598 100644 --- a/nemo/lightning/pytorch/callbacks/megatron_comm_overlap.py +++ b/nemo/lightning/pytorch/callbacks/megatron_comm_overlap.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from dataclasses import asdict, dataclass, fields import pytorch_lightning as pl diff --git a/nemo/lightning/pytorch/callbacks/moe_token_drop.py b/nemo/lightning/pytorch/callbacks/moe_token_drop.py index fc2aea84f3c16..10483dca50965 100644 --- a/nemo/lightning/pytorch/callbacks/moe_token_drop.py +++ b/nemo/lightning/pytorch/callbacks/moe_token_drop.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import pytorch_lightning as pl from megatron.core import ModelParallelConfig from pytorch_lightning.callbacks.callback import Callback diff --git a/nemo/lightning/pytorch/utils.py b/nemo/lightning/pytorch/utils.py index 19bdcc1387c42..045cf79b57779 100644 --- a/nemo/lightning/pytorch/utils.py +++ b/nemo/lightning/pytorch/utils.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import torch diff --git a/nemo/utils/callbacks/s3_checkpoint_io.py b/nemo/utils/callbacks/s3_checkpoint_io.py index 4ded98a1b6108..7a9f984fee1ba 100644 --- a/nemo/utils/callbacks/s3_checkpoint_io.py +++ b/nemo/utils/callbacks/s3_checkpoint_io.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import time from concurrent.futures import ProcessPoolExecutor diff --git a/nemo/utils/s3_dirpath_utils.py b/nemo/utils/s3_dirpath_utils.py index fd66115d4e5da..360a6bb67942a 100644 --- a/nemo/utils/s3_dirpath_utils.py +++ b/nemo/utils/s3_dirpath_utils.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from pathlib import Path from typing import Optional diff --git a/nemo/utils/s3_utils.py b/nemo/utils/s3_utils.py index 3435a603b05d5..78cb0eb8cae88 100644 --- a/nemo/utils/s3_utils.py +++ b/nemo/utils/s3_utils.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import re import time diff --git a/nemo/utils/trainer_utils.py b/nemo/utils/trainer_utils.py index 790ccb8190695..78a3a0f665841 100644 --- a/nemo/utils/trainer_utils.py +++ b/nemo/utils/trainer_utils.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import Mapping _HAS_HYDRA = True diff --git a/scripts/checkpoint_converters/convert_griffin_hf_to_nemo.py b/scripts/checkpoint_converters/convert_griffin_hf_to_nemo.py index 44435cc211356..45eba3babaf56 100644 --- a/scripts/checkpoint_converters/convert_griffin_hf_to_nemo.py +++ b/scripts/checkpoint_converters/convert_griffin_hf_to_nemo.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from argparse import ArgumentParser @@ -86,9 +100,9 @@ def convert(args): new_state_dict[f'model.decoder.layers.{l}.self_attention.linear_proj.bias'] = hf_model.state_dict()[ f'model.layers.{l}.temporal_block.o_proj.bias' ] - new_state_dict[ - f'model.decoder.layers.{l}.self_attention.linear_qkv.layer_norm_weight' - ] = hf_model.state_dict()[f'model.layers.{l}.temporal_pre_norm.weight'] + new_state_dict[f'model.decoder.layers.{l}.self_attention.linear_qkv.layer_norm_weight'] = ( + hf_model.state_dict()[f'model.layers.{l}.temporal_pre_norm.weight'] + ) new_state_dict[f'model.decoder.layers.{l}.self_attention.linear_qkv.weight'] = torch.cat( [ hf_model.state_dict()[f'model.layers.{l}.temporal_block.q_proj.weight'], @@ -99,18 +113,18 @@ def convert(args): new_state_dict[f'model.decoder.layers.{l}.self_attention.linear_qkv.bias'] = torch.zeros( new_state_dict[f'model.decoder.layers.{l}.self_attention.linear_qkv.weight'].shape[0] ) - new_state_dict[ - f'model.decoder.layers.{l}.self_attention.linear_proj._extra_state' - ] = nemo_model_from_hf.state_dict()[f'model.decoder.layers.{l}.self_attention.linear_proj._extra_state'] - new_state_dict[ - f'model.decoder.layers.{l}.self_attention.linear_qkv._extra_state' - ] = nemo_model_from_hf.state_dict()[f'model.decoder.layers.{l}.self_attention.linear_qkv._extra_state'] + new_state_dict[f'model.decoder.layers.{l}.self_attention.linear_proj._extra_state'] = ( + nemo_model_from_hf.state_dict()[f'model.decoder.layers.{l}.self_attention.linear_proj._extra_state'] + ) + new_state_dict[f'model.decoder.layers.{l}.self_attention.linear_qkv._extra_state'] = ( + nemo_model_from_hf.state_dict()[f'model.decoder.layers.{l}.self_attention.linear_qkv._extra_state'] + ) else: - new_state_dict[ - f'model.decoder.layers.{l}.recurrent_layer.linear_in.layer_norm_weight' - ] = hf_model.state_dict()[f'model.layers.{l}.temporal_pre_norm.weight'] + new_state_dict[f'model.decoder.layers.{l}.recurrent_layer.linear_in.layer_norm_weight'] = ( + hf_model.state_dict()[f'model.layers.{l}.temporal_pre_norm.weight'] + ) new_state_dict[f'model.decoder.layers.{l}.recurrent_layer.linear_in.weight'] = torch.cat( [ hf_model.state_dict()[f'model.layers.{l}.temporal_block.linear_x.weight'], @@ -154,12 +168,12 @@ def convert(args): f'model.layers.{l}.temporal_block.rg_lru.recurrent_gate_bias' ] - new_state_dict[ - f'model.decoder.layers.{l}.recurrent_layer.linear_in._extra_state' - ] = nemo_model_from_hf.state_dict()[f'model.decoder.layers.{l}.recurrent_layer.linear_in._extra_state'] - new_state_dict[ - f'model.decoder.layers.{l}.recurrent_layer.linear_out._extra_state' - ] = nemo_model_from_hf.state_dict()[f'model.decoder.layers.{l}.recurrent_layer.linear_out._extra_state'] + new_state_dict[f'model.decoder.layers.{l}.recurrent_layer.linear_in._extra_state'] = ( + nemo_model_from_hf.state_dict()[f'model.decoder.layers.{l}.recurrent_layer.linear_in._extra_state'] + ) + new_state_dict[f'model.decoder.layers.{l}.recurrent_layer.linear_out._extra_state'] = ( + nemo_model_from_hf.state_dict()[f'model.decoder.layers.{l}.recurrent_layer.linear_out._extra_state'] + ) nemo_model_from_hf.load_state_dict(new_state_dict, strict=True) dtype = torch_dtype_from_precision(args.precision) diff --git a/scripts/checkpoint_converters/convert_griffin_nemo_to_hf.py b/scripts/checkpoint_converters/convert_griffin_nemo_to_hf.py index 265af9e55cbd3..202731643f394 100644 --- a/scripts/checkpoint_converters/convert_griffin_nemo_to_hf.py +++ b/scripts/checkpoint_converters/convert_griffin_nemo_to_hf.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from argparse import ArgumentParser diff --git a/scripts/checkpoint_converters/quantize_model_to_nf4.py b/scripts/checkpoint_converters/quantize_model_to_nf4.py index 05d9c4010c026..db3a48aaa16d7 100644 --- a/scripts/checkpoint_converters/quantize_model_to_nf4.py +++ b/scripts/checkpoint_converters/quantize_model_to_nf4.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from argparse import ArgumentParser from typing import List diff --git a/scripts/speech_recognition/filter_tarred_audio_dataset.py b/scripts/speech_recognition/filter_tarred_audio_dataset.py index bbe88c6700f32..238c182f6ab1b 100644 --- a/scripts/speech_recognition/filter_tarred_audio_dataset.py +++ b/scripts/speech_recognition/filter_tarred_audio_dataset.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from functools import partial from io import BytesIO from pathlib import Path diff --git a/scripts/speech_recognition/oomptimizer.py b/scripts/speech_recognition/oomptimizer.py index 165ac5ac692d4..3d5eb5a4dbb19 100755 --- a/scripts/speech_recognition/oomptimizer.py +++ b/scripts/speech_recognition/oomptimizer.py @@ -1,4 +1,18 @@ #!/usr/bin/env python +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import importlib import math import sys diff --git a/tests/collections/asr/decoding/test_multi_task_decoding.py b/tests/collections/asr/decoding/test_multi_task_decoding.py index 906caccad3968..056bb90b2d8c4 100644 --- a/tests/collections/asr/decoding/test_multi_task_decoding.py +++ b/tests/collections/asr/decoding/test_multi_task_decoding.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from unittest.mock import Mock import pytest diff --git a/tests/collections/asr/test_asr_lhotse_dataset.py b/tests/collections/asr/test_asr_lhotse_dataset.py index e7521bfdf7d8c..5a1450e606acb 100644 --- a/tests/collections/asr/test_asr_lhotse_dataset.py +++ b/tests/collections/asr/test_asr_lhotse_dataset.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import pytest import torch from lhotse import CutSet, SupervisionSegment diff --git a/tests/collections/common/prompt_formatters/conftest.py b/tests/collections/common/prompt_formatters/conftest.py index 60b67da5fe0f2..e22d8849fa83f 100644 --- a/tests/collections/common/prompt_formatters/conftest.py +++ b/tests/collections/common/prompt_formatters/conftest.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import pytest from nemo.collections.common.tokenizers import CanaryTokenizer, SentencePieceTokenizer diff --git a/tests/collections/common/prompt_formatters/test_canary_prompt_formatter.py b/tests/collections/common/prompt_formatters/test_canary_prompt_formatter.py index f86f70404b1f2..bc0bc4f823f36 100644 --- a/tests/collections/common/prompt_formatters/test_canary_prompt_formatter.py +++ b/tests/collections/common/prompt_formatters/test_canary_prompt_formatter.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from nemo.collections.common.prompts.canary import CanaryPromptFormatter diff --git a/tests/collections/common/prompt_formatters/test_gemma_prompt_formatter.py b/tests/collections/common/prompt_formatters/test_gemma_prompt_formatter.py index 9be17cf5dd813..1a75f8d48147d 100644 --- a/tests/collections/common/prompt_formatters/test_gemma_prompt_formatter.py +++ b/tests/collections/common/prompt_formatters/test_gemma_prompt_formatter.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from nemo.collections.common.prompts.gemma import GemmaPromptFormatter diff --git a/tests/collections/common/prompt_formatters/test_llama2_prompt_formatter.py b/tests/collections/common/prompt_formatters/test_llama2_prompt_formatter.py index 0374d48e3eed4..1733bbbabb918 100644 --- a/tests/collections/common/prompt_formatters/test_llama2_prompt_formatter.py +++ b/tests/collections/common/prompt_formatters/test_llama2_prompt_formatter.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from nemo.collections.common.prompts.llama import Llama2PromptFormatter diff --git a/tests/collections/common/prompt_formatters/test_mistral_prompt_formatter.py b/tests/collections/common/prompt_formatters/test_mistral_prompt_formatter.py index 0cc1d108c5627..610800a846903 100644 --- a/tests/collections/common/prompt_formatters/test_mistral_prompt_formatter.py +++ b/tests/collections/common/prompt_formatters/test_mistral_prompt_formatter.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from nemo.collections.common.prompts.mistral import MistralPromptFormatter diff --git a/tests/collections/common/prompt_formatters/test_prompt_formatter_api.py b/tests/collections/common/prompt_formatters/test_prompt_formatter_api.py index 26ade7da14150..0d2f624d528b1 100644 --- a/tests/collections/common/prompt_formatters/test_prompt_formatter_api.py +++ b/tests/collections/common/prompt_formatters/test_prompt_formatter_api.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import pytest from nemo.collections.common.prompts.canary import PromptFormatter diff --git a/tests/collections/common/test_2d_bucketing_constraint.py b/tests/collections/common/test_2d_bucketing_constraint.py index ba67d2e1fabb4..1bef5cf14ff7a 100644 --- a/tests/collections/common/test_2d_bucketing_constraint.py +++ b/tests/collections/common/test_2d_bucketing_constraint.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import numpy as np import pytest from lhotse import CutSet, Seconds, SupervisionSegment diff --git a/tests/collections/common/test_data_utils.py b/tests/collections/common/test_data_utils.py index 4e4b8d519c1fe..543096f8034c9 100644 --- a/tests/collections/common/test_data_utils.py +++ b/tests/collections/common/test_data_utils.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from dataclasses import dataclass import pytest diff --git a/tests/collections/common/test_lhotse_multirank_rng.py b/tests/collections/common/test_lhotse_multirank_rng.py index d7b883625aa7d..1b5884e3cc8ba 100644 --- a/tests/collections/common/test_lhotse_multirank_rng.py +++ b/tests/collections/common/test_lhotse_multirank_rng.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from io import BytesIO from pathlib import Path diff --git a/tests/collections/common/test_lhotse_nemo_adapters.py b/tests/collections/common/test_lhotse_nemo_adapters.py index a76116b10dd79..3d75dcdb69566 100644 --- a/tests/collections/common/test_lhotse_nemo_adapters.py +++ b/tests/collections/common/test_lhotse_nemo_adapters.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import numpy as np import pytest from lhotse import AudioSource, CutSet, MonoCut, Recording, SupervisionSegment diff --git a/tests/collections/common/test_perf_metrics.py b/tests/collections/common/test_perf_metrics.py index 0708bca0c24df..43ae347b6c2c9 100644 --- a/tests/collections/common/test_perf_metrics.py +++ b/tests/collections/common/test_perf_metrics.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import pytest import yaml diff --git a/tests/collections/llm/auto_conf/test_autoconf_utils.py b/tests/collections/llm/auto_conf/test_autoconf_utils.py index 0faa86c130160..62b8f62e49114 100644 --- a/tests/collections/llm/auto_conf/test_autoconf_utils.py +++ b/tests/collections/llm/auto_conf/test_autoconf_utils.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from nemo.collections.llm.tools.auto_configurator.core.base_config import _estimate_training_time, calculate_model_size diff --git a/tests/collections/llm/auto_conf/test_base_configs.py b/tests/collections/llm/auto_conf/test_base_configs.py index bfcf40315a637..d12f065d8168d 100644 --- a/tests/collections/llm/auto_conf/test_base_configs.py +++ b/tests/collections/llm/auto_conf/test_base_configs.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import nemo_run as run import torch diff --git a/tests/collections/llm/auto_conf/test_generate_configs.py b/tests/collections/llm/auto_conf/test_generate_configs.py index efb3bcf9a0ba4..f10425631f985 100644 --- a/tests/collections/llm/auto_conf/test_generate_configs.py +++ b/tests/collections/llm/auto_conf/test_generate_configs.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import nemo_run as run from nemo.collections.llm import ( diff --git a/tests/collections/llm/bitexact/mixtral/compare_ckpts.py b/tests/collections/llm/bitexact/mixtral/compare_ckpts.py index b6f9cf9283fd2..94815415bad08 100644 --- a/tests/collections/llm/bitexact/mixtral/compare_ckpts.py +++ b/tests/collections/llm/bitexact/mixtral/compare_ckpts.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import sys import torch diff --git a/tests/collections/llm/bitexact/mixtral/pretrain_mini_mixtral.py b/tests/collections/llm/bitexact/mixtral/pretrain_mini_mixtral.py index 1e7de13666aa6..b4f95879bad51 100644 --- a/tests/collections/llm/bitexact/mixtral/pretrain_mini_mixtral.py +++ b/tests/collections/llm/bitexact/mixtral/pretrain_mini_mixtral.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import argparse from pathlib import Path diff --git a/tests/collections/llm/gpt/model/test_baichuan.py b/tests/collections/llm/gpt/model/test_baichuan.py index 3e57afd6354bc..a797dfc83efb6 100644 --- a/tests/collections/llm/gpt/model/test_baichuan.py +++ b/tests/collections/llm/gpt/model/test_baichuan.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import torch.nn.functional as F from nemo.collections.llm.gpt.model.baichuan import Baichuan2Config, Baichuan2Config7B diff --git a/tests/collections/llm/gpt/model/test_base.py b/tests/collections/llm/gpt/model/test_base.py index 4cc9f03593c59..0ed728994a6c5 100644 --- a/tests/collections/llm/gpt/model/test_base.py +++ b/tests/collections/llm/gpt/model/test_base.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from nemo.collections.llm.gpt.model.base import ( GPTConfig5B, GPTConfig7B, diff --git a/tests/collections/llm/gpt/model/test_chatglm.py b/tests/collections/llm/gpt/model/test_chatglm.py index 61ef2ec523fa6..8b9539e67c055 100644 --- a/tests/collections/llm/gpt/model/test_chatglm.py +++ b/tests/collections/llm/gpt/model/test_chatglm.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import torch.nn.functional as F from nemo.collections.llm.gpt.model.chatglm import ChatGLM2Config6B, ChatGLM3Config6B, ChatGLMConfig diff --git a/tests/collections/llm/gpt/model/test_gemma.py b/tests/collections/llm/gpt/model/test_gemma.py index 22419250261c3..db8da54180cb6 100644 --- a/tests/collections/llm/gpt/model/test_gemma.py +++ b/tests/collections/llm/gpt/model/test_gemma.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from nemo.collections.llm.fn.activation import openai_gelu from nemo.collections.llm.gpt.model.gemma import ( CodeGemmaConfig2B, diff --git a/tests/collections/llm/gpt/model/test_llama.py b/tests/collections/llm/gpt/model/test_llama.py index e53ffc8e15f6d..d0081386f2c0e 100644 --- a/tests/collections/llm/gpt/model/test_llama.py +++ b/tests/collections/llm/gpt/model/test_llama.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import torch.nn.functional as F from nemo.collections.llm.gpt.model.llama import ( diff --git a/tests/collections/llm/gpt/model/test_mistral.py b/tests/collections/llm/gpt/model/test_mistral.py index 025ea35dd6e93..bc6ed545e09fa 100644 --- a/tests/collections/llm/gpt/model/test_mistral.py +++ b/tests/collections/llm/gpt/model/test_mistral.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import torch.nn.functional as F from nemo.collections.llm.gpt.model.mistral import MistralConfig7B, MistralNeMoConfig12B, MistralNeMoConfig123B diff --git a/tests/collections/llm/gpt/model/test_mixtral.py b/tests/collections/llm/gpt/model/test_mixtral.py index 15146dcdd40cb..35d81ed7d5304 100644 --- a/tests/collections/llm/gpt/model/test_mixtral.py +++ b/tests/collections/llm/gpt/model/test_mixtral.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import torch import torch.nn.functional as F diff --git a/tests/collections/llm/gpt/model/test_model_import.py b/tests/collections/llm/gpt/model/test_model_import.py index 3f65cc22654f8..9edc235e454fb 100644 --- a/tests/collections/llm/gpt/model/test_model_import.py +++ b/tests/collections/llm/gpt/model/test_model_import.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import torch torch.set_grad_enabled(False) diff --git a/tests/collections/llm/gpt/model/test_nemotron.py b/tests/collections/llm/gpt/model/test_nemotron.py index d32a3d3406736..5fdd1547a75d4 100644 --- a/tests/collections/llm/gpt/model/test_nemotron.py +++ b/tests/collections/llm/gpt/model/test_nemotron.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from nemo.collections.llm.fn.activation import squared_relu from nemo.collections.llm.gpt.model.nemotron import ( Nemotron3Config4B, diff --git a/tests/collections/llm/gpt/model/test_qwen2.py b/tests/collections/llm/gpt/model/test_qwen2.py index 0b41a524eebd1..b76b071d9eb64 100644 --- a/tests/collections/llm/gpt/model/test_qwen2.py +++ b/tests/collections/llm/gpt/model/test_qwen2.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import torch.nn.functional as F from nemo.collections.llm.gpt.model.qwen2 import ( diff --git a/tests/collections/llm/gpt/model/test_ssm.py b/tests/collections/llm/gpt/model/test_ssm.py index 9a4df79e18f5e..a38ed3b6b71d9 100644 --- a/tests/collections/llm/gpt/model/test_ssm.py +++ b/tests/collections/llm/gpt/model/test_ssm.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from nemo.collections.llm.gpt.model.ssm import ( BaseMambaConfig1_3B, BaseMambaConfig2_7B, diff --git a/tests/collections/llm/gpt/model/test_starcoder.py b/tests/collections/llm/gpt/model/test_starcoder.py index 862e99715e634..5bd410e12f35c 100644 --- a/tests/collections/llm/gpt/model/test_starcoder.py +++ b/tests/collections/llm/gpt/model/test_starcoder.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import torch.nn.functional as F from nemo.collections.llm.gpt.model.starcoder import StarcoderConfig, StarcoderConfig15B diff --git a/tests/collections/llm/gpt/model/test_starcoder2.py b/tests/collections/llm/gpt/model/test_starcoder2.py index 5b6829270c2d5..9ba46a4185f03 100644 --- a/tests/collections/llm/gpt/model/test_starcoder2.py +++ b/tests/collections/llm/gpt/model/test_starcoder2.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import torch.nn.functional as F from nemo.collections.llm.gpt.model.starcoder2 import ( diff --git a/tests/collections/llm/megatron_t5_finetuning.py b/tests/collections/llm/megatron_t5_finetuning.py index f54e858cfb432..e8f4947c96749 100644 --- a/tests/collections/llm/megatron_t5_finetuning.py +++ b/tests/collections/llm/megatron_t5_finetuning.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + ## NOTE: This script is present for github-actions testing only. ## There are no guarantees that this script is up-to-date with latest NeMo. diff --git a/tests/collections/llm/recipes/test_llama3_70b.py b/tests/collections/llm/recipes/test_llama3_70b.py index d47b674b7b709..0adf1ada3f245 100644 --- a/tests/collections/llm/recipes/test_llama3_70b.py +++ b/tests/collections/llm/recipes/test_llama3_70b.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import nemo_run as run import pytest import torch diff --git a/tests/collections/llm/recipes/test_llama3_70b_16k.py b/tests/collections/llm/recipes/test_llama3_70b_16k.py index 17f0ec5ebd999..f135aff9beeb5 100644 --- a/tests/collections/llm/recipes/test_llama3_70b_16k.py +++ b/tests/collections/llm/recipes/test_llama3_70b_16k.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import nemo_run as run import pytest import torch diff --git a/tests/collections/llm/recipes/test_llama3_70b_64k.py b/tests/collections/llm/recipes/test_llama3_70b_64k.py index e9f496dfdd2ee..7bcd44b7cdf90 100644 --- a/tests/collections/llm/recipes/test_llama3_70b_64k.py +++ b/tests/collections/llm/recipes/test_llama3_70b_64k.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import nemo_run as run import pytest import torch diff --git a/tests/collections/llm/recipes/test_llama3_8b.py b/tests/collections/llm/recipes/test_llama3_8b.py index 88fab6d6325a4..2c0d016872aab 100644 --- a/tests/collections/llm/recipes/test_llama3_8b.py +++ b/tests/collections/llm/recipes/test_llama3_8b.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import nemo_run as run import pytest diff --git a/tests/collections/llm/recipes/test_llama3_8b_16k.py b/tests/collections/llm/recipes/test_llama3_8b_16k.py index fe75f01236ab6..1c6b6f7f42489 100644 --- a/tests/collections/llm/recipes/test_llama3_8b_16k.py +++ b/tests/collections/llm/recipes/test_llama3_8b_16k.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import nemo_run as run import pytest import torch diff --git a/tests/collections/llm/recipes/test_llama3_8b_64k.py b/tests/collections/llm/recipes/test_llama3_8b_64k.py index 0316b736341a0..cc70df9916764 100644 --- a/tests/collections/llm/recipes/test_llama3_8b_64k.py +++ b/tests/collections/llm/recipes/test_llama3_8b_64k.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import nemo_run as run import pytest import torch diff --git a/tests/collections/llm/recipes/test_mistral.py b/tests/collections/llm/recipes/test_mistral.py index a7d83edcc370f..90476787c534e 100644 --- a/tests/collections/llm/recipes/test_mistral.py +++ b/tests/collections/llm/recipes/test_mistral.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import nemo_run as run import pytest diff --git a/tests/collections/llm/recipes/test_mixtral_8x22b.py b/tests/collections/llm/recipes/test_mixtral_8x22b.py index 0edd56054d4f0..01999c869cb52 100644 --- a/tests/collections/llm/recipes/test_mixtral_8x22b.py +++ b/tests/collections/llm/recipes/test_mixtral_8x22b.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import nemo_run as run import pytest import torch diff --git a/tests/collections/llm/recipes/test_mixtral_8x7b_16k.py b/tests/collections/llm/recipes/test_mixtral_8x7b_16k.py index 62d6e0e31917b..19cd464da7adb 100644 --- a/tests/collections/llm/recipes/test_mixtral_8x7b_16k.py +++ b/tests/collections/llm/recipes/test_mixtral_8x7b_16k.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import nemo_run as run import pytest import torch diff --git a/tests/collections/llm/recipes/test_mixtral_8x7b_64k.py b/tests/collections/llm/recipes/test_mixtral_8x7b_64k.py index 9ff93a89f438f..8e2994f2583c0 100644 --- a/tests/collections/llm/recipes/test_mixtral_8x7b_64k.py +++ b/tests/collections/llm/recipes/test_mixtral_8x7b_64k.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import nemo_run as run import pytest import torch diff --git a/tests/collections/llm/recipes/test_nemotron.py b/tests/collections/llm/recipes/test_nemotron.py index 9123df0f4d51b..44afe02b05e12 100644 --- a/tests/collections/llm/recipes/test_nemotron.py +++ b/tests/collections/llm/recipes/test_nemotron.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import nemo_run as run import pytest diff --git a/tests/collections/llm/recipes/test_nemotron3_4b.py b/tests/collections/llm/recipes/test_nemotron3_4b.py index a4c757a5cd163..3a42434d34bfd 100644 --- a/tests/collections/llm/recipes/test_nemotron3_4b.py +++ b/tests/collections/llm/recipes/test_nemotron3_4b.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import nemo_run as run import pytest diff --git a/tests/collections/llm/recipes/test_nemotron3_8b.py b/tests/collections/llm/recipes/test_nemotron3_8b.py index dd38ac100f211..2cf1c440080a2 100644 --- a/tests/collections/llm/recipes/test_nemotron3_8b.py +++ b/tests/collections/llm/recipes/test_nemotron3_8b.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import nemo_run as run import pytest diff --git a/tests/collections/llm/recipes/test_nemotron4_15b.py b/tests/collections/llm/recipes/test_nemotron4_15b.py index 8319691ad9e07..e8374c9e117d3 100644 --- a/tests/collections/llm/recipes/test_nemotron4_15b.py +++ b/tests/collections/llm/recipes/test_nemotron4_15b.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import nemo_run as run import pytest diff --git a/tests/collections/llm/recipes/test_nemotron4_15b_16k.py b/tests/collections/llm/recipes/test_nemotron4_15b_16k.py index 6c1f5d90e1605..f1297efa0831a 100644 --- a/tests/collections/llm/recipes/test_nemotron4_15b_16k.py +++ b/tests/collections/llm/recipes/test_nemotron4_15b_16k.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import nemo_run as run import pytest diff --git a/tests/collections/llm/recipes/test_nemotron4_15b_64k.py b/tests/collections/llm/recipes/test_nemotron4_15b_64k.py index 8ed35fb818933..482596024f668 100644 --- a/tests/collections/llm/recipes/test_nemotron4_15b_64k.py +++ b/tests/collections/llm/recipes/test_nemotron4_15b_64k.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import nemo_run as run import pytest diff --git a/tests/collections/llm/recipes/test_nemotron4_22b.py b/tests/collections/llm/recipes/test_nemotron4_22b.py index 81bdcaa7813a4..fef24c2167663 100644 --- a/tests/collections/llm/recipes/test_nemotron4_22b.py +++ b/tests/collections/llm/recipes/test_nemotron4_22b.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import nemo_run as run import pytest diff --git a/tests/collections/llm/recipes/test_nemotron4_22b_16k.py b/tests/collections/llm/recipes/test_nemotron4_22b_16k.py index 6b4a581348e08..313d838067fbb 100644 --- a/tests/collections/llm/recipes/test_nemotron4_22b_16k.py +++ b/tests/collections/llm/recipes/test_nemotron4_22b_16k.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import nemo_run as run import pytest diff --git a/tests/collections/llm/recipes/test_nemotron4_22b_64k.py b/tests/collections/llm/recipes/test_nemotron4_22b_64k.py index 68a238a933387..5ac90e971cbb8 100644 --- a/tests/collections/llm/recipes/test_nemotron4_22b_64k.py +++ b/tests/collections/llm/recipes/test_nemotron4_22b_64k.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import nemo_run as run import pytest diff --git a/tests/collections/llm/recipes/test_nemotron4_340b.py b/tests/collections/llm/recipes/test_nemotron4_340b.py index 7ae25c63ad080..603c166964292 100644 --- a/tests/collections/llm/recipes/test_nemotron4_340b.py +++ b/tests/collections/llm/recipes/test_nemotron4_340b.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import nemo_run as run import pytest diff --git a/tests/collections/llm/test_hf_import.py b/tests/collections/llm/test_hf_import.py index 53232eb02bb2f..b49e0bf6607b2 100644 --- a/tests/collections/llm/test_hf_import.py +++ b/tests/collections/llm/test_hf_import.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import argparse from pathlib import Path diff --git a/tests/collections/nlp/test_qlora.py b/tests/collections/nlp/test_qlora.py index bc00cc20c6ca3..94d95608009af 100644 --- a/tests/collections/nlp/test_qlora.py +++ b/tests/collections/nlp/test_qlora.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import pytest import torch from torch import nn diff --git a/tests/core/test_dist_ckpt.py b/tests/core/test_dist_ckpt.py index 8fe21a3168540..0a483c0f58abf 100644 --- a/tests/core/test_dist_ckpt.py +++ b/tests/core/test_dist_ckpt.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import types from pathlib import Path diff --git a/tests/lightning/mcore_microbatch_utils.py b/tests/lightning/mcore_microbatch_utils.py index 39b3baee446cb..e771fec4e7af0 100644 --- a/tests/lightning/mcore_microbatch_utils.py +++ b/tests/lightning/mcore_microbatch_utils.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import contextlib diff --git a/tests/lightning/pytorch/callbacks/test_model_checkpoint.py b/tests/lightning/pytorch/callbacks/test_model_checkpoint.py index 7e047515b2b59..802f2b28c25c3 100644 --- a/tests/lightning/pytorch/callbacks/test_model_checkpoint.py +++ b/tests/lightning/pytorch/callbacks/test_model_checkpoint.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from contextlib import contextmanager from pathlib import Path diff --git a/tests/lightning/pytorch/strategies/test_megatron_strategy.py b/tests/lightning/pytorch/strategies/test_megatron_strategy.py index fc9bf3816db01..ac7b1298ddfcd 100644 --- a/tests/lightning/pytorch/strategies/test_megatron_strategy.py +++ b/tests/lightning/pytorch/strategies/test_megatron_strategy.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from unittest.mock import patch from nemo.lightning.pytorch.strategies import MegatronStrategy diff --git a/tests/lightning/test_nemo_run.py b/tests/lightning/test_nemo_run.py index 934eaa853bf0b..2d63031a5ad06 100644 --- a/tests/lightning/test_nemo_run.py +++ b/tests/lightning/test_nemo_run.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from functools import partial import pytest diff --git a/tests/lightning/test_state_restoration.py b/tests/lightning/test_state_restoration.py index 076a2f931f57a..44e0673a1a393 100644 --- a/tests/lightning/test_state_restoration.py +++ b/tests/lightning/test_state_restoration.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import math import random diff --git a/tests/utils/test_trainer_utils.py b/tests/utils/test_trainer_utils.py index ed13b0c4ac38a..55eee92a523c6 100644 --- a/tests/utils/test_trainer_utils.py +++ b/tests/utils/test_trainer_utils.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from omegaconf import OmegaConf from pytorch_lightning.strategies import DDPStrategy From 63fab9779b354aa351ab2d5ba8aed09e78bb1721 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Mon, 28 Oct 2024 09:12:23 +0100 Subject: [PATCH 009/125] =?UTF-8?q?[=F0=9F=A4=A0]:=20Howdy=20folks,=20let'?= =?UTF-8?q?s=20bump=20`Dockerfile.ci`=20to=20d357c18=20!=20(#11054)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: pablo-garay <7166088+pablo-garay@users.noreply.github.com> --- Dockerfile.ci | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile.ci b/Dockerfile.ci index 74cfd75eb042a..103e47dc753a1 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -53,7 +53,7 @@ RUN pip install nemo_run@git+https://github.com/NVIDIA/NeMo-Run.git@${NEMO_RUN_T # Install NeMo requirements ARG TE_TAG=7d576ed25266a17a7b651f2c12e8498f67e0baea ARG MODELOPT_VERSION=0.17.0 -ARG MCORE_TAG=397e9da9511a09ae8badba30129c7e4934b06118 +ARG MCORE_TAG=d357c188323b6928cbcbd6f7e06af04c1694382f ARG APEX_TAG=810ffae374a2b9cb4b5c5e28eaeca7d7998fca0c RUN \ From 1ba98fde28db9bb8ac459439e5340bbd60c2dee0 Mon Sep 17 00:00:00 2001 From: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com> Date: Mon, 28 Oct 2024 16:19:41 +0200 Subject: [PATCH 010/125] add deprecation note (#11024) * add deprecation note Signed-off-by: dimapihtar * Apply isort and black reformatting Signed-off-by: dimapihtar --------- Signed-off-by: dimapihtar Signed-off-by: dimapihtar Co-authored-by: dimapihtar --- examples/nlp/dialogue/dialogue.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/examples/nlp/dialogue/dialogue.py b/examples/nlp/dialogue/dialogue.py index de91b60d1ed32..4284fed42d223 100644 --- a/examples/nlp/dialogue/dialogue.py +++ b/examples/nlp/dialogue/dialogue.py @@ -63,10 +63,14 @@ @hydra_runner(config_path="conf", config_name="dialogue_config") def main(cfg: DictConfig) -> None: pl.seed_everything(42) + logging.warning('This script is no longer supported in NeMo and is scheduled for removal in the 23.11 release.') logging.info(f'Config: {OmegaConf.to_yaml(cfg)}') try: - strategy = NLPDDPStrategy(no_ddp_communication_hook=True, find_unused_parameters=True,) + strategy = NLPDDPStrategy( + no_ddp_communication_hook=True, + find_unused_parameters=True, + ) except (ImportError, ModuleNotFoundError): strategy = 'auto' From f27a982ed192f8039d5a30b51208847c9c1b0fc3 Mon Sep 17 00:00:00 2001 From: Huy Vu <86480512+huvunvidia@users.noreply.github.com> Date: Mon, 28 Oct 2024 13:05:32 -0400 Subject: [PATCH 011/125] Generalizing Inference pipeline in NeMo 2.0 to support encoder-decoder models (#10924) * initial commit * adding example t5_generate.py * workable inference code * updating code * update cpde * workable solution for T5 tokenizer (we add 100 sentinel tokens when initializing tokenizer throug setting config, instead of adding after initialization) * separate autokenizer's changes to another PR * cleaning code * addressing Marc's comments * addressing Marc's reviews * update code after merge * small fix * Apply isort and black reformatting Signed-off-by: huvunvidia --------- Signed-off-by: huvunvidia Co-authored-by: Huy Vu2 Co-authored-by: root Co-authored-by: huvunvidia --- .../tokenizers/huggingface/auto_tokenizer.py | 7 +- nemo/collections/llm/api.py | 4 + nemo/collections/llm/gpt/model/base.py | 22 ++++ nemo/collections/llm/inference/base.py | 63 +++++----- nemo/collections/llm/t5/model/t5.py | 22 ++++ scripts/llm/t5_generate.py | 108 ++++++++++++++++++ 6 files changed, 196 insertions(+), 30 deletions(-) create mode 100644 scripts/llm/t5_generate.py diff --git a/nemo/collections/common/tokenizers/huggingface/auto_tokenizer.py b/nemo/collections/common/tokenizers/huggingface/auto_tokenizer.py index 439322b8e8108..43d377b73f347 100644 --- a/nemo/collections/common/tokenizers/huggingface/auto_tokenizer.py +++ b/nemo/collections/common/tokenizers/huggingface/auto_tokenizer.py @@ -224,9 +224,12 @@ def text_to_ids(self, text): ids = self.tokens_to_ids(tokens) return ids - def ids_to_text(self, ids): + def ids_to_text(self, ids, remove_special_tokens=True): tokens = self.ids_to_tokens(ids) - tokens_clean = [t for t in tokens if t not in self.tokenizer.all_special_tokens] + if remove_special_tokens: + tokens_clean = [t for t in tokens if t not in self.tokenizer.all_special_tokens] + else: + tokens_clean = tokens text = self.tokens_to_text(tokens_clean) return text diff --git a/nemo/collections/llm/api.py b/nemo/collections/llm/api.py index a9b3d4361f5be..c4913e07da9b6 100644 --- a/nemo/collections/llm/api.py +++ b/nemo/collections/llm/api.py @@ -437,7 +437,9 @@ def generate( path: Union[Path, str], prompts: list[str], trainer: nl.Trainer, + encoder_prompts: Optional[list[str]] = None, params_dtype: torch.dtype = torch.bfloat16, + add_BOS: bool = False, max_batch_size: int = 4, random_seed: Optional[int] = None, inference_batch_times_seqlen_threshold: int = 1000, @@ -456,6 +458,8 @@ def generate( model=inference_wrapped_model, tokenizer=mcore_tokenizer, prompts=prompts, + encoder_prompts=encoder_prompts, + add_BOS=add_BOS, max_batch_size=max_batch_size, random_seed=random_seed, inference_params=inference_params, diff --git a/nemo/collections/llm/gpt/model/base.py b/nemo/collections/llm/gpt/model/base.py index c7a6e01c673e5..a7823c9bee80e 100644 --- a/nemo/collections/llm/gpt/model/base.py +++ b/nemo/collections/llm/gpt/model/base.py @@ -18,6 +18,8 @@ import pytorch_lightning as L import torch import torch.distributed +from megatron.core.inference.model_inference_wrappers.gpt.gpt_inference_wrapper import GPTInferenceWrapper +from megatron.core.inference.model_inference_wrappers.inference_wrapper_config import InferenceWrapperConfig from megatron.core.optimizer import OptimizerConfig from megatron.core.transformer.spec_utils import ModuleSpec from megatron.core.transformer.transformer_config import TransformerConfig @@ -310,6 +312,26 @@ def validation_step(self, batch, batch_idx=None) -> torch.Tensor: return self.forward_step(batch) + def get_inference_wrapper(self, params_dtype, inference_batch_times_seqlen_threshold) -> torch.Tensor: + # This is to get the MCore model required in GPTInferenceWrapper. + mcore_model = self.module + while mcore_model: + if type(mcore_model) is MCoreGPTModel: + break + mcore_model = getattr(mcore_model, "module", None) + if mcore_model is None or type(mcore_model) is not MCoreGPTModel: + raise ValueError("Exact McoreGPTModel instance not found in the model structure.") + + inference_wrapper_config = InferenceWrapperConfig( + hidden_size=mcore_model.config.hidden_size, + params_dtype=params_dtype, + inference_batch_times_seqlen_threshold=inference_batch_times_seqlen_threshold, + padded_vocab_size=self.tokenizer.vocab_size, + ) + + model_inference_wrapper = GPTInferenceWrapper(mcore_model, inference_wrapper_config) + return model_inference_wrapper + @property def training_loss_reduction(self) -> MaskedTokenLossReduction: if not self._training_loss_reduction: diff --git a/nemo/collections/llm/inference/base.py b/nemo/collections/llm/inference/base.py index 9c4da7940b704..f3d202451c60c 100644 --- a/nemo/collections/llm/inference/base.py +++ b/nemo/collections/llm/inference/base.py @@ -21,12 +21,16 @@ import torch.distributed from megatron.core.inference.common_inference_params import CommonInferenceParams from megatron.core.inference.engines.mcore_engine import MCoreEngine -from megatron.core.inference.model_inference_wrappers.gpt.gpt_inference_wrapper import GPTInferenceWrapper -from megatron.core.inference.model_inference_wrappers.inference_wrapper_config import InferenceWrapperConfig +from megatron.core.inference.model_inference_wrappers.abstract_model_inference_wrapper import ( + AbstractModelInferenceWrapper, +) +from megatron.core.inference.text_generation_controllers.encoder_decoder_text_generation_controller import ( + EncoderDecoderTextGenerationController, +) from megatron.core.inference.text_generation_controllers.simple_text_generation_controller import ( SimpleTextGenerationController, ) -from megatron.core.models.gpt.gpt_model import GPTModel as MCoreGPTModel +from megatron.core.transformer.module import MegatronModule from pytorch_lightning.trainer.states import TrainerFn import nemo.lightning as nl @@ -37,19 +41,31 @@ from nemo.lightning.pytorch.strategies.utils import RestoreConfig -# We need this wrapper since mcore generate uses tokenizer.detokenize, tokenizer.tokenize to encode and decode prompts +# We need this wrapper since mcore generate uses methods/properties such as tokenizer.detokenize, tokenizer.tokenize, tokenizer.bos, tokenizer.pad, etc. to encode and decode prompts class MCoreTokenizerWrappper: def __init__(self, tokenizer): self.tokenizer = tokenizer self.eod = tokenizer.eod self.vocab_size = tokenizer.vocab_size - def detokenize(self, tokens): - return self.tokenizer.ids_to_text(tokens) + def detokenize(self, tokens, remove_special_tokens=False): + return self.tokenizer.ids_to_text(tokens, remove_special_tokens) def tokenize(self, prompt): return self.tokenizer.text_to_ids(prompt) + @property + def additional_special_tokens_ids(self): + return self.tokenizer.additional_special_tokens_ids + + @property + def bos(self): + return self.tokenizer.bos_id + + @property + def pad(self): + return self.tokenizer.pad_id + # TODO: Move to lightning Fabric API. def _setup_trainer_and_restore_model(path: Path, trainer: nl.Trainer, model: pl.LightningModule): @@ -101,41 +117,30 @@ def setup_model_and_tokenizer( trainer: nl.Trainer, params_dtype: torch.dtype = torch.bfloat16, inference_batch_times_seqlen_threshold: int = 1000, -) -> tuple[MCoreGPTModel, MCoreTokenizerWrappper]: +) -> tuple[MegatronModule, MCoreTokenizerWrappper]: model: io.TrainerContext = io.load_context(path=ckpt_to_context_subdir(path), subpath="model") _setup_trainer_and_restore_model(path=path, trainer=trainer, model=model) - # This is to get the MCore model required in GPTInferenceWrapper. - mcore_model = model - while mcore_model: - if type(mcore_model) is MCoreGPTModel: - break - mcore_model = getattr(mcore_model, "module", None) - if mcore_model is None or type(mcore_model) is not MCoreGPTModel: - raise ValueError("Exact McoreGPTModel instance not found in the model structure.") - - inference_wrapped_model = GPTInferenceWrapper( - mcore_model, - InferenceWrapperConfig( - hidden_size=mcore_model.config.hidden_size, - params_dtype=params_dtype, - inference_batch_times_seqlen_threshold=inference_batch_times_seqlen_threshold, - padded_vocab_size=model.tokenizer.vocab_size, - ), - ) - + inference_wrapped_model = model.get_inference_wrapper(params_dtype, inference_batch_times_seqlen_threshold) return inference_wrapped_model, MCoreTokenizerWrappper(model.tokenizer) def generate( - model: GPTInferenceWrapper, + model: AbstractModelInferenceWrapper, tokenizer: MCoreTokenizerWrappper, prompts: list[str], + encoder_prompts: Optional[list[str]] = None, + add_BOS: bool = False, max_batch_size: int = 4, random_seed: Optional[int] = None, inference_params: Optional[CommonInferenceParams] = None, ) -> dict: - text_generation_controller = SimpleTextGenerationController(inference_wrapped_model=model, tokenizer=tokenizer) + if encoder_prompts is not None: + text_generation_controller = EncoderDecoderTextGenerationController( + inference_wrapped_model=model, tokenizer=tokenizer + ) + else: + text_generation_controller = SimpleTextGenerationController(inference_wrapped_model=model, tokenizer=tokenizer) mcore_engine = MCoreEngine( text_generation_controller=text_generation_controller, max_batch_size=max_batch_size, random_seed=random_seed ) @@ -144,6 +149,8 @@ def generate( results = mcore_engine.generate( prompts=prompts, + add_BOS=add_BOS, + encoder_prompts=encoder_prompts, common_inference_params=common_inference_params, ) diff --git a/nemo/collections/llm/t5/model/t5.py b/nemo/collections/llm/t5/model/t5.py index fa4095d755882..e6970cba3dd88 100644 --- a/nemo/collections/llm/t5/model/t5.py +++ b/nemo/collections/llm/t5/model/t5.py @@ -19,6 +19,8 @@ import pytorch_lightning as L import torch import torch.distributed +from megatron.core.inference.model_inference_wrappers.inference_wrapper_config import InferenceWrapperConfig +from megatron.core.inference.model_inference_wrappers.t5.t5_inference_wrapper import T5InferenceWrapper from megatron.core.optimizer import OptimizerConfig from megatron.core.transformer.spec_utils import ModuleSpec from megatron.core.transformer.transformer_config import TransformerConfig @@ -258,6 +260,26 @@ def validation_step(self, batch, batch_idx=None) -> torch.Tensor: return self.forward_step(batch) + def get_inference_wrapper(self, params_dtype, inference_batch_times_seqlen_threshold) -> torch.Tensor: + # This is to get the MCore model required in T5InferenceWrapper. + mcore_model = self.module + while mcore_model: + if type(mcore_model) is MCoreT5Model: + break + mcore_model = getattr(mcore_model, "module", None) + if mcore_model is None or type(mcore_model) is not MCoreT5Model: + raise ValueError("Exact MCoreT5Model instance not found in the model structure.") + + inference_wrapper_config = InferenceWrapperConfig( + hidden_size=mcore_model.config.hidden_size, + params_dtype=params_dtype, + inference_batch_times_seqlen_threshold=inference_batch_times_seqlen_threshold, + padded_vocab_size=self.tokenizer.vocab_size, + ) + + model_inference_wrapper = T5InferenceWrapper(mcore_model, inference_wrapper_config) + return model_inference_wrapper + @property def training_loss_reduction(self) -> MaskedTokenLossReduction: if not self._training_loss_reduction: diff --git a/scripts/llm/t5_generate.py b/scripts/llm/t5_generate.py new file mode 100644 index 0000000000000..917fca6e1dfe7 --- /dev/null +++ b/scripts/llm/t5_generate.py @@ -0,0 +1,108 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# NOTE: This script is just an example of using NeMo checkpoints for generating outputs and is subject to change without notice. + +import argparse +import torch +import torch.distributed +from megatron.core.inference.common_inference_params import CommonInferenceParams + +import nemo.lightning as nl +from nemo.collections.llm import api + + +def get_args(): + parser = argparse.ArgumentParser(description='Train a small T5 model using NeMo 2.0') + parser.add_argument('--devices', type=int, help="Number of devices to use for training.") + parser.add_argument('--checkpoint-path', type=str, help="Path to trained model.") + parser.add_argument("--temperature", type=float, default=1.0, help='Sampling temperature.') + parser.add_argument("--top_k", type=int, default=1, help='Top k sampling.') + parser.add_argument("--top_p", type=float, default=0.0, help='Top p sampling.') + parser.add_argument( + "--num-tokens-to-generate", type=int, default=30, help='Number of tokens to generate for each prompt.' + ) + parser.add_argument( + "--prompts", + metavar='N', + type=str, + nargs='+', + help='Prompts with each prompt within quotes and seperated by space.', + ) + parser.add_argument( + "--encoder-prompts", + metavar='N', + type=str, + nargs='+', + help='Encoder input prompts with each prompt within quotes and seperated by space.', + ) + parser.add_argument("--max-batch-size", type=int, default=1, help='Max number of prompts to process at once.') + + return parser.parse_args() + + +if __name__ == "__main__": + + args = get_args() + + strategy = nl.MegatronStrategy( + tensor_model_parallel_size=1, + pipeline_model_parallel_size=1, + context_parallel_size=1, + sequence_parallel=False, + setup_optimizers=False, + store_optimizer_states=False, + ) + + trainer = nl.Trainer( + accelerator="gpu", + devices=args.devices, + num_nodes=1, + strategy=strategy, + plugins=nl.MegatronMixedPrecision( + precision="bf16-mixed", + params_dtype=torch.bfloat16, + pipeline_dtype=torch.bfloat16, + autocast_enabled=False, + grad_reduce_in_fp32=False, + ), + ) + prompts = [ + "", + "", + "", + ] + encoder_prompts = [ + "Hello, how are ?", + "How many r's are in the 'strawberry'?", + "Which number is ? 10.119 10.19?", + ] + + results = api.generate( + path=args.checkpoint_path, + prompts=prompts, + encoder_prompts=encoder_prompts, + trainer=trainer, + add_BOS=True, + inference_params=CommonInferenceParams( + temperature=args.temperature, top_k=args.top_k, num_tokens_to_generate=args.num_tokens_to_generate + ), + text_only=True, + ) + if torch.distributed.get_rank() == 0: + for i, r in enumerate(results): + print(prompts[i]) + print("*" * 50) + print(r) + print("\n\n") From 869625e59e1058a87fd6cd5947e36bcf21d3da4e Mon Sep 17 00:00:00 2001 From: guyueh1 <140554423+guyueh1@users.noreply.github.com> Date: Mon, 28 Oct 2024 10:56:16 -0700 Subject: [PATCH 012/125] [Bug fix] use default_factory to provide default value in dataclasses in energon MultiModalSampleConfig (#11041) Signed-off-by: Guyue Huang Co-authored-by: Guyue Huang --- nemo/collections/multimodal/data/energon/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nemo/collections/multimodal/data/energon/config.py b/nemo/collections/multimodal/data/energon/config.py index ab8ecf9fc06d3..45ca8e9db8006 100644 --- a/nemo/collections/multimodal/data/energon/config.py +++ b/nemo/collections/multimodal/data/energon/config.py @@ -64,7 +64,7 @@ class LLaVATemplateConfig(BaseConversationTemplateConfig): @dataclass class MultiModalSampleConfig: - image_token: ImageToken = ImageToken() + image_token: ImageToken = field(default_factory=ImageToken) ignore_place_holder: int = -100 - conversation_template_config: LLaVATemplateConfig = LLaVATemplateConfig() + conversation_template_config: LLaVATemplateConfig = field(default_factory=LLaVATemplateConfig) image_following_text: bool = True From 60cce8d4470bd7ab0f8fb84e150fe082d45c5b6a Mon Sep 17 00:00:00 2001 From: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> Date: Tue, 29 Oct 2024 00:48:16 +0530 Subject: [PATCH 013/125] Update ModelOpt Width Pruning example defaults (#10902) * update width pruning example defaults Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> * Update Dockerfile.ci Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> * Undo CI version update Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> --------- Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> --- .../conf/megatron_gpt_prune.yaml | 19 +++++++----- .../language_modeling/megatron_gpt_prune.py | 29 ++++++++++--------- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_prune.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_prune.yaml index cb26d5744b5b4..f174aafed0ee2 100644 --- a/examples/nlp/language_modeling/conf/megatron_gpt_prune.yaml +++ b/examples/nlp/language_modeling/conf/megatron_gpt_prune.yaml @@ -23,19 +23,22 @@ trainer: model: tensor_model_parallel_size: 1 # Pruning currently only supports tensor_model_parallel_size=1 pipeline_model_parallel_size: 1 - restore_from_path: llama3.1-8b-base.nemo # Nemo file path + sequence_parallel: false # Sequence parallelism is not supported with pipeline parallelism + restore_from_path: llama3.1-8b-instruct.nemo # Nemo file path ## Activation Checkpoint activations_checkpoint_granularity: null # 'selective' or 'full' activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective' prune: - calib_dataset: cnn_dailymail # wikitext, cnn_dailymail, or a local dataset - num_calib_size: 512 # number of samples used for calibration - ffn_hidden_size: 3584 # ffn_hidden_size in the pruned model, ffn_hidden_size // 4 - num_attention_heads: 8 # num_attention_heads in the pruned model, num_attention_heads // 4 - num_query_groups: 4 # num_query_groups in the pruned model, num_query_groups // 2 - hidden_size: 2048 # hidden_size in the pruned model, hidden_size // 2 + calib_dataset: wikitext # wikitext, cnn_dailymail, or a local dataset + num_calib_size: 1024 # number of samples used for calibration + # pruning constraints (null means no pruning) + ffn_hidden_size: 9216 # ffn_hidden_size in the pruned model + num_attention_heads: null # num_attention_heads in the pruned model + num_query_groups: null # num_query_groups in the pruned model + hidden_size: 3072 # hidden_size (embedding size) in the pruned model + num_layers: null # num_layers (depth) in the pruned model export: - save_path: llama3.1-8b-base-pruned.nemo # Path where the pruned model will be saved + save_path: llama3.1-8b-instruct-pruned.nemo # Path where the pruned model will be saved diff --git a/examples/nlp/language_modeling/megatron_gpt_prune.py b/examples/nlp/language_modeling/megatron_gpt_prune.py index b9bf8edbfb1ab..de12b861a1c0f 100644 --- a/examples/nlp/language_modeling/megatron_gpt_prune.py +++ b/examples/nlp/language_modeling/megatron_gpt_prune.py @@ -36,23 +36,23 @@ Example usage: ``` python examples/nlp/language_modeling/megatron_gpt_prune.py \ - model.restore_from_path=llama3.1-8b-base.nemo \ + model.restore_from_path=llama3.1-8b-instruct.nemo \ model.tensor_model_parallel_size=1 \ model.pipeline_model_parallel_size=8 \ trainer.num_nodes=1 \ trainer.precision=bf16 \ trainer.devices=8 \ - prune.ffn_hidden_size=3584 \ - prune.num_attention_heads=8 \ - prune.num_query_groups=4 \ - prune.hidden_size=2048 \ - export.save_path=llama3.1-8b-base-pruned.nemo + prune.ffn_hidden_size=9216 \ + prune.num_attention_heads=null \ + prune.num_query_groups=null \ + prune.hidden_size=3072 \ + export.save_path=llama3.1-8b-instruct-pruned.nemo ``` where tensor_model_parallel_size must be 1 because of the current prune API limitation """ -def get_calib_data_iter(data="cnn_dailymail", batch_size=64, calib_size=512, max_sequence_length=512): +def get_calib_data_iter(data="wikitext", batch_size=64, calib_size=512, max_sequence_length=512): if data == "wikitext": dataset = load_dataset("wikitext", "wikitext-103-v1", split="train") text_column = "text" @@ -73,18 +73,12 @@ def get_calib_data_iter(data="cnn_dailymail", batch_size=64, calib_size=512, max @hydra_runner(config_path="conf", config_name="megatron_gpt_prune") def main(cfg) -> None: - if not torch.cuda.is_available(): - raise EnvironmentError("GPU is required for the pruning.") - # Overwrite model config with the one from the model checkpoint and apply pruning modifications model_cfg = load_config(cfg.model.restore_from_path) model_cfg.update(cfg.model) model_cfg.name = "modelopt" # Use modelopt transformer spec for pruning assert cfg.model.tensor_model_parallel_size == 1, "Pruning currently only supports tensor_model_parallel_size=1" - assert ( - not hasattr(cfg.model, "sequence_parallel") or not cfg.model.sequence_parallel - ), "Pruning currently does not support sequence parallelism" trainer = Trainer(strategy=NLPDDPStrategy(), **cfg.trainer) model = MegatronGPTModel.restore_from( @@ -112,7 +106,13 @@ def forward_loop(model): constraints={ "export_config": { k: cfg.prune.get(k) - for k in ["ffn_hidden_size", "num_attention_heads", "num_query_groups", "hidden_size"] + for k in [ + "ffn_hidden_size", + "num_attention_heads", + "num_query_groups", + "hidden_size", + "num_layers", + ] if cfg.prune.get(k) is not None }, }, @@ -121,6 +121,7 @@ def forward_loop(model): ) model_pruned.save_to(cfg.export.save_path) + print(f"Pruned model saved to {cfg.export.save_path}") if __name__ == '__main__': From 5d3dadb419463a1feea6cb1f517d24c708c8f9ea Mon Sep 17 00:00:00 2001 From: Michal Futrega Date: Mon, 28 Oct 2024 22:34:32 +0100 Subject: [PATCH 014/125] fix: Resolve mutable default issue in MultiModalSampleConfig dataclass (#11061) From 6597517659435c1782f88e8228f3143fff83e4f2 Mon Sep 17 00:00:00 2001 From: Ao Tang Date: Mon, 28 Oct 2024 19:51:21 -0400 Subject: [PATCH 015/125] SC1/SC2 Recipe (#10971) * Add recipe for sc1/sc2 * Apply isort and black reformatting Signed-off-by: suiyoubi * typo fixed --------- Signed-off-by: suiyoubi Co-authored-by: suiyoubi --- nemo/collections/llm/recipes/__init__.py | 10 + nemo/collections/llm/recipes/starcoder.py | 310 ++++++++++++++++++ nemo/collections/llm/recipes/starcoder2.py | 135 ++++++++ .../collections/llm/recipes/starcoder2_15b.py | 223 +++++++++++++ nemo/collections/llm/recipes/starcoder2_3b.py | 223 +++++++++++++ nemo/collections/llm/recipes/starcoder2_7b.py | 223 +++++++++++++ 6 files changed, 1124 insertions(+) create mode 100644 nemo/collections/llm/recipes/starcoder.py create mode 100644 nemo/collections/llm/recipes/starcoder2.py create mode 100644 nemo/collections/llm/recipes/starcoder2_15b.py create mode 100644 nemo/collections/llm/recipes/starcoder2_3b.py create mode 100644 nemo/collections/llm/recipes/starcoder2_7b.py diff --git a/nemo/collections/llm/recipes/__init__.py b/nemo/collections/llm/recipes/__init__.py index 2aa6eb8bf784c..c1af8907a002b 100644 --- a/nemo/collections/llm/recipes/__init__.py +++ b/nemo/collections/llm/recipes/__init__.py @@ -55,6 +55,11 @@ qwen2_7b, qwen2_72b, qwen2_500m, + starcoder, + starcoder2, + starcoder2_3b, + starcoder2_7b, + starcoder2_15b, ) from nemo.collections.llm.recipes.log.default import default_log, default_resume from nemo.collections.llm.recipes.optim import adam @@ -95,6 +100,11 @@ "nemotron4_22b_16k", "nemotron4_22b_64k", "nemotron4_340b", + "starcoder", + "starcoder2", + "starcoder2_3b", + "starcoder2_7b", + "starcoder2_15b", "qwen2", "qwen2_500m", "qwen2_1p5b", diff --git a/nemo/collections/llm/recipes/starcoder.py b/nemo/collections/llm/recipes/starcoder.py new file mode 100644 index 0000000000000..b90cec0fbd7eb --- /dev/null +++ b/nemo/collections/llm/recipes/starcoder.py @@ -0,0 +1,310 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +import nemo_run as run +import pytorch_lightning as pl +import torch +from pytorch_lightning.callbacks.callback import Callback + +from nemo import lightning as nl +from nemo.collections.llm.api import finetune, pretrain +from nemo.collections.llm.gpt.data.mock import MockDataModule +from nemo.collections.llm.gpt.model.starcoder import StarcoderConfig15B, StarcoderModel +from nemo.collections.llm.peft.lora import LoRA +from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe +from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger +from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing +from nemo.collections.llm.recipes.precision.mixed_precision import bf16_mixed, fp16_mixed +from nemo.utils.exp_manager import TimingCallback + +NAME = "starcoder_15b" + + +@run.cli.factory(name=NAME) +def model() -> run.Config[pl.LightningModule]: + """ + Factory function to create a Starcoder 15B model configuration. + + Returns: + run.Config[pl.LightningModule]: Configuration for the Starcoder 15B model. + + Examples: + CLI usage: + $ nemo llm pretrain model=starcoder_15b ... + + Python API usage: + >>> model_config = model() + >>> print(model_config) + """ + + return run.Config(StarcoderModel, config=run.Config(StarcoderConfig15B)) + + +def starcoder_trainer( + tensor_parallelism: int = 4, + pipeline_parallelism: int = 2, + pipeline_parallelism_type: Optional[torch.dtype] = None, + virtual_pipeline_parallelism: Optional[int] = None, + context_parallelism: int = 1, + sequence_parallelism: bool = False, + num_nodes: int = 1, + num_gpus_per_node: int = 8, + max_steps: int = 1168251, + precision: str = "bf16-mixed", + accumulate_grad_batches: int = 1, + limit_test_batches: int = 32, + limit_val_batches: int = 32, + log_every_n_steps: int = 10, + val_check_interval: int = 2000, + callbacks: Optional[list[run.Config[Callback]]] = None, +) -> run.Config[nl.Trainer]: + """ + Configure the NeMo Lightning Trainer for Starcoder 15B models. + + This function sets up the distributed training strategy and other training parameters. + + Args: + tensor_parallelism (int): Degree of tensor model parallelism. + pipeline_parallelism (int): Degree of pipeline model parallelism. + pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism. + virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism. + context_parallelism (int): Degree of context parallelism. + sequence_parallelism (bool): Whether to use sequence parallelism. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + max_steps (int): Maximum number of training steps. + precision (str): Precision configuration, one of fp32, 16-mixed or bf16-mixed. + accumulate_grad_batches (int): Number of steps per gradient accumulation. + limit_test_batches (int): Limit the number of test batches. + limit_val_batches (int): Limit the number of validation batches. + log_every_n_steps (int): Log every n steps. + val_check_interval (int): Run validation every N steps. + callbacks (Optional[list[run.Config[Callback]]]): List of callback configurations. + + Returns: + run.Config[nl.Trainer]: Configuration for the NeMo Lightning Trainer. + """ + strategy = run.Config( + nl.MegatronStrategy, + tensor_model_parallel_size=tensor_parallelism, + pipeline_model_parallel_size=pipeline_parallelism, + pipeline_dtype=pipeline_parallelism_type, + virtual_pipeline_model_parallel_size=virtual_pipeline_parallelism, + context_parallel_size=context_parallelism, + sequence_parallel=sequence_parallelism, + gradient_as_bucket_view=True, + ckpt_include_optimizer=True, + ckpt_async_save=True, + ckpt_parallel_load=True, + ) + + precision_plugin = None + if precision == "16-mixed": + precision_plugin = fp16_mixed() + elif precision == "bf16-mixed": + precision_plugin = bf16_mixed() + + trainer = run.Config( + nl.Trainer, + accelerator="gpu", + callbacks=callbacks, + devices=num_gpus_per_node, + accumulate_grad_batches=accumulate_grad_batches, + limit_test_batches=limit_test_batches, + limit_val_batches=limit_val_batches, + log_every_n_steps=log_every_n_steps, + max_steps=max_steps, + num_nodes=num_nodes, + plugins=precision_plugin, + strategy=strategy, + use_distributed_sampler=False, + val_check_interval=val_check_interval, + ) + + return trainer + + +@run.cli.factory(target=pretrain, name=NAME) +def pretrain_recipe( + # General + dir: Optional[str] = None, + name: str = "default", + # Trainer + tensor_parallelism: int = 2, + pipeline_parallelism: int = 1, + pipeline_parallelism_type: Optional[torch.dtype] = None, + virtual_pipeline_parallelism: Optional[int] = None, + context_parallelism: int = 1, + sequence_parallelism: bool = False, + num_nodes: int = 1, + num_gpus_per_node: int = 8, + max_steps: int = 300000, + precision: str = "bf16-mixed", + accumulate_grad_batches: int = 1, + gradient_clip_val: float = 1.0, + limit_test_batches: int = 32, + limit_val_batches: int = 32, + log_every_n_steps: int = 10, + val_check_interval: int = 1000, + # Data + global_batch_size=32, + micro_batch_size=2, + seq_length=4096, + # Optimizer + warmup_steps=500, + constant_steps=0, + min_lr=3e-5, + max_lr=3e-4, + # Training function + fn=pretrain, +) -> run.Partial: + """ + Create a pre-training recipe for Starcoder 15B model. + + This function sets up a complete configuration for pre-training, including + model, trainer, data, logging, optimization, and resumption settings. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the pre-training run. + tensor_parallelism (int): Degree of tensor model parallelism. + pipeline_parallelism (int): Degree of pipeline model parallelism. + pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism. + virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism. + context_parallelism (int): Degree of context parallelism. + sequence_parallelism (bool): Whether to use sequence parallelism. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + max_steps (int): Maximum number of training steps. + precision (str): Precision configuration, one of fp32, 16-mixed or bf16-mixed. + accumulate_grad_batches (int): Number of steps per gradient accumulation. + gradient_clip_val (float): Value for gradient clipping. + limit_test_batches (int): Limit the number of test batches. + limit_val_batches (int): Limit the number of validation batches. + log_every_n_steps (int): Log every n steps. + val_check_interval (int): Run validation every N steps. + global_batch_size (int): Global batch size. + micro_batch_size (int): Micro batch size. + seq_length (int): Sequence length. + warmup_steps (int): Number of warmup steps. + constant_steps (int): Number of constant steps. + min_lr (float): Minimum learning rate. + max_lr (float): Maximum learning rate. + fn (Callable): The pre-training function to use. + + Returns: + run.Partial: Partial configuration for pre-training. + + Examples: + CLI usage: + $ nemo llm pretrain --factory starcoder_15b + $ nemo llm pretrain --factory "starcoder_15b(num_nodes=1, name='my_starcoder2_pretrain')" + + Python API usage: + >>> recipe = pretrain_recipe(name="starcoder2_pretrain", num_nodes=1) + >>> print(recipe) + + Note: + This recipe uses a mock dataset, look for the finetune examples to see how to change the dataset. + """ + return run.Partial( + fn, + model=model(), + trainer=starcoder_trainer( + tensor_parallelism=tensor_parallelism, + pipeline_parallelism=pipeline_parallelism, + pipeline_parallelism_type=pipeline_parallelism_type, + virtual_pipeline_parallelism=virtual_pipeline_parallelism, + context_parallelism=context_parallelism, + sequence_parallelism=sequence_parallelism, + num_nodes=num_nodes, + num_gpus_per_node=num_gpus_per_node, + max_steps=max_steps, + precision=precision, + accumulate_grad_batches=accumulate_grad_batches, + limit_test_batches=limit_test_batches, + limit_val_batches=limit_val_batches, + log_every_n_steps=log_every_n_steps, + val_check_interval=val_check_interval, + callbacks=[run.Config(TimingCallback)], + ), + data=run.Config( + MockDataModule, + seq_length=seq_length, + global_batch_size=global_batch_size, + micro_batch_size=micro_batch_size, + ), + log=default_log(dir=dir, name=name, tensorboard_logger=tensorboard_logger(name=name)), + optim=distributed_fused_adam_with_cosine_annealing( + precision=precision, + warmup_steps=warmup_steps, + constant_steps=constant_steps, + min_lr=min_lr, + max_lr=max_lr, + clip_grad=gradient_clip_val, + ), + resume=default_resume(), + ) + + +@run.cli.factory(target=finetune, name=NAME) +def finetune_recipe( + dir: Optional[str] = None, + name: str = "default", + num_nodes: int = 1, + num_gpus_per_node: int = 8, + peft_scheme: Optional[str] = 'lora', +) -> run.Partial: + """ + Create a fine-tuning recipe for Starcoder 15B model. + + This function sets up a complete configuration for fine-tuning, including + model, trainer, data, logging, optimization, and resumption settings. + The recipe uses LoRA (Low-Rank Adaptation) for efficient fine-tuning, unless peft_scheme is set to None. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the fine-tuning run. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + + Returns: + run.Partial: Partial configuration for fine-tuning. + + Examples: + CLI usage: + $ nemo llm finetune --factory starcoder_15b + + Python API usage: + >>> recipe = finetune_recipe(name="starcoder_15b_finetune", num_nodes=2) + >>> print(recipe) + + Note: + This recipe uses the SQuAD dataset for fine-tuning. For more information + on fine-tuning LLMs with NeMo, see the fine-tuning guide in the + `examples/llm/finetune/` directory. + """ + recipe = default_finetune_recipe(model(), "bigcode/starcoder", dir, name, num_nodes, num_gpus_per_node) + if peft_scheme is None or peft_scheme.lower() == 'none': + recipe.trainer.strategy.tensor_model_parallel_size = 4 + recipe.optim.config.lr = 5e-6 + elif peft_scheme.lower() == 'lora': + recipe.peft = run.Config(LoRA) + recipe.optim.config.lr = 1e-4 + else: + raise ValueError(f"Unrecognized peft scheme: {peft_scheme}") + return recipe diff --git a/nemo/collections/llm/recipes/starcoder2.py b/nemo/collections/llm/recipes/starcoder2.py new file mode 100644 index 0000000000000..c3a19326585cb --- /dev/null +++ b/nemo/collections/llm/recipes/starcoder2.py @@ -0,0 +1,135 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +import nemo_run as run +import pytorch_lightning as pl +import torch +from pytorch_lightning.callbacks.callback import Callback +from nemo import lightning as nl +from nemo.collections.llm.gpt.model.starcoder2 import ( + Starcoder2Config3B, + Starcoder2Config7B, + Starcoder2Config15B, + Starcoder2Model, +) +from nemo.collections.llm.recipes.precision.mixed_precision import bf16_mixed, fp16_mixed + + +def starcoder2_model(version: str) -> run.Config[pl.LightningModule]: + """ + A function to create a Starcoder2 models. + + Args: + version (str): The version of the Starcoder2 model to create. one of ["starcoder2_3b", "starcoder2_7b", + "starcoder2_15b"]. + + Returns: + run.Config[pl.LightningModule]: Configuration for the Starcoder2 model. + """ + config = None + if version == "starcoder2_3b": + config = run.Config(Starcoder2Config3B) + elif version == "starcoder2_7b": + config = run.Config(Starcoder2Config7B) + elif version == "starcoder2_15b": + config = run.Config(Starcoder2Config15B) + + assert config is not None, f"Invalid version: {version}" + return run.Config(Starcoder2Model, config=config) + + +def starcoder2_trainer( + tensor_parallelism: int = 2, + pipeline_parallelism: int = 1, + pipeline_parallelism_type: Optional[torch.dtype] = None, + virtual_pipeline_parallelism: Optional[int] = None, + context_parallelism: int = 1, + sequence_parallelism: bool = False, + num_nodes: int = 1, + num_gpus_per_node: int = 8, + max_steps: int = 1168251, + precision: str = "bf16-mixed", + accumulate_grad_batches: int = 1, + limit_test_batches: int = 32, + limit_val_batches: int = 32, + log_every_n_steps: int = 10, + val_check_interval: int = 2000, + callbacks: Optional[list[run.Config[Callback]]] = None, +) -> run.Config[nl.Trainer]: + """ + Configure the NeMo Lightning Trainer for Starcoder2 models. + + This function sets up the distributed training strategy and other training parameters. + + Args: + tensor_parallelism (int): Degree of tensor model parallelism. + pipeline_parallelism (int): Degree of pipeline model parallelism. + pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism. + virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism. + context_parallelism (int): Degree of context parallelism. + sequence_parallelism (bool): Whether to use sequence parallelism. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + max_steps (int): Maximum number of training steps. + precision (str): Precision configuration, one of fp32, 16-mixed or bf16-mixed. + accumulate_grad_batches (int): Number of steps per gradient accumulation. + limit_test_batches (int): Limit the number of test batches. + limit_val_batches (int): Limit the number of validation batches. + log_every_n_steps (int): Log every n steps. + val_check_interval (int): Run validation every N steps. + callbacks (Optional[list[run.Config[Callback]]]): List of callback configurations. + + Returns: + run.Config[nl.Trainer]: Configuration for the NeMo Lightning Trainer. + """ + strategy = run.Config( + nl.MegatronStrategy, + tensor_model_parallel_size=tensor_parallelism, + pipeline_model_parallel_size=pipeline_parallelism, + pipeline_dtype=pipeline_parallelism_type, + virtual_pipeline_model_parallel_size=virtual_pipeline_parallelism, + context_parallel_size=context_parallelism, + sequence_parallel=sequence_parallelism, + gradient_as_bucket_view=True, + ckpt_include_optimizer=True, + ckpt_async_save=True, + ckpt_parallel_load=True, + ) + + precision_plugin = None + if precision == "16-mixed": + precision_plugin = fp16_mixed() + elif precision == "bf16-mixed": + precision_plugin = bf16_mixed() + + trainer = run.Config( + nl.Trainer, + accelerator="gpu", + callbacks=callbacks, + devices=num_gpus_per_node, + accumulate_grad_batches=accumulate_grad_batches, + limit_test_batches=limit_test_batches, + limit_val_batches=limit_val_batches, + log_every_n_steps=log_every_n_steps, + max_steps=max_steps, + num_nodes=num_nodes, + plugins=precision_plugin, + strategy=strategy, + use_distributed_sampler=False, + val_check_interval=val_check_interval, + ) + + return trainer diff --git a/nemo/collections/llm/recipes/starcoder2_15b.py b/nemo/collections/llm/recipes/starcoder2_15b.py new file mode 100644 index 0000000000000..5faebb9460f3b --- /dev/null +++ b/nemo/collections/llm/recipes/starcoder2_15b.py @@ -0,0 +1,223 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +import nemo_run as run +import pytorch_lightning as pl +import torch + +from nemo.collections.llm.api import finetune, pretrain +from nemo.collections.llm.gpt.data.mock import MockDataModule +from nemo.collections.llm.peft.lora import LoRA +from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe +from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger +from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing +from nemo.collections.llm.recipes.starcoder2 import starcoder2_model, starcoder2_trainer +from nemo.utils.exp_manager import TimingCallback + +NAME = "starcoder2_15b" + + +@run.cli.factory(name=NAME) +def model() -> run.Config[pl.LightningModule]: + """ + Factory function to create a Starcoder2 15B model configuration. + + Returns: + run.Config[pl.LightningModule]: Configuration for the Starcoder2 15b model. + + Examples: + CLI usage: + $ nemo llm pretrain model=starcoder2_15b ... + + Python API usage: + >>> model_config = model() + >>> print(model_config) + """ + + return starcoder2_model(version=NAME) + + +@run.cli.factory(target=pretrain, name=NAME) +def pretrain_recipe( + # General + dir: Optional[str] = None, + name: str = "default", + # Trainer + tensor_parallelism: int = 4, + pipeline_parallelism: int = 2, + pipeline_parallelism_type: Optional[torch.dtype] = None, + virtual_pipeline_parallelism: Optional[int] = None, + context_parallelism: int = 1, + sequence_parallelism: bool = False, + num_nodes: int = 1, + num_gpus_per_node: int = 8, + max_steps: int = 300000, + precision: str = "bf16-mixed", + accumulate_grad_batches: int = 1, + gradient_clip_val: float = 1.0, + limit_test_batches: int = 32, + limit_val_batches: int = 32, + log_every_n_steps: int = 10, + val_check_interval: int = 1000, + # Data + global_batch_size=32, + micro_batch_size=2, + seq_length=4096, + # Optimizer + warmup_steps=500, + constant_steps=0, + min_lr=3e-5, + max_lr=3e-4, + # Training function + fn=pretrain, +) -> run.Partial: + """ + Create a pre-training recipe for Starcoder2 15B model. + + This function sets up a complete configuration for pre-training, including + model, trainer, data, logging, optimization, and resumption settings. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the pre-training run. + tensor_parallelism (int): Degree of tensor model parallelism. + pipeline_parallelism (int): Degree of pipeline model parallelism. + pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism. + virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism. + context_parallelism (int): Degree of context parallelism. + sequence_parallelism (bool): Whether to use sequence parallelism. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + max_steps (int): Maximum number of training steps. + precision (str): Precision configuration, one of fp32, 16-mixed or bf16-mixed. + accumulate_grad_batches (int): Number of steps per gradient accumulation. + gradient_clip_val (float): Value for gradient clipping. + limit_test_batches (int): Limit the number of test batches. + limit_val_batches (int): Limit the number of validation batches. + log_every_n_steps (int): Log every n steps. + val_check_interval (int): Run validation every N steps. + global_batch_size (int): Global batch size. + micro_batch_size (int): Micro batch size. + seq_length (int): Sequence length. + warmup_steps (int): Number of warmup steps. + constant_steps (int): Number of constant steps. + min_lr (float): Minimum learning rate. + max_lr (float): Maximum learning rate. + fn (Callable): The pre-training function to use. + + Returns: + run.Partial: Partial configuration for pre-training. + + Examples: + CLI usage: + $ nemo llm pretrain --factory starcoder2_15b + $ nemo llm pretrain --factory "starcoder2_15b(num_nodes=1, name='my_starcoder2_pretrain')" + + Python API usage: + >>> recipe = pretrain_recipe(name="starcoder2_pretrain", num_nodes=1) + >>> print(recipe) + + Note: + This recipe uses a mock dataset, look for the finetune examples to see how to change the dataset. + """ + return run.Partial( + fn, + model=model(), + trainer=starcoder2_trainer( + tensor_parallelism=tensor_parallelism, + pipeline_parallelism=pipeline_parallelism, + pipeline_parallelism_type=pipeline_parallelism_type, + virtual_pipeline_parallelism=virtual_pipeline_parallelism, + context_parallelism=context_parallelism, + sequence_parallelism=sequence_parallelism, + num_nodes=num_nodes, + num_gpus_per_node=num_gpus_per_node, + max_steps=max_steps, + precision=precision, + accumulate_grad_batches=accumulate_grad_batches, + limit_test_batches=limit_test_batches, + limit_val_batches=limit_val_batches, + log_every_n_steps=log_every_n_steps, + val_check_interval=val_check_interval, + callbacks=[run.Config(TimingCallback)], + ), + data=run.Config( + MockDataModule, + seq_length=seq_length, + global_batch_size=global_batch_size, + micro_batch_size=micro_batch_size, + ), + log=default_log(dir=dir, name=name, tensorboard_logger=tensorboard_logger(name=name)), + optim=distributed_fused_adam_with_cosine_annealing( + precision=precision, + warmup_steps=warmup_steps, + constant_steps=constant_steps, + min_lr=min_lr, + max_lr=max_lr, + clip_grad=gradient_clip_val, + ), + resume=default_resume(), + ) + + +@run.cli.factory(target=finetune, name=NAME) +def finetune_recipe( + dir: Optional[str] = None, + name: str = "default", + num_nodes: int = 1, + num_gpus_per_node: int = 8, + peft_scheme: Optional[str] = 'lora', +) -> run.Partial: + """ + Create a fine-tuning recipe for Starcoder2 15B model. + + This function sets up a complete configuration for fine-tuning, including + model, trainer, data, logging, optimization, and resumption settings. + The recipe uses LoRA (Low-Rank Adaptation) for efficient fine-tuning, unless peft_scheme is set to None. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the fine-tuning run. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + + Returns: + run.Partial: Partial configuration for fine-tuning. + + Examples: + CLI usage: + $ nemo llm finetune --factory starcoder2_15b + + Python API usage: + >>> recipe = finetune_recipe(name="starcoder2_15b_finetune", num_nodes=2) + >>> print(recipe) + + Note: + This recipe uses the SQuAD dataset for fine-tuning. For more information + on fine-tuning LLMs with NeMo, see the fine-tuning guide in the + `examples/llm/finetune/` directory. + """ + recipe = default_finetune_recipe(model(), "bigcode/starcoder2-15b", dir, name, num_nodes, num_gpus_per_node) + if peft_scheme is None or peft_scheme.lower() == 'none': + recipe.trainer.strategy.tensor_model_parallel_size = 4 + recipe.optim.config.lr = 5e-6 + elif peft_scheme.lower() == 'lora': + recipe.peft = run.Config(LoRA) + recipe.optim.config.lr = 1e-4 + else: + raise ValueError(f"Unrecognized peft scheme: {peft_scheme}") + return recipe diff --git a/nemo/collections/llm/recipes/starcoder2_3b.py b/nemo/collections/llm/recipes/starcoder2_3b.py new file mode 100644 index 0000000000000..232f5842ff842 --- /dev/null +++ b/nemo/collections/llm/recipes/starcoder2_3b.py @@ -0,0 +1,223 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +import nemo_run as run +import pytorch_lightning as pl +import torch + +from nemo.collections.llm.api import finetune, pretrain +from nemo.collections.llm.gpt.data.mock import MockDataModule +from nemo.collections.llm.peft.lora import LoRA +from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe +from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger +from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing +from nemo.collections.llm.recipes.starcoder2 import starcoder2_model, starcoder2_trainer +from nemo.utils.exp_manager import TimingCallback + +NAME = "starcoder2_3b" + + +@run.cli.factory(name=NAME) +def model() -> run.Config[pl.LightningModule]: + """ + Factory function to create a Starcoder2 3B model configuration. + + Returns: + run.Config[pl.LightningModule]: Configuration for the Starcoder2 3b model. + + Examples: + CLI usage: + $ nemo llm pretrain model=starcoder2_3b ... + + Python API usage: + >>> model_config = model() + >>> print(model_config) + """ + + return starcoder2_model(version=NAME) + + +@run.cli.factory(target=pretrain, name=NAME) +def pretrain_recipe( + # General + dir: Optional[str] = None, + name: str = "default", + # Trainer + tensor_parallelism: int = 2, + pipeline_parallelism: int = 1, + pipeline_parallelism_type: Optional[torch.dtype] = None, + virtual_pipeline_parallelism: Optional[int] = None, + context_parallelism: int = 1, + sequence_parallelism: bool = False, + num_nodes: int = 1, + num_gpus_per_node: int = 8, + max_steps: int = 300000, + precision: str = "bf16-mixed", + accumulate_grad_batches: int = 1, + gradient_clip_val: float = 1.0, + limit_test_batches: int = 32, + limit_val_batches: int = 32, + log_every_n_steps: int = 10, + val_check_interval: int = 1000, + # Data + global_batch_size=32, + micro_batch_size=2, + seq_length=4096, + # Optimizer + warmup_steps=500, + constant_steps=0, + min_lr=3e-5, + max_lr=3e-4, + # Training function + fn=pretrain, +) -> run.Partial: + """ + Create a pre-training recipe for Starcoder2 3B model. + + This function sets up a complete configuration for pre-training, including + model, trainer, data, logging, optimization, and resumption settings. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the pre-training run. + tensor_parallelism (int): Degree of tensor model parallelism. + pipeline_parallelism (int): Degree of pipeline model parallelism. + pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism. + virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism. + context_parallelism (int): Degree of context parallelism. + sequence_parallelism (bool): Whether to use sequence parallelism. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + max_steps (int): Maximum number of training steps. + precision (str): Precision configuration, one of fp32, 16-mixed or bf16-mixed. + accumulate_grad_batches (int): Number of steps per gradient accumulation. + gradient_clip_val (float): Value for gradient clipping. + limit_test_batches (int): Limit the number of test batches. + limit_val_batches (int): Limit the number of validation batches. + log_every_n_steps (int): Log every n steps. + val_check_interval (int): Run validation every N steps. + global_batch_size (int): Global batch size. + micro_batch_size (int): Micro batch size. + seq_length (int): Sequence length. + warmup_steps (int): Number of warmup steps. + constant_steps (int): Number of constant steps. + min_lr (float): Minimum learning rate. + max_lr (float): Maximum learning rate. + fn (Callable): The pre-training function to use. + + Returns: + run.Partial: Partial configuration for pre-training. + + Examples: + CLI usage: + $ nemo llm pretrain --factory starcoder2_3b + $ nemo llm pretrain --factory "starcoder2_3b(num_nodes=1, name='my_starcoder2_pretrain')" + + Python API usage: + >>> recipe = pretrain_recipe(name="starcoder2_pretrain", num_nodes=1) + >>> print(recipe) + + Note: + This recipe uses a mock dataset, look for the finetune examples to see how to change the dataset. + """ + return run.Partial( + fn, + model=model(), + trainer=starcoder2_trainer( + tensor_parallelism=tensor_parallelism, + pipeline_parallelism=pipeline_parallelism, + pipeline_parallelism_type=pipeline_parallelism_type, + virtual_pipeline_parallelism=virtual_pipeline_parallelism, + context_parallelism=context_parallelism, + sequence_parallelism=sequence_parallelism, + num_nodes=num_nodes, + num_gpus_per_node=num_gpus_per_node, + max_steps=max_steps, + precision=precision, + accumulate_grad_batches=accumulate_grad_batches, + limit_test_batches=limit_test_batches, + limit_val_batches=limit_val_batches, + log_every_n_steps=log_every_n_steps, + val_check_interval=val_check_interval, + callbacks=[run.Config(TimingCallback)], + ), + data=run.Config( + MockDataModule, + seq_length=seq_length, + global_batch_size=global_batch_size, + micro_batch_size=micro_batch_size, + ), + log=default_log(dir=dir, name=name, tensorboard_logger=tensorboard_logger(name=name)), + optim=distributed_fused_adam_with_cosine_annealing( + precision=precision, + warmup_steps=warmup_steps, + constant_steps=constant_steps, + min_lr=min_lr, + max_lr=max_lr, + clip_grad=gradient_clip_val, + ), + resume=default_resume(), + ) + + +@run.cli.factory(target=finetune, name=NAME) +def finetune_recipe( + dir: Optional[str] = None, + name: str = "default", + num_nodes: int = 1, + num_gpus_per_node: int = 8, + peft_scheme: Optional[str] = 'lora', +) -> run.Partial: + """ + Create a fine-tuning recipe for Starcoder2 3B model. + + This function sets up a complete configuration for fine-tuning, including + model, trainer, data, logging, optimization, and resumption settings. + The recipe uses LoRA (Low-Rank Adaptation) for efficient fine-tuning, unless peft_scheme is set to None. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the fine-tuning run. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + + Returns: + run.Partial: Partial configuration for fine-tuning. + + Examples: + CLI usage: + $ nemo llm finetune --factory starcoder2_3b + + Python API usage: + >>> recipe = finetune_recipe(name="starcoder2_3b_finetune", num_nodes=2) + >>> print(recipe) + + Note: + This recipe uses the SQuAD dataset for fine-tuning. For more information + on fine-tuning LLMs with NeMo, see the fine-tuning guide in the + `examples/llm/finetune/` directory. + """ + recipe = default_finetune_recipe(model(), "bigcode/starcoder2-3b", dir, name, num_nodes, num_gpus_per_node) + if peft_scheme is None or peft_scheme.lower() == 'none': + recipe.trainer.strategy.tensor_model_parallel_size = 2 + recipe.optim.config.lr = 5e-6 + elif peft_scheme.lower() == 'lora': + recipe.peft = run.Config(LoRA) + recipe.optim.config.lr = 1e-4 + else: + raise ValueError(f"Unrecognized peft scheme: {peft_scheme}") + return recipe diff --git a/nemo/collections/llm/recipes/starcoder2_7b.py b/nemo/collections/llm/recipes/starcoder2_7b.py new file mode 100644 index 0000000000000..ee6dacdc98e9a --- /dev/null +++ b/nemo/collections/llm/recipes/starcoder2_7b.py @@ -0,0 +1,223 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +import nemo_run as run +import pytorch_lightning as pl +import torch + +from nemo.collections.llm.api import finetune, pretrain +from nemo.collections.llm.gpt.data.mock import MockDataModule +from nemo.collections.llm.peft.lora import LoRA +from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe +from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger +from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing +from nemo.collections.llm.recipes.starcoder2 import starcoder2_model, starcoder2_trainer +from nemo.utils.exp_manager import TimingCallback + +NAME = "starcoder2_7b" + + +@run.cli.factory(name=NAME) +def model() -> run.Config[pl.LightningModule]: + """ + Factory function to create a Starcoder2 7b model configuration. + + Returns: + run.Config[pl.LightningModule]: Configuration for the Starcoder2 7b model. + + Examples: + CLI usage: + $ nemo llm pretrain model=starcoder2_7b ... + + Python API usage: + >>> model_config = model() + >>> print(model_config) + """ + + return starcoder2_model(version=NAME) + + +@run.cli.factory(target=pretrain, name=NAME) +def pretrain_recipe( + # General + dir: Optional[str] = None, + name: str = "default", + # Trainer + tensor_parallelism: int = 2, + pipeline_parallelism: int = 1, + pipeline_parallelism_type: Optional[torch.dtype] = None, + virtual_pipeline_parallelism: Optional[int] = None, + context_parallelism: int = 1, + sequence_parallelism: bool = False, + num_nodes: int = 1, + num_gpus_per_node: int = 8, + max_steps: int = 300000, + precision: str = "bf16-mixed", + accumulate_grad_batches: int = 1, + gradient_clip_val: float = 1.0, + limit_test_batches: int = 32, + limit_val_batches: int = 32, + log_every_n_steps: int = 10, + val_check_interval: int = 1000, + # Data + global_batch_size=32, + micro_batch_size=2, + seq_length=4096, + # Optimizer + warmup_steps=500, + constant_steps=0, + min_lr=3e-5, + max_lr=3e-4, + # Training function + fn=pretrain, +) -> run.Partial: + """ + Create a pre-training recipe for Starcoder2 7B model. + + This function sets up a complete configuration for pre-training, including + model, trainer, data, logging, optimization, and resumption settings. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the pre-training run. + tensor_parallelism (int): Degree of tensor model parallelism. + pipeline_parallelism (int): Degree of pipeline model parallelism. + pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism. + virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism. + context_parallelism (int): Degree of context parallelism. + sequence_parallelism (bool): Whether to use sequence parallelism. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + max_steps (int): Maximum number of training steps. + precision (str): Precision configuration, one of fp32, 16-mixed or bf16-mixed. + accumulate_grad_batches (int): Number of steps per gradient accumulation. + gradient_clip_val (float): Value for gradient clipping. + limit_test_batches (int): Limit the number of test batches. + limit_val_batches (int): Limit the number of validation batches. + log_every_n_steps (int): Log every n steps. + val_check_interval (int): Run validation every N steps. + global_batch_size (int): Global batch size. + micro_batch_size (int): Micro batch size. + seq_length (int): Sequence length. + warmup_steps (int): Number of warmup steps. + constant_steps (int): Number of constant steps. + min_lr (float): Minimum learning rate. + max_lr (float): Maximum learning rate. + fn (Callable): The pre-training function to use. + + Returns: + run.Partial: Partial configuration for pre-training. + + Examples: + CLI usage: + $ nemo llm pretrain --factory starcoder2_7b + $ nemo llm pretrain --factory "starcoder2_7b(num_nodes=1, name='my_starcoder2_pretrain')" + + Python API usage: + >>> recipe = pretrain_recipe(name="starcoder2_pretrain", num_nodes=1) + >>> print(recipe) + + Note: + This recipe uses a mock dataset, look for the finetune examples to see how to change the dataset. + """ + return run.Partial( + fn, + model=model(), + trainer=starcoder2_trainer( + tensor_parallelism=tensor_parallelism, + pipeline_parallelism=pipeline_parallelism, + pipeline_parallelism_type=pipeline_parallelism_type, + virtual_pipeline_parallelism=virtual_pipeline_parallelism, + context_parallelism=context_parallelism, + sequence_parallelism=sequence_parallelism, + num_nodes=num_nodes, + num_gpus_per_node=num_gpus_per_node, + max_steps=max_steps, + precision=precision, + accumulate_grad_batches=accumulate_grad_batches, + limit_test_batches=limit_test_batches, + limit_val_batches=limit_val_batches, + log_every_n_steps=log_every_n_steps, + val_check_interval=val_check_interval, + callbacks=[run.Config(TimingCallback)], + ), + data=run.Config( + MockDataModule, + seq_length=seq_length, + global_batch_size=global_batch_size, + micro_batch_size=micro_batch_size, + ), + log=default_log(dir=dir, name=name, tensorboard_logger=tensorboard_logger(name=name)), + optim=distributed_fused_adam_with_cosine_annealing( + precision=precision, + warmup_steps=warmup_steps, + constant_steps=constant_steps, + min_lr=min_lr, + max_lr=max_lr, + clip_grad=gradient_clip_val, + ), + resume=default_resume(), + ) + + +@run.cli.factory(target=finetune, name=NAME) +def finetune_recipe( + dir: Optional[str] = None, + name: str = "default", + num_nodes: int = 1, + num_gpus_per_node: int = 8, + peft_scheme: Optional[str] = 'lora', +) -> run.Partial: + """ + Create a fine-tuning recipe for Starcoder2 7B model. + + This function sets up a complete configuration for fine-tuning, including + model, trainer, data, logging, optimization, and resumption settings. + The recipe uses LoRA (Low-Rank Adaptation) for efficient fine-tuning, unless peft_scheme is set to None. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the fine-tuning run. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + + Returns: + run.Partial: Partial configuration for fine-tuning. + + Examples: + CLI usage: + $ nemo llm finetune --factory starcoder2_7b + + Python API usage: + >>> recipe = finetune_recipe(name="starcoder2_7b_finetune", num_nodes=2) + >>> print(recipe) + + Note: + This recipe uses the SQuAD dataset for fine-tuning. For more information + on fine-tuning LLMs with NeMo, see the fine-tuning guide in the + `examples/llm/finetune/` directory. + """ + recipe = default_finetune_recipe(model(), "bigcode/starcoder2-7b", dir, name, num_nodes, num_gpus_per_node) + if peft_scheme is None or peft_scheme.lower() == 'none': + recipe.trainer.strategy.tensor_model_parallel_size = 2 + recipe.optim.config.lr = 5e-6 + elif peft_scheme.lower() == 'lora': + recipe.peft = run.Config(LoRA) + recipe.optim.config.lr = 1e-4 + else: + raise ValueError(f"Unrecognized peft scheme: {peft_scheme}") + return recipe From aa6824632214fec35f4cadb6d19bce782b3d7ef9 Mon Sep 17 00:00:00 2001 From: Pablo Garay Date: Mon, 28 Oct 2024 17:14:08 -0700 Subject: [PATCH 016/125] Add copyright notice (#11066) * Add copyright notice * Add copyright notice --- nemo/collections/nlp/modules/common/hyena/hyena.py | 14 ++++++++++++++ nemo/lightning/fabric/strategies.py | 14 ++++++++++++++ tests/collections/llm/recipes/test_mixtral_8x7b.py | 14 ++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/nemo/collections/nlp/modules/common/hyena/hyena.py b/nemo/collections/nlp/modules/common/hyena/hyena.py index f1b4fe20f5373..4808bf1eb92ce 100644 --- a/nemo/collections/nlp/modules/common/hyena/hyena.py +++ b/nemo/collections/nlp/modules/common/hyena/hyena.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Implementation of Hyena operator # # Michael Poli and Stefano Massaroli and Eric Nguyen and Daniel Y Fu and Tri Dao and Stephen Baccus and diff --git a/nemo/lightning/fabric/strategies.py b/nemo/lightning/fabric/strategies.py index 695595bca4d09..7445413b612e6 100644 --- a/nemo/lightning/fabric/strategies.py +++ b/nemo/lightning/fabric/strategies.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from contextlib import ExitStack, contextmanager from datetime import timedelta from typing import ( diff --git a/tests/collections/llm/recipes/test_mixtral_8x7b.py b/tests/collections/llm/recipes/test_mixtral_8x7b.py index 409dc26a8aa40..62dc0db3d8840 100644 --- a/tests/collections/llm/recipes/test_mixtral_8x7b.py +++ b/tests/collections/llm/recipes/test_mixtral_8x7b.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import nemo_run as run import pytest import torch From 100e093741eb0a440f26e09cd7349101d3869e71 Mon Sep 17 00:00:00 2001 From: Farhad Ramezanghorbani Date: Mon, 28 Oct 2024 18:24:55 -0600 Subject: [PATCH 017/125] Wrap batch_sampler with _IndexBatchSamplerWrapper (#10934) * wrap batch_sampler Signed-off-by: Farhad Ramezanghorbani * Apply isort and black reformatting Signed-off-by: farhadrgh * pass dataloader mode * Apply isort and black reformatting Signed-off-by: farhadrgh * pass dataloader mode Signed-off-by: Farhad Ramezanghorbani * pass dataloader mode Signed-off-by: Farhad Ramezanghorbani * resolve conflict Signed-off-by: Farhad Ramezanghorbani * change import Signed-off-by: Farhad Ramezanghorbani --------- Signed-off-by: Farhad Ramezanghorbani Signed-off-by: farhadrgh Signed-off-by: Chen Cui Co-authored-by: Chen Cui --- nemo/lightning/data.py | 6 ++++++ nemo/lightning/pytorch/plugins/data_sampler.py | 5 ++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/nemo/lightning/data.py b/nemo/lightning/data.py index e5acb1b5b8bfa..6c7fd128e530e 100644 --- a/nemo/lightning/data.py +++ b/nemo/lightning/data.py @@ -19,6 +19,7 @@ from typing import List, Literal, Optional import torch +from pytorch_lightning.overrides.distributed import _IndexBatchSamplerWrapper from torch.utils.data import DataLoader, Dataset @@ -139,6 +140,7 @@ def add_megatron_sampler( dataloader_type: Literal["single", "cyclic", "batch"] = "single", drop_last: bool = True, pad_samples_to_global_batch_size: bool = False, + dataloader_mode: Literal["train", "validation", "test", "predict"] = "train", rank: int = 0, world_size: int = 1, # data_sharding: bool = False @@ -170,6 +172,7 @@ def add_megatron_sampler( pad_samples_to_global_batch_size (bool, optional): Whether to pad the last incomplete batch to the `global_batch_size` (defaults to False, only applies when `drop_last` is False). + dataloader_mode (Literal["train", "validation", "test", "predict"]): The mode of dataloader. Returns: DataLoader: A new DataLoader instance with the configured Megatron sampler. @@ -214,6 +217,9 @@ def add_megatron_sampler( else: raise Exception(f'{dataloader_type} dataloader type is not supported.') + if dataloader_mode in ["test", "predict"]: + batch_sampler = _IndexBatchSamplerWrapper(batch_sampler) # BatchSampler wrapper to capture its indices + return DataLoader( dataloader.dataset, batch_sampler=batch_sampler, diff --git a/nemo/lightning/pytorch/plugins/data_sampler.py b/nemo/lightning/pytorch/plugins/data_sampler.py index 52ba9e3220acb..f37fd38adf531 100644 --- a/nemo/lightning/pytorch/plugins/data_sampler.py +++ b/nemo/lightning/pytorch/plugins/data_sampler.py @@ -44,7 +44,6 @@ def __init__( init_consumed_samples: int = 0, init_global_step: int = 0, output_log: bool = True, - drop_last: bool = True, ): self.seq_len = seq_len self.output_log = output_log @@ -57,7 +56,6 @@ def __init__( self.if_first_step = 0 self.prev_global_batch_size = None self.init_global_step = init_global_step - self.drop_last = drop_last def setup(self, global_rank: int) -> None: from nemo.lightning.data import setup_microbatch_calculator @@ -80,7 +78,8 @@ def transform_dataloader(self, dataloader: DataLoader, consumed_samples: int = 0 rampup_batch_size=self.rampup_batch_size, consumed_samples=self.init_consumed_samples if mode == 'train' else 0, dataloader_type=self.dataloader_type, - drop_last=self.drop_last, + drop_last=mode not in ["test", "predict"], # don't drop the incomplete batch in test and predict methods + dataloader_mode=mode, # dataloader wrapped with nemo.lightning.data.WrappedDataLoader has mode attribute rank=data_parallel_rank, world_size=data_parallel_size, ) From ce9f1dd81588ec1f0e10d65e0eaa723355bac501 Mon Sep 17 00:00:00 2001 From: Valerie Sarge Date: Mon, 28 Oct 2024 23:29:41 -0400 Subject: [PATCH 018/125] Performance fine-tuning recipes for llama3 8b + 70b (#11046) * llama3 finetuning perf recipes progress capture Signed-off-by: Valerie Sarge * Small syntax fix Signed-off-by: Valerie Sarge * syntax Signed-off-by: Valerie Sarge * Apply isort and black reformatting Signed-off-by: vysarge * Correct ddp setting Signed-off-by: Valerie Sarge * Fix hasattr check Signed-off-by: Valerie Sarge * bf16 grad Signed-off-by: Valerie Sarge * Update configs for 8b + 70b Signed-off-by: Valerie Sarge * Set wgrad_deferral_limit Signed-off-by: Valerie Sarge --------- Signed-off-by: Valerie Sarge Signed-off-by: vysarge Co-authored-by: vysarge --- nemo/collections/llm/gpt/model/llama.py | 1 - nemo/collections/llm/recipes/llama3_70b.py | 100 ++++++++++++++++++++- nemo/collections/llm/recipes/llama3_8b.py | 91 ++++++++++++++++++- 3 files changed, 189 insertions(+), 3 deletions(-) diff --git a/nemo/collections/llm/gpt/model/llama.py b/nemo/collections/llm/gpt/model/llama.py index b48f99e061c97..5bc45b1049f32 100644 --- a/nemo/collections/llm/gpt/model/llama.py +++ b/nemo/collections/llm/gpt/model/llama.py @@ -56,7 +56,6 @@ class LlamaConfig(GPTConfig): persist_layer_norm: bool = True bias_dropout_fusion: bool = True apply_rope_fusion: bool = True - cross_entropy_loss_fusion: bool = False @dataclass diff --git a/nemo/collections/llm/recipes/llama3_70b.py b/nemo/collections/llm/recipes/llama3_70b.py index 6e9da5c5116d5..5b721c7d531ed 100644 --- a/nemo/collections/llm/recipes/llama3_70b.py +++ b/nemo/collections/llm/recipes/llama3_70b.py @@ -24,6 +24,7 @@ from nemo import lightning as nl from nemo.collections.llm.api import finetune, pretrain from nemo.collections.llm.gpt.data.mock import MockDataModule +from nemo.collections.llm.gpt.data.packed_sequence import PackedSequenceSpecs from nemo.collections.llm.gpt.model.llama import Llama3Config70B, LlamaModel from nemo.collections.llm.peft.lora import LoRA from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe @@ -31,6 +32,7 @@ from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing from nemo.collections.llm.recipes.precision.mixed_precision import bf16_mixed from nemo.collections.llm.recipes.tp_overlap_configs.userbuffers import userbuffers_bf16_h100_h8192_tp4_mbs1_seqlen8192 +from nemo.lightning.pytorch.callbacks.garbage_collection import GarbageCollectionCallback from nemo.lightning.pytorch.callbacks.megatron_comm_overlap import MegatronCommOverlapCallback from nemo.utils.exp_manager import TimingCallback @@ -245,7 +247,9 @@ def finetune_recipe( num_nodes: int = 1, num_gpus_per_node: int = 8, peft_scheme: Optional[str] = 'lora', - packed_sequence: bool = False, + seq_length: Optional[int] = None, + packed_sequence: Optional[bool] = None, + performance_mode: bool = False, ) -> run.Partial: """ Create a fine-tuning recipe for Llama3 70B model. @@ -260,6 +264,9 @@ def finetune_recipe( num_nodes (int): Number of compute nodes to use. num_gpus_per_node (int): Number of GPUs per node. peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + seq_length (int): Maximum number of tokens per microbatch. + packed_sequence (Optional[bool]): If true, fine-tuning sequences will be packed into batches up to the given maximum seq_length for better efficiency. By default, this value equals performance_mode. + performance_mode (bool): If true, enables optimizations for maximum performance. Returns: run.Partial: Partial configuration for fine-tuning. @@ -277,6 +284,15 @@ def finetune_recipe( This recipe uses the SQuAD dataset for fine-tuning. Be aware that fine-tuning a 70B model requires substantial computational resources. """ + # Default to unpacked data in normal mode and packed data in performance mode + # once packing recipe is well tested, change this default to true + if packed_sequence is None: + packed_sequence = performance_mode + + # For unpacked sequence, most samples in SQuAD dataset are shorter than 2K + if seq_length is None: + seq_length = 4096 if packed_sequence else 2048 + recipe = default_finetune_recipe( model(), "meta-llama/Meta-Llama-3-70B", dir, name, num_nodes, num_gpus_per_node, packed_sequence ) @@ -287,8 +303,90 @@ def finetune_recipe( recipe.optim.config.lr = 5e-6 elif peft_scheme.lower() == 'lora': recipe.peft = run.Config(LoRA) + recipe.peft.dim = 16 + recipe.peft.alpha = 32 + recipe.peft.target_modules = ['linear_qkv'] + + # some settings currently do not function correctly with LoRA + recipe.model.config.cross_entropy_loss_fusion = False + recipe.trainer.strategy.tensor_model_parallel_size = 8 recipe.optim.config.lr = 1e-4 else: raise ValueError(f"Unrecognized peft scheme: {peft_scheme}") + + # Sequence length settings in the model and dataset must agree + recipe.model.config.seq_length = seq_length + recipe.data.seq_length = seq_length + if packed_sequence: + recipe.data.pad_to_max_length = True + recipe.data.packed_sequence_specs = run.Config(PackedSequenceSpecs, packed_sequence_size=seq_length) + + if performance_mode: + recipe = finetune_performance_optimizations(recipe, peft_scheme) + + return recipe + + +def finetune_performance_optimizations( + recipe: run.Partial, + peft_scheme: str, +) -> run.Partial: + """ + Modify the given recipe to optimize settings for performance. + + This method enables performance optimizations that may not be suitable for all use cases. + Intended to build upon the standard fine-tuning recipe. + + Args: + recipe (run.Partial): Base fine-tuning recipe to which performance optimizations will be added + peft_scheme (str): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + + Returns: + run.Partial: Partial configuration for performance-optimized fine-tuning. + + Note: + Use this method with caution and only when you need maximum performance. + It may not be suitable for all hardware configurations or use cases. + """ + + if not hasattr(recipe.trainer, "callbacks"): + recipe.trainer.callbacks = [] + + if peft_scheme is None or peft_scheme.lower() == 'none': + recipe.trainer.strategy.tensor_model_parallel_size = 4 + recipe.trainer.strategy.pipeline_model_parallel_size = 4 + recipe.trainer.strategy.virtual_pipeline_model_parallel_size = 5 + recipe.trainer.plugins.grad_reduce_in_fp32 = False + recipe.trainer.strategy.ddp = run.Config( + DistributedDataParallelConfig, + check_for_nan_in_grad=True, + grad_reduce_in_fp32=False, + overlap_grad_reduce=True, + overlap_param_gather=True, + average_in_collective=True, + ) + recipe.trainer.callbacks.append( + run.Config( + MegatronCommOverlapCallback, + tp_comm_overlap=True, + defer_embedding_wgrad_compute=True, + wgrad_deferral_limit=22, + ) + ) + else: + recipe.trainer.strategy.tensor_model_parallel_size = 2 + recipe.trainer.strategy.pipeline_model_parallel_size = 4 + + recipe.trainer.strategy.sequence_parallel = True + + recipe.trainer.callbacks.append(run.Config(TimingCallback)) + recipe.trainer.callbacks.append( + run.Config( + GarbageCollectionCallback, + 100, + 100, + ) + ) + return recipe diff --git a/nemo/collections/llm/recipes/llama3_8b.py b/nemo/collections/llm/recipes/llama3_8b.py index 394a7718b8bd7..29c5c25f94fe0 100644 --- a/nemo/collections/llm/recipes/llama3_8b.py +++ b/nemo/collections/llm/recipes/llama3_8b.py @@ -24,6 +24,7 @@ from nemo import lightning as nl from nemo.collections.llm.api import finetune, pretrain from nemo.collections.llm.gpt.data.mock import MockDataModule +from nemo.collections.llm.gpt.data.packed_sequence import PackedSequenceSpecs from nemo.collections.llm.gpt.data.squad import SquadDataModule from nemo.collections.llm.gpt.model.llama import Llama3Config8B, LlamaModel from nemo.collections.llm.peft.lora import LoRA @@ -31,6 +32,7 @@ from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing from nemo.collections.llm.recipes.precision.mixed_precision import bf16_mixed +from nemo.lightning.pytorch.callbacks.garbage_collection import GarbageCollectionCallback from nemo.lightning.pytorch.callbacks.megatron_comm_overlap import MegatronCommOverlapCallback from nemo.utils.exp_manager import TimingCallback @@ -233,7 +235,9 @@ def finetune_recipe( num_nodes: int = 1, num_gpus_per_node: int = 8, peft_scheme: Optional[str] = 'lora', - packed_sequence: bool = False, # once packing recipe is well tested, change this default to true + seq_length: Optional[int] = None, + packed_sequence: Optional[bool] = None, + performance_mode: bool = False, ) -> run.Partial: """ Create a fine-tuning recipe for Llama3 8B model. @@ -248,6 +252,9 @@ def finetune_recipe( num_nodes (int): Number of compute nodes to use. num_gpus_per_node (int): Number of GPUs per node. peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + seq_length (int): Maximum number of tokens per microbatch. + packed_sequence (Optional[bool]): If true, fine-tuning sequences will be packed into batches up to the given maximum seq_length for better efficiency. By default, this value equals performance_mode. + performance_mode (bool): If true, enables optimizations for maximum performance. Returns: run.Partial: Partial configuration for fine-tuning. @@ -265,6 +272,15 @@ def finetune_recipe( on fine-tuning LLMs with NeMo, see the fine-tuning guide in the `examples/llm/finetune/` directory. """ + # Default to unpacked data in normal mode and packed data in performance mode + # once packing recipe is well tested, change this default to true + if packed_sequence is None: + packed_sequence = performance_mode + + # For unpacked sequence, most samples in SQuAD dataset are shorter than 2K + if seq_length is None: + seq_length = 4096 if packed_sequence else 2048 + recipe = default_finetune_recipe( model(), "meta-llama/Meta-Llama-3-8B", dir, name, num_nodes, num_gpus_per_node, packed_sequence ) @@ -273,7 +289,80 @@ def finetune_recipe( recipe.optim.config.lr = 5e-6 elif peft_scheme.lower() == 'lora': recipe.peft = run.Config(LoRA) + recipe.peft.dim = 8 + recipe.peft.alpha = 16 + recipe.peft.target_modules = ['linear_qkv'] + + # some settings currently do not function correctly with LoRA + recipe.model.config.cross_entropy_loss_fusion = False + recipe.optim.config.lr = 1e-4 else: raise ValueError(f"Unrecognized peft scheme: {peft_scheme}") + + # Sequence length settings in the model and dataset must agree + recipe.model.config.seq_length = seq_length + recipe.data.seq_length = seq_length + if packed_sequence: + recipe.data.pad_to_max_length = True + recipe.data.packed_sequence_specs = run.Config(PackedSequenceSpecs, packed_sequence_size=seq_length) + + if performance_mode: + recipe = finetune_performance_optimizations(recipe, peft_scheme) + + return recipe + + +def finetune_performance_optimizations( + recipe: run.Partial, + peft_scheme: str, +) -> run.Partial: + """ + Modify the given recipe to optimize settings for performance. + + This method enables performance optimizations that may not be suitable for all use cases. + Intended to build upon the standard fine-tuning recipe. + + Args: + recipe (run.Partial): Base fine-tuning recipe to which performance optimizations will be added + peft_scheme (str): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + + Returns: + run.Partial: Partial configuration for performance-optimized fine-tuning. + + Note: + Use this method with caution and only when you need maximum performance. + It may not be suitable for all hardware configurations or use cases. + """ + recipe.trainer.strategy.tensor_model_parallel_size = 1 + + if not hasattr(recipe.trainer, "callbacks"): + recipe.trainer.callbacks = [] + + if peft_scheme is None or peft_scheme.lower() == 'none': + recipe.trainer.plugins.grad_reduce_in_fp32 = False + recipe.trainer.strategy.ddp = run.Config( + DistributedDataParallelConfig, + check_for_nan_in_grad=True, + grad_reduce_in_fp32=False, + overlap_grad_reduce=True, + overlap_param_gather=True, + average_in_collective=True, + ) + recipe.trainer.callbacks.append( + run.Config( + MegatronCommOverlapCallback, + tp_comm_overlap=False, + ) + ) + + recipe.trainer.callbacks.append(run.Config(TimingCallback)) + recipe.trainer.callbacks.append( + run.Config( + GarbageCollectionCallback, + 100, + 100, + ) + ) + return recipe From 7f3da35e1b2aebd24c4ee4ef613303d17321776f Mon Sep 17 00:00:00 2001 From: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> Date: Tue, 29 Oct 2024 09:52:58 +0530 Subject: [PATCH 019/125] Set TE spec name for NeMo to HF checkpoint converters (#11036) * Set TE spec name for NeMo to HF checkpoint converters Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> * Apply isort and black reformatting Signed-off-by: kevalmorabia97 * Update convert_falcon_nemo_to_hf.py Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> --------- Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> Signed-off-by: kevalmorabia97 Co-authored-by: kevalmorabia97 --- .../convert_baichuan2_nemo_to_hf.py | 7 ++++- .../convert_chatglm_nemo_to_hf.py | 25 +++++++++++++++--- .../convert_falcon_nemo_to_hf.py | 5 +++- .../convert_llama_nemo_to_hf.py | 26 ++++++++++++++++--- .../convert_mistral_7b_nemo_to_hf.py | 1 + .../convert_mixtral_nemo_to_hf.py | 1 + .../convert_nemotron_nemo_to_hf.py | 1 + .../convert_qwen2_nemo_to_hf.py | 1 + .../convert_starcoder2_nemo_to_hf.py | 1 + 9 files changed, 58 insertions(+), 10 deletions(-) diff --git a/scripts/checkpoint_converters/convert_baichuan2_nemo_to_hf.py b/scripts/checkpoint_converters/convert_baichuan2_nemo_to_hf.py index 18ddb89359420..ec048e4b6f190 100644 --- a/scripts/checkpoint_converters/convert_baichuan2_nemo_to_hf.py +++ b/scripts/checkpoint_converters/convert_baichuan2_nemo_to_hf.py @@ -50,7 +50,11 @@ def get_args(): parser = ArgumentParser() parser.add_argument( - "--input_name_or_path", type=str, default=None, required=True, help="Path to .nemo file", + "--input_name_or_path", + type=str, + default=None, + required=True, + help="Path to .nemo file", ) parser.add_argument("--output_path", type=str, default=None, required=True, help="Path to HF .bin file") parser.add_argument( @@ -94,6 +98,7 @@ def convert(input_nemo_file, output_hf_file, precision=None, cpu_only=False) -> model_config = MegatronGPTModel.restore_from(input_nemo_file, trainer=dummy_trainer, return_config=True) model_config.use_cpu_initialization = True model_config.tensor_model_parallel_size = 1 + model_config.name = "te_gpt" else: map_location, model_config = None, None diff --git a/scripts/checkpoint_converters/convert_chatglm_nemo_to_hf.py b/scripts/checkpoint_converters/convert_chatglm_nemo_to_hf.py index 59bc0a64bbe99..5a8e52ee8be51 100644 --- a/scripts/checkpoint_converters/convert_chatglm_nemo_to_hf.py +++ b/scripts/checkpoint_converters/convert_chatglm_nemo_to_hf.py @@ -50,7 +50,11 @@ def get_args(): parser = ArgumentParser() parser.add_argument( - "--input_name_or_path", type=str, default=None, required=True, help="Path to .nemo file", + "--input_name_or_path", + type=str, + default=None, + required=True, + help="Path to .nemo file", ) parser.add_argument("--output_path", type=str, default=None, required=True, help="Path to HF .bin file") parser.add_argument( @@ -90,6 +94,7 @@ def convert(input_nemo_file, output_hf_file, precision=None, cpu_only=False) -> model_config = MegatronGPTModel.restore_from(input_nemo_file, trainer=dummy_trainer, return_config=True) model_config.tensor_model_parallel_size = 1 model_config.pipeline_model_parallel_size = 1 + model_config.name = "te_gpt" if cpu_only: map_location = torch.device('cpu') model_config.use_cpu_initialization = True @@ -168,9 +173,21 @@ def convert(input_nemo_file, output_hf_file, precision=None, cpu_only=False) -> v_slice = torch.arange(heads_per_group + 1, qkv_total_dim, (heads_per_group + 2)) qkv_bias_base_name = f'transformer.encoder.layers.{l}.self_attention.query_key_value.bias' - q_bias = param_to_weights(qkv_bias[q_slice].reshape(-1,)) - k_bias = param_to_weights(qkv_bias[k_slice].reshape(-1,)) - v_bias = param_to_weights(qkv_bias[v_slice].reshape(-1,)) + q_bias = param_to_weights( + qkv_bias[q_slice].reshape( + -1, + ) + ) + k_bias = param_to_weights( + qkv_bias[k_slice].reshape( + -1, + ) + ) + v_bias = param_to_weights( + qkv_bias[v_slice].reshape( + -1, + ) + ) checkpoint[qkv_bias_base_name] = torch.cat((q_bias, k_bias, v_bias)) # attention dense diff --git a/scripts/checkpoint_converters/convert_falcon_nemo_to_hf.py b/scripts/checkpoint_converters/convert_falcon_nemo_to_hf.py index 997f0ac23835a..da8f15b92649c 100644 --- a/scripts/checkpoint_converters/convert_falcon_nemo_to_hf.py +++ b/scripts/checkpoint_converters/convert_falcon_nemo_to_hf.py @@ -51,7 +51,10 @@ def get_args(): parser = ArgumentParser() parser.add_argument( - "--input_name_or_path", type=str, required=True, help="Path to .nemo file", + "--input_name_or_path", + type=str, + required=True, + help="Path to .nemo file", ) parser.add_argument("--output_path", type=str, required=True, help="Path to HF .bin file") parser.add_argument( diff --git a/scripts/checkpoint_converters/convert_llama_nemo_to_hf.py b/scripts/checkpoint_converters/convert_llama_nemo_to_hf.py index 8da15148dfd87..a3c40676a9807 100644 --- a/scripts/checkpoint_converters/convert_llama_nemo_to_hf.py +++ b/scripts/checkpoint_converters/convert_llama_nemo_to_hf.py @@ -53,7 +53,11 @@ def get_args(): parser = ArgumentParser() parser.add_argument( - "--input_name_or_path", type=str, default=None, required=True, help="Path to .nemo file or extracted folder", + "--input_name_or_path", + type=str, + default=None, + required=True, + help="Path to .nemo file or extracted folder", ) parser.add_argument("--output_path", type=str, default=None, required=True, help="Path to HF .bin file") parser.add_argument( @@ -105,6 +109,7 @@ def convert(input_nemo_file, output_hf_file, precision=None, cpu_only=False) -> model_config = MegatronGPTModel.restore_from(input_nemo_file, trainer=dummy_trainer, return_config=True) model_config.tensor_model_parallel_size = 1 model_config.pipeline_model_parallel_size = 1 + model_config.name = "te_gpt" if cpu_only: map_location = torch.device('cpu') model_config.use_cpu_initialization = True @@ -226,13 +231,26 @@ def convert(input_nemo_file, output_hf_file, precision=None, cpu_only=False) -> def replace_hf_weights_and_tokenizer( - weights_file, dtype, input_hf_path, output_hf_path, tokenizer_path, output_hf_tokenizer, + weights_file, + dtype, + input_hf_path, + output_hf_path, + tokenizer_path, + output_hf_tokenizer, ): - model = AutoModelForCausalLM.from_pretrained(input_hf_path, local_files_only=True, torch_dtype=dtype,) + model = AutoModelForCausalLM.from_pretrained( + input_hf_path, + local_files_only=True, + torch_dtype=dtype, + ) nemo_exported = torch.load(weights_file) if tokenizer_path: - tokenizer = LlamaTokenizer.from_pretrained(tokenizer_path, local_files_only=True, legacy=False,) + tokenizer = LlamaTokenizer.from_pretrained( + tokenizer_path, + local_files_only=True, + legacy=False, + ) tmp_tokenizer = convert_slow_tokenizer.convert_slow_tokenizer(tokenizer) fast_tokenizer = LlamaTokenizerFast(tokenizer_object=tmp_tokenizer) tokenizer_length = len(fast_tokenizer) diff --git a/scripts/checkpoint_converters/convert_mistral_7b_nemo_to_hf.py b/scripts/checkpoint_converters/convert_mistral_7b_nemo_to_hf.py index 796819c38ba44..b8c30a1b929d2 100644 --- a/scripts/checkpoint_converters/convert_mistral_7b_nemo_to_hf.py +++ b/scripts/checkpoint_converters/convert_mistral_7b_nemo_to_hf.py @@ -81,6 +81,7 @@ def convert(in_file, precision=None, cpu_only=True) -> None: model_config.tensor_model_parallel_size = 1 model_config.pipeline_model_parallel_size = 1 model_config.sequence_parallel = False + model_config.name = "te_gpt" if cpu_only: map_location = torch.device('cpu') model_config.use_cpu_initialization = True diff --git a/scripts/checkpoint_converters/convert_mixtral_nemo_to_hf.py b/scripts/checkpoint_converters/convert_mixtral_nemo_to_hf.py index 58311d0324c2a..2bac2eaad616b 100644 --- a/scripts/checkpoint_converters/convert_mixtral_nemo_to_hf.py +++ b/scripts/checkpoint_converters/convert_mixtral_nemo_to_hf.py @@ -83,6 +83,7 @@ def convert(in_file, precision=None) -> None: model_config = MegatronGPTModel.restore_from(in_file, trainer=dummy_trainer, return_config=True) model_config.tensor_model_parallel_size = 1 model_config.pipeline_model_parallel_size = 1 + model_config.name = "te_gpt" cpu_only = True if cpu_only: map_location = torch.device('cpu') diff --git a/scripts/checkpoint_converters/convert_nemotron_nemo_to_hf.py b/scripts/checkpoint_converters/convert_nemotron_nemo_to_hf.py index 7a58573278afe..fc0f660cbd425 100644 --- a/scripts/checkpoint_converters/convert_nemotron_nemo_to_hf.py +++ b/scripts/checkpoint_converters/convert_nemotron_nemo_to_hf.py @@ -140,6 +140,7 @@ def convert(input_nemo_file, output_hf_file, precision=None, cpu_only=False) -> model_config.pipeline_model_parallel_size = 1 model_config.sequence_parallel = False model_config.transformer_engine = True + model_config.name = "te_gpt" if cpu_only: map_location = torch.device("cpu") model_config.use_cpu_initialization = True diff --git a/scripts/checkpoint_converters/convert_qwen2_nemo_to_hf.py b/scripts/checkpoint_converters/convert_qwen2_nemo_to_hf.py index c6a218020c213..6080499ffdf8c 100644 --- a/scripts/checkpoint_converters/convert_qwen2_nemo_to_hf.py +++ b/scripts/checkpoint_converters/convert_qwen2_nemo_to_hf.py @@ -108,6 +108,7 @@ def convert(input_nemo_file, output_hf_file, precision=None, cpu_only=False) -> model_config = MegatronGPTModel.restore_from(input_nemo_file, trainer=dummy_trainer, return_config=True) model_config.tensor_model_parallel_size = 1 model_config.pipeline_model_parallel_size = 1 + model_config.name = "te_gpt" if cpu_only: map_location = torch.device('cpu') model_config.use_cpu_initialization = True diff --git a/scripts/checkpoint_converters/convert_starcoder2_nemo_to_hf.py b/scripts/checkpoint_converters/convert_starcoder2_nemo_to_hf.py index 043d1fd35261e..4b65533b74ec4 100644 --- a/scripts/checkpoint_converters/convert_starcoder2_nemo_to_hf.py +++ b/scripts/checkpoint_converters/convert_starcoder2_nemo_to_hf.py @@ -89,6 +89,7 @@ def convert(in_file, precision=None, cpu_only=True) -> None: model_config = MegatronGPTModel.restore_from(in_file, trainer=dummy_trainer, return_config=True) model_config.tensor_model_parallel_size = 1 model_config.pipeline_model_parallel_size = 1 + model_config.name = "te_gpt" if cpu_only: map_location = torch.device('cpu') model_config.use_cpu_initialization = True From 3209aea8aecb4053421f29885a8646b3d3063f2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Tue, 29 Oct 2024 08:16:34 +0100 Subject: [PATCH 020/125] ci: Re-add secrets detector (#11038) Signed-off-by: Oliver Koenig --- .github/workflows/cicd-main.yml | 1 + .github/workflows/secrets-detector.yml | 64 ++++++++++--------- .../.secrets.baseline => .secrets.baseline | 18 +++--- 3 files changed, 43 insertions(+), 40 deletions(-) rename .github/workflows/config/.secrets.baseline => .secrets.baseline (99%) diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml index 6b39d2a9082ee..ef1c65e77a6c4 100644 --- a/.github/workflows/cicd-main.yml +++ b/.github/workflows/cicd-main.yml @@ -31,6 +31,7 @@ concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true + jobs: pre-flight: runs-on: ubuntu-latest diff --git a/.github/workflows/secrets-detector.yml b/.github/workflows/secrets-detector.yml index 0cf73e961bd49..cf8ccc189ab6d 100644 --- a/.github/workflows/secrets-detector.yml +++ b/.github/workflows/secrets-detector.yml @@ -1,35 +1,37 @@ -# # Copyright (c) 2020-2021, NVIDIA CORPORATION. -# # -# # Licensed under the Apache License, Version 2.0 (the "License"); -# # you may not use this file except in compliance with the License. -# # You may obtain a copy of the License at -# # -# # http://www.apache.org/licenses/LICENSE-2.0 -# # -# # Unless required by applicable law or agreed to in writing, software -# # distributed under the License is distributed on an "AS IS" BASIS, -# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# # See the License for the specific language governing permissions and -# # limitations under the License. -# name: Secrets detector +# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +name: Secrets detector -# on: -# pull_request: +on: + pull_request: + branches: + - 'main' -# jobs: -# main: -# runs-on: ubuntu-latest -# steps: -# - name: Checkout repository -# uses: actions/checkout@v4 -# with: -# path: ${{ github.run_id }} -# fetch-depth: 0 +jobs: + main: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + path: ${{ github.run_id }} + fetch-depth: 0 -# - name: Install secrets detector -# run: pip install detect-secrets + - name: Install secrets detector + run: pip install detect-secrets -# - name: Run on change-set -# run: | -# cd ${{ github.run_id }} -# git diff --name-only --diff-filter=d --merge-base origin/${{ github.base_ref }} -z | xargs -0 detect-secrets-hook --baseline .github/workflows/config/.secrets.baseline \ No newline at end of file + - name: Run on change-set + run: | + cd ${{ github.run_id }} + git diff --name-only --diff-filter=d --merge-base origin/main -z | xargs -0 detect-secrets-hook --baseline .secrets.baseline \ No newline at end of file diff --git a/.github/workflows/config/.secrets.baseline b/.secrets.baseline similarity index 99% rename from .github/workflows/config/.secrets.baseline rename to .secrets.baseline index 4a56aaad3c58e..c26f70775c5a2 100644 --- a/.github/workflows/config/.secrets.baseline +++ b/.secrets.baseline @@ -123,13 +123,13 @@ } ], "results": { - ".github/workflows/cicd-main.yml": [ + ".github/workflows/node-reboot.yml": [ { - "type": "Base64 High Entropy String", - "filename": ".github/workflows/cicd-main.yml", - "hashed_secret": "593951c440200143335452427205ae7c8580d463", + "type": "Secret Keyword", + "filename": ".github/workflows/node-reboot.yml", + "hashed_secret": "3e26d6750975d678acb8fa35a0f69237881576b0", "is_verified": false, - "line_number": 1503 + "line_number": 52 } ], "docs/source/nlp/question_answering.rst": [ @@ -1229,9 +1229,9 @@ { "type": "Base64 High Entropy String", "filename": "tests/infer_data_path.py", - "hashed_secret": "e3fb89ccb261c88146519164f7e8a47786d33fee", + "hashed_secret": "8e0937151cfd9750db688fbe66be37d0c53ed6ab", "is_verified": false, - "line_number": 271 + "line_number": 63 } ], "tutorials/asr/Multilang_ASR.ipynb": [ @@ -1902,7 +1902,7 @@ "filename": "tutorials/multimodal/Multimodal Data Preparation.ipynb", "hashed_secret": "b641cbe299c9e27b480cc8a823bb020d45962236", "is_verified": false, - "line_number": 660 + "line_number": 658 } ], "tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb": [ @@ -2083,5 +2083,5 @@ } ] }, - "generated_at": "2024-09-08T19:00:15Z" + "generated_at": "2024-10-25T13:43:17Z" } From c51cb679d75c46c428329f370602958ff729ca32 Mon Sep 17 00:00:00 2001 From: Dmytro Pykhtar <37850217+dimapihtar@users.noreply.github.com> Date: Tue, 29 Oct 2024 14:08:49 +0200 Subject: [PATCH 021/125] switch to NeMo 2.0 recipes (#10948) * switch to NeMo 2.0 recipes Signed-off-by: dimapihtar * Apply isort and black reformatting Signed-off-by: dimapihtar * add NeMo 2.0 recipe support Signed-off-by: dimapihtar * Apply isort and black reformatting Signed-off-by: dimapihtar * fix unit tests Signed-off-by: dimapihtar * Apply isort and black reformatting Signed-off-by: dimapihtar * fix auto conf ci test Signed-off-by: dimapihtar * remove unused imports Signed-off-by: dimapihtar * Apply isort and black reformatting Signed-off-by: dimapihtar * fix stlye Signed-off-by: dimapihtar * fix typo Signed-off-by: dimapihtar --------- Signed-off-by: dimapihtar Signed-off-by: dimapihtar Co-authored-by: dimapihtar --- .github/workflows/cicd-main.yml | 16 +- examples/llm/auto_configurator/auto_config.py | 71 ++-- examples/nlp/dialogue/dialogue.py | 2 +- .../tools/auto_configurator/core/__init__.py | 13 - .../auto_configurator/core/base_config.py | 208 ----------- .../core/calculate_performance.py | 218 +++++------ .../auto_configurator/core/training_config.py | 27 +- .../llm/tools/auto_configurator/core/utils.py | 41 +- .../llm/tools/auto_configurator/runner.py | 69 ++-- .../llm/auto_conf/test_base_configs.py | 353 ------------------ .../llm/auto_conf/test_generate_configs.py | 110 ++---- 11 files changed, 262 insertions(+), 866 deletions(-) delete mode 100644 nemo/collections/llm/tools/auto_configurator/core/__init__.py delete mode 100644 tests/collections/llm/auto_conf/test_base_configs.py diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml index ef1c65e77a6c4..5f8dc98e3948b 100644 --- a/.github/workflows/cicd-main.yml +++ b/.github/workflows/cicd-main.yml @@ -2590,27 +2590,19 @@ jobs: mkdir examples/llm/auto_configurator/auto_conf_logs python examples/llm/auto_configurator/auto_config.py \ - --logs_dir=/workspace/examples/llm/auto_configurator/auto_conf_logs \ - --data_path=/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document \ - --tokenizer_path=/home/TestData/nlp/gpt2_tokenizer \ + --log_dir=/workspace/examples/llm/auto_configurator/auto_conf_logs \ --run_number=1 python examples/llm/auto_configurator/auto_config.py \ - --logs_dir=/workspace/examples/llm/auto_configurator/auto_conf_logs \ - --data_path=/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document \ - --tokenizer_path=/home/TestData/nlp/gpt2_tokenizer \ + --log_dir=/workspace/examples/llm/auto_configurator/auto_conf_logs \ --run_number=2 python examples/llm/auto_configurator/auto_config.py \ - --logs_dir=/workspace/examples/llm/auto_configurator/auto_conf_logs \ - --data_path=/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document \ - --tokenizer_path=/home/TestData/nlp/gpt2_tokenizer \ + --log_dir=/workspace/examples/llm/auto_configurator/auto_conf_logs \ --run_number=3 python examples/llm/auto_configurator/auto_config.py \ - --logs_dir=/workspace/examples/llm/auto_configurator/auto_conf_logs \ - --data_path=/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document \ - --tokenizer_path=/home/TestData/nlp/gpt2_tokenizer \ + --log_dir=/workspace/examples/llm/auto_configurator/auto_conf_logs \ --get_results AFTER_SCRIPT: | rm -rf examples/llm/auto_configurator/auto_conf_logs diff --git a/examples/llm/auto_configurator/auto_config.py b/examples/llm/auto_configurator/auto_config.py index 777e0d290fcbb..b9b9c7b023d5d 100644 --- a/examples/llm/auto_configurator/auto_config.py +++ b/examples/llm/auto_configurator/auto_config.py @@ -14,48 +14,76 @@ import argparse import os +from dataclasses import dataclass +from functools import partial import fiddle as fdl import nemo_run as run -from nemo.collections.llm import GPTConfig126M +from nemo.collections import llm +from nemo.collections.llm.gpt.model.llama import Llama3Config, LlamaModel from nemo.collections.llm.tools.auto_configurator import AutoConfigurator, generate_configs, get_results def get_args(): parser = argparse.ArgumentParser() parser.add_argument("--run_number", type=int, help="Number of config to run") - parser.add_argument("--logs_dir", type=str, help="Path where to save training logs") - parser.add_argument("--data_path", type=str, help="Path to the dataset") - parser.add_argument("--tokenizer_path", type=str, help="Path to the tokenizer") + parser.add_argument("--log_dir", type=str, help="Path where to save training logs") parser.add_argument("--get_results", action="store_true") return parser.parse_args() +@dataclass +class Llama3Config145M(Llama3Config): + num_layers: int = 12 + hidden_size: int = 768 + num_attention_heads: int = 16 + num_query_groups: int = 8 + ffn_hidden_size: int = 2688 + + +@run.cli.factory(target=llm.pretrain, name="llama3_145m") +def llama3_145m(num_nodes=1, num_gpus_per_node=1): + # Setup Llama3 145M config + recipe = partial(llm.llama3_8b.pretrain_recipe, num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)() + recipe.data.global_batch_size = 16 + recipe.data.seq_length = 2048 + + recipe.trainer.strategy.context_parallel_size = 1 + recipe.model.config.seq_length = recipe.data.seq_length + + recipe = run.Partial( + llm.pretrain, + model=run.Config(LlamaModel, config=run.Config(Llama3Config145M)), + trainer=recipe.trainer, + data=recipe.data, + log=recipe.log, + optim=recipe.optim, + resume=None, + ) + + return recipe + + def train_config(args): - # GPT-3 126M + # Llama3 145M # This example will generate 3 configs. - # It is expected that this script will be run 3 times with changing --run_number flag for each run from 0 to 2. + # It is expected that this script will be run 3 times with changing --run_number flag for each run from 1 to 3. # After all configurations are trained, please trigger the script using --get_results flag. + + # Get Auto Conf runner runner = AutoConfigurator( - model=run.Config(GPTConfig126M), - num_nodes=1, - gpus_per_node=1, + recipe=partial(llama3_145m)(), gpu_memory_gb=40, - global_batch_size=16, - seq_length=512, tensor_parallel_sizes=[1], pipeline_parallel_sizes=[1], micro_batch_sizes=[1, 2, 4], max_training_days=1, - max_steps_per_run=25, + max_steps_per_run=10, num_tokens_in_b=10, - vocab_size=51200, - tokenizer_type="autotokenizer", - tokenizer_path=args.tokenizer_path, - data_paths=args.data_path, - path_to_logs=args.logs_dir, + vocab_size=32000, + path_to_logs=args.log_dir, ) base_cfg, configs = generate_configs(runner) @@ -65,14 +93,13 @@ def train_config(args): names = list(configs.keys()) # Run pre-training - partial = partials[args.run_number - 1] - partial.log.log_dir = os.path.join(args.logs_dir, names[args.run_number - 1]) - pretrain = fdl.build(partial) + pretrain_cfg = partials[args.run_number - 1] # partial(llama3_145m)() # + pretrain = fdl.build(pretrain_cfg) pretrain() else: # # Get Auto Configurator results - get_results(base_cfg, runner, args.logs_dir) - print(f"The results were successfully saved to {args.logs_dir}.") + get_results(base_cfg, runner, args.log_dir) + print(f"The results were successfully saved to {args.log_dir}.") def main(): diff --git a/examples/nlp/dialogue/dialogue.py b/examples/nlp/dialogue/dialogue.py index 4284fed42d223..578895a2ad434 100644 --- a/examples/nlp/dialogue/dialogue.py +++ b/examples/nlp/dialogue/dialogue.py @@ -63,7 +63,7 @@ @hydra_runner(config_path="conf", config_name="dialogue_config") def main(cfg: DictConfig) -> None: pl.seed_everything(42) - logging.warning('This script is no longer supported in NeMo and is scheduled for removal in the 23.11 release.') + logging.warning('This script is no longer supported in NeMo and is scheduled for removal in the 24.11 release.') logging.info(f'Config: {OmegaConf.to_yaml(cfg)}') try: diff --git a/nemo/collections/llm/tools/auto_configurator/core/__init__.py b/nemo/collections/llm/tools/auto_configurator/core/__init__.py deleted file mode 100644 index d9155f923f186..0000000000000 --- a/nemo/collections/llm/tools/auto_configurator/core/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/llm/tools/auto_configurator/core/base_config.py b/nemo/collections/llm/tools/auto_configurator/core/base_config.py index a82823c71248f..b621b0567c050 100644 --- a/nemo/collections/llm/tools/auto_configurator/core/base_config.py +++ b/nemo/collections/llm/tools/auto_configurator/core/base_config.py @@ -12,214 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import torch -from megatron.core.optimizer import OptimizerConfig -from pytorch_lightning.loggers import TensorBoardLogger - -from nemo import lightning as nl -from nemo.collections.common.tokenizers import AutoTokenizer, SentencePieceTokenizer -from nemo.collections.llm import PreTrainingDataModule -from nemo.collections.llm.utils import Config -from nemo.lightning.pytorch.optim import CosineAnnealingScheduler, MegatronOptimizerModule -from nemo.utils.exp_manager import TimingCallback - -# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -class BaseConfig: - def __init__(self, config=None): - """ - Args: - config (AutoConfigurator): auto configurator runner config. - """ - - self.config = config - - self.model = self.get_model() - self.optim = self.get_optim() - self.trainer = self.get_trainer() - self.data = self.get_data() - self.log = self.get_logger() - self.run = self.get_run_config() - self.tokenizer = self.get_tokenizer(config.tokenizer_type, config.tokenizer_path) - - def get_model(self): - """Function that returns model config. - - Returns: - Config: model config. - """ - - self.config.model.seq_length = self.config.seq_length - - return self.config.model - - def get_optim(self) -> Config[OptimizerConfig]: - """Function that returns optimizer config. - - Returns: - Config[OptimizerConfig]: optimizer config. - """ - optim_params = { - "optimizer": "adam", - "lr": 1e-4, - "min_lr": 1e-5, - "use_distributed_optimizer": True, - "bf16": True, - "adam_beta1": 0.9, - "adam_beta2": 0.95, - "clip_grad": 1.0, - "adam_eps": 1e-5, - } - - optim_config = Config( - OptimizerConfig, - **optim_params, - ) - - sched = Config( - CosineAnnealingScheduler, - warmup_steps=10, - constant_steps=0, - min_lr=optim_config.min_lr, - ) - - return Config( - MegatronOptimizerModule, - config=optim_config, - lr_scheduler=sched, - ) - - def get_trainer(self) -> Config[nl.Trainer]: - """Function that returns config for PTL trainer. - - Returns: - Config[nl.Trainer]: trainer config. - """ - - trainer_config = { - "accelerator": "gpu", - "enable_checkpointing": False, - "use_distributed_sampler": False, - "max_epochs": None, - "log_every_n_steps": 1, - "limit_val_batches": 1, - "limit_test_batches": 1, - "accumulate_grad_batches": 1, - "num_nodes": self.config.num_nodes, - "devices": self.config.num_gpus, - "max_steps": self.config.max_steps_per_run, - "val_check_interval": self.config.max_steps_per_run, - } - - strategy = Config( - nl.MegatronStrategy, - pipeline_dtype=torch.bfloat16, - ) - - return Config( - nl.Trainer, - **trainer_config, - strategy=strategy, - plugins=Config(nl.MegatronMixedPrecision, precision="bf16-mixed"), - callbacks=[Config(TimingCallback)], - ) - - def get_tokenizer(self, tokenizer_type: str, tokenizer_path: str) -> Config: - """Function that returns the tokenizer config. - - Args: - tokenizer_type (str): tokenizer type. - tokenizer_path (str): path to the tokenizer. - - Returns: - Config: tokenizer config. - """ - - if tokenizer_type == "sentencepiece": - return Config(SentencePieceTokenizer, model_path=tokenizer_path) - else: - return Config(AutoTokenizer, pretrained_model_name=tokenizer_path) - - def get_data(self) -> Config[PreTrainingDataModule]: - """Function that returns dataset config. - - Returns: - Config[PreTrainingDataModule]: data config. - """ - - # Data config - data_config = { - "paths": self.config.data_paths, - "seq_length": self.config.seq_length, - "global_batch_size": self.config.global_batch_size, - "num_workers": 2, - "index_mapping_dir": None, - } - - # Define the tokenizer - tokenizer = self.get_tokenizer( - self.config.tokenizer_type, - self.config.tokenizer_path, - ) - - return Config( - PreTrainingDataModule, - **data_config, - tokenizer=tokenizer, - ) - - def get_logger(self) -> Config[nl.NeMoLogger]: - """Function that returns the training strategy. - - Returns: - Config[nl.NeMoLogger]: NeMo Logger config. - """ - - # Define TensorBoard Logger - tb_logger = Config(TensorBoardLogger, save_dir="tb_logs") - - ckpt = Config( - nl.ModelCheckpoint, - monitor="reduced_train_loss", - save_last=False, - save_top_k=0, - ) - - return Config( - nl.NeMoLogger, - ckpt=ckpt, - tensorboard=tb_logger, - wandb=None, - log_dir=self.config.path_to_logs, - ) - - def get_run_config(self) -> dict: - """Function that returns config for cluster job. - - Returns: - dict: cluster job config. - """ - - run_config = { - "name": self.config.model.__class__.__name__, - "time_limit": f"0-00:{self.config.max_minutes_per_run}:00", - } - - return run_config - def calculate_model_size( gpu_count: int, diff --git a/nemo/collections/llm/tools/auto_configurator/core/calculate_performance.py b/nemo/collections/llm/tools/auto_configurator/core/calculate_performance.py index 5b7ac0ebc4d3c..1620c608e549f 100644 --- a/nemo/collections/llm/tools/auto_configurator/core/calculate_performance.py +++ b/nemo/collections/llm/tools/auto_configurator/core/calculate_performance.py @@ -42,11 +42,11 @@ def get_results( vocab_size = train_config.vocab_size num_nodes = train_config.num_nodes - gpus_per_node = train_config.gpus_per_node + gpus_per_node = train_config.num_gpus - layers = base_config.model.num_layers - hs = base_config.model.hidden_size - ffn_hs = base_config.model.ffn_hidden_size + layers = base_config.model.config.num_layers + hs = base_config.model.config.hidden_size + ffn_hs = base_config.model.config.ffn_hidden_size training_logs = path_to_save final_result_logs = path_to_save @@ -60,9 +60,7 @@ def get_results( "CP", "EP", "MBS", - "Act Ckpt Layers", - "Act Ckpt Micro Bathes", - "Act Ckpt Layers per Pipeline", + "VP", "Num Layers", "Hidden Size", "FFN Hidden Size", @@ -83,9 +81,7 @@ def get_results( "CP", "EP", "MBS", - "Act Ckpt Layers", - "Act Ckpt Micro Bathes", - "Act Ckpt Layers per Pipeline", + "VP", "Num Layers", "Hidden Size", "FFN Hidden Size", @@ -96,105 +92,96 @@ def get_results( ] result = [] errors = [] + training_logs = os.path.abspath(training_logs) + error_files = find_tb_logs(training_logs, "nemo_error_log") + tb_files = find_tb_logs(training_logs, "events") dirs = [f.path for f in os.scandir(training_logs) if f.is_dir()] - for candidate_dir in dirs: - logs_dir = os.path.join(training_logs, candidate_dir, "tb_logs/lightning_logs") - logs_folder = [f.path for f in os.scandir(logs_dir) if f.is_dir()][0] - tp, pp, cp, ep, mbs, act_ckpt, num_mbs_act, act_per_pipe = get_config(candidate_dir) - - for f in os.listdir(logs_folder): - if f.endswith("0.txt"): - error_file = os.path.join(logs_folder, f) - error = find_error(error_file) - if error: - errors.append( - [ - model_name, - model_size, - seq_length, - tp, - pp, - cp, - ep, - mbs, - act_ckpt, - num_mbs_act, - act_per_pipe, - layers, - hs, - ffn_hs, - global_batch_size, - num_nodes, - gpus_per_node, - error, - ] - ) - - files = os.listdir(logs_folder) - for f in files: - if f.startswith("events"): - event_file = os.path.join(logs_folder, f) - ea = event_accumulator.EventAccumulator(event_file) - ea.Reload() - try: - timing_list = ea.Scalars("train_step_timing in s") - if len(timing_list) <= 6: - continue - timing_list = [x.value for x in timing_list[5:]] - avg_global_step_time = round(sum(timing_list) / len(timing_list), 4) - samples_per_s = round(global_batch_size / avg_global_step_time, 2) - m_tflops, m_tflops_gpu = calculate_tflops( - model_name=model_name, - gbs=global_batch_size, - enc_seq_len=seq_length, - dec_seq_len=seq_length, - hs=hs, - ffn_hs=ffn_hs, - layers=layers, - vocab=vocab_size, - nodes=num_nodes, - gpus_per_node=gpus_per_node, - time_per_step=avg_global_step_time, - ) - config_name = f"tp{tp}_pp{pp}_cp{cp}_ep{ep}_mbs{mbs}_act_{act_ckpt}_num_mbs_act_{num_mbs_act}_act_per_pipe_{act_per_pipe}" - result.append( - [ - model_name, - model_size, - seq_length, - tp, - pp, - cp, - ep, - mbs, - act_ckpt, - num_mbs_act, - act_per_pipe, - layers, - hs, - ffn_hs, - global_batch_size, - num_nodes, - gpus_per_node, - avg_global_step_time, - samples_per_s, - m_tflops_gpu, - m_tflops, - ] - ) - finally: - continue - result.sort(key=lambda x: x[17]) + for error_file, tb_file, candidate_dir in zip(error_files, tb_files, dirs): + tp, pp, cp, ep, mbs, vp = get_config(candidate_dir) + error = find_error(error_file) + if error: + errors.append( + [ + model_name, + model_size, + seq_length, + tp, + pp, + cp, + ep, + mbs, + vp, + layers, + hs, + ffn_hs, + global_batch_size, + num_nodes, + gpus_per_node, + error, + ] + ) + + ea = event_accumulator.EventAccumulator(tb_file) + ea.Reload() + try: + timing_list = ea.Scalars("train_step_timing in s") + if len(timing_list) < 10: + continue + timing_list = [x.value for x in timing_list[1:]] + print(timing_list) + avg_global_step_time = round(sum(timing_list) / len(timing_list), 2) + samples_per_s = round(global_batch_size / avg_global_step_time, 2) + print(samples_per_s) + m_tflops, m_tflops_gpu = calculate_tflops( + model_name=model_name, + gbs=global_batch_size, + enc_seq_len=seq_length, + dec_seq_len=seq_length, + hs=hs, + ffn_hs=ffn_hs, + layers=layers, + vocab=vocab_size, + nodes=num_nodes, + gpus_per_node=gpus_per_node, + time_per_step=avg_global_step_time, + ) + result.append( + [ + model_name, + model_size, + seq_length, + tp, + pp, + cp, + ep, + mbs, + vp, + layers, + hs, + ffn_hs, + global_batch_size, + num_nodes, + gpus_per_node, + avg_global_step_time, + samples_per_s, + m_tflops_gpu, + m_tflops, + ] + ) + finally: + continue + + result.sort(key=lambda x: x[15]) print(f"Top {min(output_top_n, len(result))} configs sorted from fastest to slowest:") for i, res in enumerate(result): - print(f"Config #{i+1}: {res[-1]} with {res[17]:.4f}s per global step.") + print(f"Config #{i+1}: {res[-1]} with {res[15]:.4f}s per global step.") if i + 1 == output_top_n: break - top_config = f"{model_name}_{model_size}b_{num_nodes}nodes_tp_{result[0][3]}_pp_{result[0][4]}_cp_{result[0][5]}_ep_{result[0][6]}_mbs_{result[0][7]}_act_ckpt_{result[0][8]}_num_mbs_act_{result[0][9]}_act_per_pipe_{result[0][10]}" + top_config = f"{model_name}_{model_size}b_{num_nodes}nodes_tp_{result[0][3]}_pp_{result[0][4]}_cp_{result[0][5]}_ep_{result[0][6]}_mbs_{result[0][7]}_vp_{result[0][8]}" print("\n==================================================") - print(f"Optimal config: {top_config} with {result[0][17]:.4f}s per global step.") + print(f"Optimal config: {top_config} with {result[0][15]:.4f}s per global step.") print("==================================================\n") # Save results as a CSV file. @@ -310,7 +297,8 @@ def get_config(run_name: str) -> tuple: Returns: tuple: model parallelism parameters. """ - pattern = r'_(tp|pp|cp|ep|mbs|act_ckpt|num_mbs_act|act_per_pipe)_([^_]+)' + + pattern = r'_(tp|pp|cp|ep|mbs|vp)_([^_]+)' # Find all matches in the input string matches = re.findall(pattern, run_name) @@ -324,11 +312,31 @@ def get_config(run_name: str) -> tuple: params["cp"], params["ep"], params["mbs"], - params["act_ckpt"], - params["num_mbs_act"], - params["act_per_pipe"], + params["vp"], ) +def find_tb_logs(logs_dir: str, tb_prefix: str) -> list: + """Function that finds tensorboard logs + + Args: + logs_dir (str): results directory. + + Returns: + list: list of tensorboard files. + """ + + tb_files = [] + # Walk through all directories and subdirectories + for root, dirs, files in os.walk(logs_dir): + for file in files: + # Check if the file starts with the tb prefix + if file.startswith(tb_prefix): + absolute_path = os.path.abspath(os.path.join(root, file)) + tb_files.append(absolute_path) + + return tb_files + + if __name__ == "__main__": main() diff --git a/nemo/collections/llm/tools/auto_configurator/core/training_config.py b/nemo/collections/llm/tools/auto_configurator/core/training_config.py index 087bf3c6fb0e8..f7bf4d30427db 100644 --- a/nemo/collections/llm/tools/auto_configurator/core/training_config.py +++ b/nemo/collections/llm/tools/auto_configurator/core/training_config.py @@ -49,17 +49,22 @@ def generate_grid_search_configs( model_name = train_cfg.model_type model_size_in_b = train_cfg.model_size_in_b + path_to_logs = train_cfg.path_to_logs # 2 * num_layers is needed because of encoder/decoder architecture. multiplier = 1 if model_name in GPT_BASED_MODELS else 2 - seq_length = base_cfg.model.seq_length - num_layers = base_cfg.model.num_layers if model_name in GPT_BASED_MODELS else base_cfg.model.encoder.num_layers + seq_length = base_cfg.model.config.seq_length + num_layers = ( + base_cfg.model.config.num_layers + if model_name in GPT_BASED_MODELS + else base_cfg.model.config.encoder.num_layers + ) if model_name in GPT_BASED_MODELS: act_method = None else: - act_method = base_cfg.model.encoder.activations_checkpoint_method + act_method = base_cfg.model.config.encoder.activations_checkpoint_method params = _calculate_tp_pp_mbs_grid( model_size_in_b=model_size_in_b, @@ -69,7 +74,6 @@ def generate_grid_search_configs( train_cfg=train_cfg, ) - max_minutes = train_cfg.max_minutes_per_run max_steps = train_cfg.max_steps_per_run num_nodes = train_cfg.num_nodes @@ -82,11 +86,11 @@ def generate_grid_search_configs( num_gpus = base_cfg.trainer.num_nodes * base_cfg.trainer.devices base_cfg.data.global_batch_size = params.gbs if model_name in GPT_BASED_MODELS: - att_heads = base_cfg.model.num_attention_heads - num_layers = base_cfg.model.num_layers + att_heads = base_cfg.model.config.num_attention_heads + num_layers = base_cfg.model.config.num_layers else: - att_heads = base_cfg.model.encoder.num_attention_heads - num_layers = base_cfg.model.encoder.num_layers + att_heads = base_cfg.model.config.encoder.num_attention_heads + num_layers = base_cfg.model.config.encoder.num_layers model_parallelism = (tp * pp * cp * ep) if (cp and ep) else (tp * pp) mod_gbs = params.gbs % (mbs * num_gpus / model_parallelism) mod_att_heads = att_heads % tp @@ -134,9 +138,9 @@ def generate_grid_search_configs( "ep": ep, "virtual_pipelines": virtual_pipelines, "mbs": mbs, - "max_minutes": max_minutes, "max_steps": max_steps, "num_nodes": num_nodes, + "path_to_logs": path_to_logs, "model_name": model_name, "model_size": model_size_in_b, } @@ -151,12 +155,11 @@ def generate_grid_search_configs( kwargs["act_per_pipe"] = act_per_pipe new_cfg = utils.modify_cfg(**kwargs) if new_cfg: # Save candidate cfg. - configs[new_cfg["run"]["name"]] = new_cfg + configs[new_cfg["name"]] = new_cfg else: new_cfg = utils.modify_cfg(**kwargs) if new_cfg: # Save candidate cfg. - config_name = new_cfg["run"]["name"] - new_cfg.pop("run") + config_name = new_cfg["name"] configs[config_name] = new_cfg print(f"\nAll candidate configurations created correctly. Total number of configs: {len(configs)}.\n") diff --git a/nemo/collections/llm/tools/auto_configurator/core/utils.py b/nemo/collections/llm/tools/auto_configurator/core/utils.py index 3441c7cdbf9ba..aeb23c0cafcea 100644 --- a/nemo/collections/llm/tools/auto_configurator/core/utils.py +++ b/nemo/collections/llm/tools/auto_configurator/core/utils.py @@ -338,7 +338,7 @@ def generic_base_config(config) -> dict: AutoConfigurator: config object for the Auto Configurator tool. """ - from nemo.collections.llm.tools.auto_configurator.core.base_config import BaseConfig, calculate_model_size + from nemo.collections.llm.tools.auto_configurator.core.base_config import calculate_model_size default_model = False if config.model_size_in_b else True @@ -350,7 +350,7 @@ def generic_base_config(config) -> dict: config.num_tokens_in_b, config.model_type, ) - base_cfg = BaseConfig(config) + base_cfg = config.recipe if default_model: params = ModelSizeParams( @@ -362,14 +362,14 @@ def generic_base_config(config) -> dict: params.init_params() if config.model_type in GPT_BASED_MODELS: - base_cfg.model.num_layers = params.layers - base_cfg.model.hidden_size = params.hs - base_cfg.model.num_attention_heads = params.att_h - base_cfg.model.kv_channels = params.kv + base_cfg.model.config.num_layers = params.layers + base_cfg.model.config.hidden_size = params.hs + base_cfg.model.config.num_attention_heads = params.att_h + base_cfg.model.config.kv_channels = params.kv if not params.ffn: - base_cfg.model.ffn_hidden_size = params.hs * 4 + base_cfg.model.config.ffn_hidden_size = params.hs * 4 else: - base_cfg.model.ffn_hidden_size = params.ffn + base_cfg.model.config.ffn_hidden_size = params.ffn config.model_size_in_b = model_size_in_b @@ -387,10 +387,10 @@ def modify_cfg( ep: int, virtual_pipelines: int, mbs: int, - max_minutes: int, max_steps: int, num_nodes: int, model_name: str, + path_to_logs: str, model_size, ) -> dict: """Modify the base configuration for the model with the new parameters that are specific to the current model, which the Auto Configurator tool heuristics selected. @@ -406,7 +406,6 @@ def modify_cfg( ep (int): Expert Parallelism (EP) value to be set for the model. virtual_pipelines (int): Virtual Pipelines value to be set for the model. mbs (int): Micro Batch Size (MBS) value to be set for the model. - max_minutes (int): maximum amount of time to run this model for. max_steps (int): maximum number of steps to run this model for. num_nodes (int): number of nodes to use for the training run. model_name (str): name of the model, i.e. gpt3, t5, mt5... @@ -416,18 +415,18 @@ def modify_cfg( """ if model_name in GPT_BASED_MODELS: - att_heads = base_cfg.model.num_attention_heads - num_layers = base_cfg.model.num_layers + att_heads = base_cfg.model.config.num_attention_heads + num_layers = base_cfg.model.config.num_layers else: - att_heads = base_cfg.model.encoder.num_attention_heads - num_layers = base_cfg.model.encoder.num_layers + att_heads = base_cfg.model.config.encoder.num_attention_heads + num_layers = base_cfg.model.config.encoder.num_layers # gbs = mbs * num_gpus * accumulate_grad_batches / (tp * pp) num_gpus = base_cfg.trainer.num_nodes * base_cfg.trainer.devices gbs = base_cfg.data.global_batch_size - seq_len = base_cfg.model.seq_length + seq_len = base_cfg.model.config.seq_length - new_cfg = dict(run=base_cfg.run) + new_cfg = {} # dict(run=base_cfg.run) if act is not None: if model_name in GPT_BASED_MODELS: new_cfg["activations_checkpoint_num_layers"] = act @@ -448,6 +447,8 @@ def modify_cfg( new_cfg["pipeline_model_parallel_size"] = pp new_cfg["micro_batch_size"] = mbs new_cfg["global_batch_size"] = gbs + new_cfg["max_steps"] = max_steps + new_cfg["path_to_logs"] = path_to_logs if cp is not None: new_cfg["context_parallel_size"] = cp @@ -460,11 +461,11 @@ def modify_cfg( mod_layers = num_layers % pp if mod_gbs == 0 and mod_att_heads == 0 and mod_layers == 0: # Valid config - new_cfg["run"][ - "name" - ] = f"{model_name}_{str(model_size)}b_{num_nodes}nodes_tp_{tp}_pp_{pp}_cp_{cp}_ep_{ep}_mbs_{mbs}_act_ckpt_{act}_num_mbs_act_{num_mbs_act}_act_per_pipe_{act_per_pipe}" + new_cfg["name"] = ( + f"{model_name}_{str(model_size)}b_{num_nodes}nodes_tp_{tp}_pp_{pp}_cp_{cp}_ep_{ep}_mbs_{mbs}_vp_{virtual_pipelines}" + ) print( - f"Valid config: SeqLen={seq_len}, GBS={gbs}, MBS={mbs}, TP={tp}, PP={pp}, CP={cp}, EP={ep}, act_ckpt_layers={act}, num_mbs_act={num_mbs_act}, act_per_pipe={act_per_pipe}. Adding to directory." + f"Valid config: SeqLen={seq_len}, GBS={gbs}, MBS={mbs}, TP={tp}, PP={pp}, CP={cp}, EP={ep}, VP={virtual_pipelines}. Adding to directory." ) return new_cfg return None diff --git a/nemo/collections/llm/tools/auto_configurator/runner.py b/nemo/collections/llm/tools/auto_configurator/runner.py index 0c80c9a21a9e6..7afefaa3170ee 100644 --- a/nemo/collections/llm/tools/auto_configurator/runner.py +++ b/nemo/collections/llm/tools/auto_configurator/runner.py @@ -13,8 +13,8 @@ # limitations under the License. import copy +import os import re - from typing import List, Optional from nemo.collections.llm import GPTModel @@ -33,28 +33,15 @@ "nemotron", ] -SUPPORTED_TOKENIZERS = [ - "autotokenizer", - "sentencepiece", - "huggingface", -] - class AutoConfigurator: """Auto Configurator runner config class.""" def __init__( self, - model: Config = None, - num_nodes: int = None, - data_paths: List = None, + recipe: Partial = None, path_to_logs: str = None, - tokenizer_type: Optional[str] = "autotokenizer", - tokenizer_path: Optional[str] = "GPT2BPETokenizer", - gpus_per_node: Optional[int] = 8, gpu_memory_gb: Optional[int] = 80, - seq_length: Optional[int] = 2048, - global_batch_size: Optional[int] = "auto", tensor_parallel_sizes: Optional[List[int]] = "auto", pipeline_parallel_sizes: Optional[List[int]] = "auto", micro_batch_sizes: Optional[List[int]] = "auto", @@ -62,26 +49,18 @@ def __init__( expert_parallel_sizes: Optional[List[int]] = [1], min_model_parallel_size: Optional[int] = "auto", max_model_parallel_size: Optional[int] = "auto", - num_tokens_in_b: Optional[int] = 300, + num_tokens_in_b: Optional[int] = 1400, tflops_per_gpu: Optional[int] = 140, max_minutes_per_run: Optional[int] = 30, max_training_days: Optional[int] = 2, max_steps_per_run: Optional[int] = 50, - vocab_size: Optional[int] = 51200, + vocab_size: Optional[int] = 32000, ): """ Args: - model_type (Config): model type to be used for training. - num_nodes (int): number of nodes to be used for training. - data_paths (List): list of datafiles to be used for training. + recipe (Partial): recipe to be used for training. path_to_logs (str): path to the directory where the logs will be stored. - tokenizer_type (Optional[str]): tokenizer type. - tokenizer_path (Optional[str]): path to the tokenizer model. - model_size (Optional[int]): size of model to be trained. - gpus_per_node (Optional[int]): number of GPUs per node to be used. gpu_memory_gb (Optional[int]): memory per GPU, in GB. Currently 40GB and 80GB A100s/H100s supported. - seq_length (Optional[int]): model sequence length. Available seq_length list for GPT-based models: [2048, 4096, 8192, 16384, 32768]. - global_batch_size (Optional[int]): model global batch size. Set to "auto" if you want auto configurator to find optimal gbs. tensor_parallel_sizes (Optional[List[int]]): set to "auto" to use our recommendation, or a list, such as [1, 2, 4, 8]. pipeline_parallel_sizes (Optional[List[int]]): set to "auto" to use our recommendation, or a list, such as [1, 2, 4, 8]. micro_batch_sizes (Optional[List[int]]): set to "auto" to use our recommendation, or a list, such as [1, 2, 4, 8]. @@ -104,13 +83,20 @@ def __init__( setattr(self, key, value) logging.info(self._get_message(config)) - model_type = self._get_model_type(model) + model_type = self._get_model_type(recipe.model.config) assert model_type in SUPPORTED_MODELS, f"model_type must be set to one of {SUPPORTED_MODELS}." - assert tokenizer_type in SUPPORTED_TOKENIZERS, f"tokenizer_type must be set to one of {SUPPORTED_TOKENIZERS}." - assert num_nodes, "num_nodes value must be specified." - assert data_paths, "training data must be specified." + assert recipe.data.seq_length in [ + 2048, + 4096, + 8192, + 16384, + 32768, + ], "Available seq_length list for GPT-based models: [2048, 4096, 8192, 16384, 32768]." assert path_to_logs, f"path_to_logs parameter must be specified." - gpu_count = num_nodes * gpus_per_node + + self.num_gpus = recipe.trainer.devices + self.num_nodes = recipe.trainer.num_nodes + gpu_count = self.num_nodes * self.num_gpus assert gpu_count > 0, "num_nodes * gpus_per_node must be an int larger than zero." assert gpu_memory_gb in ( 40, @@ -119,9 +105,10 @@ def __init__( assert max_minutes_per_run >= 10, "max_minutes_per_run must be an int and be at least 10 minutes." self.model_type = model_type - self.model_size_in_b = self._get_model_size(model) + self.model_size_in_b = self._get_model_size(recipe.model.config) self.gpu_count = gpu_count - self.num_gpus = gpus_per_node + self.seq_length = recipe.data.seq_length + self.global_batch_size = recipe.data.global_batch_size def _get_message(self, config: dict) -> str: """ @@ -203,13 +190,10 @@ def generate_configs(runner_config: AutoConfigurator = None) -> dict: """ # Generate base config for the given model size - base_cfg, train_cfg = generic_base_config(runner_config) + base_config, train_config = generic_base_config(runner_config) # Launch grid search for training constraints - base_config, train_configs = generate_grid_search_configs(base_cfg, train_cfg) - - tokenizer = base_config.tokenizer - model = Config(GPTModel, config=base_config.model, tokenizer=tokenizer) + base_config, train_configs = generate_grid_search_configs(base_config, train_config) configs = {} for name, config in train_configs.items(): @@ -231,11 +215,16 @@ def generate_configs(runner_config: AutoConfigurator = None) -> dict: ) if config.get("tensor_model_parallel_size") > 1: trainer.strategy.sequence_parallel = True + trainer.max_steps = config.get("max_steps") + trainer.log_every_n_steps = 1 + + log.log_dir = os.path.join(config.get("path_to_logs"), name) + log.ckpt.save_last = False # Set the directory where to save the logs configs[name] = Partial( pretrain, - model=model, + model=base_config.model, trainer=trainer, data=data, optim=base_config.optim, @@ -243,4 +232,4 @@ def generate_configs(runner_config: AutoConfigurator = None) -> dict: resume=None, ) - return base_cfg, configs + return base_config, configs diff --git a/tests/collections/llm/auto_conf/test_base_configs.py b/tests/collections/llm/auto_conf/test_base_configs.py deleted file mode 100644 index d12f065d8168d..0000000000000 --- a/tests/collections/llm/auto_conf/test_base_configs.py +++ /dev/null @@ -1,353 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import nemo_run as run -import torch - -from megatron.core.optimizer import OptimizerConfig -from pytorch_lightning.loggers import TensorBoardLogger - -from nemo import lightning as nl -from nemo.collections.common.tokenizers import AutoTokenizer -from nemo.collections.llm import ( - GemmaConfig2B, - GPTConfig126M, - Llama3Config8B, - MistralConfig7B, - MixtralConfig8x3B, - Nemotron4Config22B, - PreTrainingDataModule, -) -from nemo.collections.llm.tools.auto_configurator import AutoConfigurator -from nemo.collections.llm.tools.auto_configurator.core.base_config import BaseConfig -from nemo.lightning.pytorch.optim import CosineAnnealingScheduler, MegatronOptimizerModule -from nemo.utils.exp_manager import TimingCallback - - -def get_tokenizer() -> run.Config: - return run.Config(AutoTokenizer, pretrained_model_name="GPT2BPETokenizer") - - -def get_data(seq_length, global_batch_size) -> run.Config[PreTrainingDataModule]: - config = { - "paths": "/", - "seq_length": seq_length, - "global_batch_size": global_batch_size, - "num_workers": 2, - "index_mapping_dir": None, - } - - return run.Config( - PreTrainingDataModule, - **config, - tokenizer=get_tokenizer(), - ) - - -def get_trainer(num_nodes) -> run.Config[nl.Trainer]: - trainer_config = { - "accelerator": "gpu", - "enable_checkpointing": False, - "use_distributed_sampler": False, - "max_epochs": None, - "log_every_n_steps": 1, - "limit_val_batches": 1, - "limit_test_batches": 1, - "accumulate_grad_batches": 1, - "num_nodes": num_nodes, - "devices": 8, - "max_steps": 50, - "val_check_interval": 50, - } - - strategy = run.Config( - nl.MegatronStrategy, - pipeline_dtype=torch.bfloat16, - ) - - return run.Config( - nl.Trainer, - **trainer_config, - strategy=strategy, - plugins=run.Config(nl.MegatronMixedPrecision, precision="bf16-mixed"), - callbacks=[run.Config(TimingCallback)], - ) - - -def get_optim() -> run.Config[OptimizerConfig]: - optim_params = { - "optimizer": "adam", - "lr": 1e-4, - "min_lr": 1e-5, - "use_distributed_optimizer": True, - "bf16": True, - "adam_beta1": 0.9, - "adam_beta2": 0.95, - "clip_grad": 1.0, - "adam_eps": 1e-5, - } - - optim_config = run.Config( - OptimizerConfig, - **optim_params, - ) - - sched = run.Config( - CosineAnnealingScheduler, - warmup_steps=10, - constant_steps=0, - min_lr=optim_config.min_lr, - ) - - return run.Config( - MegatronOptimizerModule, - config=optim_config, - lr_scheduler=sched, - ) - - -def get_logger() -> run.Config[nl.NeMoLogger]: - tb_logger = run.Config(TensorBoardLogger, save_dir="tb_logs") - - ckpt = run.Config( - nl.ModelCheckpoint, - monitor="reduced_train_loss", - save_last=False, - save_top_k=0, - ) - - return run.Config( - nl.NeMoLogger, - ckpt=ckpt, - tensorboard=tb_logger, - wandb=None, - log_dir="/", - ) - - -class TestBaseConfigs: - def test_gpt3_base_config(self): - # GPT3 7B - model_config = run.Config(GPTConfig126M) - runner = AutoConfigurator(model=model_config, num_nodes=8, path_to_logs="/", data_paths="/") - base_config = BaseConfig(runner) - model_size = runner._get_model_size(model_config) - model_type = runner._get_model_type(model_config) - data_config = get_data(2048, 'auto') - trainer_config = get_trainer(8) - optim_config = get_optim() - logger_config = get_logger() - - assert ( - base_config.model == model_config - ), f"{model_config} is expected class object but got {base_config.model}" - assert model_size == 0.126, f"0.126 is expected size for {model_config} but got {model_size}" - assert model_type == "gpt3", f"gpt3 is expected model type for {model_config} but got {model_type}" - assert ( - base_config.data == data_config - ), f"f{data_config} is expected data config for {model_config} but got {base_config.data}" - assert ( - base_config.trainer == trainer_config - ), f"f{trainer_config} is expected trainer config for {model_config} but got {base_config.trainer}" - assert ( - base_config.optim == optim_config - ), f"f{optim_config} is expected trainer config for {model_config} but got {base_config.optim}" - assert ( - base_config.log == logger_config - ), f"f{logger_config} is expected trainer config for {model_config} but got {logger_config}" - - def test_llama_base_config(self): - # Llama3 8B - model_config = run.Config(Llama3Config8B) - runner = AutoConfigurator( - model=model_config, - num_nodes=16, - path_to_logs="/", - data_paths="/", - seq_length=8192, - global_batch_size=2048, - ) - base_config = BaseConfig(runner) - model_size = runner._get_model_size(model_config) - model_type = runner._get_model_type(model_config) - data_config = get_data(8192, 2048) - trainer_config = get_trainer(16) - optim_config = get_optim() - logger_config = get_logger() - - assert ( - base_config.model == model_config - ), f"{model_config} is expected class object but got {base_config.model}" - assert model_size == 8, f"8 is expected size for {model_config} but got {model_size}" - assert model_type == "llama", f"llama is expected model type for {model_config} but got {model_type}" - assert ( - base_config.data == data_config - ), f"f{data_config} is expected data config for {model_config} but got {base_config.data}" - assert ( - base_config.trainer == trainer_config - ), f"f{trainer_config} is expected trainer config for {model_config} but got {base_config.trainer}" - assert ( - base_config.optim == optim_config - ), f"f{optim_config} is expected trainer config for {model_config} but got {base_config.optim}" - assert ( - base_config.log == logger_config - ), f"f{logger_config} is expected trainer config for {model_config} but got {logger_config}" - - def test_mistral_base_config(self): - # Mistral 7B - model_config = run.Config(MistralConfig7B) - runner = AutoConfigurator( - model=model_config, - num_nodes=16, - path_to_logs="/", - data_paths="/", - seq_length=32768, - global_batch_size=2048, - ) - base_config = BaseConfig(runner) - model_size = runner._get_model_size(model_config) - model_type = runner._get_model_type(model_config) - data_config = get_data(32768, 2048) - trainer_config = get_trainer(16) - optim_config = get_optim() - logger_config = get_logger() - - assert ( - base_config.model == model_config - ), f"{model_config} is expected class object but got {base_config.model}" - assert model_size == 7, f"7 is expected size for {model_config} but got {model_size}" - assert model_type == "mistral", f"mistral is expected model type for {model_config} but got {model_type}" - assert ( - base_config.data == data_config - ), f"f{data_config} is expected data config for {model_config} but got {base_config.data}" - assert ( - base_config.trainer == trainer_config - ), f"f{trainer_config} is expected trainer config for {model_config} but got {base_config.trainer}" - assert ( - base_config.optim == optim_config - ), f"f{optim_config} is expected trainer config for {model_config} but got {base_config.optim}" - assert ( - base_config.log == logger_config - ), f"f{logger_config} is expected trainer config for {model_config} but got {logger_config}" - - def test_mixtral_base_config(self): - # Mixtral 8x3B - model_config = run.Config(MixtralConfig8x3B) - runner = AutoConfigurator( - model=model_config, - num_nodes=16, - path_to_logs="/", - data_paths="/", - seq_length=4096, - global_batch_size=2048, - ) - base_config = BaseConfig(runner) - model_size = runner._get_model_size(model_config) - model_type = runner._get_model_type(model_config) - data_config = get_data(4096, 2048) - trainer_config = get_trainer(16) - optim_config = get_optim() - logger_config = get_logger() - - assert ( - base_config.model == model_config - ), f"{model_config} is expected class object but got {base_config.model}" - assert model_size == 3, f"3 is expected size for {model_config} but got {model_size}" - assert model_type == "mixtral", f"mixtral is expected model type for {model_config} but got {model_type}" - assert ( - base_config.data == data_config - ), f"f{data_config} is expected data config for {model_config} but got {base_config.data}" - assert ( - base_config.trainer == trainer_config - ), f"f{trainer_config} is expected trainer config for {model_config} but got {base_config.trainer}" - assert ( - base_config.optim == optim_config - ), f"f{optim_config} is expected trainer config for {model_config} but got {base_config.optim}" - assert ( - base_config.log == logger_config - ), f"f{logger_config} is expected trainer config for {model_config} but got {logger_config}" - - def test_gemma_base_config(self): - # Gemma 2B - model_config = run.Config(GemmaConfig2B) - runner = AutoConfigurator( - model=model_config, - num_nodes=8, - path_to_logs="/", - data_paths="/", - seq_length=4096, - global_batch_size=1024, - ) - base_config = BaseConfig(runner) - model_size = runner._get_model_size(model_config) - model_type = runner._get_model_type(model_config) - data_config = get_data(4096, 1024) - trainer_config = get_trainer(8) - optim_config = get_optim() - logger_config = get_logger() - - assert ( - base_config.model == model_config - ), f"{model_config} is expected class object but got {base_config.model}" - assert model_size == 2, f"2 is expected size for {model_config} but got {model_size}" - assert model_type == "gemma", f"gemma is expected model type for {model_config} but got {model_type}" - assert ( - base_config.data == data_config - ), f"f{data_config} is expected data config for {model_config} but got {base_config.data}" - assert ( - base_config.trainer == trainer_config - ), f"f{trainer_config} is expected trainer config for {model_config} but got {base_config.trainer}" - assert ( - base_config.optim == optim_config - ), f"f{optim_config} is expected trainer config for {model_config} but got {base_config.optim}" - assert ( - base_config.log == logger_config - ), f"f{logger_config} is expected trainer config for {model_config} but got {logger_config}" - - def test_nemotron_base_config(self): - # Nemotron 22B - model_config = run.Config(Nemotron4Config22B) - runner = AutoConfigurator( - model=model_config, - num_nodes=64, - path_to_logs="/", - data_paths="/", - seq_length=4096, - global_batch_size=2048, - ) - base_config = BaseConfig(runner) - model_size = runner._get_model_size(model_config) - model_type = runner._get_model_type(model_config) - data_config = get_data(4096, 2048) - trainer_config = get_trainer(64) - optim_config = get_optim() - logger_config = get_logger() - - assert ( - base_config.model == model_config - ), f"{model_config} is expected class object but got {base_config.model}" - assert model_size == 22, f"22 is expected size for {model_config} but got {model_size}" - assert model_type == "nemotron", f"nemotron is expected model type for {model_config} but got {model_type}" - assert ( - base_config.data == data_config - ), f"f{data_config} is expected data config for {model_config} but got {base_config.data}" - assert ( - base_config.trainer == trainer_config - ), f"f{trainer_config} is expected trainer config for {model_config} but got {base_config.trainer}" - assert ( - base_config.optim == optim_config - ), f"f{optim_config} is expected trainer config for {model_config} but got {base_config.optim}" - assert ( - base_config.log == logger_config - ), f"f{logger_config} is expected trainer config for {model_config} but got {logger_config}" diff --git a/tests/collections/llm/auto_conf/test_generate_configs.py b/tests/collections/llm/auto_conf/test_generate_configs.py index f10425631f985..0d3e230e39fac 100644 --- a/tests/collections/llm/auto_conf/test_generate_configs.py +++ b/tests/collections/llm/auto_conf/test_generate_configs.py @@ -12,16 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import nemo_run as run - -from nemo.collections.llm import ( - GemmaConfig7B, - GPTConfig5B, - Llama3Config70B, - MistralConfig7B, - MixtralConfig8x22B, - Nemotron3Config8B, -) +from functools import partial + +from nemo.collections import llm from nemo.collections.llm.tools.auto_configurator import AutoConfigurator, generate_configs @@ -42,58 +35,12 @@ def get_auto_configs(configs): class TestGenerateConfgis: - def test_gpt_model(self): - # GPT3 126M - runner = AutoConfigurator( - model=run.Config(GPTConfig5B), - num_nodes=16, - seq_length=2048, - global_batch_size=2048, - tensor_parallel_sizes=[4], - pipeline_parallel_sizes=[2], - micro_batch_sizes=[1, 2], - context_parallel_sizes=[1], - expert_parallel_sizes=[1], - min_model_parallel_size=8, - max_model_parallel_size=8, - data_paths="/", - path_to_logs="/", - ) - - _, configs = generate_configs(runner) - - mbs = [1, 2] - for run_name, config, mb in zip(configs.keys(), configs.values(), mbs): - assert config.data.micro_batch_size == mb - assert config.data.seq_length == 2048 - assert config.data.global_batch_size == 2048 - - assert len(configs) == 2, f"{len(configs)} configurations were generated but 2 were expected." - - auto_configs = get_auto_configs(configs) - assert auto_configs[0] == [ - 4, - 2, - 1, - 1, - 1, - ], f"[4, 2, 1, 1, 1] is expected configuration output but got {auto_configs[0]}." - - assert auto_configs[1] == [ - 4, - 2, - 1, - 1, - 2, - ], f"[4, 2, 1, 1, 2] is expected configuration output but got {auto_configs[1]}." - def test_llama_model(self): # Llama3 70B + recipe = partial(llm.llama3_70b.pretrain_recipe, num_nodes=128, num_gpus_per_node=8)() + recipe.data.global_batch_size = 2048 runner = AutoConfigurator( - model=run.Config(Llama3Config70B), - num_nodes=128, - seq_length=8192, - global_batch_size=2048, + recipe=recipe, tensor_parallel_sizes="auto", pipeline_parallel_sizes="auto", micro_batch_sizes=[1], @@ -101,7 +48,6 @@ def test_llama_model(self): expert_parallel_sizes=[1], min_model_parallel_size=16, max_model_parallel_size=64, - data_paths="/", path_to_logs="/", ) @@ -142,11 +88,13 @@ def test_llama_model(self): def test_mistral_model(self): # Mistral 7B + recipe = partial(llm.mistral_7b.pretrain_recipe, num_nodes=16, num_gpus_per_node=8)() + recipe.data.seq_length = 4096 + recipe.data.global_batch_size = 2048 + recipe.model.config.seq_length = recipe.data.seq_length + runner = AutoConfigurator( - model=run.Config(MistralConfig7B), - num_nodes=16, - seq_length=4096, - global_batch_size=2048, + recipe=recipe, tensor_parallel_sizes=[4], pipeline_parallel_sizes=[1, 2], micro_batch_sizes=[1], @@ -154,7 +102,6 @@ def test_mistral_model(self): expert_parallel_sizes=[1], min_model_parallel_size=4, max_model_parallel_size=8, - data_paths="/", path_to_logs="/", ) @@ -187,11 +134,13 @@ def test_mistral_model(self): def test_mixtral_model(self): # Mixtral 8x22B + recipe = partial(llm.mixtral_8x22b.pretrain_recipe, num_nodes=16, num_gpus_per_node=8)() + recipe.data.seq_length = 4096 + recipe.data.global_batch_size = 2048 + recipe.model.config.seq_length = recipe.data.seq_length + runner = AutoConfigurator( - model=run.Config(MixtralConfig8x22B), - num_nodes=16, - seq_length=4096, - global_batch_size=2048, + recipe=recipe, tensor_parallel_sizes=[4], pipeline_parallel_sizes=[1], micro_batch_sizes=[1], @@ -199,7 +148,6 @@ def test_mixtral_model(self): expert_parallel_sizes=[1, 2], min_model_parallel_size=4, max_model_parallel_size=8, - data_paths="/", path_to_logs="/", ) @@ -232,11 +180,13 @@ def test_mixtral_model(self): def test_gemma_model(self): # Gemma 7B + recipe = partial(llm.gemma_7b.pretrain_recipe, num_nodes=16, num_gpus_per_node=8)() + recipe.data.seq_length = 8192 + recipe.data.global_batch_size = 2048 + recipe.model.config.seq_length = recipe.data.seq_length + runner = AutoConfigurator( - model=run.Config(GemmaConfig7B), - num_nodes=16, - seq_length=8192, - global_batch_size=2048, + recipe=recipe, tensor_parallel_sizes=[2], pipeline_parallel_sizes=[2], micro_batch_sizes=[1, 2], @@ -244,7 +194,6 @@ def test_gemma_model(self): expert_parallel_sizes=[1], min_model_parallel_size=4, max_model_parallel_size=8, - data_paths="/", path_to_logs="/", ) @@ -277,11 +226,13 @@ def test_gemma_model(self): def test_nemotron_model(self): # Nemotron3 8B + recipe = partial(llm.nemotron3_8b.pretrain_recipe, num_nodes=16, num_gpus_per_node=8)() + recipe.data.seq_length = 4096 + recipe.data.global_batch_size = 2048 + recipe.model.config.seq_length = recipe.data.seq_length + runner = AutoConfigurator( - model=run.Config(Nemotron3Config8B), - num_nodes=16, - seq_length=4096, - global_batch_size=2048, + recipe=recipe, tensor_parallel_sizes=[1], pipeline_parallel_sizes=[4], micro_batch_sizes=[1, 2], @@ -289,7 +240,6 @@ def test_nemotron_model(self): expert_parallel_sizes=[1], min_model_parallel_size=4, max_model_parallel_size=8, - data_paths="/", path_to_logs="/", ) From 540e40dae42f5e1723ae68f568cdca6f7cc6be7f Mon Sep 17 00:00:00 2001 From: Huy Vu <86480512+huvunvidia@users.noreply.github.com> Date: Tue, 29 Oct 2024 09:44:05 -0400 Subject: [PATCH 022/125] Adding nemo-run recipes for NeMo 2.0 T5 (#10964) * initial commit * adding all recipes and tests * add tests for 220m, 3b, 11b; runnable with example_nemorun_t5.py * restore test_llama3_70b.py * remove unused libraties * remove unused libraries * address Ali's comments * address Ali's comments * edit test_t5_3b.py * Apply isort and black reformatting Signed-off-by: huvunvidia --------- Signed-off-by: huvunvidia Co-authored-by: Huy Vu2 Co-authored-by: huvunvidia --- nemo/collections/llm/recipes/__init__.py | 6 + nemo/collections/llm/recipes/t5_11b.py | 204 ++++++++++++++++++ nemo/collections/llm/recipes/t5_220m.py | 203 +++++++++++++++++ nemo/collections/llm/recipes/t5_3b.py | 204 ++++++++++++++++++ nemo/collections/llm/t5/data/mock.py | 189 ++++++++++++++++ nemo/collections/llm/t5/model/t5.py | 32 +++ tests/collections/llm/recipes/test_t5_11b.py | 92 ++++++++ tests/collections/llm/recipes/test_t5_220m.py | 91 ++++++++ tests/collections/llm/recipes/test_t5_3b.py | 92 ++++++++ 9 files changed, 1113 insertions(+) create mode 100644 nemo/collections/llm/recipes/t5_11b.py create mode 100644 nemo/collections/llm/recipes/t5_220m.py create mode 100644 nemo/collections/llm/recipes/t5_3b.py create mode 100644 nemo/collections/llm/t5/data/mock.py create mode 100644 tests/collections/llm/recipes/test_t5_11b.py create mode 100644 tests/collections/llm/recipes/test_t5_220m.py create mode 100644 tests/collections/llm/recipes/test_t5_3b.py diff --git a/nemo/collections/llm/recipes/__init__.py b/nemo/collections/llm/recipes/__init__.py index c1af8907a002b..b02547acfffe5 100644 --- a/nemo/collections/llm/recipes/__init__.py +++ b/nemo/collections/llm/recipes/__init__.py @@ -60,6 +60,9 @@ starcoder2_3b, starcoder2_7b, starcoder2_15b, + t5_3b, + t5_11b, + t5_220m, ) from nemo.collections.llm.recipes.log.default import default_log, default_resume from nemo.collections.llm.recipes.optim import adam @@ -100,6 +103,9 @@ "nemotron4_22b_16k", "nemotron4_22b_64k", "nemotron4_340b", + "t5_220m", + "t5_3b", + "t5_11b", "starcoder", "starcoder2", "starcoder2_3b", diff --git a/nemo/collections/llm/recipes/t5_11b.py b/nemo/collections/llm/recipes/t5_11b.py new file mode 100644 index 0000000000000..09d4698793649 --- /dev/null +++ b/nemo/collections/llm/recipes/t5_11b.py @@ -0,0 +1,204 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from typing import Optional + +import nemo_run as run +import pytorch_lightning as pl +import torch +from megatron.core.distributed import DistributedDataParallelConfig +from megatron.core.optimizer import OptimizerConfig +from pytorch_lightning.callbacks.callback import Callback + +from nemo import lightning as nl +from nemo.collections.llm.api import finetune, pretrain +from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger +from nemo.collections.llm.recipes.precision.mixed_precision import bf16_mixed +from nemo.collections.llm.t5.data.mock import MockDataModule +from nemo.collections.llm.t5.model.t5 import T5Config11B, T5Model +from nemo.lightning.pytorch.optim.lr_scheduler import WarmupAnnealingScheduler +from nemo.lightning.pytorch.optim.megatron import MegatronOptimizerModule +from nemo.utils.exp_manager import TimingCallback + +NAME = "t5_11b" + + +@run.cli.factory(name=NAME) +def model() -> run.Config[pl.LightningModule]: + """ + Factory function to create a T5 11B model configuration. + + Returns: + run.Config[pl.LightningModule]: Configuration for the T5 11B model. + + Examples: + CLI usage: + $ nemo llm pretrain model=t5_11b ... + + Python API usage: + >>> model_config = model() + >>> print(model_config) + """ + return run.Config(T5Model, config=run.Config(T5Config11B)) + + +def trainer( + tensor_parallelism: int = 4, + pipeline_parallelism: int = 1, + pipeline_parallelism_type: Optional[torch.dtype] = None, + virtual_pipeline_parallelism: Optional[int] = None, + context_parallelism: int = 1, + sequence_parallelism: bool = False, + num_nodes: int = 20, + num_gpus_per_node: int = 8, + max_steps: int = 1000000, + callbacks: Optional[list[run.Config[Callback]]] = None, +) -> run.Config[nl.Trainer]: + """ + Configure the NeMo Lightning Trainer for T5 model. + + This function sets up the distributed training strategy and other training parameters. + + Args: + tensor_parallelism (int): Degree of tensor model parallelism. + pipeline_parallelism (int): Degree of pipeline model parallelism. + pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism. + virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism. + context_parallelism (int): Degree of context parallelism. + sequence_parallelism (bool): Whether to use sequence parallelism. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + max_steps (int): Maximum number of training steps. + callbacks (Optional[list[run.Config[Callback]]]): List of callback configurations. + + Returns: + run.Config[nl.Trainer]: Configuration for the NeMo Lightning Trainer. + + Examples: + CLI usage: + $ nemo llm pretrain trainer=t5_11b ... + + Python API usage: + >>> trainer_config = trainer(num_nodes=2, num_gpus_per_node=8) + >>> print(trainer_config) + + Note: + For more information on distributed training strategies, refer to the + NeMo documentation on multi-GPU and multi-node training. + """ + strategy = run.Config( + nl.MegatronStrategy, + tensor_model_parallel_size=tensor_parallelism, + pipeline_model_parallel_size=pipeline_parallelism, + pipeline_dtype=pipeline_parallelism_type, + virtual_pipeline_model_parallel_size=virtual_pipeline_parallelism, + context_parallel_size=context_parallelism, + sequence_parallel=sequence_parallelism, + gradient_as_bucket_view=True, + ckpt_async_save=True, + ckpt_parallel_load=True, + ddp=run.Config( + DistributedDataParallelConfig, + check_for_nan_in_grad=True, + grad_reduce_in_fp32=True, + overlap_grad_reduce=True, + overlap_param_gather=True, + ), + ) + + trainer = run.Config( + nl.Trainer, + accelerator="gpu", + accumulate_grad_batches=1, + callbacks=callbacks, + devices=num_gpus_per_node, + limit_test_batches=50, + limit_val_batches=32, + log_every_n_steps=10, + max_steps=max_steps, + num_nodes=num_nodes, + plugins=bf16_mixed(), + strategy=strategy, + use_distributed_sampler=False, + val_check_interval=2000, + ) + + return trainer + + +@run.cli.factory(target=pretrain, name=NAME) +def pretrain_recipe( + dir: Optional[str] = None, name: str = "default", num_nodes: int = 20, num_gpus_per_node: int = 8, fn=pretrain +) -> run.Partial: + """ + Create a pre-training recipe for T5 11b model. + + This function sets up a complete configuration for pre-training, including + model, trainer, data, logging, optimization, and resumption settings. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the pre-training run. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + fn (Callable): The pre-training function to use. + + Returns: + run.Partial: Partial configuration for pre-training. + + Examples: + CLI usage: + $ nemo llm pretrain --factory t5_11b + $ nemo llm pretrain --factory "t5_11b(num_nodes=2, name='my_pretrain')" + + Python API usage: + >>> recipe = pretrain_recipe(name="t5_11b_pretrain", num_nodes=2) + >>> print(recipe) + + Note: + For more details on pre-training LLMs with NeMo, see the pre-training + guide in the `examples/llm/pretrain/` directory. + """ + + opt_config = OptimizerConfig( + optimizer='adam', + lr=0.0001, + use_distributed_optimizer=True, + bf16=True, + weight_decay=0.01, + ) + + lr_scheduler = WarmupAnnealingScheduler( + warmup_steps=None, + warmup_ratio=0.01, + max_steps=1000000, + min_lr=0.00001, + ) + + return run.Partial( + fn, + model=model(), + trainer=trainer( + num_nodes=num_nodes, + num_gpus_per_node=num_gpus_per_node, + callbacks=[run.Config(TimingCallback)], + ), + data=run.Config( + MockDataModule, seq_length=512, seq_length_dec=128, global_batch_size=1920, micro_batch_size=24 + ), + log=default_log(dir=dir, name=name, tensorboard_logger=tensorboard_logger(name=name)), + optim=MegatronOptimizerModule(config=opt_config, lr_scheduler=lr_scheduler), + resume=default_resume(), + ) diff --git a/nemo/collections/llm/recipes/t5_220m.py b/nemo/collections/llm/recipes/t5_220m.py new file mode 100644 index 0000000000000..a3b2b761b65b6 --- /dev/null +++ b/nemo/collections/llm/recipes/t5_220m.py @@ -0,0 +1,203 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from typing import Optional + +import nemo_run as run +import pytorch_lightning as pl +import torch +from megatron.core.distributed import DistributedDataParallelConfig +from megatron.core.optimizer import OptimizerConfig +from pytorch_lightning.callbacks.callback import Callback + +from nemo import lightning as nl +from nemo.collections.llm.api import finetune, pretrain +from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger +from nemo.collections.llm.recipes.precision.mixed_precision import bf16_mixed +from nemo.collections.llm.t5.data.mock import MockDataModule +from nemo.collections.llm.t5.model.t5 import T5Config220M, T5Model +from nemo.lightning.pytorch.optim.lr_scheduler import WarmupAnnealingScheduler +from nemo.lightning.pytorch.optim.megatron import MegatronOptimizerModule +from nemo.utils.exp_manager import TimingCallback + +NAME = "t5_220m" + + +@run.cli.factory(name=NAME) +def model() -> run.Config[pl.LightningModule]: + """ + Factory function to create a T5 220M model configuration. + + Returns: + run.Config[pl.LightningModule]: Configuration for the T5 220M model. + + Examples: + CLI usage: + $ nemo llm pretrain model=t5_220m ... + + Python API usage: + >>> model_config = model() + >>> print(model_config) + """ + return run.Config(T5Model, config=run.Config(T5Config220M)) + + +def trainer( + tensor_parallelism: int = 1, + pipeline_parallelism: int = 1, + pipeline_parallelism_type: Optional[torch.dtype] = None, + virtual_pipeline_parallelism: Optional[int] = None, + context_parallelism: int = 1, + sequence_parallelism: bool = False, + num_nodes: int = 1, + num_gpus_per_node: int = 8, + max_steps: int = 1000000, + callbacks: Optional[list[run.Config[Callback]]] = None, +) -> run.Config[nl.Trainer]: + """ + Configure the NeMo Lightning Trainer for T5 model. + + This function sets up the distributed training strategy and other training parameters. + + Args: + tensor_parallelism (int): Degree of tensor model parallelism. + pipeline_parallelism (int): Degree of pipeline model parallelism. + pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism. + virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism. + context_parallelism (int): Degree of context parallelism. + sequence_parallelism (bool): Whether to use sequence parallelism. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + max_steps (int): Maximum number of training steps. + callbacks (Optional[list[run.Config[Callback]]]): List of callback configurations. + + Returns: + run.Config[nl.Trainer]: Configuration for the NeMo Lightning Trainer. + + Examples: + CLI usage: + $ nemo llm pretrain trainer=t5_220m ... + + Python API usage: + >>> trainer_config = trainer(num_nodes=2, num_gpus_per_node=8) + >>> print(trainer_config) + + Note: + For more information on distributed training strategies, refer to the + NeMo documentation on multi-GPU and multi-node training. + """ + strategy = run.Config( + nl.MegatronStrategy, + tensor_model_parallel_size=tensor_parallelism, + pipeline_model_parallel_size=pipeline_parallelism, + pipeline_dtype=pipeline_parallelism_type, + virtual_pipeline_model_parallel_size=virtual_pipeline_parallelism, + context_parallel_size=context_parallelism, + sequence_parallel=sequence_parallelism, + gradient_as_bucket_view=True, + ckpt_async_save=True, + ckpt_parallel_load=True, + ddp=run.Config( + DistributedDataParallelConfig, + check_for_nan_in_grad=True, + grad_reduce_in_fp32=True, + overlap_grad_reduce=True, + overlap_param_gather=True, + ), + ) + + trainer = run.Config( + nl.Trainer, + accelerator="gpu", + accumulate_grad_batches=1, + callbacks=callbacks, + devices=num_gpus_per_node, + limit_test_batches=50, + limit_val_batches=32, + log_every_n_steps=10, + max_steps=max_steps, + num_nodes=num_nodes, + plugins=bf16_mixed(), + strategy=strategy, + use_distributed_sampler=False, + # DEBUGGING + val_check_interval=2000, + ) + + return trainer + + +@run.cli.factory(target=pretrain, name=NAME) +def pretrain_recipe( + dir: Optional[str] = None, name: str = "default", num_nodes: int = 1, num_gpus_per_node: int = 8, fn=pretrain +) -> run.Partial: + """ + Create a pre-training recipe for T5 220m model. + + This function sets up a complete configuration for pre-training, including + model, trainer, data, logging, optimization, and resumption settings. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the pre-training run. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + fn (Callable): The pre-training function to use. + + Returns: + run.Partial: Partial configuration for pre-training. + + Examples: + CLI usage: + $ nemo llm pretrain --factory t5_220m + $ nemo llm pretrain --factory "t5_220m(num_nodes=2, name='my_pretrain')" + + Python API usage: + >>> recipe = pretrain_recipe(name="t5_220m_pretrain", num_nodes=2) + >>> print(recipe) + + Note: + For more details on pre-training LLMs with NeMo, see the pre-training + guide in the `examples/llm/pretrain/` directory. + """ + + opt_config = OptimizerConfig( + optimizer='adam', + lr=0.0001, + use_distributed_optimizer=True, + bf16=True, + weight_decay=0.01, + ) + + lr_scheduler = WarmupAnnealingScheduler( + warmup_steps=None, + warmup_ratio=0.01, + max_steps=1000000, + min_lr=0.00001, + ) + + return run.Partial( + fn, + model=model(), + trainer=trainer( + num_nodes=num_nodes, + num_gpus_per_node=num_gpus_per_node, + callbacks=[run.Config(TimingCallback)], + ), + data=run.Config(MockDataModule, seq_length=512, seq_length_dec=128, global_batch_size=512, micro_batch_size=1), + log=default_log(dir=dir, name=name, tensorboard_logger=tensorboard_logger(name=name)), + optim=MegatronOptimizerModule(config=opt_config, lr_scheduler=lr_scheduler), + resume=default_resume(), + ) diff --git a/nemo/collections/llm/recipes/t5_3b.py b/nemo/collections/llm/recipes/t5_3b.py new file mode 100644 index 0000000000000..08bcae895c3e6 --- /dev/null +++ b/nemo/collections/llm/recipes/t5_3b.py @@ -0,0 +1,204 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from typing import Optional + +import nemo_run as run +import pytorch_lightning as pl +import torch +from megatron.core.distributed import DistributedDataParallelConfig +from megatron.core.optimizer import OptimizerConfig +from pytorch_lightning.callbacks.callback import Callback + +from nemo import lightning as nl +from nemo.collections.llm.api import finetune, pretrain +from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger +from nemo.collections.llm.recipes.precision.mixed_precision import bf16_mixed +from nemo.collections.llm.t5.data.mock import MockDataModule +from nemo.collections.llm.t5.model.t5 import T5Config3B, T5Model +from nemo.lightning.pytorch.optim.lr_scheduler import WarmupAnnealingScheduler +from nemo.lightning.pytorch.optim.megatron import MegatronOptimizerModule +from nemo.utils.exp_manager import TimingCallback + +NAME = "t5_3b" + + +@run.cli.factory(name=NAME) +def model() -> run.Config[pl.LightningModule]: + """ + Factory function to create a T5 3B model configuration. + + Returns: + run.Config[pl.LightningModule]: Configuration for the T5 3B model. + + Examples: + CLI usage: + $ nemo llm pretrain model=t5_3b ... + + Python API usage: + >>> model_config = model() + >>> print(model_config) + """ + return run.Config(T5Model, config=run.Config(T5Config3B)) + + +def trainer( + tensor_parallelism: int = 2, + pipeline_parallelism: int = 1, + pipeline_parallelism_type: Optional[torch.dtype] = None, + virtual_pipeline_parallelism: Optional[int] = None, + context_parallelism: int = 1, + sequence_parallelism: bool = False, + num_nodes: int = 20, + num_gpus_per_node: int = 8, + max_steps: int = 1000000, + callbacks: Optional[list[run.Config[Callback]]] = None, +) -> run.Config[nl.Trainer]: + """ + Configure the NeMo Lightning Trainer for T5 model. + + This function sets up the distributed training strategy and other training parameters. + + Args: + tensor_parallelism (int): Degree of tensor model parallelism. + pipeline_parallelism (int): Degree of pipeline model parallelism. + pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism. + virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism. + context_parallelism (int): Degree of context parallelism. + sequence_parallelism (bool): Whether to use sequence parallelism. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + max_steps (int): Maximum number of training steps. + callbacks (Optional[list[run.Config[Callback]]]): List of callback configurations. + + Returns: + run.Config[nl.Trainer]: Configuration for the NeMo Lightning Trainer. + + Examples: + CLI usage: + $ nemo llm pretrain trainer=t5_3b ... + + Python API usage: + >>> trainer_config = trainer(num_nodes=2, num_gpus_per_node=8) + >>> print(trainer_config) + + Note: + For more information on distributed training strategies, refer to the + NeMo documentation on multi-GPU and multi-node training. + """ + strategy = run.Config( + nl.MegatronStrategy, + tensor_model_parallel_size=tensor_parallelism, + pipeline_model_parallel_size=pipeline_parallelism, + pipeline_dtype=pipeline_parallelism_type, + virtual_pipeline_model_parallel_size=virtual_pipeline_parallelism, + context_parallel_size=context_parallelism, + sequence_parallel=sequence_parallelism, + gradient_as_bucket_view=True, + ckpt_async_save=True, + ckpt_parallel_load=True, + ddp=run.Config( + DistributedDataParallelConfig, + check_for_nan_in_grad=True, + grad_reduce_in_fp32=True, + overlap_grad_reduce=True, + overlap_param_gather=True, + ), + ) + + trainer = run.Config( + nl.Trainer, + accelerator="gpu", + accumulate_grad_batches=1, + callbacks=callbacks, + devices=num_gpus_per_node, + limit_test_batches=50, + limit_val_batches=32, + log_every_n_steps=10, + max_steps=max_steps, + num_nodes=num_nodes, + plugins=bf16_mixed(), + strategy=strategy, + use_distributed_sampler=False, + val_check_interval=2000, + ) + + return trainer + + +@run.cli.factory(target=pretrain, name=NAME) +def pretrain_recipe( + dir: Optional[str] = None, name: str = "default", num_nodes: int = 20, num_gpus_per_node: int = 8, fn=pretrain +) -> run.Partial: + """ + Create a pre-training recipe for T5 3b model. + + This function sets up a complete configuration for pre-training, including + model, trainer, data, logging, optimization, and resumption settings. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the pre-training run. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + fn (Callable): The pre-training function to use. + + Returns: + run.Partial: Partial configuration for pre-training. + + Examples: + CLI usage: + $ nemo llm pretrain --factory t5_3b + $ nemo llm pretrain --factory "t5_3b(num_nodes=2, name='my_pretrain')" + + Python API usage: + >>> recipe = pretrain_recipe(name="t5_3b_pretrain", num_nodes=2) + >>> print(recipe) + + Note: + For more details on pre-training LLMs with NeMo, see the pre-training + guide in the `examples/llm/pretrain/` directory. + """ + + opt_config = OptimizerConfig( + optimizer='adam', + lr=0.0001, + use_distributed_optimizer=True, + bf16=True, + weight_decay=0.01, + ) + + lr_scheduler = WarmupAnnealingScheduler( + warmup_steps=None, + warmup_ratio=0.01, + max_steps=1000000, + min_lr=0.00001, + ) + + return run.Partial( + fn, + model=model(), + trainer=trainer( + num_nodes=num_nodes, + num_gpus_per_node=num_gpus_per_node, + callbacks=[run.Config(TimingCallback)], + ), + data=run.Config( + MockDataModule, seq_length=512, seq_length_dec=128, global_batch_size=1920, micro_batch_size=24 + ), + log=default_log(dir=dir, name=name, tensorboard_logger=tensorboard_logger(name=name)), + optim=MegatronOptimizerModule(config=opt_config, lr_scheduler=lr_scheduler), + resume=default_resume(), + ) diff --git a/nemo/collections/llm/t5/data/mock.py b/nemo/collections/llm/t5/data/mock.py new file mode 100644 index 0000000000000..eaf41d290da42 --- /dev/null +++ b/nemo/collections/llm/t5/data/mock.py @@ -0,0 +1,189 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import TYPE_CHECKING, Dict, List, Optional + +import numpy as np +import pytorch_lightning as pl +import torch +from pytorch_lightning.utilities.types import EVAL_DATALOADERS, TRAIN_DATALOADERS +from torch.utils import data +from torch.utils.data import DataLoader, Dataset + +from nemo.lightning.pytorch.plugins import MegatronDataSampler +from nemo.utils.import_utils import safe_import + +_, HAVE_TE = safe_import("transformer_engine") + +if TYPE_CHECKING: + from nemo.collections.common.tokenizers.tokenizer_spec import TokenizerSpec + + +class MockDataModule(pl.LightningDataModule): + def __init__( + self, + seq_length: int = 512, + seq_length_dec: int = 128, + tokenizer: Optional["TokenizerSpec"] = None, + micro_batch_size: int = 4, + global_batch_size: int = 8, + rampup_batch_size: Optional[List[int]] = None, + num_train_samples: int = 10_000, + num_val_samples: int = 10_000, + num_test_samples: int = 10_000, + num_workers: int = 8, + pin_memory: bool = True, + persistent_workers: bool = False, + create_attention_mask: bool = False, + ): + super().__init__() + self.seq_length = seq_length + self.seq_length_dec = seq_length_dec + self.num_train_samples = num_train_samples + self.num_val_samples = num_val_samples + self.num_test_samples = num_test_samples + self.num_workers = num_workers + self.pin_memory = pin_memory + self.persistent_workers = persistent_workers + self.create_attention_mask = create_attention_mask or not HAVE_TE + + from nemo.collections.nlp.modules.common.tokenizer_utils import get_nmt_tokenizer + + self.tokenizer = tokenizer or get_nmt_tokenizer("megatron", "BertWordPieceCase") + self.data_sampler = MegatronDataSampler( + seq_len=self.seq_length, + micro_batch_size=micro_batch_size, + global_batch_size=global_batch_size, + rampup_batch_size=rampup_batch_size, + ) + + def setup(self, stage: str = "") -> None: + self._train_ds = _MockT5Dataset( + self.tokenizer, "train", self.num_train_samples, self.seq_length, self.seq_length_dec + ) + self._validation_ds = _MockT5Dataset( + self.tokenizer, "valid", self.num_val_samples, self.seq_length, self.seq_length_dec + ) + self._test_ds = _MockT5Dataset( + self.tokenizer, "test", self.num_test_samples, self.seq_length, self.seq_length_dec + ) + + def train_dataloader(self) -> TRAIN_DATALOADERS: + if not hasattr(self, "_train_ds"): + self.setup() + return self._create_dataloader(self._train_ds) + + def val_dataloader(self) -> EVAL_DATALOADERS: + if not hasattr(self, "_validation_ds"): + self.setup() + return self._create_dataloader(self._validation_ds) + + def test_dataloader(self) -> EVAL_DATALOADERS: + if not hasattr(self, "_test_ds"): + self.setup() + return self._create_dataloader(self._test_ds) + + def _create_dataloader(self, dataset, **kwargs) -> DataLoader: + return DataLoader( + dataset, + num_workers=self.num_workers, + pin_memory=self.pin_memory, + persistent_workers=self.persistent_workers, + collate_fn=dataset.collate_fn, + **kwargs, + ) + + +class _MockT5Dataset(Dataset): + def __init__( + self, + tokenizer: "TokenizerSpec", + name: str, + num_samples: int, + seq_length: int, + seq_length_dec: int, + seed: int = 42, + create_attention_mask: bool = False, + ) -> None: + super().__init__() + self.name = name + self.seq_length = seq_length + self.seq_length_dec = seq_length_dec + self.vocab_size = tokenizer.vocab_size + self.length = num_samples + self.seed = seed + self.create_attention_mask = create_attention_mask + + self.mask_encoder = torch.ones((self.seq_length, self.seq_length), device='cpu') + self.mask_decoder = torch.tril(torch.ones((self.seq_length_dec, self.seq_length_dec), device='cpu')) + self.mask_encoder_decoder = torch.ones((self.seq_length_dec, self.seq_length), device='cpu') + self.mask_encoder = self.mask_encoder < 0.5 + self.mask_decoder = self.mask_decoder < 0.5 + self.mask_encoder_decoder = self.mask_encoder_decoder < 0.5 + + self.loss_mask = torch.ones(self.seq_length_dec, dtype=torch.float) + + def __len__(self) -> int: + return self.length + + def _get_text(self, idx: int) -> np.ndarray: + np_gen = np.random.default_rng(seed=(self.seed + idx)) + return np_gen.integers(self.vocab_size, size=[self.seq_length], dtype=np.int64) + + def __getitem__(self, idx) -> Dict[str, torch.Tensor]: + # Generate data of the expected size and datatype (based on GPTDataset). + np_gen = np.random.default_rng(seed=(self.seed + idx)) + encoder_input = torch.from_numpy(np_gen.integers(self.vocab_size, size=[self.seq_length], dtype=np.int64)) + decoder_input = torch.from_numpy(np_gen.integers(self.vocab_size, size=[self.seq_length_dec], dtype=np.int64)) + labels = torch.from_numpy(np_gen.integers(self.vocab_size, size=[self.seq_length_dec], dtype=np.int64)) + + batch = { + "text_enc": encoder_input, + "text_dec": decoder_input, + "labels": labels, + "loss_mask": self.loss_mask, + "truncated": 0, + "enc_mask": self.mask_encoder, + "dec_mask": self.mask_decoder, + "enc_dec_mask": self.mask_encoder_decoder, + } + + return batch + + def _collate_fn(self, batch): + """ + A default implementation of a collation function. + Users should override this method to define custom data loaders. + """ + return data.dataloader.default_collate(batch) + + def collate_fn(self, batch): + """Method that user pass as functor to DataLoader. + + The method optionally performs neural type checking and add types to the outputs. + + Please note, subclasses of Dataset should not implement `input_types`. + + # Usage: + dataloader = torch.utils.data.DataLoader( + ...., + collate_fn=dataset.collate_fn, + .... + ) + + Returns + ------- + Collated batch, with or without types. + """ + return self._collate_fn(batch) diff --git a/nemo/collections/llm/t5/model/t5.py b/nemo/collections/llm/t5/model/t5.py index e6970cba3dd88..058acaaec7b09 100644 --- a/nemo/collections/llm/t5/model/t5.py +++ b/nemo/collections/llm/t5/model/t5.py @@ -200,6 +200,38 @@ def configure_model(self, tokenizer) -> "MCoreT5Model": return model +@dataclass +class T5Config220M(T5Config): + """ + NeMo's T5 model variant + https://github.com/NVIDIA/NeMo-Framework-Launcher/blob/main/launcher_scripts/conf/training/t5/220m.yaml + """ + + num_layers: int = 12 + encoder_num_layers: int = 12 + hidden_size: int = 768 + ffn_hidden_size: int = 3072 + num_attention_heads: int = 12 + + +@dataclass +class T5Config3B(T5Config): + num_layers: int = 24 + encoder_num_layers: int = 24 + hidden_size: int = 2048 + ffn_hidden_size: int = 5120 + num_attention_heads: int = 32 + + +@dataclass +class T5Config11B(T5Config): + num_layers: int = 24 + encoder_num_layers: int = 24 + hidden_size: int = 4096 + ffn_hidden_size: int = 10240 + num_attention_heads: int = 64 + + class T5Model(L.LightningModule, io.IOMixin, io.ConnectorMixin, fn.FNMixin): def __init__( self, diff --git a/tests/collections/llm/recipes/test_t5_11b.py b/tests/collections/llm/recipes/test_t5_11b.py new file mode 100644 index 0000000000000..502faa882be81 --- /dev/null +++ b/tests/collections/llm/recipes/test_t5_11b.py @@ -0,0 +1,92 @@ +import nemo_run as run +import pytest + +from nemo.collections.llm.api import pretrain +from nemo.collections.llm.recipes import t5_11b +from nemo.collections.llm.t5.data.mock import MockDataModule +from nemo.collections.llm.t5.model.t5 import T5Config11B, T5Model +from nemo.lightning import Trainer + + +class TestT5_11B: + @pytest.fixture(scope="class") + def recipe_module(self): + return t5_11b + + def test_model(self, recipe_module): + model_config = recipe_module.model() + assert isinstance(model_config, run.Config) + assert model_config.__fn_or_cls__ == T5Model + assert isinstance(model_config.config, run.Config) + assert model_config.config.__fn_or_cls__ == T5Config11B + + def test_trainer(self, recipe_module): + trainer_config = recipe_module.trainer() + assert isinstance(trainer_config, run.Config) + assert trainer_config.__fn_or_cls__ == Trainer + assert trainer_config.accelerator == "gpu" + assert trainer_config.devices == 8 + assert trainer_config.num_nodes == 20 + assert trainer_config.max_steps == 1000000 + + # Check strategy configuration + assert isinstance(trainer_config.strategy, run.Config) + assert trainer_config.strategy.__fn_or_cls__.__name__ == "MegatronStrategy" + assert trainer_config.strategy.tensor_model_parallel_size == 4 + assert trainer_config.strategy.pipeline_model_parallel_size == 1 + assert trainer_config.strategy.pipeline_dtype is None + assert trainer_config.strategy.virtual_pipeline_model_parallel_size is None + assert trainer_config.strategy.context_parallel_size == 1 + assert trainer_config.strategy.sequence_parallel is False + assert trainer_config.strategy.gradient_as_bucket_view is True + assert trainer_config.strategy.ckpt_async_save is True + assert trainer_config.strategy.ckpt_parallel_load is True + + # Check other trainer configurations + assert trainer_config.accumulate_grad_batches == 1 + assert trainer_config.limit_test_batches == 50 + assert trainer_config.limit_val_batches == 32 + assert trainer_config.log_every_n_steps == 10 + assert trainer_config.use_distributed_sampler is False + assert trainer_config.val_check_interval == 2000 + + # Check plugins + assert isinstance(trainer_config.plugins, run.Config) + assert trainer_config.plugins.__fn_or_cls__.__name__ == "MegatronMixedPrecision" + + def test_pretrain_recipe(self, recipe_module): + recipe = recipe_module.pretrain_recipe() + assert isinstance(recipe, run.Partial) + assert recipe.__fn_or_cls__ == pretrain + assert isinstance(recipe.model, run.Config) + assert recipe.model.__fn_or_cls__ == T5Model + assert isinstance(recipe.trainer, run.Config) + assert recipe.trainer.__fn_or_cls__ == Trainer + assert isinstance(recipe.data, run.Config) + assert recipe.data.__fn_or_cls__ == MockDataModule + assert recipe.data.seq_length == 512 + assert recipe.data.seq_length_dec == 128 + assert recipe.data.global_batch_size == 1920 + + @pytest.mark.parametrize("num_nodes,num_gpus_per_node", [(1, 8), (2, 4), (4, 2)]) + def test_pretrain_recipe_with_different_configurations(self, recipe_module, num_nodes, num_gpus_per_node): + recipe = recipe_module.pretrain_recipe(num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node) + assert recipe.trainer.num_nodes == num_nodes + assert recipe.trainer.devices == num_gpus_per_node + + def test_trainer_parallelism_options(self, recipe_module): + trainer_config = recipe_module.trainer( + tensor_parallelism=2, + pipeline_parallelism=2, + ) + assert trainer_config.strategy.tensor_model_parallel_size == 2 + assert trainer_config.strategy.pipeline_model_parallel_size == 2 + + def test_model_config_parameters(self, recipe_module): + model_config = recipe_module.model() + llama_config = model_config.config + assert llama_config.num_layers == 24 + assert llama_config.encoder_num_layers == 24 + assert llama_config.hidden_size == 4096 + assert llama_config.ffn_hidden_size == 10240 + assert llama_config.num_attention_heads == 64 diff --git a/tests/collections/llm/recipes/test_t5_220m.py b/tests/collections/llm/recipes/test_t5_220m.py new file mode 100644 index 0000000000000..7839a97ae0ed8 --- /dev/null +++ b/tests/collections/llm/recipes/test_t5_220m.py @@ -0,0 +1,91 @@ +import nemo_run as run +import pytest + +from nemo.collections.llm.api import pretrain +from nemo.collections.llm.recipes import t5_220m +from nemo.collections.llm.t5.data.mock import MockDataModule +from nemo.collections.llm.t5.model.t5 import T5Config220M, T5Model +from nemo.lightning import Trainer + + +class TestT5_220M: + @pytest.fixture(scope="class") + def recipe_module(self): + return t5_220m + + def test_model(self, recipe_module): + model_config = recipe_module.model() + assert isinstance(model_config, run.Config) + assert model_config.__fn_or_cls__ == T5Model + assert isinstance(model_config.config, run.Config) + assert model_config.config.__fn_or_cls__ == T5Config220M + + def test_trainer(self, recipe_module): + trainer_config = recipe_module.trainer() + assert isinstance(trainer_config, run.Config) + assert trainer_config.__fn_or_cls__ == Trainer + assert trainer_config.accelerator == "gpu" + assert trainer_config.devices == 8 + assert trainer_config.num_nodes == 1 + assert trainer_config.max_steps == 1000000 + + # Check strategy configuration + assert isinstance(trainer_config.strategy, run.Config) + assert trainer_config.strategy.__fn_or_cls__.__name__ == "MegatronStrategy" + assert trainer_config.strategy.tensor_model_parallel_size == 1 + assert trainer_config.strategy.pipeline_model_parallel_size == 1 + assert trainer_config.strategy.pipeline_dtype is None + assert trainer_config.strategy.virtual_pipeline_model_parallel_size is None + assert trainer_config.strategy.context_parallel_size == 1 + assert trainer_config.strategy.sequence_parallel is False + assert trainer_config.strategy.gradient_as_bucket_view is True + assert trainer_config.strategy.ckpt_async_save is True + assert trainer_config.strategy.ckpt_parallel_load is True + + # Check other trainer configurations + assert trainer_config.accumulate_grad_batches == 1 + assert trainer_config.limit_test_batches == 50 + assert trainer_config.limit_val_batches == 32 + assert trainer_config.log_every_n_steps == 10 + assert trainer_config.use_distributed_sampler is False + assert trainer_config.val_check_interval == 2000 + + # Check plugins + assert isinstance(trainer_config.plugins, run.Config) + assert trainer_config.plugins.__fn_or_cls__.__name__ == "MegatronMixedPrecision" + + def test_pretrain_recipe(self, recipe_module): + recipe = recipe_module.pretrain_recipe() + assert isinstance(recipe, run.Partial) + assert recipe.__fn_or_cls__ == pretrain + assert isinstance(recipe.model, run.Config) + assert recipe.model.__fn_or_cls__ == T5Model + assert isinstance(recipe.trainer, run.Config) + assert recipe.trainer.__fn_or_cls__ == Trainer + assert isinstance(recipe.data, run.Config) + assert recipe.data.__fn_or_cls__ == MockDataModule + assert recipe.data.seq_length == 512 + assert recipe.data.seq_length_dec == 128 + assert recipe.data.global_batch_size == 512 + + @pytest.mark.parametrize("num_nodes,num_gpus_per_node", [(1, 8), (2, 4), (4, 2)]) + def test_pretrain_recipe_with_different_configurations(self, recipe_module, num_nodes, num_gpus_per_node): + recipe = recipe_module.pretrain_recipe(num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node) + assert recipe.trainer.num_nodes == num_nodes + assert recipe.trainer.devices == num_gpus_per_node + + def test_trainer_parallelism_options(self, recipe_module): + trainer_config = recipe_module.trainer( + tensor_parallelism=2, + pipeline_parallelism=2, + ) + assert trainer_config.strategy.tensor_model_parallel_size == 2 + assert trainer_config.strategy.pipeline_model_parallel_size == 2 + + def test_model_config_parameters(self, recipe_module): + model_config = recipe_module.model() + llama_config = model_config.config + assert llama_config.num_layers == 12 + assert llama_config.hidden_size == 768 + assert llama_config.ffn_hidden_size == 3072 + assert llama_config.num_attention_heads == 12 diff --git a/tests/collections/llm/recipes/test_t5_3b.py b/tests/collections/llm/recipes/test_t5_3b.py new file mode 100644 index 0000000000000..28341ba32c02a --- /dev/null +++ b/tests/collections/llm/recipes/test_t5_3b.py @@ -0,0 +1,92 @@ +import nemo_run as run +import pytest + +from nemo.collections.llm.api import pretrain +from nemo.collections.llm.recipes import t5_3b +from nemo.collections.llm.t5.data.mock import MockDataModule +from nemo.collections.llm.t5.model.t5 import T5Config3B, T5Model +from nemo.lightning import Trainer + + +class TestT5_3B: + @pytest.fixture(scope="class") + def recipe_module(self): + return t5_3b + + def test_model(self, recipe_module): + model_config = recipe_module.model() + assert isinstance(model_config, run.Config) + assert model_config.__fn_or_cls__ == T5Model + assert isinstance(model_config.config, run.Config) + assert model_config.config.__fn_or_cls__ == T5Config3B + + def test_trainer(self, recipe_module): + trainer_config = recipe_module.trainer() + assert isinstance(trainer_config, run.Config) + assert trainer_config.__fn_or_cls__ == Trainer + assert trainer_config.accelerator == "gpu" + assert trainer_config.devices == 8 + assert trainer_config.num_nodes == 20 + assert trainer_config.max_steps == 1000000 + + # Check strategy configuration + assert isinstance(trainer_config.strategy, run.Config) + assert trainer_config.strategy.__fn_or_cls__.__name__ == "MegatronStrategy" + assert trainer_config.strategy.tensor_model_parallel_size == 2 + assert trainer_config.strategy.pipeline_model_parallel_size == 1 + assert trainer_config.strategy.pipeline_dtype is None + assert trainer_config.strategy.virtual_pipeline_model_parallel_size is None + assert trainer_config.strategy.context_parallel_size == 1 + assert trainer_config.strategy.sequence_parallel is False + assert trainer_config.strategy.gradient_as_bucket_view is True + assert trainer_config.strategy.ckpt_async_save is True + assert trainer_config.strategy.ckpt_parallel_load is True + + # Check other trainer configurations + assert trainer_config.accumulate_grad_batches == 1 + assert trainer_config.limit_test_batches == 50 + assert trainer_config.limit_val_batches == 32 + assert trainer_config.log_every_n_steps == 10 + assert trainer_config.use_distributed_sampler is False + assert trainer_config.val_check_interval == 2000 + + # Check plugins + assert isinstance(trainer_config.plugins, run.Config) + assert trainer_config.plugins.__fn_or_cls__.__name__ == "MegatronMixedPrecision" + + def test_pretrain_recipe(self, recipe_module): + recipe = recipe_module.pretrain_recipe() + assert isinstance(recipe, run.Partial) + assert recipe.__fn_or_cls__ == pretrain + assert isinstance(recipe.model, run.Config) + assert recipe.model.__fn_or_cls__ == T5Model + assert isinstance(recipe.trainer, run.Config) + assert recipe.trainer.__fn_or_cls__ == Trainer + assert isinstance(recipe.data, run.Config) + assert recipe.data.__fn_or_cls__ == MockDataModule + assert recipe.data.seq_length == 512 + assert recipe.data.seq_length_dec == 128 + assert recipe.data.global_batch_size == 1920 + + @pytest.mark.parametrize("num_nodes,num_gpus_per_node", [(1, 8), (2, 4), (4, 2)]) + def test_pretrain_recipe_with_different_configurations(self, recipe_module, num_nodes, num_gpus_per_node): + recipe = recipe_module.pretrain_recipe(num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node) + assert recipe.trainer.num_nodes == num_nodes + assert recipe.trainer.devices == num_gpus_per_node + + def test_trainer_parallelism_options(self, recipe_module): + trainer_config = recipe_module.trainer( + tensor_parallelism=2, + pipeline_parallelism=2, + ) + assert trainer_config.strategy.tensor_model_parallel_size == 2 + assert trainer_config.strategy.pipeline_model_parallel_size == 2 + + def test_model_config_parameters(self, recipe_module): + model_config = recipe_module.model() + llama_config = model_config.config + assert llama_config.num_layers == 24 + assert llama_config.encoder_num_layers == 24 + assert llama_config.hidden_size == 2048 + assert llama_config.ffn_hidden_size == 5120 + assert llama_config.num_attention_heads == 32 From 610e919c9502be65028dd6c2c904dd069b555a73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Kami=C5=84ski?= <67481570+Laplasjan107@users.noreply.github.com> Date: Tue, 29 Oct 2024 15:06:37 +0100 Subject: [PATCH 023/125] Minor fixes for NeMo 2.0 PTQ (#11079) * initial commit Signed-off-by: Piotr Kaminski * cleanup Signed-off-by: Piotr Kaminski * fix fabric num nodes Signed-off-by: Piotr Kaminski * Apply isort and black reformatting Signed-off-by: Laplasjan107 * make style coherent Signed-off-by: Piotr Kaminski --------- Signed-off-by: Piotr Kaminski Signed-off-by: Laplasjan107 Co-authored-by: Piotr Kaminski Co-authored-by: Laplasjan107 --- nemo/collections/llm/quantization/utils.py | 7 ++++--- nemo/lightning/pytorch/trainer.py | 6 +++++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/nemo/collections/llm/quantization/utils.py b/nemo/collections/llm/quantization/utils.py index 86c343ad54ec3..c4c533fe38d0c 100644 --- a/nemo/collections/llm/quantization/utils.py +++ b/nemo/collections/llm/quantization/utils.py @@ -44,11 +44,12 @@ def quantizable_model_config(model_cfg: llm.GPTConfig) -> llm.GPTConfig: def load_with_modelopt_layer_spec(nemo_checkpoint_path: str, calib_tp: int = 1, calib_pp: int = 1) -> llm.GPTModel: trainer = nl.Trainer( - devices=calib_tp * calib_pp, + devices=calib_tp, + num_nodes=calib_pp, strategy=nl.MegatronStrategy( - tensor_model_parallel_size=calib_tp, pipeline_model_parallel_size=calib_pp, pipeline_dtype=torch.float32 + tensor_model_parallel_size=calib_tp, pipeline_model_parallel_size=calib_pp, pipeline_dtype=torch.bfloat16 ), - plugins=nl.MegatronMixedPrecision(precision='32', pipeline_dtype=torch.float32), + plugins=nl.MegatronMixedPrecision(precision='bf16', pipeline_dtype=torch.bfloat16, autocast_enabled=True), ) fabric = trainer.to_fabric() fabric.launch() diff --git a/nemo/lightning/pytorch/trainer.py b/nemo/lightning/pytorch/trainer.py index 164c07fe5b808..0d71c49bf1984 100644 --- a/nemo/lightning/pytorch/trainer.py +++ b/nemo/lightning/pytorch/trainer.py @@ -45,7 +45,7 @@ def io_init(self, **kwargs) -> fdl.Config[Self]: return fdl.Config(type(self), **cfg_kwargs) def to_fabric(self, callbacks=None, loggers=None) -> Fabric: - accelerator, devices, strategy, plugins = None, None, None, None + accelerator, devices, strategy, plugins, num_nodes = None, None, None, None, None if hasattr(self.__io__, "devices"): devices = self.__io__.devices if hasattr(self.__io__, "accelerator"): @@ -62,6 +62,9 @@ def to_fabric(self, callbacks=None, loggers=None) -> Fabric: plugins = fdl.build(plugins) plugins = to_fabric(plugins) + if hasattr(self.__io__, "num_nodes"): + num_nodes = self.__io__.num_nodes + out = Fabric( devices=devices, accelerator=accelerator, @@ -69,6 +72,7 @@ def to_fabric(self, callbacks=None, loggers=None) -> Fabric: plugins=plugins, callbacks=callbacks, loggers=loggers, + num_nodes=num_nodes, ) return out From 5c76d90dea0ab7ad338d867e10695f55db7dd8ae Mon Sep 17 00:00:00 2001 From: Pablo Garay Date: Tue, 29 Oct 2024 08:50:21 -0700 Subject: [PATCH 024/125] Add copyright notice (#11083) --- tests/collections/llm/recipes/test_t5_11b.py | 14 ++++++++++++++ tests/collections/llm/recipes/test_t5_220m.py | 14 ++++++++++++++ tests/collections/llm/recipes/test_t5_3b.py | 14 ++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/tests/collections/llm/recipes/test_t5_11b.py b/tests/collections/llm/recipes/test_t5_11b.py index 502faa882be81..8c4ab8332c182 100644 --- a/tests/collections/llm/recipes/test_t5_11b.py +++ b/tests/collections/llm/recipes/test_t5_11b.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import nemo_run as run import pytest diff --git a/tests/collections/llm/recipes/test_t5_220m.py b/tests/collections/llm/recipes/test_t5_220m.py index 7839a97ae0ed8..744598e3b01b3 100644 --- a/tests/collections/llm/recipes/test_t5_220m.py +++ b/tests/collections/llm/recipes/test_t5_220m.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import nemo_run as run import pytest diff --git a/tests/collections/llm/recipes/test_t5_3b.py b/tests/collections/llm/recipes/test_t5_3b.py index 28341ba32c02a..7672b95426cb1 100644 --- a/tests/collections/llm/recipes/test_t5_3b.py +++ b/tests/collections/llm/recipes/test_t5_3b.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import nemo_run as run import pytest From 791b05415d531106d95392e77ad7163633fccb81 Mon Sep 17 00:00:00 2001 From: Maanu Grover <109391026+maanug-nv@users.noreply.github.com> Date: Tue, 29 Oct 2024 08:53:49 -0700 Subject: [PATCH 025/125] Add L2 llama test (#10824) * use llama recipe with some changes Signed-off-by: Maanu Grover * copy max_steps Signed-off-by: Maanu Grover * optional parallelisms Signed-off-by: Maanu Grover * precision options Signed-off-by: Maanu Grover * add ci job Signed-off-by: Maanu Grover * typehints Signed-off-by: Maanu Grover * fix Signed-off-by: Maanu Grover * sp arg as int Signed-off-by: Maanu Grover * change interval Signed-off-by: Maanu Grover * verify checkpointing Signed-off-by: Maanu Grover * address CI Signed-off-by: Maanu Grover * doc Signed-off-by: Maanu Grover * fix precision verification Signed-off-by: Maanu Grover * add callback to verify model attributes Signed-off-by: Maanu Grover * extract to common Signed-off-by: Maanu Grover * import fixes Signed-off-by: Maanu Grover * fix checkpointing check Signed-off-by: Maanu Grover * add callback for stopping before end Signed-off-by: Maanu Grover * unused Signed-off-by: Maanu Grover * disable for now Signed-off-by: Maanu Grover * add distcp dir check Signed-off-by: Maanu Grover * change early stop timing Signed-off-by: Maanu Grover * update ckpts verification Signed-off-by: Maanu Grover * update test Signed-off-by: Maanu Grover * fix import Signed-off-by: Maanu Grover * disable for now Signed-off-by: Maanu Grover * fix Signed-off-by: Maanu Grover * use tmp dir Signed-off-by: Maanu Grover * add misc check for max_steps Signed-off-by: Maanu Grover * check end batch progress state Signed-off-by: Maanu Grover * more ckpt dir assertions Signed-off-by: Maanu Grover * disable time-based ckpting Signed-off-by: Maanu Grover * val takes too long Signed-off-by: Maanu Grover --------- Signed-off-by: Maanu Grover --- .github/workflows/cicd-main.yml | 29 +++ .../pytorch/strategies/megatron_strategy.py | 8 + tests/collections/llm/common.py | 176 ++++++++++++++++++ tests/collections/llm/llama3_pretraining.py | 138 ++++++++++++++ 4 files changed, 351 insertions(+) create mode 100644 tests/collections/llm/common.py create mode 100644 tests/collections/llm/llama3_pretraining.py diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml index 5f8dc98e3948b..22bbb3c1a447b 100644 --- a/.github/workflows/cicd-main.yml +++ b/.github/workflows/cicd-main.yml @@ -3880,6 +3880,34 @@ jobs: rm -rf tests/collections/llm/gpt_pretrain_results rm -rf tests/collections/llm/gpt_index_mappings + L2_NeMo_2_llama3_pretraining_recipe: + needs: [cicd-test-container-setup] + uses: ./.github/workflows/_test_template.yml + if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_llama3_pretraining_recipe') || needs.cicd-test-container-setup.outputs.all == 'true' + with: + RUNNER: self-hosted-azure + SCRIPT: | + + python tests/collections/llm/llama3_pretraining.py \ + --seq-length 1024 \ + --devices=2 \ + --max-steps=6 \ + --early-stop=3 \ + --experiment-dir=/tmp/llm_tests/llama_pretrain_results \ + --data-path=/home/TestData/nlp/megatron_llama/data/rp2_sample_sentencepiece_preproc_text_document \ + --tokenizer-path=/home/TestData/nlp/megatron_llama/tokenizer.model \ + --index-mapping-dir=/tmp/llm_tests/llama_index_mappings \ + + python tests/collections/llm/llama3_pretraining.py \ + --seq-length 1024 \ + --devices=2 \ + --max-steps=6 \ + --experiment-dir=/tmp/llm_tests/llama_pretrain_results \ + --data-path=/home/TestData/nlp/megatron_llama/data/rp2_sample_sentencepiece_preproc_text_document \ + --tokenizer-path=/home/TestData/nlp/megatron_llama/tokenizer.model \ + --index-mapping-dir=/tmp/llm_tests/llama_index_mappings \ + --cp 1 --tp 2 --sp 1 + L2_NeMo_2_GPT_DDP_Param_Parity_check: needs: [cicd-test-container-setup] uses: ./.github/workflows/_test_template.yml @@ -4432,6 +4460,7 @@ jobs: - L2_NeMo_2_GPT_Pretraining_no_transformer_engine - L2_NeMo_2_GPT_DDP_Param_Parity_check - L2_NeMo_2_HF_MODEL_IMPORT + - L2_NeMo_2_llama3_pretraining_recipe - L2_NeMo_2_SSM_Pretraining - L2_NeMo_2_SSM_Finetuning - L2_NeMo_2_T5_Pretraining diff --git a/nemo/lightning/pytorch/strategies/megatron_strategy.py b/nemo/lightning/pytorch/strategies/megatron_strategy.py index c869fe8962798..c61c3371cc3cc 100644 --- a/nemo/lightning/pytorch/strategies/megatron_strategy.py +++ b/nemo/lightning/pytorch/strategies/megatron_strategy.py @@ -297,6 +297,14 @@ def setup(self, trainer: pl.Trainer) -> None: self.accelerator.setup(trainer) self.trainer = trainer + try: + self.model.optim.lr_scheduler.max_steps = trainer.max_steps + logging.info(f"Copying Trainer's 'max_steps' ({trainer.max_steps}) to LR scheduler's 'max_steps'.") + except AttributeError: + logging.warning( + "Could not copy Trainer's 'max_steps' to LR scheduler's 'max_steps'. If you are not using an LR scheduler, this warning can safely be ignored." + ) + # move the model to the correct device # self.model_to_device() diff --git a/tests/collections/llm/common.py b/tests/collections/llm/common.py new file mode 100644 index 0000000000000..5a464dfb93fe5 --- /dev/null +++ b/tests/collections/llm/common.py @@ -0,0 +1,176 @@ +import os + +import pytorch_lightning as pl +import torch + +from nemo import lightning as nl +from nemo.collections import llm +from nemo.collections.common.tokenizers import SentencePieceTokenizer +from nemo.utils import logging + + +def train_data( + data_path: str, tokenizer_path: str, index_mapping_dir: str, seq_length: int +) -> llm.PreTrainingDataModule: + """Single shard dataset tokenized by SentencePiece""" + tokenizer = SentencePieceTokenizer(model_path=tokenizer_path) + return llm.PreTrainingDataModule( + paths=data_path, + tokenizer=tokenizer, + seq_length=seq_length, + micro_batch_size=4, + global_batch_size=32, + seed=1234, + index_mapping_dir=index_mapping_dir, + ) + + +def small_llama_cfg(seq_length: int) -> llm.GPTConfig: + """Small 145m model""" + return llm.Llama3Config8B( + rotary_base=500_000, + seq_length=seq_length, + num_layers=12, + hidden_size=768, + ffn_hidden_size=2688, + num_attention_heads=16, + init_method_std=0.023, + ) + + +class StopBeforeEnd(pl.Callback): + """Preemptively stop training at a given global step. Allows stopping training before reaching + the max steps. Useful for testing checkpoint save and resume. + + Args: + stop_on_step (int): Stop training when trainer.global_step reaches this value. + Checked at the start of every step. + """ + + def __init__(self, stop_on_step: int): + self.stop_on_step = stop_on_step + + def on_train_batch_end( + self, trainer: pl.Trainer, pl_module: pl.LightningModule, outputs, batch, batch_idx + ) -> None: + if trainer.global_step >= self.stop_on_step: + logging.info(f"Global step {trainer.global_step} >= {self.stop_on_step}, signaling Trainer to stop.") + trainer.should_stop = True + # skip EarlyStopping validation unless val_check_interval met + if trainer.global_step % trainer.val_check_interval != 0: + trainer.limit_val_batches = 0 + + +class MCoreModelAttributeValidator(pl.Callback): + """Walk through submodules and verify user-specified attributes like parallelisms.""" + + def __init__(self, attr_dict: dict): + super().__init__() + self.attr_dict = attr_dict + + def _check_attrs(self, target): + for k, v in self.attr_dict.items(): + if hasattr(target, k): + model_val = getattr(target, k) + assert ( + model_val == v + ), f"Key {k} for model ({model_val}) does not match {v} from provided attribute mapping." + + def on_train_start(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> None: + def walk_fn(module: torch.nn.Module) -> torch.nn.Module: + # self._check_attrs(module) # TE DPA has 'sequence_parallel' attribute that is always False. Checking module config should be sufficient + if hasattr(module, "config"): + self._check_attrs(module.config) + + return module + + trainer.model.walk(walk_fn) + + +class MiscAttributeValidator(pl.Callback): + """Place for any miscellaneous attribute assertions. Extend as needed.""" + + def __init__(self, attr_dict: dict): + super().__init__() + self.attr_dict = attr_dict + + def on_train_start(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> None: + if 'max_steps' in self.attr_dict: + sched_max = trainer.model.optim.lr_scheduler._scheduler['lr_scheduler']['scheduler'].max_steps + assert ( + trainer.max_steps == self.attr_dict['max_steps'] + ), f"Trainer max_steps {trainer.max_steps} did not match provided {self.attr_dict['max_steps']}" + assert ( + sched_max == self.attr_dict['max_steps'] + ), f"Scheduler max_steps {sched_max} did not match provided {self.attr_dict['max_steps']}" + + def on_train_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None: + if 'stop_on_step' in self.attr_dict: + total_steps = trainer.fit_loop.epoch_loop.batch_progress.total.completed + assert total_steps == self.attr_dict['stop_on_step'] + + +def verify_distcp_dir(ckpt_path: str) -> None: + ckpt_name = os.path.basename(ckpt_path) + + weights_dir = os.path.join(ckpt_path, 'weights') + assert os.path.isdir(weights_dir), f"Weights not found in checkpoint {ckpt_name}" + assert os.path.isfile(os.path.join(weights_dir, 'common.pt')), f"No 'common.pt' file in checkpoint {ckpt_name}" + assert os.path.isfile( + os.path.join(weights_dir, 'metadata.json') + ), f"No 'metadata.json' file in checkpoint {ckpt_name}" + + shards = [shard for shard in os.listdir(weights_dir) if shard.endswith('.distcp')] + world_size = torch.distributed.get_world_size() + assert ( + len(shards) == 2 * world_size + ), f"Wrong number of .distcp files, Expected: {2*world_size} Found: {len(shards)}" + + +def verify_ckpt_dir( + model_ckpt: nl.ModelCheckpoint, max_steps: int, val_check_interval: int, exp_dir: str, dist_ckpts: bool = True +) -> None: + """Ensures that the provided checkpoint directory has + - correct number of checkpoints + - no more than top-k checkpoints + - no unfinished checkpoints + - a checkpoint for the last step + - all checkpoints in the correct format + """ + + ckpt_dir = os.path.join(exp_dir, 'checkpoints') + ckpts = os.listdir(ckpt_dir) + + if model_ckpt.save_last: + assert any([c.endswith('-last') for c in ckpts]), "No -last checkpoint found after training" + + expected_count = (max_steps // val_check_interval) + model_ckpt.save_last + if model_ckpt.save_top_k > 0: + assert ( + len(ckpts) == expected_count or len(ckpts) == model_ckpt.save_top_k + model_ckpt.save_last + ), f"Expected {expected_count} checkpoints or at most top {model_ckpt.save_top_k} checkpoints besides '-last'" + else: + assert len(ckpts) == expected_count, f"Expected {expected_count} checkpoints" + + for ckpt_name in ckpts: + ckpt_path = os.path.join(ckpt_dir, ckpt_name) + + assert ( + '-unfinished' not in ckpt_name + ), f"Unfinished checkpoint found. Something went wrong with saving checkpoint {ckpt_name}" + + if ckpt_name.endswith('-last') and 'step' in model_ckpt.filename: + assert f'step={max_steps-1}' in ckpt_name, f"Last checkpoint {ckpt_name} not for final step {max_steps}" + + if dist_ckpts: + assert os.path.isdir(ckpt_path), "Checkpoint is not correct type" + verify_distcp_dir(ckpt_path) + else: + assert os.path.isfile(ckpt_path), "Checkpoint is not correct type" + + +def create_verify_precision(precision: torch.dtype): + def verify_precision(tensor: torch.Tensor) -> None: + assert tensor.dtype == precision + + return verify_precision diff --git a/tests/collections/llm/llama3_pretraining.py b/tests/collections/llm/llama3_pretraining.py new file mode 100644 index 0000000000000..dd0c1e7ce5fae --- /dev/null +++ b/tests/collections/llm/llama3_pretraining.py @@ -0,0 +1,138 @@ +""" +Test the LLaMA3 recipe with a smaller model. +""" + +import argparse +import os + +import nemo_run as run +import torch + +from nemo.collections import llm +from nemo.lightning.pytorch.callbacks.debugging import ParameterDebugger +from tests.collections.llm.common import ( + MCoreModelAttributeValidator, + MiscAttributeValidator, + StopBeforeEnd, + create_verify_precision, + small_llama_cfg, + train_data, + verify_ckpt_dir, +) + + +def get_args(): + parser = argparse.ArgumentParser(prog="", description="") + parser.add_argument('--devices', type=int, required=True, help="Number of devices to use for training") + parser.add_argument('--max-steps', type=int, required=True, help="Number of steps to train for") + parser.add_argument( + '--early-stop', + type=int, + default=None, + help="Stop training early at this global step (for testing resume training)", + ) + parser.add_argument( + '--experiment-dir', type=str, required=True, help="directory to write results and checkpoints to" + ) + parser.add_argument( + '--data-path', type=str, default=None, help="Path to data file. If not specified, uses mock data." + ) + parser.add_argument( + '--tokenizer-path', + type=str, + default=None, + help="Path to a sentencepiece tokenizer model file. If not specified, uses mock data.", + ) + parser.add_argument('--index-mapping-dir', type=str, help="directory to write index mappings to") + parser.add_argument('--seq-length', type=int, default=8192, help="Sequence length. default is 8k") + parser.add_argument('--tp', type=int, default=None, help="Override tensor parallelism") + parser.add_argument('--pp', type=int, default=None, help="Override pipeline parallelism") + parser.add_argument('--vp', type=int, default=None, help="Override virtual pipeline parallelism") + parser.add_argument('--cp', type=int, default=None, help="Override context parallelism") + parser.add_argument('--sp', type=int, choices=[0, 1], default=None, help="Override sequence parallel") + parser.add_argument( + '--precision', type=str, choices=['bf16', 'fp16', 'fp32'], default='bf16', help="Override recipe precision" + ) + parser.add_argument('--fp8', action='store_true', help="Enable FP8") + + return parser.parse_args() + + +def main(): + args = get_args() + + exp_name = "L2_llama3_small_pretrain_test" + pretrain_recipe = llm.llama3_8b.pretrain_recipe( + dir=args.experiment_dir, name=exp_name, num_gpus_per_node=args.devices + ) + + pretrain_recipe.model = llm.LlamaModel(small_llama_cfg(args.seq_length)) + + if args.data_path and args.tokenizer_path: + pretrain_recipe.data = train_data( + data_path=args.data_path, + tokenizer_path=args.tokenizer_path, + index_mapping_dir=args.index_mapping_dir, + seq_length=args.seq_length, + ) + + # Recipe Overrides + pretrain_recipe.trainer.max_steps = args.max_steps + pretrain_recipe.trainer.log_every_n_steps = 1 + pretrain_recipe.log.ckpt.every_n_train_steps = None + pretrain_recipe.log.ckpt.train_time_interval = None + pretrain_recipe.trainer.val_check_interval = 2 + pretrain_recipe.trainer.limit_val_batches = 2 + + if args.early_stop: + pretrain_recipe.trainer.callbacks.append(StopBeforeEnd(stop_on_step=args.early_stop)) + + if not args.precision == 'bf16' or args.fp8: # default case is bf16 without fp8 + import llm.recipes.precision.mixed_precision as mp_recipes + + key = (args.precision, args.fp8) + precision_recipe = { + ("fp16", False): mp_recipes.fp16_mixed, + ("bf16", True): mp_recipes.bf16_with_fp8_mixed, + ("fp16", True): mp_recipes.fp16_with_fp8_mixed, + # Need fp32 + }[key] + pretrain_recipe.trainer.plugins = precision_recipe() + dtype_map = {"bf16": torch.bfloat16, "fp16": torch.float16, "fp32": torch.float32} + debugger_callback = ParameterDebugger( + param_fn=create_verify_precision(dtype_map[args.precision]), + grad_fn=create_verify_precision(torch.float32), + log_on_hooks=["on_train_start", "on_train_end"], + ) + pretrain_recipe.trainer.callbacks.append(debugger_callback) + + parallelisms = { + "tensor_model_parallel_size": args.tp, + "pipeline_model_parallel_size": args.pp, + "virtual_pipeline_model_parallel_size": args.vp, + "context_parallel_size": args.cp, + "sequence_parallel": bool(args.sp) if args.sp is not None else None, + } + for k, v in parallelisms.items(): + if v is not None: # use recipe default if not specified + setattr(pretrain_recipe.trainer.strategy, k, v) + parallelisms[k] = getattr(pretrain_recipe.trainer.strategy, k) + pretrain_recipe.trainer.callbacks.append(MCoreModelAttributeValidator(parallelisms)) + + misc_checker = MiscAttributeValidator( + {"max_steps": args.max_steps, "stop_on_step": args.early_stop or args.max_steps} + ) + pretrain_recipe.trainer.callbacks.append(misc_checker) + + run.run(pretrain_recipe, direct=True) + + verify_ckpt_dir( + pretrain_recipe.log.ckpt, + args.early_stop or args.max_steps, + pretrain_recipe.trainer.val_check_interval, + os.path.join(args.experiment_dir, exp_name), + ) + + +if __name__ == '__main__': + main() From aeee5ae589709658f4073d4078f16bf87170de83 Mon Sep 17 00:00:00 2001 From: Pablo Garay Date: Tue, 29 Oct 2024 08:54:46 -0700 Subject: [PATCH 026/125] Add copyright check (#11048) * check for copyright * Copyright check * Copyright check * Copyright check * Exclude path * Exclude path * account for other license types * account for other license types * account for other license types * account for other license types * Ignore __init__.py files * Ignore __init__.py files * Ignore __init__.py files * Ignore __init__.py files * Change job name --- .github/workflows/copyright-check.yml | 59 +++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 .github/workflows/copyright-check.yml diff --git a/.github/workflows/copyright-check.yml b/.github/workflows/copyright-check.yml new file mode 100644 index 0000000000000..724f3afb61770 --- /dev/null +++ b/.github/workflows/copyright-check.yml @@ -0,0 +1,59 @@ +# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +name: Copyright check + +on: + pull_request: + +jobs: + main: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + path: ${{ github.run_id }} + fetch-depth: 0 + + - name: Check files have copyright notice + run: | + cd ${{ github.run_id }} + + # Files ending with .py should have Copyright notice in the first 10 lines + find_files_with_missing_copyright() { + find ./ -type f -name '*.py' -not -path "./.git/*" -not -path "./*__init__.py" | while read path; do + echo -en $path"\t" + head -n 10 $path | tr '\n' '\t' | sed 's/\t$/\n/' + done \ + | egrep -iv 'Copyright.*NVIDIA CORPORATION.*' \ + | egrep -iv '*MIT.*Licen.e.*' \ + | egrep -iv '*Copyright.*Apache.*' \ + | egrep -iv '*Apache.*License.*' \ + | while read line; do + echo $line | cut -d' ' -f1 + done + } + + + declare RESULT=($(find_files_with_missing_copyright)) # (..) = array + + if [ "${#RESULT[@]}" -gt 0 ]; then + echo "Error: Found files with missing copyright:" + for (( i=0; i<"${#RESULT[@]}"; i++ )); do + echo "path= ${RESULT[$i]}" + done + exit 1; + else + echo "Ok: All (Python) files start with copyright notice" + fi From 5296716d8d28d098574f22ed82c8e966c9d99100 Mon Sep 17 00:00:00 2001 From: Chen Cui Date: Tue, 29 Oct 2024 11:56:16 -0400 Subject: [PATCH 027/125] Fix finalize model grad for PEFT (#11065) * fix finalize model grad for PEFT Signed-off-by: Chen Cui * Apply isort and black reformatting Signed-off-by: cuichenx * change to warning Signed-off-by: Chen Cui * Apply isort and black reformatting Signed-off-by: cuichenx --------- Signed-off-by: Chen Cui Signed-off-by: cuichenx Co-authored-by: cuichenx --- nemo/lightning/pytorch/callbacks/peft.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/nemo/lightning/pytorch/callbacks/peft.py b/nemo/lightning/pytorch/callbacks/peft.py index 28f16882305c9..aad2a1696d619 100644 --- a/nemo/lightning/pytorch/callbacks/peft.py +++ b/nemo/lightning/pytorch/callbacks/peft.py @@ -29,6 +29,7 @@ from nemo.lightning.io.mixin import IOMixin from nemo.lightning.io.pl import ckpt_to_dir from nemo.lightning.pytorch.callbacks.model_transform import ModelTransform +from nemo.lightning.pytorch.optim.megatron import MegatronOptimizerModule from nemo.utils import logging from nemo.utils.callbacks.dist_ckpt_io import AsyncCompatibleCheckpointIO @@ -172,6 +173,16 @@ def apply_transform(self, trainer): if trainer.state.fn == TrainerFn.FITTING: trainer.strategy.load_optimizer_state_dict(adapter_state, selective_restore=True) + for cb in trainer.callbacks[::-1]: + if isinstance(cb, MegatronOptimizerModule): + cb.on_fit_start(trainer, trainer.lightning_module) + break + else: + logging.warning( + "MegatronOptimizerModule not found in trainer callbacks. finalize_model_grads is not " + "properly set up for PEFT." + ) + def adapter_key_filter(self, key: str) -> bool: return key in self.trainable_params or ".adapter." in key or key.endswith(".adapters") From dc5357861edcdc4624b0f65ca0e99730d94b956b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Tue, 29 Oct 2024 16:57:57 +0100 Subject: [PATCH 028/125] ci: Less verbose infra alerts (#11080) Signed-off-by: Oliver Koenig --- .github/workflows/_test_template.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_test_template.yml b/.github/workflows/_test_template.yml index 54579ab2d850a..e2401cab7f642 100644 --- a/.github/workflows/_test_template.yml +++ b/.github/workflows/_test_template.yml @@ -79,7 +79,7 @@ jobs: echo "log=$(tail -c 2000 err.log | base64 -w 0)" >> "$GITHUB_OUTPUT" - potential_infra_failure=$(cat err.log | grep -Eqi "gpu|cuda|device" && echo true || echo false) + potential_infra_failure=$(cat err.log | grep -Eqiw "device" && echo true || echo false) echo "potential_infra_failure=$potential_infra_failure" >> "$GITHUB_OUTPUT" exit $EXIT_CODE From d5e360ff004d18c54b57225c68d4e58d0d4cd964 Mon Sep 17 00:00:00 2001 From: Pablo Garay Date: Tue, 29 Oct 2024 09:14:26 -0700 Subject: [PATCH 029/125] Add copyright notice (#11085) --- tests/collections/llm/common.py | 14 ++++++++++++++ tests/collections/llm/llama3_pretraining.py | 14 ++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/tests/collections/llm/common.py b/tests/collections/llm/common.py index 5a464dfb93fe5..95b8bc0de5849 100644 --- a/tests/collections/llm/common.py +++ b/tests/collections/llm/common.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import pytorch_lightning as pl diff --git a/tests/collections/llm/llama3_pretraining.py b/tests/collections/llm/llama3_pretraining.py index dd0c1e7ce5fae..24eeca8f01c8e 100644 --- a/tests/collections/llm/llama3_pretraining.py +++ b/tests/collections/llm/llama3_pretraining.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ Test the LLaMA3 recipe with a smaller model. """ From 217b528587faf3684aaa9bf70ae75ae59ab99947 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Tue, 29 Oct 2024 17:42:28 +0100 Subject: [PATCH 030/125] ci: Fix cron schedule (#11076) * ci: Fix cron Signed-off-by: Oliver Koenig * fix Signed-off-by: Oliver Koenig --------- Signed-off-by: Oliver Koenig --- .github/workflows/monitor-vms.yml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/monitor-vms.yml b/.github/workflows/monitor-vms.yml index 03f37d48e8ea9..6795f87abf68e 100644 --- a/.github/workflows/monitor-vms.yml +++ b/.github/workflows/monitor-vms.yml @@ -2,9 +2,8 @@ name: Reboots VMs in a controlled way on: schedule: - - cron: /15 * * * * + - cron: 0/15 * * * * workflow_dispatch: - pull_request: jobs: pre-flight: @@ -28,7 +27,7 @@ jobs: | jq -c '[ .runners[] | select(.status == "online") - | select(.name | contains("gpu") + | select(.name | contains("gpu")) | { "vm": .name, "n_gpus": [ @@ -47,8 +46,8 @@ jobs: fail-fast: false matrix: include: ${{ fromJSON(needs.pre-flight.outputs.list-of-vms )}} - uses: .github/workflows/monitor-single-vm.yml + uses: ./.github/workflows/monitor-single-vm.yml with: vm: ${{ matrix.vm }} n_gpus: ${{ matrix.n_gpus }} - secrets: inherit + secrets: inherit # pragma: allowlist secret From a8832b83d7a9113794d71cf2c0044bd74f07a099 Mon Sep 17 00:00:00 2001 From: Jan Lasek Date: Tue, 29 Oct 2024 18:01:47 +0100 Subject: [PATCH 031/125] Export & deploy updates (part I) (#10941) * Update vLLMExporter docstring Signed-off-by: Jan Lasek * No need to create empty kwargs here Signed-off-by: Jan Lasek * Use debug from command line Signed-off-by: Jan Lasek * Param save_engine for both both vLLM and TRT-LLM Signed-off-by: Jan Lasek * Unused backend param in run_trt_llm_inference Signed-off-by: Jan Lasek * Reindent files for non-existent checkpoint check Signed-off-by: Jan Lasek * Docs for lora_checkpoints Signed-off-by: Jan Lasek * Improve config readability Signed-off-by: Jan Lasek * Raise error directly in get_vllm_deployable Signed-off-by: Jan Lasek * Apply isort and black reformatting Signed-off-by: janekl * Revert "Reindent files for non-existent checkpoint check" This reverts commit 8499d501fc8da8781bc7651b928946efad3ef46d. Signed-off-by: Jan Lasek * Cut off prompt for real Signed-off-by: Jan Lasek * Apply isort and black reformatting Signed-off-by: janekl --------- Signed-off-by: Jan Lasek Signed-off-by: janekl Co-authored-by: janekl --- nemo/export/vllm_exporter.py | 25 ++++++++++-------- scripts/deploy/nlp/deploy_vllm_triton.py | 32 +++++++++++------------- tests/deploy/nemo_deploy.py | 23 ++++++++--------- tests/export/nemo_export.py | 23 ++++++++--------- 4 files changed, 50 insertions(+), 53 deletions(-) diff --git a/nemo/export/vllm_exporter.py b/nemo/export/vllm_exporter.py index b56a6414929f5..0ce7d49126d3f 100644 --- a/nemo/export/vllm_exporter.py +++ b/nemo/export/vllm_exporter.py @@ -52,26 +52,28 @@ def wrapper(*args, **kwargs): class vLLMExporter(ITritonDeployable): """ - The Exporter class implements conversion from a Nemo checkpoint format to something compatible with vLLM, + The vLLMExporter class implements conversion from a Nemo checkpoint format to something compatible with vLLM, loading the model in vLLM, and binding that model to a Triton server. Example: - from nemo.export.vllm import Exporter + from nemo.export.vllm_exporter import vLLMExporter from nemo.deploy import DeployPyTriton - exporter = Exporter() + exporter = vLLMExporter() + exporter.export( nemo_checkpoint='/path/to/checkpoint.nemo', model_dir='/path/to/temp_dir', - model_type='llama') + model_type='llama', + ) server = DeployPyTriton( model=exporter, - triton_model_name='LLAMA') + triton_model_name='LLAMA', + ) server.deploy() server.serve() - server.stop() """ def __init__(self): @@ -86,7 +88,7 @@ def export( tensor_parallel_size: int = 1, pipeline_parallel_size: int = 1, max_model_len: int = None, - lora_checkpoints: List[str] = [], + lora_checkpoints: Optional[List[str]] = None, dtype: str = 'auto', seed: int = 0, log_stats: bool = True, @@ -110,6 +112,7 @@ def export( pipeline_parallel_size (int): pipeline parallelism. Values over 1 are not currently supported by vLLM. max_model_len (int): model context length. + lora_checkpoints List[str]: paths to LoRA checkpoints. dtype (str): data type for model weights and activations. Possible choices: auto, half, float16, bfloat16, float, float32 "auto" will use FP16 precision for FP32 and FP16 models, @@ -161,7 +164,7 @@ def export( # vllm/huggingface doesn't like the absense of config file. Place config in load dir. if model_config.model and not os.path.exists(os.path.join(model_config.model, 'config.json')): with open(os.path.join(model_config.model, 'config.json'), "w") as f: - json.dump(model_config.hf_text_config.to_dict(), f) + json.dump(model_config.hf_text_config.to_dict(), f, indent=2) # Dynamic online FP8 quantization currently does not support in-memory conversion [TODO] if quantization is not None and weight_storage in {'auto', 'memory'}: @@ -277,10 +280,12 @@ def export( log_stats=log_stats, ) - def _prepare_lora_checkpoints(self, model_dir: str, lora_checkpoints: List[str], dtype) -> LoRAConfig: + def _prepare_lora_checkpoints( + self, model_dir: str, lora_checkpoints: Optional[List[str]], dtype: str + ) -> LoRAConfig: self.lora_checkpoints = [] - if lora_checkpoints is None or len(lora_checkpoints) == 0: + if not lora_checkpoints: return None index = 0 diff --git a/scripts/deploy/nlp/deploy_vllm_triton.py b/scripts/deploy/nlp/deploy_vllm_triton.py index bc48bf9edd380..ab9f13a1b8da6 100755 --- a/scripts/deploy/nlp/deploy_vllm_triton.py +++ b/scripts/deploy/nlp/deploy_vllm_triton.py @@ -104,24 +104,20 @@ def get_args(argv): def get_vllm_deployable(args, model_dir): - - try: - exporter = vLLMExporter() - exporter.export( - nemo_checkpoint=args.nemo_checkpoint, - model_dir=model_dir, - model_type=args.model_type, - tensor_parallel_size=args.tensor_parallelism_size, - max_model_len=args.max_model_len, - lora_checkpoints=args.lora_ckpt, - dtype=args.dtype, - weight_storage=args.weight_storage, - gpu_memory_utilization=args.gpu_memory_utilization, - quantization=args.quantization, - ) - return exporter - except Exception as error: - raise RuntimeError("An error has occurred during the model export. Error message: " + str(error)) + exporter = vLLMExporter() + exporter.export( + nemo_checkpoint=args.nemo_checkpoint, + model_dir=model_dir, + model_type=args.model_type, + tensor_parallel_size=args.tensor_parallelism_size, + max_model_len=args.max_model_len, + lora_checkpoints=args.lora_ckpt, + dtype=args.dtype, + weight_storage=args.weight_storage, + gpu_memory_utilization=args.gpu_memory_utilization, + quantization=args.quantization, + ) + return exporter def nemo_deploy(argv): diff --git a/tests/deploy/nemo_deploy.py b/tests/deploy/nemo_deploy.py index e970a9b6a0711..23db7c4f01f3d 100644 --- a/tests/deploy/nemo_deploy.py +++ b/tests/deploy/nemo_deploy.py @@ -180,8 +180,7 @@ def run_trt_llm_inference( stop_words_list=None, test_deployment=False, test_data_path=None, - backend="TensorRT-LLM", - save_trt_engine=False, + save_engine=False, ): if Path(checkpoint_path).exists(): if n_gpu > torch.cuda.device_count(): @@ -319,14 +318,14 @@ def run_trt_llm_inference( if test_deployment: nm.stop() - if not save_trt_engine: + if not save_engine: shutil.rmtree(trt_llm_model_dir) return result if test_deployment: nm.stop() - if not save_trt_engine: + if not save_engine: shutil.rmtree(trt_llm_model_dir) return None, None, None, None, None @@ -368,7 +367,7 @@ def run_existing_checkpoints( stop_words_list=None, test_data_path=None, backend="tensorrt-llm", - save_trt_engine=False, + save_engine=False, ): if n_gpus > torch.cuda.device_count(): print("Skipping the test due to not enough number of GPUs") @@ -433,7 +432,7 @@ def run_existing_checkpoints( stop_words_list=stop_words_list, test_deployment=test_deployment, test_data_path=test_data_path, - save_trt_engine=save_trt_engine, + save_engine=save_engine, ) @@ -573,7 +572,7 @@ def get_args(): help="Different options to deploy nemo model.", ) parser.add_argument( - "--save_trt_engine", + "--save_engine", type=str, default="False", ) @@ -587,10 +586,10 @@ def run_inference_tests(args): else: args.test_deployment = False - if args.save_trt_engine == "True": - args.save_trt_engine = True + if args.save_engine == "True": + args.save_engine = True else: - args.save_trt_engine = False + args.save_engine = False if args.run_accuracy == "True": args.run_accuracy = True @@ -621,7 +620,7 @@ def run_inference_tests(args): run_accuracy=args.run_accuracy, test_data_path=args.test_data_path, backend=args.backend.lower(), - save_trt_engine=args.save_trt_engine, + save_engine=args.save_engine, ) n_gpus = n_gpus * 2 @@ -658,7 +657,7 @@ def run_inference_tests(args): streaming=args.streaming, test_deployment=args.test_deployment, test_data_path=args.test_data_path, - save_trt_engine=args.save_trt_engine, + save_engine=args.save_engine, ) else: result_dic[n_gpus] = run_in_framework_inference( diff --git a/tests/export/nemo_export.py b/tests/export/nemo_export.py index a97ac9242f9b1..e929f2601022e 100644 --- a/tests/export/nemo_export.py +++ b/tests/export/nemo_export.py @@ -241,7 +241,7 @@ def run_inference( test_cpp_runtime=False, test_deployment=False, test_data_path=None, - save_trt_engine=False, + save_engine=False, fp8_quantized=False, fp8_kvcache=False, trt_llm_export_kwargs=None, @@ -442,7 +442,7 @@ def run_inference( if test_deployment: nm.stop() - if not save_trt_engine and model_dir: + if not save_engine and model_dir: shutil.rmtree(model_dir) return (functional_result, accuracy_result) @@ -464,7 +464,7 @@ def run_existing_checkpoints( test_deployment=False, stop_words_list=None, test_data_path=None, - save_trt_engine=False, + save_engine=False, in_framework=False, fp8_quantized=False, fp8_kvcache=False, @@ -497,9 +497,6 @@ def run_existing_checkpoints( else: use_embedding_sharing = False - if trt_llm_export_kwargs is None: - trt_llm_export_kwargs = {} - if in_framework: return run_in_framework_inference( model_name=model_name, @@ -542,7 +539,7 @@ def run_existing_checkpoints( test_cpp_runtime=test_cpp_runtime, test_deployment=test_deployment, test_data_path=test_data_path, - save_trt_engine=save_trt_engine, + save_engine=save_engine, fp8_quantized=fp8_quantized, fp8_kvcache=fp8_kvcache, trt_llm_export_kwargs=trt_llm_export_kwargs, @@ -591,7 +588,7 @@ def run_in_framework_inference( output_deployed = output_deployed["sentences"] # MegatronLLMDeployable will return the prompt + generated output, so cut off the prompt for i, output in enumerate(output_deployed): - output = output[len(prompts[i]) :] + output_deployed[i, :] = output[0][len(prompts[i]) :] # Unwrap the generator if needed output_deployed = list(output_deployed) @@ -744,7 +741,7 @@ def get_args(): default=None, ) parser.add_argument( - "--save_trt_engine", + "--save_engine", type=str, default="False", ) @@ -811,7 +808,7 @@ def str_to_bool(name: str, s: str, optional: bool = False) -> Optional[bool]: args.test_cpp_runtime = str_to_bool("test_cpp_runtime", args.test_cpp_runtime) args.test_deployment = str_to_bool("test_deployment", args.test_deployment) args.functional_test = str_to_bool("functional_test", args.functional_test) - args.save_trt_engine = str_to_bool("save_trt_engin", args.save_trt_engine) + args.save_engine = str_to_bool("save_engine", args.save_engine) args.run_accuracy = str_to_bool("run_accuracy", args.run_accuracy) args.use_vllm = str_to_bool("use_vllm", args.use_vllm) args.lora = str_to_bool("lora", args.lora) @@ -871,7 +868,7 @@ def run_inference_tests(args): test_cpp_runtime=args.test_cpp_runtime, run_accuracy=args.run_accuracy, test_data_path=args.test_data_path, - save_trt_engine=args.save_trt_engine, + save_engine=args.save_engine, in_framework=args.in_framework, fp8_quantized=args.export_fp8_quantized, fp8_kvcache=args.use_fp8_kv_cache, @@ -900,7 +897,7 @@ def run_inference_tests(args): top_p=args.top_p, temperature=args.temperature, run_accuracy=args.run_accuracy, - debug=True, + debug=args.debug, test_data_path=args.test_data_path, ) else: @@ -932,7 +929,7 @@ def run_inference_tests(args): test_deployment=args.test_deployment, test_cpp_runtime=args.test_cpp_runtime, test_data_path=args.test_data_path, - save_trt_engine=args.save_trt_engine, + save_engine=args.save_engine, fp8_quantized=args.export_fp8_quantized, fp8_kvcache=args.use_fp8_kv_cache, trt_llm_export_kwargs=args.trt_llm_export_kwargs, From 5b41198e83c4bd1ca99bb2e86733704fcd08697d Mon Sep 17 00:00:00 2001 From: Marc Romeyn Date: Tue, 29 Oct 2024 20:29:13 +0100 Subject: [PATCH 032/125] Add doc-strings to import & export + improve logging (#11078) * Add doc-strings to import & export + improve logging Signed-off-by: Marc Romeijn * Apply isort and black reformatting Signed-off-by: marcromeyn * Apply isort and black reformatting Signed-off-by: artbataev --------- Signed-off-by: Marc Romeijn Signed-off-by: marcromeyn Signed-off-by: artbataev Co-authored-by: marcromeyn Co-authored-by: artbataev --- nemo/collections/llm/api.py | 122 +++++++++++++++++++++++++++++++++++- 1 file changed, 120 insertions(+), 2 deletions(-) diff --git a/nemo/collections/llm/api.py b/nemo/collections/llm/api.py index c4913e07da9b6..4f47f5c4bc734 100644 --- a/nemo/collections/llm/api.py +++ b/nemo/collections/llm/api.py @@ -21,10 +21,12 @@ import nemo_run as run import pytorch_lightning as pl import torch +from rich.console import Console from typing_extensions import Annotated import nemo.lightning as nl from nemo.lightning import AutoResume, NeMoLogger, OptimizerModule, Trainer, io +from nemo.lightning.base import NEMO_MODELS_CACHE from nemo.lightning.pytorch.callbacks import PEFT, ModelTransform from nemo.utils import logging @@ -414,7 +416,71 @@ def import_ckpt( output_path: Optional[Path] = None, overwrite: bool = False, ) -> Path: - return io.import_ckpt(model=model, source=source, output_path=output_path, overwrite=overwrite) + """ + Imports a checkpoint into a model using the model's associated importer, typically for + the purpose of fine-tuning a community model trained in an external framework, such as + Hugging Face. + + This function can be used both programmatically and through the NeMo CLI: + + CLI Usage: + ```bash + # Import Llama 3 8B from HuggingFace (saves to $NEMO_MODELS_CACHE) + nemo llm import llama3_8b source="hf://meta-llama/Llama-3.1-8B" + + # Import with custom output path + nemo llm import llama3_8b source="hf://meta-llama/Llama-3.1-8B" output_path="/path/to/save" + + # Force overwrite existing checkpoint + nemo llm import llama3_8b source="hf://meta-llama/Llama-3.1-8B" overwrite=true + ``` + + Python Usage: + ```python + model = Mistral7BModel() + imported_path = import_ckpt(model, "hf://mistralai/Mistral-7B-v0.1") + ``` + + The importer component of the model reads the checkpoint data from the specified source + and transforms it into the right format. This is particularly useful for adapting + models that have been pre-trained in different environments or frameworks to be fine-tuned + or further developed within the current system. + + For instance, using `import_ckpt(Mistral7BModel(), "hf")` initiates the import process + by searching for a registered model importer tagged with "hf". In NeMo, `HFMistral7BImporter` + is registered under this tag via: + `@io.model_importer(Mistral7BModel, "hf", default_path="mistralai/Mistral-7B-v0.1")`. + This links `Mistral7BModel` to `HFMistral7BImporter`, designed for HuggingFace checkpoints. + + Args: + model (pl.LightningModule): The model into which the checkpoint will be imported. + This model must implement the ConnectorMixin. + source (str): The source from which the checkpoint will be imported. This can be + a file path, URL, or any other string identifier that the model's importer + can recognize. + output_path (Optional[Path]): The path where the imported checkpoint will be stored. + If not specified, the checkpoint will be saved to $NEMO_MODELS_CACHE + (defaults to ~/.cache/nemo/models/ if the environment variable is not set). + overwrite (bool): If set to True, existing files at the output path will be overwritten. + This is useful for model updates where retaining old checkpoint files is not required. + + Returns: + Path: The path where the checkpoint has been saved after import. + + Raises: + ValueError: If the model does not implement ConnectorMixin, indicating a lack of + necessary importer functionality. + """ + output = io.import_ckpt(model=model, source=source, output_path=output_path, overwrite=overwrite) + + console = Console() + if output_path: + console.print(f"[green]✓ Checkpoint imported to {output}[/green]") + else: + console.print(f"[green] $NEMO_MODELS_CACHE={NEMO_MODELS_CACHE} [/green]") + console.print(f"[green]✓ Checkpoint imported to {output}[/green]") + + return output def load_connector_from_trainer_ckpt(path: Path, target: str) -> io.ModelConnector: @@ -429,7 +495,59 @@ def export_ckpt( overwrite: bool = False, load_connector: Callable[[Path, str], io.ModelConnector] = load_connector_from_trainer_ckpt, ) -> Path: - return io.export_ckpt(path, target, output_path, overwrite, load_connector) + """ + Exports a checkpoint from a model using the model's associated exporter, typically for + the purpose of sharing a model that has been fine-tuned or customized within NeMo. + + This function can be used both programmatically and through the NeMo CLI: + + CLI Usage: + ```bash + # Export model to HuggingFace format (saves to {checkpoint_path}/hf/) + nemo llm export /path/to/model.nemo target="hf" + + # Export with custom output path + nemo llm export /path/to/model.nemo target="hf" output_path="/path/to/save" + + # Force overwrite existing export + nemo llm export /path/to/model.nemo target="hf" overwrite=true + ``` + + Python Usage: + ```python + nemo_ckpt_path = Path("/path/to/model.nemo") + export_path = export_ckpt(nemo_ckpt_path, "hf") + ``` + + The exporter component of the model reads the model's state from the specified path and + exports it into the format specified by the 'target' identifier. This is particularly + useful for adapting models that have been developed or fine-tuned within NeMo to be + compatible with other environments or frameworks. + + Args: + path (Path): The path to the model's checkpoint file from which data will be exported. + target (str): The identifier for the exporter that defines the format of the export + (e.g., "hf" for HuggingFace format). + output_path (Optional[Path]): The path where the exported checkpoint will be saved. + If not specified, defaults to {checkpoint_path}/{target}/. + overwrite (bool): If set to True, existing files at the output path will be overwritten. + This is useful for model updates where retaining old checkpoint files is not required. + load_connector (Callable[[Path, str], ModelConnector]): A function to load the appropriate + exporter based on the model and target format. Defaults to `load_connector_from_trainer_ckpt`. + + Returns: + Path: The path where the checkpoint has been saved after export. + + Raises: + ValueError: If the model does not implement ConnectorMixin, indicating a lack of + necessary exporter functionality. + """ + output = io.export_ckpt(path, target, output_path, overwrite, load_connector) + + console = Console() + console.print(f"[green]✓ Checkpoint exported to {output}[/green]") + + return output @run.cli.entrypoint(name="generate", namespace="llm") From 0512ceabed8f0c6f74c7be2dde8697e8e327e250 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Tue, 29 Oct 2024 22:34:13 +0100 Subject: [PATCH 033/125] ci: Use code-freeze via Nemo-FW-Templates (#11073) * ci: Use FW-templates Signed-off-by: Oliver Koenig * rename Signed-off-by: Oliver Koenig * test Signed-off-by: Oliver Koenig * use slack_endpoint Signed-off-by: Oliver Koenig * will this work? Signed-off-by: Oliver Koenig * SLACK_RELEASE_ENDPOINT Signed-off-by: Oliver Koenig * secret Signed-off-by: Oliver Koenig --------- Signed-off-by: Oliver Koenig --- .github/workflows/release-freeze.yml | 142 ++++----------------------- 1 file changed, 19 insertions(+), 123 deletions(-) diff --git a/.github/workflows/release-freeze.yml b/.github/workflows/release-freeze.yml index 2f4799cfc5e28..0097f0aa2f9f3 100644 --- a/.github/workflows/release-freeze.yml +++ b/.github/workflows/release-freeze.yml @@ -16,13 +16,18 @@ on: type: string jobs: - create-release-branch: + code-freeze: + uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_code_freeze.yml + with: + name_of_library: NeMo-Toolkit + type_of_release: ${{ inputs.type_of_release }} + python_package: nemo + secrets: + SLACK_RELEASE_ENDPOINT: ${{ secrets.SLACK_RELEASE_ENDPOINT }} + + freeze-tags: runs-on: ubuntu-latest - if: contains(fromJSON('["ko3n1g"]'), github.actor) - environment: - name: main - outputs: - version: ${{ steps.release-branch.outputs.version }} + needs: [code-freeze] steps: - name: Checkout repository uses: actions/checkout@v4 @@ -30,139 +35,30 @@ jobs: path: ${{ github.run_id }} fetch-depth: 0 fetch-tags: true - ref: main + ref: ${{ needs.code-freeze.outputs.release-branch }} token: ${{ secrets.PAT }} - - name: Get release branch ref - id: release-branch - run: | - cd ${{ github.run_id }} - - if [[ "${{ inputs.type_of_release }}" != "pre_release" ]]; then - sed -i "/^PRE_RELEASE/c\PRE_RELEASE = ''" nemo/package_info.py - fi - - VERSION=$(python -c 'import nemo; print(nemo.__version__)') - - echo "Release version r$VERSION" > version - echo "version=$VERSION" >> "$GITHUB_OUTPUT" - - git switch --force-create r$VERSION origin/main - git push -u origin r$VERSION --force - - name: Pin branch name in Notebooks run: | cd ${{ github.run_id }} - find tutorials -type f -name "*.ipynb" -exec sed -i "s/BRANCH = 'main'/BRANCH = 'r${{ steps.release-branch.outputs.version }}'/g" {} + + find tutorials -type f -name "*.ipynb" -exec sed -i "s/BRANCH = 'main'/BRANCH = '${{ needs.code-freeze.outputs.release-branch }}'/g" {} + - name: Pin MCore in Dockerfile run: | cd ${{ github.run_id }} sed -i 's/^ARG MCORE_TAG=.*$/ARG MCORE_TAG=${{ inputs.mcore_version }}/' Dockerfile.ci - - name: Create Release PR + - name: Create PR uses: peter-evans/create-pull-request@v6 id: create-pull-request with: path: ${{ github.run_id }} - base: r${{ steps.release-branch.outputs.version }} - branch: ci/release-r${{ steps.release-branch.outputs.version }} - title: 'Release `${{ steps.release-branch.outputs.version }}`' + base: ${{ needs.code-freeze.outputs.release-branch }} + branch: ci/freeze-tags-${{ needs.code-freeze.outputs.release-branch }} + title: 'Freeze tags in in `${{ needs.code-freeze.outputs.release-branch }}`' body: | - 🚀 PR to release NeMo `${{ steps.release-branch.outputs.version }}`. - - 📝 Please remember the following to-do's before merge: - - [ ] Fill-in the comment `Highlights` - - [ ] Review the comment `Detailed Changelogs` - - 🚨 Please also keep in mind to _not_ delete the headings of the task commits. They are required by the post-merge automation. + 🚀 PR to freeze tags in `${{ needs.code-freeze.outputs.release-branch }}`. - 🙏 Please merge this PR only if the CI workflow completed successfully. - - commit-message: "[🤠]: Howdy folks, let's release NeMo `${{ steps.release-branch.outputs.version }}` !" + commit-message: "[🤠]: Howdy folks, let's release NeMo `${{ needs.code-freeze.outputs.release-branch }}` !" signoff: true assignees: okoenig - labels: 'Run CICD' - - bump-next-version: - runs-on: ubuntu-latest - needs: [create-release-branch] - environment: - name: main - env: - VERSION_FILE: nemo/package_info.py - steps: - - name: Checkout repository - uses: actions/checkout@v4 - with: - path: ${{ github.run_id }} - fetch-depth: 0 - fetch-tags: true - ref: main - token: ${{ secrets.PAT }} - - - name: Bump version - id: bump-version - run: | - cd ${{ github.run_id }} - PRE_RELEASE=$(cat nemo/package_info.py | awk '/^PRE_RELEASE = /' | awk -F"= " '{print $2}' | tr -d '"' | tr -d "'") - MAJOR=$(cat nemo/package_info.py | awk '/^MAJOR = /' | awk -F"= " '{print $2}') - MINOR=$(cat nemo/package_info.py | awk '/^MINOR = /' | awk -F"= " '{print $2}') - PATCH=$(cat nemo/package_info.py | awk '/^PATCH = /' | awk -F"= " '{print $2}') - - if [[ "${{ inputs.type_of_release }}" == "pre_release" ]]; then - NEXT_MAJOR=$MAJOR - NEXT_MINOR=$MINOR - NEXT_PRE_RELEASE=rc$(( $(echo $PRE_RELEASE | awk -F"rc" '{print $2}') + 1)) - elif [[ "${{ inputs.type_of_release }}" == "major" ]]; then - NEXT_MAJOR=$(( MAJOR + 1)) - NEXT_MINOR=0 - NEXT_PRE_RELEASE=rc0 - else - NEXT_MAJOR=$MAJOR - NEXT_MINOR=$(( MINOR + 1)) - NEXT_PRE_RELEASE=rc0 - fi - - sed -i "/^MAJOR/c\MAJOR = $NEXT_MAJOR" nemo/package_info.py - sed -i "/^MINOR/c\MINOR = $NEXT_MINOR" nemo/package_info.py - sed -i "/^PRE_RELEASE/c\PRE_RELEASE = '$NEXT_PRE_RELEASE'" nemo/package_info.py - - echo "version=$NEXT_MAJOR.$NEXT_MINOR.$PATCH$NEXT_PRE_RELEASE" >> "$GITHUB_OUTPUT" - - - name: Create Version Bump PR - uses: peter-evans/create-pull-request@v6 - id: create-pull-request - with: - path: ${{ github.run_id }} - branch: bot/chore/version-bump-${{ steps.bump-version.outputs.version }} - title: 'Version bump to `${{ steps.bump-version.outputs.version }}`' - body: | - 🚀 Version bump NeMo-Toolkit to `${{ steps.bump-version.outputs.version }}` - - commit-message: "[🤠]: Howdy folks, let's bump NeMo-Toolkit `${{ steps.bump-version.outputs.version }}` !" - signoff: true - assignees: okoenig - labels: 'Run CICD' - - notify: - runs-on: ubuntu-latest - needs: [create-release-branch, bump-next-version] - environment: - name: main - steps: - - name: Main - run: | - MESSAGE='{ - "blocks": [ - { - "type": "section", - "text": { - "type": "mrkdwn", - "text": "Releasebot 🤖: NeMo-Toolkit has been frozen 🎉 to branch `r${{ needs.create-release-branch.outputs.version }}`" - } - } - ] - }' - - curl -X POST -H "Content-type: application/json" --data "$MESSAGE" ${{ secrets.SLACK_RELEASE_ENDPOINT }} \ No newline at end of file From 4246ae609058b598ff20ed74beb05e32368e3e28 Mon Sep 17 00:00:00 2001 From: Alexandros Koumparoulis <153118171+akoumpa@users.noreply.github.com> Date: Tue, 29 Oct 2024 18:40:01 -0700 Subject: [PATCH 034/125] Akoumparouli/hf lit module peft ckpt bugfix (#11022) * Add sharded_state_dict Signed-off-by: Alexandros Koumparoulis * remove sharded_state_dict Signed-off-by: Alexandros Koumparoulis * fallback to state_dict if ckpt does not have sharded_state_dict Signed-off-by: Alexandros Koumparoulis * remove Optional since it was optional Signed-off-by: Alexandros Koumparoulis --------- Signed-off-by: Alexandros Koumparoulis --- nemo/lightning/pytorch/callbacks/peft.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/nemo/lightning/pytorch/callbacks/peft.py b/nemo/lightning/pytorch/callbacks/peft.py index aad2a1696d619..e90d53ad2ac9e 100644 --- a/nemo/lightning/pytorch/callbacks/peft.py +++ b/nemo/lightning/pytorch/callbacks/peft.py @@ -313,8 +313,11 @@ def __init__(self, checkpoint_io: Optional["CheckpointIO"] = None, peft: Optiona def save_checkpoint(self, checkpoint: Dict[str, Any], path: _PATH, storage_options: Optional[Any] = None) -> None: assert self.checkpoint_io is not None - checkpoint['sharded_state_dict'] = dict( - filter(lambda item: self.peft.adapter_key_filter(item[0]), checkpoint['sharded_state_dict'].items()) + state_key = 'sharded_state_dict' + if not state_key in checkpoint: + state_key = 'state_dict' + checkpoint[state_key] = dict( + filter(lambda item: self.peft.adapter_key_filter(item[0]), checkpoint[state_key].items()) ) request = self.checkpoint_io.save_checkpoint(checkpoint, path, storage_options=storage_options) @@ -322,7 +325,9 @@ def save_checkpoint(self, checkpoint: Dict[str, Any], path: _PATH, storage_optio if is_global_rank_zero(): metadata = {"model_ckpt_path": str(self.model_ckpt_path)} - adapter_meta_path = ckpt_to_dir(path) / ADAPTER_META_FILENAME + base_dir = ckpt_to_dir(path) + base_dir.mkdir(parents=True, exist_ok=True) + adapter_meta_path = base_dir / ADAPTER_META_FILENAME with open(adapter_meta_path, "w") as f: json.dump(metadata, f) return request From a8fd3d6f25d363f09e259ad1fd58be2ee3f683f5 Mon Sep 17 00:00:00 2001 From: Alexandros Koumparoulis <153118171+akoumpa@users.noreply.github.com> Date: Tue, 29 Oct 2024 18:40:15 -0700 Subject: [PATCH 035/125] NeMo 1.0: upcycle dense to moe (#11002) * upcycle dense to moe Signed-off-by: Alexandros Koumparoulis * fix(?) path when saving Signed-off-by: Alexandros Koumparoulis * bot happy Signed-off-by: Alexandros Koumparoulis * bot happy #2 Signed-off-by: Alexandros Koumparoulis * add unwrap method Signed-off-by: Alexandros Koumparoulis * Apply isort and black reformatting Signed-off-by: akoumpa * move file Signed-off-by: Alexandros Koumparoulis --------- Signed-off-by: Alexandros Koumparoulis Signed-off-by: akoumpa Co-authored-by: akoumpa --- .../language_modeling/upcycle_dense_to_moe.py | 115 ++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 examples/nlp/language_modeling/upcycle_dense_to_moe.py diff --git a/examples/nlp/language_modeling/upcycle_dense_to_moe.py b/examples/nlp/language_modeling/upcycle_dense_to_moe.py new file mode 100644 index 0000000000000..a1f4b6000b6fd --- /dev/null +++ b/examples/nlp/language_modeling/upcycle_dense_to_moe.py @@ -0,0 +1,115 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r""" +Conversion script to convert NeMo Mistral-7B checkpoints into HuggingFace checkpoint. + Example to run this conversion script: + python3 upcycle_dense_to_moe.py \ + --model \ + --num-experts 8 \ + --output_path +""" + +from argparse import ArgumentParser +from pathlib import Path + +import torch +import torch.nn +from pytorch_lightning.trainer.trainer import Trainer + +from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector +from nemo.utils import logging + + +def get_args(): + parser = ArgumentParser() + parser.add_argument("--model", type=str, default=None, required=True, help="Path to NeMo checkpoint") + parser.add_argument( + "--output-path", type=str, default='', required=False, help="Path to NeMo save upcycled checkpoint" + ) + parser.add_argument( + "--num-experts", type=int, default=8, required=True, help="Number of experts to use in upcycled model." + ) + args = parser.parse_args() + assert isinstance(args.num_experts, int) + assert args.num_experts > 1, "Expected --num-experts to be greater-than 1." + if args.output_path == '': + args.output_path = args.model + f'_upcycled_num_exp{args.num_experts}.nemo' + return args + + +def make_moe_config_from_dense(config, num_experts=8): + from copy import deepcopy + + moe_config = deepcopy(config) + moe_config['num_moe_experts'] = num_experts + return moe_config + + +def unwrap(model): + tmp = model + while hasattr(tmp, 'module'): + tmp = tmp.module + return tmp + + +def upcycle(in_file, num_experts, cpu_only=True) -> None: + """ + Upcycle dense checkpoint to MoE. + """ + + logging.info(f'Loading NeMo checkpoint from: {in_file}') + + dummy_trainer = Trainer(devices=1, accelerator='cpu', strategy=NLPDDPStrategy()) + + # Load dense model + model_config = MegatronGPTModel.restore_from(in_file, trainer=dummy_trainer, return_config=True) + model_config.tensor_model_parallel_size = 1 + model_config.pipeline_model_parallel_size = 1 + model_config.sequence_parallel = False + if cpu_only: + map_location = torch.device('cpu') + model_config.use_cpu_initialization = True + else: + map_location = None + model_config.perform_initialization = False + dense_model = MegatronGPTModel.restore_from( + in_file, trainer=dummy_trainer, override_config_path=model_config, map_location=map_location + ) + + # Make upcycled config + moe_config = make_moe_config_from_dense(model_config, num_experts) + # print(moe_config) + # quit() + dummy_trainer2 = Trainer(devices=1, accelerator='cpu', strategy=NLPDDPStrategy()) + moe_model = MegatronGPTModel(moe_config, trainer=dummy_trainer2) + + # convert state dict dense -> MoE + from megatron.core.transformer.moe.upcycling_utils import upcycle_state_dict + + moe_state_dict = upcycle_state_dict([unwrap(moe_model.model)], [unwrap(dense_model.model)]) + moe_model.model.module.load_state_dict(moe_state_dict['model']) + moe_model._save_restore_connector = NLPSaveRestoreConnector() + # hack + if Path(args.model).is_dir(): + moe_model._save_restore_connector._model_extracted_dir = args.model + + moe_model.save_to(args.output_path) + + +if __name__ == '__main__': + args = get_args() + upcycle(args.model, args.num_experts) + logging.info(f'Upcycled checkpoint saved to: {args.output_path}') From 85e14ca6c4d357a85d611ce5322f26ae206c3a46 Mon Sep 17 00:00:00 2001 From: Yu Yao <54727607+yaoyu-33@users.noreply.github.com> Date: Tue, 29 Oct 2024 20:30:35 -0700 Subject: [PATCH 036/125] Update mcore parallelism initialization in nemo2 (#10643) * update mcore parallelism initialization Signed-off-by: yaoyu-33 * Apply isort and black reformatting Signed-off-by: yaoyu-33 * Update megatron_init.py Signed-off-by: Yu Yao <54727607+yaoyu-33@users.noreply.github.com> * add encoder parallel default config Signed-off-by: yaoyu-33 * Fix _strategy_lib.py Signed-off-by: Yu Yao <54727607+yaoyu-33@users.noreply.github.com> * update megatron_init.py inside lightning Signed-off-by: yaoyu-33 * fix test Signed-off-by: yaoyu-33 * try fix test Signed-off-by: yaoyu-33 * try fix test Signed-off-by: yaoyu-33 * Fix megatron megatron_init.py dp Signed-off-by: Yu Yao <54727607+yaoyu-33@users.noreply.github.com> * Update lightning megatron_init.py dp Signed-off-by: Yu Yao <54727607+yaoyu-33@users.noreply.github.com> --------- Signed-off-by: yaoyu-33 Signed-off-by: yaoyu-33 Signed-off-by: Yu Yao <54727607+yaoyu-33@users.noreply.github.com> Co-authored-by: yaoyu-33 Co-authored-by: Pablo Garay --- .../modules/common/megatron/megatron_init.py | 116 +++++++++++++++--- nemo/lightning/_strategy_lib.py | 4 + nemo/lightning/megatron_init.py | 116 +++++++++++++++--- .../pytorch/strategies/megatron_strategy.py | 8 ++ nemo/utils/app_state.py | 66 ++++++++++ tests/lightning/test_strategy_lib.py | 6 + 6 files changed, 282 insertions(+), 34 deletions(-) diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_init.py b/nemo/collections/nlp/modules/common/megatron/megatron_init.py index c060d140cb8cd..10b939d4aecb6 100644 --- a/nemo/collections/nlp/modules/common/megatron/megatron_init.py +++ b/nemo/collections/nlp/modules/common/megatron/megatron_init.py @@ -95,6 +95,8 @@ def initialize_model_parallel_for_nemo( virtual_pipeline_model_parallel_size=None, pipeline_model_parallel_split_rank=None, context_parallel_size=1, + encoder_tensor_model_parallel_size=0, + encoder_pipeline_model_parallel_size=0, micro_batch_size=None, global_batch_size=None, rampup_batch_size=None, @@ -120,6 +122,8 @@ def initialize_model_parallel_for_nemo( app_state.pipeline_model_parallel_size = pipeline_model_parallel_size app_state.virtual_pipeline_model_parallel_size = virtual_pipeline_model_parallel_size app_state.context_parallel_size = context_parallel_size + app_state.encoder_tensor_model_parallel_size = encoder_tensor_model_parallel_size + app_state.encoder_pipeline_model_parallel_size = encoder_pipeline_model_parallel_size app_state.use_fp8 = use_fp8 app_state.init_mpi_proc_group = init_mpi_proc_group ( @@ -139,6 +143,8 @@ def initialize_model_parallel_for_nemo( pipeline_model_parallel_split_rank_=pipeline_model_parallel_split_rank, context_parallel_size_=context_parallel_size, expert_model_parallel_size_=expert_model_parallel_size, + encoder_tensor_model_parallel_size_=encoder_tensor_model_parallel_size, + encoder_pipeline_model_parallel_size_=encoder_pipeline_model_parallel_size, use_tp_pp_dp_mapping=use_tp_pp_dp_mapping, ) @@ -149,12 +155,14 @@ def initialize_model_parallel_for_nemo( set_expert_model_parallel_world_size(app_state.expert_model_parallel_size) set_expert_model_parallel_rank(app_state.expert_model_parallel_rank) + set_pipeline_model_parallel_world_size( + app_state.pipeline_model_parallel_size + app_state.encoder_pipeline_model_parallel_size + ) + set_pipeline_model_parallel_split_rank(app_state.pipeline_model_parallel_split_rank) set_pipeline_model_parallel_rank(app_state.pipeline_model_parallel_rank) if HAVE_INTERLEAVED: set_virtual_pipeline_model_parallel_world_size(app_state.virtual_pipeline_model_parallel_size) set_virtual_pipeline_model_parallel_rank(app_state.virtual_pipeline_model_parallel_rank) - set_pipeline_model_parallel_world_size(app_state.pipeline_model_parallel_size) - set_pipeline_model_parallel_split_rank(app_state.pipeline_model_parallel_split_rank) tensor_parallel.random.initialize_rng_tracker(use_te_rng_tracker=use_te_rng_tracker) if seed is not None: @@ -247,6 +255,8 @@ def fake_initialize_model_parallel( virtual_pipeline_model_parallel_size_=None, expert_model_parallel_size_=1, context_parallel_size_=1, + encoder_tensor_model_parallel_size_=0, + encoder_pipeline_model_parallel_size_=0, use_tp_pp_dp_mapping=False, ): """ @@ -283,37 +293,109 @@ def fake_initialize_model_parallel( model_parallel_size = tensor_model_parallel_size * pipeline_model_parallel_size context_parallel_size = min(context_parallel_size_, world_size) - assert ( - world_size % (tensor_model_parallel_size * pipeline_model_parallel_size * context_parallel_size) == 0 - ), f'world_size: {world_size} must be divisible by tensor_model_parallel_size: {tensor_model_parallel_size} times pipeline_model_parallel_size {pipeline_model_parallel_size} times context_parallel_size {context_parallel_size}' - data_parallel_size = world_size // ( - tensor_model_parallel_size * pipeline_model_parallel_size * context_parallel_size + if encoder_pipeline_model_parallel_size_ is None: + encoder_pipeline_model_parallel_size = 0 + else: + encoder_pipeline_model_parallel_size = encoder_pipeline_model_parallel_size_ + + if encoder_tensor_model_parallel_size_ == 0 and encoder_pipeline_model_parallel_size_ > 0: + encoder_tensor_model_parallel_size = tensor_model_parallel_size + else: + encoder_tensor_model_parallel_size = encoder_tensor_model_parallel_size_ + + if encoder_tensor_model_parallel_size > 0: + assert encoder_pipeline_model_parallel_size > 0 + assert ( + encoder_tensor_model_parallel_size <= tensor_model_parallel_size + ), "We do not support encoders with more TP than the decoder." + + encoder_model_size = ( + encoder_tensor_model_parallel_size * encoder_pipeline_model_parallel_size * context_parallel_size + ) + decoder_model_size = tensor_model_parallel_size * pipeline_model_parallel_size * context_parallel_size + total_model_size = encoder_model_size + decoder_model_size + + assert world_size % total_model_size == 0, ( + f'world_size: {world_size} must be divisible by total world_size: ' + f'(decoder_)tensor_model_parallel_size {tensor_model_parallel_size} ' + f'* (decoder_)pipeline_model_parallel_size {pipeline_model_parallel_size} ' + f'* (decoder_)context_parallel_size {context_parallel_size} + ' + f'encoder_tensor_model_parallel_size {encoder_tensor_model_parallel_size} ' + f'* encoder_pipeline_model_parallel_size {encoder_pipeline_model_parallel_size} ' + f'* context_parallel_size {context_parallel_size}' ) + data_parallel_size = world_size // total_model_size - num_tensor_model_parallel_groups = world_size // tensor_model_parallel_size - num_pipeline_model_parallel_groups = world_size // pipeline_model_parallel_size + encoder_world_size = encoder_model_size * data_parallel_size + decoder_world_size = decoder_model_size * data_parallel_size + assert encoder_world_size + decoder_world_size == world_size virtual_pipeline_model_parallel_rank = None if virtual_pipeline_model_parallel_size_ is not None: virtual_pipeline_model_parallel_rank = 0 - rank_generator = RankGenerator( + if encoder_world_size > 0: + encoder_rank_generator = RankGenerator( + tp=encoder_tensor_model_parallel_size, + ep=1, + dp=data_parallel_size, + pp=encoder_pipeline_model_parallel_size, + cp=context_parallel_size, + order='tp-pp-dp' if use_tp_pp_dp_mapping else 'tp-cp-ep-dp-pp', + rank_offset=0, + ) + else: + encoder_rank_generator = None + + decoder_rank_generator = RankGenerator( tp=tensor_model_parallel_size, ep=expert_model_parallel_size_, dp=data_parallel_size, pp=pipeline_model_parallel_size, cp=context_parallel_size, order='tp-pp-dp' if use_tp_pp_dp_mapping else 'tp-cp-ep-dp-pp', + rank_offset=encoder_world_size, ) + def generator_wrapper(group_type, **kwargs): + from itertools import cycle + + """The `RankGenerator` class produces a hyper-rectangle for a given set of + tensor, pipeline, data, expert, and context parallelism. If we have an encoder, + in addition to the default decoder, we essentially instantiate two `RankGenerator` + classes to construct the parallelism for each module separately, and we then have + to stitch them together for the right groups. For now, this means pp and tp-pp.""" + d_ranks = decoder_rank_generator.get_ranks(group_type, **kwargs) + if encoder_rank_generator is None: + for x in d_ranks: + yield x + return + e_ranks = encoder_rank_generator.get_ranks(group_type, **kwargs) + if group_type == 'pp': + # Map 1 encoder tp rank to several decoder tp ranks, because + # these won't be the same size. + for x, y in zip(cycle(e_ranks), d_ranks): + yield x + y + elif group_type == 'tp-pp': + # For this group, we can just return the concatenated + # groups together, because their sizes are the same. + assert len(e_ranks) == len(d_ranks) + for x, y in zip(e_ranks, d_ranks): + yield x + y + else: + for x in e_ranks: + yield x + for x in d_ranks: + yield x + # Build the data-parallel groups. all_data_parallel_group_ranks_with_cp = [] - for ranks in rank_generator.get_ranks('dp'): + for ranks in generator_wrapper('dp'): if rank in ranks: data_parallel_group = list(ranks) logging.info(f'Rank {rank} has data parallel group : {data_parallel_group}') - for ranks_with_cp in rank_generator.get_ranks('dp-cp'): + for ranks_with_cp in generator_wrapper('dp-cp'): all_data_parallel_group_ranks_with_cp.append(ranks_with_cp) if rank in ranks_with_cp: data_parallel_group_with_cp = ranks_with_cp @@ -329,7 +411,7 @@ def fake_initialize_model_parallel( # Build the context-parallel groups. all_context_parallel_group_ranks = [] - for ranks in rank_generator.get_ranks('cp'): + for ranks in generator_wrapper('cp'): all_context_parallel_group_ranks.append(ranks) if rank in ranks: context_parallel_group = ranks @@ -341,7 +423,7 @@ def fake_initialize_model_parallel( # Build the model-parallel groups. all_model_parallel_group_ranks = [] - for ranks in rank_generator.get_ranks('tp-pp'): + for ranks in generator_wrapper('tp-pp'): all_model_parallel_group_ranks.append(ranks) if rank in ranks: logging.info(f'Rank {rank} has model parallel group: {list(ranks)}') @@ -350,7 +432,7 @@ def fake_initialize_model_parallel( # Build the tensor model-parallel groups. all_tensor_model_parallel_group_ranks = [] tensor_model_parallel_group = None - for ranks in rank_generator.get_ranks('tp'): + for ranks in generator_wrapper('tp'): all_tensor_model_parallel_group_ranks.append(ranks) if rank in ranks: tensor_model_parallel_group = ranks @@ -364,7 +446,7 @@ def fake_initialize_model_parallel( # EP rank expert_model_parallel_rank = 0 if expert_model_parallel_size_ is not None and expert_model_parallel_size_ > 1: - for ranks in rank_generator.get_ranks('ep', independent_ep=True): + for ranks in generator_wrapper('ep', independent_ep=True): if rank in ranks: expert_model_parallel_rank = list(ranks).index(rank) @@ -375,7 +457,7 @@ def fake_initialize_model_parallel( pipeline_model_parallel_group = None embedding_group = None embedding_rank = None - for ranks in rank_generator.get_ranks('pp'): + for ranks in generator_wrapper('pp'): all_pipeline_model_parallel_group_ranks.append(ranks) if rank in ranks: pipeline_model_parallel_group = ranks diff --git a/nemo/lightning/_strategy_lib.py b/nemo/lightning/_strategy_lib.py index ba4847219ed38..40a79c94c59fa 100644 --- a/nemo/lightning/_strategy_lib.py +++ b/nemo/lightning/_strategy_lib.py @@ -84,6 +84,8 @@ def init_parallel_ranks( pipeline_model_parallel_size=parallel_config.pipeline_model_parallel_size, virtual_pipeline_model_parallel_size=parallel_config.virtual_pipeline_model_parallel_size, context_parallel_size=parallel_config.context_parallel_size, + encoder_tensor_model_parallel_size=getattr(parallel_config, "encoder_tensor_model_parallel_size", 0), + encoder_pipeline_model_parallel_size=getattr(parallel_config, "encoder_pipeline_model_parallel_size", 0), seed=seed, pipeline_model_parallel_split_rank=getattr(parallel_config, "pipeline_model_parallel_split_rank", None), use_fp8=fp8, @@ -113,6 +115,8 @@ def init_model_parallel(model: Optional[nn.Module] = None) -> None: pipeline_model_parallel_size=app_state.pipeline_model_parallel_size, virtual_pipeline_model_parallel_size=app_state.virtual_pipeline_model_parallel_size, pipeline_model_parallel_split_rank=app_state.pipeline_model_parallel_split_rank, + encoder_pipeline_model_parallel_size=app_state.encoder_pipeline_model_parallel_size, + encoder_tensor_model_parallel_size=app_state.encoder_tensor_model_parallel_size, context_parallel_size=app_state.context_parallel_size, expert_model_parallel_size=app_state.expert_model_parallel_size, ) diff --git a/nemo/lightning/megatron_init.py b/nemo/lightning/megatron_init.py index c060d140cb8cd..10b939d4aecb6 100644 --- a/nemo/lightning/megatron_init.py +++ b/nemo/lightning/megatron_init.py @@ -95,6 +95,8 @@ def initialize_model_parallel_for_nemo( virtual_pipeline_model_parallel_size=None, pipeline_model_parallel_split_rank=None, context_parallel_size=1, + encoder_tensor_model_parallel_size=0, + encoder_pipeline_model_parallel_size=0, micro_batch_size=None, global_batch_size=None, rampup_batch_size=None, @@ -120,6 +122,8 @@ def initialize_model_parallel_for_nemo( app_state.pipeline_model_parallel_size = pipeline_model_parallel_size app_state.virtual_pipeline_model_parallel_size = virtual_pipeline_model_parallel_size app_state.context_parallel_size = context_parallel_size + app_state.encoder_tensor_model_parallel_size = encoder_tensor_model_parallel_size + app_state.encoder_pipeline_model_parallel_size = encoder_pipeline_model_parallel_size app_state.use_fp8 = use_fp8 app_state.init_mpi_proc_group = init_mpi_proc_group ( @@ -139,6 +143,8 @@ def initialize_model_parallel_for_nemo( pipeline_model_parallel_split_rank_=pipeline_model_parallel_split_rank, context_parallel_size_=context_parallel_size, expert_model_parallel_size_=expert_model_parallel_size, + encoder_tensor_model_parallel_size_=encoder_tensor_model_parallel_size, + encoder_pipeline_model_parallel_size_=encoder_pipeline_model_parallel_size, use_tp_pp_dp_mapping=use_tp_pp_dp_mapping, ) @@ -149,12 +155,14 @@ def initialize_model_parallel_for_nemo( set_expert_model_parallel_world_size(app_state.expert_model_parallel_size) set_expert_model_parallel_rank(app_state.expert_model_parallel_rank) + set_pipeline_model_parallel_world_size( + app_state.pipeline_model_parallel_size + app_state.encoder_pipeline_model_parallel_size + ) + set_pipeline_model_parallel_split_rank(app_state.pipeline_model_parallel_split_rank) set_pipeline_model_parallel_rank(app_state.pipeline_model_parallel_rank) if HAVE_INTERLEAVED: set_virtual_pipeline_model_parallel_world_size(app_state.virtual_pipeline_model_parallel_size) set_virtual_pipeline_model_parallel_rank(app_state.virtual_pipeline_model_parallel_rank) - set_pipeline_model_parallel_world_size(app_state.pipeline_model_parallel_size) - set_pipeline_model_parallel_split_rank(app_state.pipeline_model_parallel_split_rank) tensor_parallel.random.initialize_rng_tracker(use_te_rng_tracker=use_te_rng_tracker) if seed is not None: @@ -247,6 +255,8 @@ def fake_initialize_model_parallel( virtual_pipeline_model_parallel_size_=None, expert_model_parallel_size_=1, context_parallel_size_=1, + encoder_tensor_model_parallel_size_=0, + encoder_pipeline_model_parallel_size_=0, use_tp_pp_dp_mapping=False, ): """ @@ -283,37 +293,109 @@ def fake_initialize_model_parallel( model_parallel_size = tensor_model_parallel_size * pipeline_model_parallel_size context_parallel_size = min(context_parallel_size_, world_size) - assert ( - world_size % (tensor_model_parallel_size * pipeline_model_parallel_size * context_parallel_size) == 0 - ), f'world_size: {world_size} must be divisible by tensor_model_parallel_size: {tensor_model_parallel_size} times pipeline_model_parallel_size {pipeline_model_parallel_size} times context_parallel_size {context_parallel_size}' - data_parallel_size = world_size // ( - tensor_model_parallel_size * pipeline_model_parallel_size * context_parallel_size + if encoder_pipeline_model_parallel_size_ is None: + encoder_pipeline_model_parallel_size = 0 + else: + encoder_pipeline_model_parallel_size = encoder_pipeline_model_parallel_size_ + + if encoder_tensor_model_parallel_size_ == 0 and encoder_pipeline_model_parallel_size_ > 0: + encoder_tensor_model_parallel_size = tensor_model_parallel_size + else: + encoder_tensor_model_parallel_size = encoder_tensor_model_parallel_size_ + + if encoder_tensor_model_parallel_size > 0: + assert encoder_pipeline_model_parallel_size > 0 + assert ( + encoder_tensor_model_parallel_size <= tensor_model_parallel_size + ), "We do not support encoders with more TP than the decoder." + + encoder_model_size = ( + encoder_tensor_model_parallel_size * encoder_pipeline_model_parallel_size * context_parallel_size + ) + decoder_model_size = tensor_model_parallel_size * pipeline_model_parallel_size * context_parallel_size + total_model_size = encoder_model_size + decoder_model_size + + assert world_size % total_model_size == 0, ( + f'world_size: {world_size} must be divisible by total world_size: ' + f'(decoder_)tensor_model_parallel_size {tensor_model_parallel_size} ' + f'* (decoder_)pipeline_model_parallel_size {pipeline_model_parallel_size} ' + f'* (decoder_)context_parallel_size {context_parallel_size} + ' + f'encoder_tensor_model_parallel_size {encoder_tensor_model_parallel_size} ' + f'* encoder_pipeline_model_parallel_size {encoder_pipeline_model_parallel_size} ' + f'* context_parallel_size {context_parallel_size}' ) + data_parallel_size = world_size // total_model_size - num_tensor_model_parallel_groups = world_size // tensor_model_parallel_size - num_pipeline_model_parallel_groups = world_size // pipeline_model_parallel_size + encoder_world_size = encoder_model_size * data_parallel_size + decoder_world_size = decoder_model_size * data_parallel_size + assert encoder_world_size + decoder_world_size == world_size virtual_pipeline_model_parallel_rank = None if virtual_pipeline_model_parallel_size_ is not None: virtual_pipeline_model_parallel_rank = 0 - rank_generator = RankGenerator( + if encoder_world_size > 0: + encoder_rank_generator = RankGenerator( + tp=encoder_tensor_model_parallel_size, + ep=1, + dp=data_parallel_size, + pp=encoder_pipeline_model_parallel_size, + cp=context_parallel_size, + order='tp-pp-dp' if use_tp_pp_dp_mapping else 'tp-cp-ep-dp-pp', + rank_offset=0, + ) + else: + encoder_rank_generator = None + + decoder_rank_generator = RankGenerator( tp=tensor_model_parallel_size, ep=expert_model_parallel_size_, dp=data_parallel_size, pp=pipeline_model_parallel_size, cp=context_parallel_size, order='tp-pp-dp' if use_tp_pp_dp_mapping else 'tp-cp-ep-dp-pp', + rank_offset=encoder_world_size, ) + def generator_wrapper(group_type, **kwargs): + from itertools import cycle + + """The `RankGenerator` class produces a hyper-rectangle for a given set of + tensor, pipeline, data, expert, and context parallelism. If we have an encoder, + in addition to the default decoder, we essentially instantiate two `RankGenerator` + classes to construct the parallelism for each module separately, and we then have + to stitch them together for the right groups. For now, this means pp and tp-pp.""" + d_ranks = decoder_rank_generator.get_ranks(group_type, **kwargs) + if encoder_rank_generator is None: + for x in d_ranks: + yield x + return + e_ranks = encoder_rank_generator.get_ranks(group_type, **kwargs) + if group_type == 'pp': + # Map 1 encoder tp rank to several decoder tp ranks, because + # these won't be the same size. + for x, y in zip(cycle(e_ranks), d_ranks): + yield x + y + elif group_type == 'tp-pp': + # For this group, we can just return the concatenated + # groups together, because their sizes are the same. + assert len(e_ranks) == len(d_ranks) + for x, y in zip(e_ranks, d_ranks): + yield x + y + else: + for x in e_ranks: + yield x + for x in d_ranks: + yield x + # Build the data-parallel groups. all_data_parallel_group_ranks_with_cp = [] - for ranks in rank_generator.get_ranks('dp'): + for ranks in generator_wrapper('dp'): if rank in ranks: data_parallel_group = list(ranks) logging.info(f'Rank {rank} has data parallel group : {data_parallel_group}') - for ranks_with_cp in rank_generator.get_ranks('dp-cp'): + for ranks_with_cp in generator_wrapper('dp-cp'): all_data_parallel_group_ranks_with_cp.append(ranks_with_cp) if rank in ranks_with_cp: data_parallel_group_with_cp = ranks_with_cp @@ -329,7 +411,7 @@ def fake_initialize_model_parallel( # Build the context-parallel groups. all_context_parallel_group_ranks = [] - for ranks in rank_generator.get_ranks('cp'): + for ranks in generator_wrapper('cp'): all_context_parallel_group_ranks.append(ranks) if rank in ranks: context_parallel_group = ranks @@ -341,7 +423,7 @@ def fake_initialize_model_parallel( # Build the model-parallel groups. all_model_parallel_group_ranks = [] - for ranks in rank_generator.get_ranks('tp-pp'): + for ranks in generator_wrapper('tp-pp'): all_model_parallel_group_ranks.append(ranks) if rank in ranks: logging.info(f'Rank {rank} has model parallel group: {list(ranks)}') @@ -350,7 +432,7 @@ def fake_initialize_model_parallel( # Build the tensor model-parallel groups. all_tensor_model_parallel_group_ranks = [] tensor_model_parallel_group = None - for ranks in rank_generator.get_ranks('tp'): + for ranks in generator_wrapper('tp'): all_tensor_model_parallel_group_ranks.append(ranks) if rank in ranks: tensor_model_parallel_group = ranks @@ -364,7 +446,7 @@ def fake_initialize_model_parallel( # EP rank expert_model_parallel_rank = 0 if expert_model_parallel_size_ is not None and expert_model_parallel_size_ > 1: - for ranks in rank_generator.get_ranks('ep', independent_ep=True): + for ranks in generator_wrapper('ep', independent_ep=True): if rank in ranks: expert_model_parallel_rank = list(ranks).index(rank) @@ -375,7 +457,7 @@ def fake_initialize_model_parallel( pipeline_model_parallel_group = None embedding_group = None embedding_rank = None - for ranks in rank_generator.get_ranks('pp'): + for ranks in generator_wrapper('pp'): all_pipeline_model_parallel_group_ranks.append(ranks) if rank in ranks: pipeline_model_parallel_group = ranks diff --git a/nemo/lightning/pytorch/strategies/megatron_strategy.py b/nemo/lightning/pytorch/strategies/megatron_strategy.py index c61c3371cc3cc..c22df7cc9dfe3 100644 --- a/nemo/lightning/pytorch/strategies/megatron_strategy.py +++ b/nemo/lightning/pytorch/strategies/megatron_strategy.py @@ -97,6 +97,8 @@ class ParallelismConfig: expert_model_parallel_size: int moe_extended_tp: bool pipeline_dtype: torch.dtype + encoder_tensor_model_parallel_size: int = 0 + encoder_pipeline_model_parallel_size: int = 0 class MegatronStrategy(DDPStrategy, io.IOMixin): @@ -177,6 +179,8 @@ def __init__( sequence_parallel: bool = False, expert_model_parallel_size: int = 1, moe_extended_tp: bool = False, + encoder_tensor_model_parallel_size: Optional[int] = 0, + encoder_pipeline_model_parallel_size: Optional[int] = 0, data_sampler: Optional["DataSampler"] = None, parallel_devices: Optional[List[torch.device]] = None, cluster_environment=None, # TODO: Add type-hint @@ -220,6 +224,8 @@ def __init__( self.moe_extended_tp = moe_extended_tp self.virtual_pipeline_model_parallel_size = virtual_pipeline_model_parallel_size self.sequence_parallel = sequence_parallel + self.encoder_tensor_model_parallel_size = encoder_tensor_model_parallel_size + self.encoder_pipeline_model_parallel_size = encoder_pipeline_model_parallel_size self.lazy_init = lazy_init self.ckpt_load_optimizer = ckpt_load_optimizer self.ckpt_save_optimizer = ckpt_save_optimizer @@ -821,6 +827,8 @@ def parallelism(self) -> ParallelismConfig: sequence_parallel=self.sequence_parallel, expert_model_parallel_size=self.expert_model_parallel_size, moe_extended_tp=self.moe_extended_tp, + encoder_tensor_model_parallel_size=self.encoder_tensor_model_parallel_size, + encoder_pipeline_model_parallel_size=self.encoder_pipeline_model_parallel_size, pipeline_dtype=self.pipeline_dtype, ) diff --git a/nemo/utils/app_state.py b/nemo/utils/app_state.py index 7a60c3969df38..37193cfdd8c5d 100644 --- a/nemo/utils/app_state.py +++ b/nemo/utils/app_state.py @@ -50,6 +50,8 @@ def __init__(self): self._expert_model_parallel_size = None self._pipeline_model_parallel_size = None self._virtual_pipeline_model_parallel_size = None + self._encoder_tensor_model_parallel_size = None + self._encoder_pipeline_model_parallel_size = None self._pipeline_model_parallel_group = None self._pipeline_model_parallel_split_rank = None self._is_megatron_initialized = False @@ -200,6 +202,38 @@ def pipeline_model_parallel_size(self, size): """ self._pipeline_model_parallel_size = size + @property + def encoder_tensor_model_parallel_size(self): + """Property returns the number of GPUs in each model parallel group. + Returns: + Number of GPUs in each model parallel group. + """ + return self._encoder_tensor_model_parallel_size + + @encoder_tensor_model_parallel_size.setter + def encoder_tensor_model_parallel_size(self, size): + """Property sets the number of GPUs in each model parallel group. + Args: + size (int): Number of GPUs in each model parallel group. + """ + self._encoder_tensor_model_parallel_size = size + + @property + def encoder_pipeline_model_parallel_size(self): + """Property returns the number of GPUs in each model parallel group. + Returns: + Number of GPUs in each model parallel group. + """ + return self._encoder_pipeline_model_parallel_size + + @encoder_pipeline_model_parallel_size.setter + def encoder_pipeline_model_parallel_size(self, size): + """Property sets the number of GPUs in each model parallel group. + Args: + size (int): Number of GPUs in each model parallel group. + """ + self._encoder_pipeline_model_parallel_size = size + @property def use_tp_pp_dp_mapping(self): return self._use_tp_pp_dp_mapping @@ -336,6 +370,38 @@ def virtual_pipeline_model_parallel_rank(self, rank): """ self._virtual_pipeline_model_parallel_rank = rank + @property + def encoder_tensor_model_parallel_rank(self): + """Property returns the encoder tensor model parallel rank. + Returns: + Tensor model parallel rank. + """ + return self._encoder_tensor_model_parallel_rank + + @encoder_tensor_model_parallel_rank.setter + def encoder_tensor_model_parallel_rank(self, rank): + """Property sets the encoder tensor model parallel rank. + Args: + rank (int): Tensor model parallel rank. + """ + self._encoder_tensor_model_parallel_rank = rank + + @property + def encoder_pipeline_model_parallel_rank(self): + """Property returns the encoder pipeline model parallel rank. + Returns: + Tensor model parallel rank. + """ + return self._encoder_pipeline_model_parallel_rank + + @encoder_pipeline_model_parallel_rank.setter + def encoder_pipeline_model_parallel_rank(self, rank): + """Property sets the encoder pipeline model parallel rank. + Args: + rank (int): Tensor model parallel rank. + """ + self._encoder_pipeline_model_parallel_rank = rank + @property def pipeline_model_parallel_split_rank(self): """Property returns the rank at which Encoder and Decoder are split into different pipelines for Megatrron Encoder-Decoder models. diff --git a/tests/lightning/test_strategy_lib.py b/tests/lightning/test_strategy_lib.py index 4410d0b1b9106..241debd16316d 100644 --- a/tests/lightning/test_strategy_lib.py +++ b/tests/lightning/test_strategy_lib.py @@ -78,6 +78,8 @@ def test_init_parallel_ranks() -> None: mock_parallel_config.virtual_pipeline_model_parallel_size = 4 mock_parallel_config.context_parallel_size = 2 mock_parallel_config.expert_model_parallel_size = 2 + mock_parallel_config.encoder_tensor_model_parallel_size = 0 + mock_parallel_config.encoder_pipeline_model_parallel_size = 0 mock_parallel_config.tp_comm_overlap = False mock_parallel_config.pipeline_model_parallel_split_rank = None @@ -99,6 +101,8 @@ def test_init_parallel_ranks() -> None: "context_parallel_size": 2, "expert_model_parallel_size": 2, "pipeline_model_parallel_split_rank": None, + "encoder_pipeline_model_parallel_size": 0, + "encoder_tensor_model_parallel_size": 0, "use_fp8": False, "init_mpi_proc_group": False, } @@ -135,6 +139,8 @@ def test_init_model_parallel(mock_mpu, *args): pipeline_model_parallel_size=1, virtual_pipeline_model_parallel_size=None, pipeline_model_parallel_split_rank=None, + encoder_pipeline_model_parallel_size=None, + encoder_tensor_model_parallel_size=None, context_parallel_size=2, expert_model_parallel_size=2, ) From f2cb252ffff90d789d9908676e02369b4c497e7e Mon Sep 17 00:00:00 2001 From: Ao Tang Date: Wed, 30 Oct 2024 01:42:39 -0400 Subject: [PATCH 037/125] Gemma2 in Nemo2 with Recipes (#11037) * add gemma2 in nemo2.0 and 2b recipe * Apply isort and black reformatting Signed-off-by: suiyoubi * Fix gemma1 inference bug * add more recipe * minor fix * recipe fix * Apply isort and black reformatting Signed-off-by: suiyoubi * merge fix --------- Signed-off-by: suiyoubi Co-authored-by: suiyoubi --- nemo/collections/llm/__init__.py | 10 + nemo/collections/llm/gpt/model/__init__.py | 12 + nemo/collections/llm/gpt/model/gemma.py | 6 +- nemo/collections/llm/gpt/model/gemma2.py | 378 ++++++++++++++++++ nemo/collections/llm/recipes/__init__.py | 8 + nemo/collections/llm/recipes/gemma2.py | 140 +++++++ nemo/collections/llm/recipes/gemma2_27b.py | 222 ++++++++++ nemo/collections/llm/recipes/gemma2_2b.py | 219 ++++++++++ nemo/collections/llm/recipes/gemma2_9b.py | 220 ++++++++++ .../megatron/gemma2/gemma2_modules.py | 5 +- .../convert_gemma2_hf_to_nemo.py | 2 +- 11 files changed, 1217 insertions(+), 5 deletions(-) create mode 100644 nemo/collections/llm/gpt/model/gemma2.py create mode 100644 nemo/collections/llm/recipes/gemma2.py create mode 100644 nemo/collections/llm/recipes/gemma2_27b.py create mode 100644 nemo/collections/llm/recipes/gemma2_2b.py create mode 100644 nemo/collections/llm/recipes/gemma2_9b.py diff --git a/nemo/collections/llm/__init__.py b/nemo/collections/llm/__init__.py index 6f8070015c074..3fe20173cba26 100644 --- a/nemo/collections/llm/__init__.py +++ b/nemo/collections/llm/__init__.py @@ -46,6 +46,11 @@ CodeLlamaConfig13B, CodeLlamaConfig34B, CodeLlamaConfig70B, + Gemma2Config, + Gemma2Config2B, + Gemma2Config9B, + Gemma2Config27B, + Gemma2Model, GemmaConfig, GemmaConfig2B, GemmaConfig7B, @@ -165,6 +170,11 @@ "CodeGemmaConfig2B", "CodeGemmaConfig7B", "GemmaModel", + "Gemma2Model", + "Gemma2Config9B", + "Gemma2Config", + "Gemma2Config27B", + "Gemma2Config2B", "Baichuan2Config", "Baichuan2Config7B", "Baichuan2Model", diff --git a/nemo/collections/llm/gpt/model/__init__.py b/nemo/collections/llm/gpt/model/__init__.py index a0d5d5a926630..9699a9376d9ac 100644 --- a/nemo/collections/llm/gpt/model/__init__.py +++ b/nemo/collections/llm/gpt/model/__init__.py @@ -37,6 +37,13 @@ GemmaConfig7B, GemmaModel, ) +from nemo.collections.llm.gpt.model.gemma2 import ( + Gemma2Config, + Gemma2Config2B, + Gemma2Config9B, + Gemma2Config27B, + Gemma2Model, +) from nemo.collections.llm.gpt.model.hf_auto_model_for_causal_lm import HfAutoModelForCausalLM from nemo.collections.llm.gpt.model.llama import ( CodeLlamaConfig7B, @@ -142,6 +149,11 @@ "CodeGemmaConfig2B", "CodeGemmaConfig7B", "GemmaModel", + "Gemma2Config", + "Gemma2Config27B", + "Gemma2Config2B", + "Gemma2Config9B", + "Gemma2Model", "LlamaModel", "Baichuan2Config", "Baichuan2Config7B", diff --git a/nemo/collections/llm/gpt/model/gemma.py b/nemo/collections/llm/gpt/model/gemma.py index 0bd0bf05ea099..bf828bb66277c 100644 --- a/nemo/collections/llm/gpt/model/gemma.py +++ b/nemo/collections/llm/gpt/model/gemma.py @@ -17,6 +17,7 @@ from typing import TYPE_CHECKING, Annotated, Callable, Optional import torch +from megatron.core import parallel_state from torch import nn from nemo.collections.llm.fn.activation import openai_gelu @@ -95,7 +96,8 @@ def configure_model(self): from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import EmbeddingScalingMixin super().configure_model() - extend_instance(self.module.embedding, EmbeddingScalingMixin) + if parallel_state.is_pipeline_first_stage(): + extend_instance(self.module.embedding, EmbeddingScalingMixin) @io.model_importer(GemmaModel, "hf") @@ -160,7 +162,7 @@ def make_vocab_size_divisible_by(vocab_size): rotary_base=source.rope_theta, gated_linear_unit=True, make_vocab_size_divisible_by=make_vocab_size_divisible_by(source.vocab_size), - share_embeddings_and_output_weights=False, + share_embeddings_and_output_weights=True, fp16=(dtype_from_hf(source) == torch.float16), bf16=(dtype_from_hf(source) == torch.bfloat16), params_dtype=dtype_from_hf(source), diff --git a/nemo/collections/llm/gpt/model/gemma2.py b/nemo/collections/llm/gpt/model/gemma2.py new file mode 100644 index 0000000000000..6ed73ea3273fd --- /dev/null +++ b/nemo/collections/llm/gpt/model/gemma2.py @@ -0,0 +1,378 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass +from pathlib import Path +from typing import TYPE_CHECKING, Annotated, Callable, Optional, Union + +import torch +from megatron.core import parallel_state +from megatron.core.transformer.spec_utils import ModuleSpec +from torch import nn + +from nemo.collections.llm.fn.activation import openai_gelu +from nemo.collections.llm.gpt.model.base import GPTConfig, GPTModel +from nemo.collections.llm.utils import Config +from nemo.lightning import OptimizerModule, io, teardown +from nemo.lightning.pytorch.utils import dtype_from_hf + +if TYPE_CHECKING: + from transformers import GemmaForCausalLM + + from nemo.collections.common.tokenizers.huggingface.auto_tokenizer import AutoTokenizer + from nemo.collections.common.tokenizers.tokenizer_spec import TokenizerSpec + + +def gemma2_layer_spec(config: "GPTConfig") -> ModuleSpec: + from nemo.collections.nlp.models.language_modeling.megatron.gemma2.gemma2_spec import get_gemma2_layer_spec + + return get_gemma2_layer_spec() + + +# Note: Gemma requires huggingface transformers >= 4.38 +# Note: these Gemma configs are copied from the corresponding HF model. You may need to modify the parameter for +# your own needs, in particular: seq_length and rotary_base. +@dataclass +class Gemma2Config(GPTConfig): + # configs that are common across model sizes + normalization: str = "RMSNorm" + activation_func: Callable = openai_gelu + gated_linear_unit: bool = True + position_embedding_type: str = "rope" + add_bias_linear: bool = False + seq_length: int = 8192 + kv_channels: int = 256 + attention_dropout: float = 0.0 + hidden_dropout: float = 0.0 + share_embeddings_and_output_weights: bool = True + layernorm_zero_centered_gamma: bool = True + layernorm_epsilon: float = 1e-6 + rotary_base: float = 10000 + window_size: tuple = (4096, 0) + vocab_size: int = 256000 + gradient_accumulation_fusion: bool = False + + transformer_layer_spec: Union[ModuleSpec, Callable[["GPTConfig"], ModuleSpec]] = gemma2_layer_spec + # mcore customization + query_pre_attn_scalar: int = 224 + attn_logit_softcapping: float = 50.0 + final_logit_softcapping: float = 30.0 + + +@dataclass +class Gemma2Config2B(Gemma2Config): + num_layers: int = 26 + hidden_size: int = 2304 + num_attention_heads: int = 8 + num_query_groups: int = 4 + ffn_hidden_size: int = 9216 + query_pre_attn_scalar: int = 256 + + +@dataclass +class Gemma2Config9B(Gemma2Config): + num_layers: int = 42 + hidden_size: int = 3584 + num_attention_heads: int = 16 + num_query_groups: int = 8 + ffn_hidden_size: int = 14336 + query_pre_attn_scalar: int = 256 + + +@dataclass +class Gemma2Config27B(Gemma2Config): + num_layers: int = 46 + hidden_size: int = 4608 + num_attention_heads: int = 32 + num_query_groups: int = 16 + ffn_hidden_size: int = 36864 + query_pre_attn_scalar: int = 144 + + +class Gemma2Model(GPTModel): + def __init__( + self, + config: Annotated[Optional[Gemma2Config], Config[Gemma2Config]] = None, + optim: Optional[OptimizerModule] = None, + tokenizer: Optional["TokenizerSpec"] = None, + model_transform: Optional[Callable[[nn.Module], nn.Module]] = None, + ): + super().__init__(config or Gemma2Config(), optim=optim, tokenizer=tokenizer, model_transform=model_transform) + + def configure_model(self): + from nemo.collections.common.parts.utils import extend_instance + from nemo.collections.nlp.models.language_modeling.megatron.gemma2.gemma2_modules import Gemma2OutputLayer + from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import EmbeddingScalingMixin + + super().configure_model() + if parallel_state.is_pipeline_first_stage(): + # Apply Embedding Scaling: sqrt(hidden_size) + extend_instance(self.module.embedding, EmbeddingScalingMixin) + if parallel_state.is_pipeline_last_stage(): + # Prevents final logits from growing excessively by scaling them to a fixed range + extend_instance(self.module.output_layer, Gemma2OutputLayer) + + +@io.model_importer(Gemma2Model, "hf") +class HFGemmaImporter(io.ModelConnector["GemmaForCausalLM", Gemma2Model]): + def init(self) -> Gemma2Model: + return Gemma2Model(self.config, tokenizer=self.tokenizer) + + def apply(self, output_path: Path) -> Path: + from transformers import Gemma2ForCausalLM + + source = Gemma2ForCausalLM.from_pretrained(str(self), torch_dtype='auto') + target = self.init() + + trainer = self.nemo_setup(target) + self.convert_state(source, target) + self.nemo_save(output_path, trainer) + + print(f"Converted Gemma2 model to Nemo, model saved to {output_path}") + + teardown(trainer, target) + del trainer, target + + return output_path + + def convert_state(self, source, target): + mapping = { + "model.embed_tokens.weight": "embedding.word_embeddings.weight", + "model.layers.*.self_attn.o_proj.weight": "decoder.layers.*.self_attention.linear_proj.weight", + "model.layers.*.mlp.down_proj.weight": "decoder.layers.*.mlp.linear_fc2.weight", + "model.layers.*.input_layernorm.weight": "decoder.layers.*.self_attention.linear_qkv.layer_norm_weight", + "model.layers.*.pre_feedforward_layernorm.weight": "decoder.layers.*.mlp.linear_fc1.layer_norm_weight", + "model.layers.*.post_feedforward_layernorm.weight": "decoder.layers.*.mlp.linear_fc2.post_layernorm.weight", + "model.layers.*.post_attention_layernorm.weight": "decoder.layers.*.self_attention.linear_proj.post_layernorm.weight", + "model.norm.weight": "decoder.final_layernorm.weight", + } + + return io.apply_transforms(source, target, mapping=mapping, transforms=[_import_qkv, _import_linear_fc1]) + + @property + def tokenizer(self) -> "AutoTokenizer": + from nemo.collections.common.tokenizers.huggingface.auto_tokenizer import AutoTokenizer + + return AutoTokenizer(self.save_hf_tokenizer_assets(str(self))) + + @property + def config(self) -> Gemma2Config: + from transformers import GemmaConfig as HFGemmaConfig + + source = HFGemmaConfig.from_pretrained(str(self)) + + def make_vocab_size_divisible_by(vocab_size): + base = 128 + while vocab_size % base != 0: + base //= 2 + return base + + output = Gemma2Config( + num_layers=source.num_hidden_layers, + hidden_size=source.hidden_size, + ffn_hidden_size=source.intermediate_size, + num_attention_heads=source.num_attention_heads, + init_method_std=source.initializer_range, + layernorm_epsilon=source.rms_norm_eps, + num_query_groups=source.num_key_value_heads, + rotary_base=source.rope_theta, + query_pre_attn_scalar=source.query_pre_attn_scalar, + attn_logit_softcapping=source.attn_logit_softcapping, + final_logit_softcapping=source.final_logit_softcapping, + window_size=(source.sliding_window, 0), + gated_linear_unit=True, + make_vocab_size_divisible_by=make_vocab_size_divisible_by(source.vocab_size), + vocab_size=source.vocab_size, + share_embeddings_and_output_weights=True, + fp16=(dtype_from_hf(source) == torch.float16), + bf16=(dtype_from_hf(source) == torch.bfloat16), + params_dtype=dtype_from_hf(source), + ) + + return output + + +@io.model_exporter(Gemma2Model, "hf") +class HFGemmaExporter(io.ModelConnector[Gemma2Model, "GemmaForCausalLM"]): + def init(self) -> "GemmaForCausalLM": + from transformers import AutoModelForCausalLM + from transformers.modeling_utils import no_init_weights + + with no_init_weights(True): + return AutoModelForCausalLM.from_config(self.config) + + def apply(self, output_path: Path) -> Path: + target = self.init() + source, _ = self.nemo_load(str(self)) + target = self.convert_state(source, target) + + target = target.cpu() + target.save_pretrained(output_path) + self.tokenizer.save_pretrained(output_path) + + return output_path + + def convert_state(self, source, target): + mapping = { + "embedding.word_embeddings.weight": "model.embed_tokens.weight", + "decoder.layers.*.self_attention.linear_proj.weight": "model.layers.*.self_attn.o_proj.weight", + "decoder.layers.*.mlp.linear_fc2.weight": "model.layers.*.mlp.down_proj.weight", + "decoder.layers.*.self_attention.linear_qkv.layer_norm_weight": "model.layers.*.input_layernorm.weight", + "decoder.layers.*.mlp.linear_fc1.layer_norm_weight": "model.layers.*.post_attention_layernorm.weight", + "decoder.layers.*.mlp.linear_fc2.post_layernorm.weight": "model.layers.*.post_feedforward_layernorm.weight", + "decoder.layers.*.self_attention.linear_proj.post_layernorm.weight": "model.layers.*.post_attention_layernorm.weight", + "decoder.final_layernorm.weight": "model.norm.weight", + } + + return io.apply_transforms(source, target, mapping=mapping, transforms=[_export_qkv, _export_linear_fc1]) + + @property + def tokenizer(self): + return io.load_context(str(self)).model.tokenizer.tokenizer + + @property + def config(self) -> "Gemma2Config": + source: Gemma2Config = io.load_context(str(self)).model.config + + from transformers import Gemma2Config as HFGemmaConfig + + return HFGemmaConfig( + num_hidden_layers=source.num_layers, + hidden_size=source.hidden_size, + intermediate_size=source.ffn_hidden_size, + num_attention_heads=source.num_attention_heads, + max_position_embeddings=source.seq_length, + initializer_range=source.init_method_std, + rms_norm_eps=source.layernorm_epsilon, + num_key_value_heads=source.num_query_groups, + vocab_size=self.tokenizer.vocab_size, + rope_theta=source.rotary_base, + query_pre_attn_scalar=source.query_pre_attn_scalar, + attn_logit_softcapping=source.attn_logit_softcapping, + final_logit_softcapping=source.final_logit_softcapping, + ) + + +@io.state_transform( + source_key=( + "model.layers.*.self_attn.q_proj.weight", + "model.layers.*.self_attn.k_proj.weight", + "model.layers.*.self_attn.v_proj.weight", + ), + target_key="decoder.layers.*.self_attention.linear_qkv.weight", +) +def _import_qkv(ctx: io.TransformCTX, q, k, v): + megatron_config = ctx.target.config + + head_num = megatron_config.num_attention_heads + num_query_groups = megatron_config.num_query_groups + heads_per_group = head_num // num_query_groups + hidden_size = megatron_config.hidden_size + head_size = megatron_config.kv_channels + + old_tensor_shape = q.size() + new_q_tensor_shape = (head_num, head_size) + old_tensor_shape[1:] + new_kv_tensor_shape = (num_query_groups, head_size) + old_tensor_shape[1:] + + q = q.view(*new_q_tensor_shape) + k = k.view(*new_kv_tensor_shape) + v = v.view(*new_kv_tensor_shape) + + qkv_weights_l = [] + for i in range(num_query_groups): + qkv_weights_l.append(q[i * heads_per_group : (i + 1) * heads_per_group, :, :]) + qkv_weights_l.append(k[i : i + 1, :, :]) + qkv_weights_l.append(v[i : i + 1, :, :]) + qkv_weights = torch.cat(qkv_weights_l) + assert qkv_weights.ndim == 3, qkv_weights.shape + assert qkv_weights.shape[0] == (heads_per_group + 2) * num_query_groups, qkv_weights.shape + assert qkv_weights.shape[1] == head_size, qkv_weights.shape + assert qkv_weights.shape[2] == old_tensor_shape[1], qkv_weights.shape + + qkv_weights = qkv_weights.reshape([head_size * (head_num + 2 * num_query_groups), hidden_size]) + + return qkv_weights + + +@io.state_transform( + source_key="model.layers.*.post_feedforward_layernorm.weight", + target_key=( + "decoder.layers.*.self_attention.linear_proj.post_layernorm.weight", + "decoder.layers.*.mlp.linear_fc2.post_layernorm.weight", + ), +) +def _import_post_ffn_ln(ctx: io.TransformCTX, ln): + return ln, ln + + +@io.state_transform( + source_key="decoder.layers.*.self_attention.linear_qkv.weight", + target_key=( + "model.layers.*.self_attn.q_proj.weight", + "model.layers.*.self_attn.k_proj.weight", + "model.layers.*.self_attn.v_proj.weight", + ), +) +def _export_qkv(ctx: io.TransformCTX, linear_qkv): + megatron_config = ctx.source.config + + head_num = megatron_config.num_attention_heads + num_query_groups = megatron_config.num_query_groups + heads_per_group = head_num // num_query_groups + hidden_size = megatron_config.hidden_size + head_size = megatron_config.kv_channels + qkv_total_dim = head_num + 2 * num_query_groups + + linear_qkv = linear_qkv.reshape([qkv_total_dim, head_size, hidden_size]) + q_slice = torch.cat( + [ + torch.arange((heads_per_group + 2) * i, (heads_per_group + 2) * i + heads_per_group) + for i in range(num_query_groups) + ] + ) + k_slice = torch.arange(heads_per_group, qkv_total_dim, (heads_per_group + 2)) + v_slice = torch.arange(heads_per_group + 1, qkv_total_dim, (heads_per_group + 2)) + + q_proj = linear_qkv[q_slice].reshape(-1, hidden_size).cpu() + k_proj = linear_qkv[k_slice].reshape(-1, hidden_size).cpu() + v_proj = linear_qkv[v_slice].reshape(-1, hidden_size).cpu() + + return q_proj, k_proj, v_proj + + +@io.state_transform( + source_key=("model.layers.*.mlp.gate_proj.weight", "model.layers.*.mlp.up_proj.weight"), + target_key="decoder.layers.*.mlp.linear_fc1.weight", +) +def _import_linear_fc1(down, gate): + return torch.cat((down, gate), axis=0) + + +@io.state_transform( + source_key="decoder.layers.*.mlp.linear_fc1.weight", + target_key=("model.layers.*.mlp.gate_proj.weight", "model.layers.*.mlp.up_proj.weight"), +) +def _export_linear_fc1(linear_fc1): + gate_proj, up_proj = torch.chunk(linear_fc1, 2, dim=0) + + return gate_proj, up_proj + + +__all__ = [ + "Gemma2Config", + "Gemma2Config2B", + "Gemma2Config9B", + "Gemma2Config27B", + "Gemma2Model", +] diff --git a/nemo/collections/llm/recipes/__init__.py b/nemo/collections/llm/recipes/__init__.py index b02547acfffe5..551de93cce431 100644 --- a/nemo/collections/llm/recipes/__init__.py +++ b/nemo/collections/llm/recipes/__init__.py @@ -16,6 +16,10 @@ from nemo.collections.llm.recipes import ( baichuan2_7b, chatglm3_6b, + gemma2, + gemma2_2b, + gemma2_9b, + gemma2_27b, gemma_2b, gemma_7b, gpt3_175b, @@ -117,6 +121,10 @@ "qwen2_7b", "qwen2_72b", "gpt3_175b", + "gemma2", + "gemma2_2b", + "gemma2_9b", + "gemma2_27b", "adam", "default_log", "default_resume", diff --git a/nemo/collections/llm/recipes/gemma2.py b/nemo/collections/llm/recipes/gemma2.py new file mode 100644 index 0000000000000..6fd1be83c1836 --- /dev/null +++ b/nemo/collections/llm/recipes/gemma2.py @@ -0,0 +1,140 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +import nemo_run as run +import pytorch_lightning as pl +import torch +from megatron.core.distributed import DistributedDataParallelConfig +from pytorch_lightning.callbacks.callback import Callback + +from nemo import lightning as nl +from nemo.collections.llm.gpt.model.gemma2 import Gemma2Config2B, Gemma2Config9B, Gemma2Config27B, Gemma2Model +from nemo.collections.llm.recipes.precision.mixed_precision import bf16_mixed, fp16_mixed + + +def gemma2_model(version: str) -> run.Config[pl.LightningModule]: + """ + A function to create a Gemma2 models. + + Args + version (str): The version of the Gemma2 model to create. one of [ + "gemma2_2b", "gemma2_9b", "gemma2_27b"]. + + Returns: + run.Config[pl.LightningModule]: Configuration for the Gemma2 model. + """ + config = None + if version == "gemma2_2b": + config = run.Config(Gemma2Config2B) + elif version == "gemma2_9b": + config = run.Config(Gemma2Config9B) + elif version == "gemma2_27b": + config = run.Config(Gemma2Config27B) + + assert config is not None, f"Invalid version: {version}" + return run.Config(Gemma2Model, config=config) + + +def gemma2_trainer( + tensor_parallelism: int = 1, + pipeline_parallelism: int = 1, + pipeline_parallelism_type: Optional[torch.dtype] = None, + virtual_pipeline_parallelism: Optional[int] = None, + context_parallelism: int = 1, + sequence_parallelism: bool = False, + num_nodes: int = 1, + num_gpus_per_node: int = 8, + max_steps: int = 1168251, + precision: str = "bf16-mixed", + accumulate_grad_batches: int = 1, + limit_test_batches: int = 32, + limit_val_batches: int = 32, + log_every_n_steps: int = 10, + val_check_interval: int = 2000, + callbacks: Optional[list[run.Config[Callback]]] = None, +) -> run.Config[nl.Trainer]: + """ + Configure the NeMo Lightning Trainer for Gemma2 models. + + This function sets up the distributed training strategy and other training parameters. + + Args: + tensor_parallelism (int): Degree of tensor model parallelism. + pipeline_parallelism (int): Degree of pipeline model parallelism. + pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism. + virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism. + context_parallelism (int): Degree of context parallelism. + sequence_parallelism (bool): Whether to use sequence parallelism. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + max_steps (int): Maximum number of training steps. + precision (str): Precision configuration, one of fp32, 16-mixed or bf16-mixed. + accumulate_grad_batches (int): Number of steps per gradient accumulation. + limit_test_batches (int): Limit the number of test batches. + limit_val_batches (int): Limit the number of validation batches. + log_every_n_steps (int): Log every n steps. + val_check_interval (int): Run validation every N steps. + callbacks (Optional[list[run.Config[Callback]]]): List of callback configurations. + + Returns: + run.Config[nl.Trainer]: Configuration for the NeMo Lightning Trainer. + """ + strategy = run.Config( + nl.MegatronStrategy, + tensor_model_parallel_size=tensor_parallelism, + pipeline_model_parallel_size=pipeline_parallelism, + pipeline_dtype=pipeline_parallelism_type, + virtual_pipeline_model_parallel_size=virtual_pipeline_parallelism, + context_parallel_size=context_parallelism, + sequence_parallel=sequence_parallelism, + gradient_as_bucket_view=True, + ckpt_include_optimizer=True, + ckpt_async_save=True, + ckpt_parallel_load=True, + ddp=run.Config( + DistributedDataParallelConfig, + check_for_nan_in_grad=True, + grad_reduce_in_fp32=True, + overlap_grad_reduce=True, + overlap_param_gather=True, + average_in_collective=True, + ), + ) + + precision_plugin = None + if precision == "16-mixed": + precision_plugin = fp16_mixed() + elif precision == "bf16-mixed": + precision_plugin = bf16_mixed() + + trainer = run.Config( + nl.Trainer, + accelerator="gpu", + callbacks=callbacks, + devices=num_gpus_per_node, + accumulate_grad_batches=accumulate_grad_batches, + limit_test_batches=limit_test_batches, + limit_val_batches=limit_val_batches, + log_every_n_steps=log_every_n_steps, + max_steps=max_steps, + num_nodes=num_nodes, + plugins=precision_plugin, + strategy=strategy, + use_distributed_sampler=False, + val_check_interval=val_check_interval, + ) + + return trainer diff --git a/nemo/collections/llm/recipes/gemma2_27b.py b/nemo/collections/llm/recipes/gemma2_27b.py new file mode 100644 index 0000000000000..4b7c09e30bfcf --- /dev/null +++ b/nemo/collections/llm/recipes/gemma2_27b.py @@ -0,0 +1,222 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +import nemo_run as run +import pytorch_lightning as pl +import torch + +from nemo.collections.llm.api import finetune, pretrain +from nemo.collections.llm.gpt.data.mock import MockDataModule +from nemo.collections.llm.peft.lora import LoRA +from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe +from nemo.collections.llm.recipes.gemma2 import gemma2_model, gemma2_trainer +from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger +from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing +from nemo.utils.exp_manager import TimingCallback + +NAME = "gemma2_27b" + + +@run.cli.factory(name=NAME) +def model() -> run.Config[pl.LightningModule]: + """ + Factory function to create a Gemma2 27B model configuration. + + Returns: + run.Config[pl.LightningModule]: Configuration for the Gemma2 27B model. + + Examples: + CLI usage: + $ nemo llm pretrain model=gemma2 ... + + Python API usage: + >>> model_config = model() + >>> print(model_config) + """ + + return gemma2_model(version=NAME) + + +@run.cli.factory(target=pretrain, name=NAME) +def pretrain_recipe( + # General + dir: Optional[str] = None, + name: str = "default", + # Trainer + tensor_parallelism: int = 8, + pipeline_parallelism: int = 2, + pipeline_parallelism_type: Optional[torch.dtype] = torch.bfloat16, + virtual_pipeline_parallelism: Optional[int] = None, + context_parallelism: int = 1, + sequence_parallelism: bool = False, + num_nodes: int = 1, + num_gpus_per_node: int = 8, + max_steps: int = 1168251, + precision: str = "bf16-mixed", + accumulate_grad_batches: int = 1, + gradient_clip_val: float = 1.0, + limit_test_batches: int = 32, + limit_val_batches: int = 32, + log_every_n_steps: int = 10, + val_check_interval: int = 500, + # Data + global_batch_size=32, + micro_batch_size=2, + seq_length=4096, + # Optimizer + warmup_steps=500, + constant_steps=0, + min_lr=3.0e-5, + max_lr=3e-4, + # Training function + fn=pretrain, +) -> run.Partial: + """ + Create a pre-training recipe for gemma2 27B model. + + This function sets up a complete configuration for pre-training, including + model, trainer, data, logging, optimization, and resumption settings. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the pre-training run. + tensor_parallelism (int): Degree of tensor model parallelism. + pipeline_parallelism (int): Degree of pipeline model parallelism. + pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism. + virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism. + context_parallelism (int): Degree of context parallelism. + sequence_parallelism (bool): Whether to use sequence parallelism. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + max_steps (int): Maximum number of training steps. + precision (str): Precision configuration, one of fp32, 16-mixed or bf16-mixed. + accumulate_grad_batches (int): Number of steps per gradient accumulation. + gradient_clip_val (float): Value for gradient clipping. + limit_test_batches (int): Limit the number of test batches. + limit_val_batches (int): Limit the number of validation batches. + log_every_n_steps (int): Log every n steps. + val_check_interval (int): Run validation every N steps. + global_batch_size (int): Global batch size. + micro_batch_size (int): Micro batch size. + seq_length (int): Sequence length. + warmup_steps (int): Number of warmup steps. + constant_steps (int): Number of constant steps. + min_lr (float): Minimum learning rate. + max_lr (float): Maximum learning rate. + fn (Callable): The pre-training function to use. + + Returns: + run.Partial: Partial configuration for pre-training. + + Examples: + CLI usage: + $ nemo llm pretrain --factory gemma2_27b + $ nemo llm pretrain --factory "gemma2_27b(num_nodes=1, name='my_gemma2_pretrain')" + + Python API usage: + >>> recipe = pretrain_recipe(name="gemma2_pretrain", num_nodes=1) + >>> print(recipe) + """ + return run.Partial( + fn, + model=model(), + trainer=gemma2_trainer( + tensor_parallelism=tensor_parallelism, + pipeline_parallelism=pipeline_parallelism, + pipeline_parallelism_type=pipeline_parallelism_type, + virtual_pipeline_parallelism=virtual_pipeline_parallelism, + context_parallelism=context_parallelism, + sequence_parallelism=sequence_parallelism, + num_nodes=num_nodes, + num_gpus_per_node=num_gpus_per_node, + max_steps=max_steps, + precision=precision, + accumulate_grad_batches=accumulate_grad_batches, + limit_test_batches=limit_test_batches, + limit_val_batches=limit_val_batches, + log_every_n_steps=log_every_n_steps, + val_check_interval=val_check_interval, + callbacks=[run.Config(TimingCallback)], + ), + data=run.Config( + MockDataModule, + seq_length=seq_length, + global_batch_size=global_batch_size, + micro_batch_size=micro_batch_size, + ), + log=default_log(dir=dir, name=name, tensorboard_logger=tensorboard_logger(name=name)), + optim=distributed_fused_adam_with_cosine_annealing( + precision=precision, + warmup_steps=warmup_steps, + constant_steps=constant_steps, + min_lr=min_lr, + max_lr=max_lr, + clip_grad=gradient_clip_val, + ), + resume=default_resume(), + ) + + +@run.cli.factory(target=finetune, name=NAME) +def finetune_recipe( + dir: Optional[str] = None, + name: str = "default", + num_nodes: int = 1, + num_gpus_per_node: int = 8, + peft_scheme: Optional[str] = 'lora', +) -> run.Partial: + """ + Create a fine-tuning recipe for Gemma2 27B model. + + This function sets up a complete configuration for fine-tuning, including + model, trainer, data, logging, optimization, and resumption settings. + The recipe uses LoRA (Low-Rank Adaptation) for efficient fine-tuning, unless peft_scheme is set to None. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the fine-tuning run. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + + Returns: + run.Partial: Partial configuration for fine-tuning. + + Examples: + CLI usage: + $ nemo llm finetune --factory gemma2_27b + + Python API usage: + >>> recipe = finetune_recipe(name="gemma2_27b_finetune", num_nodes=2) + >>> print(recipe) + + Note: + This recipe uses the SQuAD dataset for fine-tuning. For more information + on fine-tuning LLMs with NeMo, see the fine-tuning guide in the + `examples/llm/finetune/` directory. + """ + recipe = default_finetune_recipe(model(), "google/gemma-2-27b", dir, name, num_nodes, num_gpus_per_node) + if peft_scheme is None or peft_scheme.lower() == 'none': + recipe.optim.config.lr = 5e-6 + recipe.trainer.strategy.tensor_model_parallel_size = 8 + recipe.trainer.strategy.pipeline_model_parallel_size = 2 + elif peft_scheme.lower() == 'lora': + recipe.peft = run.Config(LoRA) + recipe.trainer.strategy.tensor_model_parallel_size = 4 + recipe.optim.config.lr = 1e-4 + else: + raise ValueError(f"Unrecognized peft scheme: {peft_scheme}") + return recipe diff --git a/nemo/collections/llm/recipes/gemma2_2b.py b/nemo/collections/llm/recipes/gemma2_2b.py new file mode 100644 index 0000000000000..952d081841688 --- /dev/null +++ b/nemo/collections/llm/recipes/gemma2_2b.py @@ -0,0 +1,219 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +import nemo_run as run +import pytorch_lightning as pl +import torch + +from nemo.collections.llm.api import finetune, pretrain +from nemo.collections.llm.gpt.data.mock import MockDataModule +from nemo.collections.llm.peft.lora import LoRA +from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe +from nemo.collections.llm.recipes.gemma2 import gemma2_model, gemma2_trainer +from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger +from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing +from nemo.utils.exp_manager import TimingCallback + +NAME = "gemma2_2b" + + +@run.cli.factory(name=NAME) +def model() -> run.Config[pl.LightningModule]: + """ + Factory function to create a Gemma2 2B model configuration. + + Returns: + run.Config[pl.LightningModule]: Configuration for the Gemma2 2B model. + + Examples: + CLI usage: + $ nemo llm pretrain model=gemma2 ... + + Python API usage: + >>> model_config = model() + >>> print(model_config) + """ + + return gemma2_model(version=NAME) + + +@run.cli.factory(target=pretrain, name=NAME) +def pretrain_recipe( + # General + dir: Optional[str] = None, + name: str = "default", + # Trainer + tensor_parallelism: int = 2, + pipeline_parallelism: int = 1, + pipeline_parallelism_type: Optional[torch.dtype] = None, + virtual_pipeline_parallelism: Optional[int] = None, + context_parallelism: int = 1, + sequence_parallelism: bool = False, + num_nodes: int = 1, + num_gpus_per_node: int = 8, + max_steps: int = 1168251, + precision: str = "bf16-mixed", + accumulate_grad_batches: int = 1, + gradient_clip_val: float = 1.0, + limit_test_batches: int = 32, + limit_val_batches: int = 32, + log_every_n_steps: int = 10, + val_check_interval: int = 500, + # Data + global_batch_size=32, + micro_batch_size=2, + seq_length=4096, + # Optimizer + warmup_steps=500, + constant_steps=0, + min_lr=3.0e-5, + max_lr=3e-4, + # Training function + fn=pretrain, +) -> run.Partial: + """ + Create a pre-training recipe for gemma2 2B model. + + This function sets up a complete configuration for pre-training, including + model, trainer, data, logging, optimization, and resumption settings. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the pre-training run. + tensor_parallelism (int): Degree of tensor model parallelism. + pipeline_parallelism (int): Degree of pipeline model parallelism. + pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism. + virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism. + context_parallelism (int): Degree of context parallelism. + sequence_parallelism (bool): Whether to use sequence parallelism. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + max_steps (int): Maximum number of training steps. + precision (str): Precision configuration, one of fp32, 16-mixed or bf16-mixed. + accumulate_grad_batches (int): Number of steps per gradient accumulation. + gradient_clip_val (float): Value for gradient clipping. + limit_test_batches (int): Limit the number of test batches. + limit_val_batches (int): Limit the number of validation batches. + log_every_n_steps (int): Log every n steps. + val_check_interval (int): Run validation every N steps. + global_batch_size (int): Global batch size. + micro_batch_size (int): Micro batch size. + seq_length (int): Sequence length. + warmup_steps (int): Number of warmup steps. + constant_steps (int): Number of constant steps. + min_lr (float): Minimum learning rate. + max_lr (float): Maximum learning rate. + fn (Callable): The pre-training function to use. + + Returns: + run.Partial: Partial configuration for pre-training. + + Examples: + CLI usage: + $ nemo llm pretrain --factory gemma2_2b + $ nemo llm pretrain --factory "gemma2_2b(num_nodes=1, name='my_gemma2_pretrain')" + + Python API usage: + >>> recipe = pretrain_recipe(name="gemma2_pretrain", num_nodes=1) + >>> print(recipe) + """ + return run.Partial( + fn, + model=model(), + trainer=gemma2_trainer( + tensor_parallelism=tensor_parallelism, + pipeline_parallelism=pipeline_parallelism, + pipeline_parallelism_type=pipeline_parallelism_type, + virtual_pipeline_parallelism=virtual_pipeline_parallelism, + context_parallelism=context_parallelism, + sequence_parallelism=sequence_parallelism, + num_nodes=num_nodes, + num_gpus_per_node=num_gpus_per_node, + max_steps=max_steps, + precision=precision, + accumulate_grad_batches=accumulate_grad_batches, + limit_test_batches=limit_test_batches, + limit_val_batches=limit_val_batches, + log_every_n_steps=log_every_n_steps, + val_check_interval=val_check_interval, + callbacks=[run.Config(TimingCallback)], + ), + data=run.Config( + MockDataModule, + seq_length=seq_length, + global_batch_size=global_batch_size, + micro_batch_size=micro_batch_size, + ), + log=default_log(dir=dir, name=name, tensorboard_logger=tensorboard_logger(name=name)), + optim=distributed_fused_adam_with_cosine_annealing( + precision=precision, + warmup_steps=warmup_steps, + constant_steps=constant_steps, + min_lr=min_lr, + max_lr=max_lr, + clip_grad=gradient_clip_val, + ), + resume=default_resume(), + ) + + +@run.cli.factory(target=finetune, name=NAME) +def finetune_recipe( + dir: Optional[str] = None, + name: str = "default", + num_nodes: int = 1, + num_gpus_per_node: int = 8, + peft_scheme: Optional[str] = 'lora', +) -> run.Partial: + """ + Create a fine-tuning recipe for Gemma2 2B model. + + This function sets up a complete configuration for fine-tuning, including + model, trainer, data, logging, optimization, and resumption settings. + The recipe uses LoRA (Low-Rank Adaptation) for efficient fine-tuning, unless peft_scheme is set to None. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the fine-tuning run. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + + Returns: + run.Partial: Partial configuration for fine-tuning. + + Examples: + CLI usage: + $ nemo llm finetune --factory gemma2_2b + + Python API usage: + >>> recipe = finetune_recipe(name="gemma2_2b_finetune", num_nodes=2) + >>> print(recipe) + + Note: + This recipe uses the SQuAD dataset for fine-tuning. For more information + on fine-tuning LLMs with NeMo, see the fine-tuning guide in the + `examples/llm/finetune/` directory. + """ + recipe = default_finetune_recipe(model(), "google/gemma-2-2b", dir, name, num_nodes, num_gpus_per_node) + if peft_scheme is None or peft_scheme.lower() == 'none': + recipe.optim.config.lr = 5e-6 + elif peft_scheme.lower() == 'lora': + recipe.peft = run.Config(LoRA) + recipe.optim.config.lr = 1e-4 + else: + raise ValueError(f"Unrecognized peft scheme: {peft_scheme}") + return recipe diff --git a/nemo/collections/llm/recipes/gemma2_9b.py b/nemo/collections/llm/recipes/gemma2_9b.py new file mode 100644 index 0000000000000..8f004c5a2a8d7 --- /dev/null +++ b/nemo/collections/llm/recipes/gemma2_9b.py @@ -0,0 +1,220 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +import nemo_run as run +import pytorch_lightning as pl +import torch + +from nemo.collections.llm.api import finetune, pretrain +from nemo.collections.llm.gpt.data.mock import MockDataModule +from nemo.collections.llm.peft.lora import LoRA +from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe +from nemo.collections.llm.recipes.gemma2 import gemma2_model, gemma2_trainer +from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger +from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing +from nemo.utils.exp_manager import TimingCallback + +NAME = "gemma2_9b" + + +@run.cli.factory(name=NAME) +def model() -> run.Config[pl.LightningModule]: + """ + Factory function to create a Gemma2 9B model configuration. + + Returns: + run.Config[pl.LightningModule]: Configuration for the Gemma2 9B model. + + Examples: + CLI usage: + $ nemo llm pretrain model=gemma2 ... + + Python API usage: + >>> model_config = model() + >>> print(model_config) + """ + + return gemma2_model(version=NAME) + + +@run.cli.factory(target=pretrain, name=NAME) +def pretrain_recipe( + # General + dir: Optional[str] = None, + name: str = "default", + # Trainer + tensor_parallelism: int = 8, + pipeline_parallelism: int = 1, + pipeline_parallelism_type: Optional[torch.dtype] = torch.bfloat16, + virtual_pipeline_parallelism: Optional[int] = None, + context_parallelism: int = 1, + sequence_parallelism: bool = False, + num_nodes: int = 1, + num_gpus_per_node: int = 8, + max_steps: int = 1168251, + precision: str = "bf16-mixed", + accumulate_grad_batches: int = 1, + gradient_clip_val: float = 1.0, + limit_test_batches: int = 32, + limit_val_batches: int = 32, + log_every_n_steps: int = 10, + val_check_interval: int = 500, + # Data + global_batch_size=32, + micro_batch_size=2, + seq_length=4096, + # Optimizer + warmup_steps=500, + constant_steps=0, + min_lr=3.0e-5, + max_lr=3e-4, + # Training function + fn=pretrain, +) -> run.Partial: + """ + Create a pre-training recipe for gemma2 9B model. + + This function sets up a complete configuration for pre-training, including + model, trainer, data, logging, optimization, and resumption settings. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the pre-training run. + tensor_parallelism (int): Degree of tensor model parallelism. + pipeline_parallelism (int): Degree of pipeline model parallelism. + pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism. + virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism. + context_parallelism (int): Degree of context parallelism. + sequence_parallelism (bool): Whether to use sequence parallelism. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + max_steps (int): Maximum number of training steps. + precision (str): Precision configuration, one of fp32, 16-mixed or bf16-mixed. + accumulate_grad_batches (int): Number of steps per gradient accumulation. + gradient_clip_val (float): Value for gradient clipping. + limit_test_batches (int): Limit the number of test batches. + limit_val_batches (int): Limit the number of validation batches. + log_every_n_steps (int): Log every n steps. + val_check_interval (int): Run validation every N steps. + global_batch_size (int): Global batch size. + micro_batch_size (int): Micro batch size. + seq_length (int): Sequence length. + warmup_steps (int): Number of warmup steps. + constant_steps (int): Number of constant steps. + min_lr (float): Minimum learning rate. + max_lr (float): Maximum learning rate. + fn (Callable): The pre-training function to use. + + Returns: + run.Partial: Partial configuration for pre-training. + + Examples: + CLI usage: + $ nemo llm pretrain --factory gemma2_9b + $ nemo llm pretrain --factory "gemma2_9b(num_nodes=1, name='my_gemma2_pretrain')" + + Python API usage: + >>> recipe = pretrain_recipe(name="gemma2_pretrain", num_nodes=1) + >>> print(recipe) + """ + return run.Partial( + fn, + model=model(), + trainer=gemma2_trainer( + tensor_parallelism=tensor_parallelism, + pipeline_parallelism=pipeline_parallelism, + pipeline_parallelism_type=pipeline_parallelism_type, + virtual_pipeline_parallelism=virtual_pipeline_parallelism, + context_parallelism=context_parallelism, + sequence_parallelism=sequence_parallelism, + num_nodes=num_nodes, + num_gpus_per_node=num_gpus_per_node, + max_steps=max_steps, + precision=precision, + accumulate_grad_batches=accumulate_grad_batches, + limit_test_batches=limit_test_batches, + limit_val_batches=limit_val_batches, + log_every_n_steps=log_every_n_steps, + val_check_interval=val_check_interval, + callbacks=[run.Config(TimingCallback)], + ), + data=run.Config( + MockDataModule, + seq_length=seq_length, + global_batch_size=global_batch_size, + micro_batch_size=micro_batch_size, + ), + log=default_log(dir=dir, name=name, tensorboard_logger=tensorboard_logger(name=name)), + optim=distributed_fused_adam_with_cosine_annealing( + precision=precision, + warmup_steps=warmup_steps, + constant_steps=constant_steps, + min_lr=min_lr, + max_lr=max_lr, + clip_grad=gradient_clip_val, + ), + resume=default_resume(), + ) + + +@run.cli.factory(target=finetune, name=NAME) +def finetune_recipe( + dir: Optional[str] = None, + name: str = "default", + num_nodes: int = 1, + num_gpus_per_node: int = 8, + peft_scheme: Optional[str] = 'lora', +) -> run.Partial: + """ + Create a fine-tuning recipe for Gemma2 9B model. + + This function sets up a complete configuration for fine-tuning, including + model, trainer, data, logging, optimization, and resumption settings. + The recipe uses LoRA (Low-Rank Adaptation) for efficient fine-tuning, unless peft_scheme is set to None. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the fine-tuning run. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + + Returns: + run.Partial: Partial configuration for fine-tuning. + + Examples: + CLI usage: + $ nemo llm finetune --factory gemma2_9b + + Python API usage: + >>> recipe = finetune_recipe(name="gemma2_9b_finetune", num_nodes=2) + >>> print(recipe) + + Note: + This recipe uses the SQuAD dataset for fine-tuning. For more information + on fine-tuning LLMs with NeMo, see the fine-tuning guide in the + `examples/llm/finetune/` directory. + """ + recipe = default_finetune_recipe(model(), "google/gemma-2-9b", dir, name, num_nodes, num_gpus_per_node) + if peft_scheme is None or peft_scheme.lower() == 'none': + recipe.optim.config.lr = 5e-6 + recipe.trainer.strategy.tensor_model_parallel_size = 4 + elif peft_scheme.lower() == 'lora': + recipe.peft = run.Config(LoRA) + recipe.optim.config.lr = 1e-4 + else: + raise ValueError(f"Unrecognized peft scheme: {peft_scheme}") + return recipe diff --git a/nemo/collections/nlp/models/language_modeling/megatron/gemma2/gemma2_modules.py b/nemo/collections/nlp/models/language_modeling/megatron/gemma2/gemma2_modules.py index 11123d03b4e82..5113ee7458959 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron/gemma2/gemma2_modules.py +++ b/nemo/collections/nlp/models/language_modeling/megatron/gemma2/gemma2_modules.py @@ -66,6 +66,7 @@ def __init__( attn_mask_type: AttnMaskType, attention_type: str, attention_dropout: float = None, + cp_comm_type: str = None, ): super().__init__(config=config) @@ -274,7 +275,7 @@ def forward(self, x): class Gemma2OutputLayer(ColumnParallelLinear): - def forward(self, input_: torch.Tensor, weight: Optional[torch.Tensor] = None): - output, bias = super().forward(input_, weight) + def forward(self, *args, **kwargs): + output, bias = super().forward(*args, **kwargs) output = logit_softcapping(output, self.config.final_logit_softcapping) return output, bias diff --git a/scripts/checkpoint_converters/convert_gemma2_hf_to_nemo.py b/scripts/checkpoint_converters/convert_gemma2_hf_to_nemo.py index fb296cf25c685..70e45879ca8a1 100644 --- a/scripts/checkpoint_converters/convert_gemma2_hf_to_nemo.py +++ b/scripts/checkpoint_converters/convert_gemma2_hf_to_nemo.py @@ -194,7 +194,7 @@ def adjust_nemo_config(model_config, ref_config): model_config["num_query_groups"] = ref_config["num_key_value_heads"] model_config["kv_channels"] = ref_config["head_dim"] model_config["layernorm_epsilon"] = ref_config["rms_norm_eps"] - model_config["window_size"] = (ref_config["sliding_window_size"], 0) + model_config["window_size"] = (ref_config["sliding_window"], 0) model_config["layernorm_zero_centered_gamma"] = True model_config["name"] = 'megatron_gemma2' model_config['mcore_customization_config'] = { From 1ec1714d7c08b763ac6328fb75e94cbe3e0a0bfc Mon Sep 17 00:00:00 2001 From: JimmyZhang12 <67203904+JimmyZhang12@users.noreply.github.com> Date: Wed, 30 Oct 2024 02:17:18 -0400 Subject: [PATCH 038/125] PEFT perf and TE spec fixes (#11070) * lora perf fixes Signed-off-by: Jimmy Zhang * fix vpp Signed-off-by: Jimmy Zhang * Apply isort and black reformatting Signed-off-by: JimmyZhang12 * gpt dropout Signed-off-by: Jimmy Zhang * support full te layer spec Signed-off-by: Jimmy Zhang * Apply isort and black reformatting Signed-off-by: JimmyZhang12 * import fix Signed-off-by: Jimmy Zhang * Apply isort and black reformatting Signed-off-by: JimmyZhang12 * Update llama3_70b.py Signed-off-by: JimmyZhang12 <67203904+JimmyZhang12@users.noreply.github.com> --------- Signed-off-by: Jimmy Zhang Signed-off-by: JimmyZhang12 Signed-off-by: JimmyZhang12 <67203904+JimmyZhang12@users.noreply.github.com> Co-authored-by: Jimmy Zhang Co-authored-by: JimmyZhang12 --- .../llm/gpt/data/packed_sequence.py | 3 +- nemo/collections/llm/gpt/model/__init__.py | 2 + nemo/collections/llm/gpt/model/base.py | 78 ++++++++++++++++--- nemo/collections/llm/recipes/llama3_70b.py | 2 + nemo/collections/llm/recipes/llama3_8b.py | 1 + nemo/lightning/_strategy_lib.py | 10 --- nemo/lightning/pytorch/callbacks/peft.py | 13 +++- nemo/lightning/run/plugins.py | 4 - 8 files changed, 84 insertions(+), 29 deletions(-) diff --git a/nemo/collections/llm/gpt/data/packed_sequence.py b/nemo/collections/llm/gpt/data/packed_sequence.py index 372e851da7cd7..153e79f943914 100644 --- a/nemo/collections/llm/gpt/data/packed_sequence.py +++ b/nemo/collections/llm/gpt/data/packed_sequence.py @@ -101,13 +101,14 @@ class PackedSequenceSpecs: This field is set by llm.finetune api. """ - packed_data_path: Path = None + packed_data_path: str = None """ If specified, use the packed dataset from this file instead of the default path. """ def __post_init__(self): if self.packed_data_path is not None: + self.packed_data_path = Path(self.packed_data_path) assert ( self.packed_data_path.suffix == ".npy" ), f"packed data file must be a .npy file: {self.packed_data_path}" diff --git a/nemo/collections/llm/gpt/model/__init__.py b/nemo/collections/llm/gpt/model/__init__.py index 9699a9376d9ac..6c7d159dd5cfe 100644 --- a/nemo/collections/llm/gpt/model/__init__.py +++ b/nemo/collections/llm/gpt/model/__init__.py @@ -26,6 +26,7 @@ gpt_data_step, gpt_forward_step, local_layer_spec, + transformer_engine_full_layer_spec, transformer_engine_layer_spec, ) from nemo.collections.llm.gpt.model.chatglm import ChatGLM2Config6B, ChatGLM3Config6B, ChatGLMConfig, ChatGLMModel @@ -180,6 +181,7 @@ "gpt_data_step", "gpt_forward_step", "transformer_engine_layer_spec", + "transformer_engine_full_layer_spec", "local_layer_spec", "HfAutoModelForCausalLM", ] diff --git a/nemo/collections/llm/gpt/model/base.py b/nemo/collections/llm/gpt/model/base.py index a7823c9bee80e..5b25e0ca9b626 100644 --- a/nemo/collections/llm/gpt/model/base.py +++ b/nemo/collections/llm/gpt/model/base.py @@ -63,21 +63,31 @@ def gpt_data_step(dataloader_iter) -> Dict[str, torch.Tensor]: else: _batch = batch - required_keys = set() - required_keys.add("attention_mask") + required_device_keys = set() + required_host_keys = set() + + required_device_keys.add("attention_mask") if 'cu_seqlens' in _batch: - required_keys.add('cu_seqlens') - required_keys.add('cu_seqlens_argmin') - required_keys.add('max_seqlen') + required_device_keys.add('cu_seqlens') + required_host_keys.add('cu_seqlens_argmin') + required_host_keys.add('max_seqlen') if parallel_state.is_pipeline_first_stage(): - required_keys.update(("tokens", "position_ids")) + required_device_keys.update(("tokens", "position_ids")) if parallel_state.is_pipeline_last_stage(): - required_keys.update(("labels", "loss_mask")) + required_device_keys.update(("labels", "loss_mask")) + + _batch_required_keys = {} + for key, val in _batch.items(): + if key in required_device_keys: + _batch_required_keys[key] = val.cuda(non_blocking=True) + elif key in required_host_keys: + _batch_required_keys[key] = val.cpu() + else: + _batch_required_keys[key] = None - _batch = {key: val.cuda(non_blocking=True) if key in required_keys else None for key, val in _batch.items()} # slice batch along sequence dimension for context parallelism - output = get_batch_on_this_context_parallel_rank(_batch) + output = get_batch_on_this_context_parallel_rank(_batch_required_keys) return output @@ -111,6 +121,14 @@ def transformer_engine_layer_spec(config: "GPTConfig") -> ModuleSpec: ) +def transformer_engine_full_layer_spec(config: "GPTConfig") -> ModuleSpec: + from nemo.collections.nlp.models.language_modeling.megatron.gpt_full_te_layer_autocast_spec import ( + get_gpt_full_te_layer_autocast_spec, + ) + + return get_gpt_full_te_layer_autocast_spec(transformer_config=config) + + def local_layer_spec(config: "GPTConfig") -> ModuleSpec: from megatron.core.models.gpt import gpt_layer_specs @@ -121,7 +139,10 @@ def local_layer_spec(config: "GPTConfig") -> ModuleSpec: def default_layer_spec(config: "GPTConfig") -> ModuleSpec: if HAVE_TE: - return transformer_engine_layer_spec(config) + if config.use_transformer_engine_full_layer_spec: + return transformer_engine_full_layer_spec(config) + else: + return transformer_engine_layer_spec(config) else: return local_layer_spec(config) @@ -144,7 +165,9 @@ class GPTConfig(TransformerConfig, io.IOMixin): gradient_accumulation_fusion: bool = _grad_accum_fusion_available deallocate_pipeline_outputs = True + use_transformer_engine_full_layer_spec: bool = False transformer_layer_spec: Union[ModuleSpec, Callable[["GPTConfig"], ModuleSpec]] = default_layer_spec + forward_step_fn: Callable = gpt_forward_step data_step_fn: Callable = gpt_data_step @@ -172,7 +195,7 @@ def configure_model(self, tokenizer) -> "MCoreGPTModel": else: vocab_size = get_vocab_size(self, tokenizer.vocab_size, self.make_vocab_size_divisible_by) - return MCoreGPTModel( + model = MCoreGPTModel( self, transformer_layer_spec=transformer_layer_spec, vocab_size=vocab_size, @@ -188,6 +211,35 @@ def configure_model(self, tokenizer) -> "MCoreGPTModel": post_process=parallel_state.is_pipeline_last_stage(), ) + # If using full TE layer, need to set TP, CP group since the module call + # is not routed through megatron core, which normally handles passing the + # TP, CP group to the TE modules. + # Deep iterate but skip self to avoid infinite recursion. + if HAVE_TE and self.use_transformer_engine_full_layer_spec: + # Copied from: https://github.com/NVIDIA/TransformerEngine/blob/main/transformer_engine/pytorch/transformer.py + if parallel_state.get_tensor_model_parallel_world_size() > 1: + for index, child in enumerate(model.modules()): + if index == 0: + continue + if hasattr(child, "set_tensor_parallel_group"): + tp_group = parallel_state.get_tensor_model_parallel_group() + child.set_tensor_parallel_group(tp_group) + + if parallel_state.get_context_parallel_world_size() > 1: + cp_stream = torch.cuda.Stream() + for module in self.get_model_module_list(): + for index, child in enumerate(module.modules()): + if index == 0: + continue + if hasattr(child, "set_context_parallel_group"): + child.set_context_parallel_group( + parallel_state.get_context_parallel_group(), + parallel_state.get_context_parallel_global_ranks(), + cp_stream, + ) + + return model + @dataclass class GPTConfig126M(GPTConfig): @@ -247,7 +299,9 @@ class GPTConfig175B(GPTConfig): hidden_size: int = 12288 ffn_hidden_size: int = 49152 num_attention_heads: int = 96 - + hidden_dropout: float = 0.0 + attention_dropout: float = 0.0 + ffn_dropout: float = 0.0 bias_activation_fusion: bool = True bias_dropout_add_fusion: bool = True diff --git a/nemo/collections/llm/recipes/llama3_70b.py b/nemo/collections/llm/recipes/llama3_70b.py index 5b721c7d531ed..cb862bf50ee4b 100644 --- a/nemo/collections/llm/recipes/llama3_70b.py +++ b/nemo/collections/llm/recipes/llama3_70b.py @@ -306,6 +306,7 @@ def finetune_recipe( recipe.peft.dim = 16 recipe.peft.alpha = 32 recipe.peft.target_modules = ['linear_qkv'] + recipe.optim.config.use_distributed_optimizer = False # some settings currently do not function correctly with LoRA recipe.model.config.cross_entropy_loss_fusion = False @@ -377,6 +378,7 @@ def finetune_performance_optimizations( else: recipe.trainer.strategy.tensor_model_parallel_size = 2 recipe.trainer.strategy.pipeline_model_parallel_size = 4 + recipe.trainer.strategy.virtual_pipeline_model_parallel_size = 5 recipe.trainer.strategy.sequence_parallel = True diff --git a/nemo/collections/llm/recipes/llama3_8b.py b/nemo/collections/llm/recipes/llama3_8b.py index 29c5c25f94fe0..1030ad8799a10 100644 --- a/nemo/collections/llm/recipes/llama3_8b.py +++ b/nemo/collections/llm/recipes/llama3_8b.py @@ -292,6 +292,7 @@ def finetune_recipe( recipe.peft.dim = 8 recipe.peft.alpha = 16 recipe.peft.target_modules = ['linear_qkv'] + recipe.optim.config.use_distributed_optimizer = False # some settings currently do not function correctly with LoRA recipe.model.config.cross_entropy_loss_fusion = False diff --git a/nemo/lightning/_strategy_lib.py b/nemo/lightning/_strategy_lib.py index 40a79c94c59fa..1bee71e26e170 100644 --- a/nemo/lightning/_strategy_lib.py +++ b/nemo/lightning/_strategy_lib.py @@ -135,16 +135,6 @@ def init_model_parallel(model: Optional[nn.Module] = None) -> None: if app_state.init_mpi_proc_group: torch.distributed.new_group(backend="mpi") - if model: - # Set TP group - # Deep iterate but skip self to avoid infinite recursion. - for index, child in enumerate(model.modules()): - if index == 0: - continue - if hasattr(child, "set_tensor_parallel_group"): - tp_group = parallel_state.get_tensor_model_parallel_group() - child.set_tensor_parallel_group(tp_group) - def set_model_parallel_attributes(model, parallelism): # Right now mcore sub-classes ModelParellelConfig, we should remove that diff --git a/nemo/lightning/pytorch/callbacks/peft.py b/nemo/lightning/pytorch/callbacks/peft.py index e90d53ad2ac9e..2e32b1f0b73e0 100644 --- a/nemo/lightning/pytorch/callbacks/peft.py +++ b/nemo/lightning/pytorch/callbacks/peft.py @@ -28,6 +28,7 @@ from nemo.lightning.ckpt_utils import ADAPTER_META_FILENAME from nemo.lightning.io.mixin import IOMixin from nemo.lightning.io.pl import ckpt_to_dir +from nemo.lightning.megatron_parallel import MegatronParallel from nemo.lightning.pytorch.callbacks.model_transform import ModelTransform from nemo.lightning.pytorch.optim.megatron import MegatronOptimizerModule from nemo.utils import logging @@ -92,8 +93,16 @@ def __call__(self, model: nn.Module) -> nn.Module: Returns: nn.Module: The transformed model with PEFT applied. """ - self.freeze_model(model) - model.walk(self.transform) + + # If using megatron virtual pipeline parallelism, model is a list of + # model chunks so iterate over model + if isinstance(model, MegatronParallel) and len(model) > 1: + for model_chunk in model: + model_chunk.freeze() + model_chunk.walk(self.transform) + else: + model.freeze() + model.walk(self.transform) return model diff --git a/nemo/lightning/run/plugins.py b/nemo/lightning/run/plugins.py index c9a38c5979ca3..9d2936e567ec4 100644 --- a/nemo/lightning/run/plugins.py +++ b/nemo/lightning/run/plugins.py @@ -294,10 +294,6 @@ def setup(self, task: run.Partial | run.Script, executor: run.Executor): executor.env_vars["NVTE_FWD_LAYERNORM_SM_MARGIN"] = str(self.layernorm_sm_margin) executor.env_vars["NVTE_BWD_LAYERNORM_SM_MARGIN"] = str(self.layernorm_sm_margin) - # Force Transformer Engine to use cuDNN attention over HazyResearch's Flash Attention - executor.env_vars["NVTE_FLASH_ATTN"] = "0" - executor.env_vars["NVTE_FUSED_ATTN"] = "1" - # Improve perf by steering power to tensor cores, may not work on all systems if self.enable_vboost and isinstance(executor, run.SlurmExecutor): vboost_cmd = self.get_vboost_srun_cmd(executor.nodes, executor.job_dir) From 86e56c178063b6919857b81faa5dd2b71c8f27f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Wed, 30 Oct 2024 11:06:14 +0100 Subject: [PATCH 039/125] =?UTF-8?q?[=F0=9F=A4=A0]:=20Howdy=20folks,=20let'?= =?UTF-8?q?s=20bump=20`Dockerfile.ci`=20to=20213c8a2=20!=20(#11092)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: pablo-garay <7166088+pablo-garay@users.noreply.github.com> --- Dockerfile.ci | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile.ci b/Dockerfile.ci index 103e47dc753a1..c881a4c830ebe 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -53,7 +53,7 @@ RUN pip install nemo_run@git+https://github.com/NVIDIA/NeMo-Run.git@${NEMO_RUN_T # Install NeMo requirements ARG TE_TAG=7d576ed25266a17a7b651f2c12e8498f67e0baea ARG MODELOPT_VERSION=0.17.0 -ARG MCORE_TAG=d357c188323b6928cbcbd6f7e06af04c1694382f +ARG MCORE_TAG=213c8a23fa9fe95d19eff0932a1e6e71767f0962 ARG APEX_TAG=810ffae374a2b9cb4b5c5e28eaeca7d7998fca0c RUN \ From b5432258da3d061ec9057805c13edd374b29f7ce Mon Sep 17 00:00:00 2001 From: Alexandros Koumparoulis <153118171+akoumpa@users.noreply.github.com> Date: Wed, 30 Oct 2024 05:55:12 -0700 Subject: [PATCH 040/125] NeMo-UX: fix nemo-ux export path (#11081) * only make optim config if model has optim and setup_optimizers is True Signed-off-by: Alexandros Koumparoulis * pass setup_optimizers=False in nemo_load Signed-off-by: Alexandros Koumparoulis * fix backwards compatibility in load_context Signed-off-by: Alexandros Koumparoulis * fix Signed-off-by: Alexandros Koumparoulis * add torch_dtype_from_mcore_config Signed-off-by: Alexandros Koumparoulis * fix hf model dtype & prune embedding size Signed-off-by: Alexandros Koumparoulis * propagate changes: mistral Signed-off-by: Alexandros Koumparoulis * propagate changes: mixtral Signed-off-by: Alexandros Koumparoulis * propagate changes: nemotron Signed-off-by: Alexandros Koumparoulis * propagate changes: qwen2 Signed-off-by: Alexandros Koumparoulis * propagate changes: startcoder Signed-off-by: Alexandros Koumparoulis * propagate changes: startcoder2 Signed-off-by: Alexandros Koumparoulis * propagate chatglm Signed-off-by: Alexandros Koumparoulis * remove commented code Signed-off-by: Alexandros Koumparoulis * rm rename Signed-off-by: Alexandros Koumparoulis * rm rename Signed-off-by: Alexandros Koumparoulis * fix Signed-off-by: Alexandros Koumparoulis * fix Signed-off-by: Alexandros Koumparoulis * Apply isort and black reformatting Signed-off-by: akoumpa --------- Signed-off-by: Alexandros Koumparoulis Signed-off-by: akoumpa Co-authored-by: akoumpa --- nemo/collections/llm/gpt/model/base.py | 9 +++++ nemo/collections/llm/gpt/model/chatglm.py | 32 +++++++++++++--- nemo/collections/llm/gpt/model/llama.py | 37 +++++++++++++++---- nemo/collections/llm/gpt/model/mistral.py | 18 +++++---- nemo/collections/llm/gpt/model/mixtral.py | 10 +++-- nemo/collections/llm/gpt/model/nemotron.py | 15 ++++---- nemo/collections/llm/gpt/model/qwen2.py | 16 ++++---- nemo/collections/llm/gpt/model/starcoder.py | 33 +++++++++++++---- nemo/collections/llm/gpt/model/starcoder2.py | 2 - nemo/lightning/io/api.py | 12 +++++- nemo/lightning/io/connector.py | 4 +- .../pytorch/strategies/megatron_strategy.py | 2 +- 12 files changed, 141 insertions(+), 49 deletions(-) diff --git a/nemo/collections/llm/gpt/model/base.py b/nemo/collections/llm/gpt/model/base.py index 5b25e0ca9b626..1e34ffcea0c14 100644 --- a/nemo/collections/llm/gpt/model/base.py +++ b/nemo/collections/llm/gpt/model/base.py @@ -147,6 +147,15 @@ def default_layer_spec(config: "GPTConfig") -> ModuleSpec: return local_layer_spec(config) +def torch_dtype_from_mcore_config(config: TransformerConfig): + if config.fp16: + return torch.float16 + elif config.bf16: + return torch.bfloat16 + else: + return torch.float + + @dataclass class GPTConfig(TransformerConfig, io.IOMixin): # From megatron.core.models.gpt.gpt_model.GPTModel diff --git a/nemo/collections/llm/gpt/model/chatglm.py b/nemo/collections/llm/gpt/model/chatglm.py index e7450a8db28de..f9ad8fc6010c0 100644 --- a/nemo/collections/llm/gpt/model/chatglm.py +++ b/nemo/collections/llm/gpt/model/chatglm.py @@ -20,7 +20,7 @@ import torch.nn.functional as F from torch import nn -from nemo.collections.llm.gpt.model.base import GPTConfig, GPTModel +from nemo.collections.llm.gpt.model.base import GPTConfig, GPTModel, torch_dtype_from_mcore_config from nemo.collections.llm.utils import Config from nemo.lightning import OptimizerModule, io, teardown from nemo.lightning.pytorch.utils import dtype_from_hf @@ -139,16 +139,16 @@ def config(self) -> ChatGLMConfig: @io.model_exporter(ChatGLMModel, "hf") class HFChatGLMExporter(io.ModelConnector[ChatGLMModel, "AutoModelForCausalLM"]): - def init(self) -> "AutoModelForCausalLM": + def init(self, dtype=torch.bfloat16) -> "AutoModelForCausalLM": from transformers import AutoModelForCausalLM from transformers.modeling_utils import no_init_weights with no_init_weights(True): - return AutoModelForCausalLM.from_config(self.config, trust_remote_code=True) + return AutoModelForCausalLM.from_config(self.config, trust_remote_code=True, torch_dtype=dtype) def apply(self, output_path: Path) -> Path: - target = self.init() source, _ = self.nemo_load(str(self)) + target = self.init(torch_dtype_from_mcore_config(source.config)) target = self.convert_state(source, target) target = target.cpu() @@ -159,14 +159,12 @@ def apply(self, output_path: Path) -> Path: def convert_state(self, source, target): mapping = { - "embedding.word_embeddings.weight": "transformer.embedding.word_embeddings.weight", "decoder.layers.*.self_attention.linear_proj.weight": "transformer.encoder.layers.*.self_attention.dense.weight", "decoder.layers.*.mlp.linear_fc1.weight": "transformer.encoder.layers.*.mlp.dense_h_to_4h.weight", "decoder.layers.*.mlp.linear_fc2.weight": "transformer.encoder.layers.*.mlp.dense_4h_to_h.weight", "decoder.layers.*.self_attention.linear_qkv.layer_norm_weight": "transformer.encoder.layers.*.input_layernorm.weight", "decoder.layers.*.mlp.linear_fc1.layer_norm_weight": "transformer.encoder.layers.*.post_attention_layernorm.weight", "decoder.final_layernorm.weight": "transformer.encoder.final_layernorm.weight", - "output_layer.weight": "transformer.output_layer.weight", } return io.apply_transforms( @@ -176,6 +174,8 @@ def convert_state(self, source, target): transforms=[ _export_qkv_weight, _export_qkv_bias, + _export_embedding, + _export_head, ], ) @@ -198,6 +198,26 @@ def config(self) -> "AutoConfig": ) +@io.state_transform( + source_key="embedding.word_embeddings.weight", + target_key="transformer.embedding.word_embeddings.weight", +) +def _export_embedding(ctx: io.TransformCTX, embedding): + megatron_config = ctx.target.config + # prune padding. + return embedding[: megatron_config.vocab_size, :] + + +@io.state_transform( + source_key="output_layer.weight", + target_key="transformer.output_layer.weight", +) +def _export_head(ctx: io.TransformCTX, embedding): + megatron_config = ctx.target.config + # prune padding. + return embedding[: megatron_config.vocab_size, :] + + @io.state_transform( source_key="transformer.encoder.layers.*.self_attention.query_key_value.weight", target_key="decoder.layers.*.self_attention.linear_qkv.weight", diff --git a/nemo/collections/llm/gpt/model/llama.py b/nemo/collections/llm/gpt/model/llama.py index 5bc45b1049f32..7b235d59ee89c 100644 --- a/nemo/collections/llm/gpt/model/llama.py +++ b/nemo/collections/llm/gpt/model/llama.py @@ -21,7 +21,7 @@ import torch.nn.functional as F from torch import nn -from nemo.collections.llm.gpt.model.base import GPTConfig, GPTModel +from nemo.collections.llm.gpt.model.base import GPTConfig, GPTModel, torch_dtype_from_mcore_config from nemo.collections.llm.utils import Config from nemo.lightning import OptimizerModule, io, teardown from nemo.lightning.pytorch.utils import dtype_from_hf @@ -295,16 +295,16 @@ def make_vocab_size_divisible_by(vocab_size): @io.model_exporter(LlamaModel, "hf") class HFLlamaExporter(io.ModelConnector[LlamaModel, "LlamaForCausalLM"]): - def init(self) -> "LlamaForCausalLM": + def init(self, dtype=torch.bfloat16) -> "LlamaForCausalLM": from transformers import AutoModelForCausalLM from transformers.modeling_utils import no_init_weights with no_init_weights(True): - return AutoModelForCausalLM.from_config(self.config) + return AutoModelForCausalLM.from_config(self.config, torch_dtype=dtype) def apply(self, output_path: Path) -> Path: - target = self.init() source, _ = self.nemo_load(str(self)) + target = self.init(torch_dtype_from_mcore_config(source.config)) target = self.convert_state(source, target) target = target.cpu() @@ -315,16 +315,19 @@ def apply(self, output_path: Path) -> Path: def convert_state(self, source, target): mapping = { - "embedding.word_embeddings.weight": "model.embed_tokens.weight", "decoder.layers.*.self_attention.linear_proj.weight": "model.layers.*.self_attn.o_proj.weight", "decoder.layers.*.mlp.linear_fc2.weight": "model.layers.*.mlp.down_proj.weight", "decoder.layers.*.self_attention.linear_qkv.layer_norm_weight": "model.layers.*.input_layernorm.weight", "decoder.layers.*.mlp.linear_fc1.layer_norm_weight": "model.layers.*.post_attention_layernorm.weight", "decoder.final_layernorm.weight": "model.norm.weight", - "output_layer.weight": "lm_head.weight", } - return io.apply_transforms(source, target, mapping=mapping, transforms=[_export_qkv, _export_linear_fc1]) + return io.apply_transforms( + source, + target, + mapping=mapping, + transforms=[_export_qkv, _export_linear_fc1, _export_embedding, _export_head], + ) @property def tokenizer(self): @@ -426,6 +429,26 @@ def _export_qkv(ctx: io.TransformCTX, linear_qkv): return q_proj, k_proj, v_proj +@io.state_transform( + source_key="embedding.word_embeddings.weight", + target_key="model.embed_tokens.weight", +) +def _export_embedding(ctx: io.TransformCTX, embedding): + megatron_config = ctx.target.config + # prune padding. + return embedding[: megatron_config.vocab_size, :] + + +@io.state_transform( + source_key="output_layer.weight", + target_key="lm_head.weight", +) +def _export_head(ctx: io.TransformCTX, embedding): + megatron_config = ctx.target.config + # prune padding. + return embedding[: megatron_config.vocab_size, :] + + @io.state_transform( source_key=("model.layers.*.mlp.gate_proj.weight", "model.layers.*.mlp.up_proj.weight"), target_key="decoder.layers.*.mlp.linear_fc1.weight", diff --git a/nemo/collections/llm/gpt/model/mistral.py b/nemo/collections/llm/gpt/model/mistral.py index b9f4b6fb8f656..a71042e2ba6f9 100644 --- a/nemo/collections/llm/gpt/model/mistral.py +++ b/nemo/collections/llm/gpt/model/mistral.py @@ -22,7 +22,8 @@ from torch import nn from typing_extensions import Annotated -from nemo.collections.llm.gpt.model.base import GPTConfig, GPTModel +from nemo.collections.llm.gpt.model.base import GPTConfig, GPTModel, torch_dtype_from_mcore_config +from nemo.collections.llm.gpt.model.llama import _export_embedding, _export_head from nemo.collections.llm.utils import Config from nemo.lightning import io, teardown from nemo.lightning.pytorch.optim import OptimizerModule @@ -186,19 +187,19 @@ def make_vocab_size_divisible_by(mistral_vocab_size): @io.model_exporter(MistralModel, "hf") class HFMistralExporter(io.ModelConnector[MistralModel, "MistralForCausalLM"]): - def init(self) -> "MistralForCausalLM": + def init(self, dtype=torch.bfloat16) -> "MistralForCausalLM": from transformers import AutoModelForCausalLM from transformers.modeling_utils import no_init_weights with no_init_weights(True): - return AutoModelForCausalLM.from_config(self.config) + return AutoModelForCausalLM.from_config(self.config, torch_dtype=dtype) def apply(self, output_path: Path) -> Path: # TODO: Make it work with lazy init # with torch.device("meta"): # target = self.init() - target = self.init() source, _ = self.nemo_load(str(self)) + target = self.init(torch_dtype_from_mcore_config(source.config)) target = self.convert_state(source, target) # TODO: Make sure we don't need to do this @@ -210,16 +211,19 @@ def apply(self, output_path: Path) -> Path: def convert_state(self, source, target): mapping = { - "embedding.word_embeddings.weight": "model.embed_tokens.weight", "decoder.layers.*.self_attention.linear_proj.weight": "model.layers.*.self_attn.o_proj.weight", "decoder.layers.*.mlp.linear_fc2.weight": "model.layers.*.mlp.down_proj.weight", "decoder.layers.*.self_attention.linear_qkv.layer_norm_weight": "model.layers.*.input_layernorm.weight", "decoder.layers.*.mlp.linear_fc1.layer_norm_weight": "model.layers.*.post_attention_layernorm.weight", "decoder.final_layernorm.weight": "model.norm.weight", - "output_layer.weight": "lm_head.weight", } - return io.apply_transforms(source, target, mapping=mapping, transforms=[_export_qkv, _export_linear_fc1]) + return io.apply_transforms( + source, + target, + mapping=mapping, + transforms=[_export_qkv, _export_linear_fc1, _export_embedding, _export_head], + ) @property def tokenizer(self): diff --git a/nemo/collections/llm/gpt/model/mixtral.py b/nemo/collections/llm/gpt/model/mixtral.py index 23b83960a9ecb..29361c38fda5f 100644 --- a/nemo/collections/llm/gpt/model/mixtral.py +++ b/nemo/collections/llm/gpt/model/mixtral.py @@ -21,6 +21,7 @@ from torch import nn from nemo.collections.llm.gpt.model.base import GPTConfig, GPTModel +from nemo.collections.llm.gpt.model.llama import _export_embedding, _export_head from nemo.lightning import io, teardown from nemo.lightning.pytorch.optim import OptimizerModule @@ -290,7 +291,6 @@ def apply(self, output_path: Path) -> Path: def convert_state(self, source, target): mapping = { - "embedding.word_embeddings.weight": "model.embed_tokens.weight", "decoder.layers.*.self_attention.linear_proj.weight": "model.layers.*.self_attn.o_proj.weight", "decoder.layers.*.self_attention.linear_qkv.layer_norm_weight": "model.layers.*.input_layernorm.weight", "decoder.layers.*.pre_mlp_layernorm.weight": "model.layers.*.post_attention_layernorm.weight", @@ -299,10 +299,14 @@ def convert_state(self, source, target): "decoder.layers.*.mlp.router.weight": "model.layers.*.block_sparse_moe.gate.weight", # lm-head "decoder.final_layernorm.weight": "model.norm.weight", - "output_layer.weight": "lm_head.weight", } - return io.apply_transforms(source, target, mapping=mapping, transforms=[_export_qkv, _export_moe_w1_w3]) + return io.apply_transforms( + source, + target, + mapping=mapping, + transforms=[_export_qkv, _export_moe_w1_w3, _export_embedding, _export_head], + ) @property def tokenizer(self): diff --git a/nemo/collections/llm/gpt/model/nemotron.py b/nemo/collections/llm/gpt/model/nemotron.py index c5ffbf9b52d23..8fdc5f8f0f00a 100644 --- a/nemo/collections/llm/gpt/model/nemotron.py +++ b/nemo/collections/llm/gpt/model/nemotron.py @@ -20,7 +20,8 @@ from torch import nn from nemo.collections.llm.fn.activation import squared_relu -from nemo.collections.llm.gpt.model.base import GPTConfig, GPTModel +from nemo.collections.llm.gpt.model.base import GPTConfig, GPTModel, torch_dtype_from_mcore_config +from nemo.collections.llm.gpt.model.llama import _export_embedding, _export_head from nemo.collections.llm.utils import Config from nemo.lightning import OptimizerModule, io, teardown from nemo.lightning.pytorch.utils import dtype_from_hf @@ -211,15 +212,15 @@ def make_vocab_size_divisible_by(vocab_size): @io.model_exporter(NemotronModel, "hf") class HFNemotronExporter(io.ModelConnector[NemotronModel, "NemotronForCausalLM"]): - def init(self) -> "NemotronForCausalLM": + def init(self, dtype=torch.bfloat16) -> "NemotronForCausalLM": from transformers.modeling_utils import no_init_weights with no_init_weights(True): - return NemotronForCausalLM.from_config(self.config) + return NemotronForCausalLM.from_config(self.config, torch_dtype=dtype) def apply(self, output_path: Path) -> Path: - target = self.init() source, _ = self.nemo_load(str(self)) + target = self.init(torch_dtype_from_mcore_config(source.config)) target = self.convert_state(source, target) target = target.cpu() @@ -230,7 +231,6 @@ def apply(self, output_path: Path) -> Path: def convert_state(self, source, target): mapping = { - "embedding.word_embeddings.weight": "model.embed_tokens.weight", "decoder.layers.*.self_attention.linear_proj.weight": "model.layers.*.self_attn.o_proj.weight", "decoder.layers.*.mlp.linear_fc1.weight": "model.layers.*.mlp.up_proj.weight", "decoder.layers.*.mlp.linear_fc2.weight": "model.layers.*.mlp.down_proj.weight", @@ -240,10 +240,11 @@ def convert_state(self, source, target): "decoder.layers.*.mlp.linear_fc1.layer_norm_bias": "model.layers.*.post_attention_layernorm.bias", "decoder.final_layernorm.weight": "model.norm.weight", "decoder.final_layernorm.bias": "model.norm.bias", - "output_layer.weight": "lm_head.weight", } - return io.apply_transforms(source, target, mapping=mapping, transforms=[_export_qkv]) + return io.apply_transforms( + source, target, mapping=mapping, transforms=[_export_qkv, _export_embedding, _export_head] + ) @property def tokenizer(self): diff --git a/nemo/collections/llm/gpt/model/qwen2.py b/nemo/collections/llm/gpt/model/qwen2.py index 75f436aa95369..4cf0292d1a6ad 100644 --- a/nemo/collections/llm/gpt/model/qwen2.py +++ b/nemo/collections/llm/gpt/model/qwen2.py @@ -20,7 +20,8 @@ import torch.nn.functional as F from torch import nn -from nemo.collections.llm.gpt.model.base import GPTConfig, GPTModel +from nemo.collections.llm.gpt.model.base import GPTConfig, GPTModel, torch_dtype_from_mcore_config +from nemo.collections.llm.gpt.model.llama import _export_embedding, _export_head from nemo.collections.llm.utils import Config from nemo.lightning import OptimizerModule, io, teardown from nemo.lightning.pytorch.utils import dtype_from_hf @@ -172,16 +173,16 @@ def config(self) -> Qwen2Config: @io.model_exporter(Qwen2Model, "hf") class HFQwen2Exporter(io.ModelConnector[Qwen2Model, "AutoModelForCausalLM"]): - def init(self) -> "AutoModelForCausalLM": + def init(self, dtype=torch.bfloat16) -> "AutoModelForCausalLM": from transformers import AutoModelForCausalLM from transformers.modeling_utils import no_init_weights with no_init_weights(True): - return AutoModelForCausalLM.from_config(self.config, trust_remote_code=True) + return AutoModelForCausalLM.from_config(self.config, trust_remote_code=True, torch_dtype=dtype) def apply(self, output_path: Path) -> Path: - target = self.init() source, _ = self.nemo_load(str(self)) + target = self.init(torch_dtype_from_mcore_config(source.config)) target = self.convert_state(source, target) target = target.cpu() @@ -192,17 +193,18 @@ def apply(self, output_path: Path) -> Path: def convert_state(self, source, target): mapping = { - "embedding.word_embeddings.weight": "model.embed_tokens.weight", "decoder.layers.*.self_attention.linear_proj.weight": "model.layers.*.self_attn.o_proj.weight", "decoder.layers.*.mlp.linear_fc2.weight": "model.layers.*.mlp.down_proj.weight", "decoder.layers.*.self_attention.linear_qkv.layer_norm_weight": "model.layers.*.input_layernorm.weight", "decoder.layers.*.mlp.linear_fc1.layer_norm_weight": "model.layers.*.post_attention_layernorm.weight", "decoder.final_layernorm.weight": "model.norm.weight", - "output_layer.weight": "lm_head.weight", } return io.apply_transforms( - source, target, mapping=mapping, transforms=[_export_qkv, _export_qkv_bias, _export_linear_fc1] + source, + target, + mapping=mapping, + transforms=[_export_qkv, _export_qkv_bias, _export_linear_fc1, _export_embedding, _export_head], ) @property diff --git a/nemo/collections/llm/gpt/model/starcoder.py b/nemo/collections/llm/gpt/model/starcoder.py index 34bff1aa613df..b3e7b25f705ba 100644 --- a/nemo/collections/llm/gpt/model/starcoder.py +++ b/nemo/collections/llm/gpt/model/starcoder.py @@ -16,10 +16,11 @@ from pathlib import Path from typing import TYPE_CHECKING, Annotated, Callable, Optional +import torch import torch.nn.functional as F from torch import nn -from nemo.collections.llm.gpt.model.base import GPTConfig, GPTModel +from nemo.collections.llm.gpt.model.base import GPTConfig, GPTModel, torch_dtype_from_mcore_config from nemo.collections.llm.utils import Config from nemo.lightning import OptimizerModule, io, teardown from nemo.lightning.pytorch.utils import dtype_from_hf @@ -157,16 +158,16 @@ def make_vocab_size_divisible_by(vocab_size): @io.model_exporter(StarcoderModel, "hf") class HFStarcoderExporter(io.ModelConnector[StarcoderModel, "GPTBigCodeForCausalLM"]): - def init(self) -> "GPTBigCodeForCausalLM": + def init(self, dtype=torch.bfloat16) -> "GPTBigCodeForCausalLM": from transformers import GPTBigCodeForCausalLM from transformers.modeling_utils import no_init_weights with no_init_weights(True): - return GPTBigCodeForCausalLM._from_config(self.config) + return GPTBigCodeForCausalLM._from_config(self.config, torch_dtype=dtype) def apply(self, output_path: Path) -> Path: - target = self.init() source, _ = self.nemo_load(str(self)) + target = self.init(torch_dtype_from_mcore_config(source.config)) target = self.convert_state(source, target) target = target.cpu() @@ -177,7 +178,6 @@ def apply(self, output_path: Path) -> Path: def convert_state(self, source, target): mapping = { - "embedding.word_embeddings.weight": "transformer.wte.weight", "embedding.position_embeddings.weight": "transformer.wpe.weight", "decoder.layers.*.self_attention.linear_proj.weight": "transformer.h.*.attn.c_proj.weight", "decoder.layers.*.self_attention.linear_proj.bias": "transformer.h.*.attn.c_proj.bias", @@ -193,10 +193,9 @@ def convert_state(self, source, target): "decoder.layers.*.mlp.linear_fc1.layer_norm_bias": "transformer.h.*.ln_2.bias", "decoder.final_layernorm.weight": "transformer.ln_f.weight", "decoder.final_layernorm.bias": "transformer.ln_f.bias", - "output_layer.weight": "lm_head.weight", } - return io.apply_transforms(source, target, mapping=mapping) + return io.apply_transforms(source, target, mapping=mapping, transforms=[_export_embedding, _export_head]) @property def tokenizer(self): @@ -225,3 +224,23 @@ def config(self) -> "HFStarcoderConfig": num_key_value_heads=source.num_query_groups, vocab_size=self.tokenizer.vocab_size, ) + + +@io.state_transform( + source_key="embedding.word_embeddings.weight", + target_key="transformer.wte.weight", +) +def _export_embedding(ctx: io.TransformCTX, embedding): + megatron_config = ctx.target.config + # prune padding. + return embedding[: megatron_config.vocab_size, :] + + +@io.state_transform( + source_key="output_layer.weight", + target_key="lm_head.weight", +) +def _export_head(ctx: io.TransformCTX, embedding): + megatron_config = ctx.target.config + # prune padding. + return embedding[: megatron_config.vocab_size, :] diff --git a/nemo/collections/llm/gpt/model/starcoder2.py b/nemo/collections/llm/gpt/model/starcoder2.py index 5df3fd2246139..544721fa4e867 100644 --- a/nemo/collections/llm/gpt/model/starcoder2.py +++ b/nemo/collections/llm/gpt/model/starcoder2.py @@ -202,7 +202,6 @@ def apply(self, output_path: Path) -> Path: def convert_state(self, source, target): mapping = { - "embedding.word_embeddings.weight": "model.embed_tokens.weight", "decoder.layers.*.self_attention.linear_proj.weight": "model.layers.*.self_attn.o_proj.weight", "decoder.layers.*.self_attention.linear_proj.bias": "model.layers.*.self_attn.o_proj.bias", "decoder.layers.*.mlp.linear_fc1.weight": "model.layers.*.mlp.c_fc.weight", @@ -215,7 +214,6 @@ def convert_state(self, source, target): "decoder.layers.*.mlp.linear_fc1.layer_norm_bias": "model.layers.*.post_attention_layernorm.bias", "decoder.final_layernorm.weight": "model.norm.weight", "decoder.final_layernorm.bias": "model.norm.bias", - "output_layer.weight": "lm_head.weight", } return io.apply_transforms(source, target, mapping=mapping, transforms=[_export_qkv_weight, _export_qkv_bias]) diff --git a/nemo/lightning/io/api.py b/nemo/lightning/io/api.py index 4abc8fa7cca3d..be9372f2e79b9 100644 --- a/nemo/lightning/io/api.py +++ b/nemo/lightning/io/api.py @@ -51,7 +51,17 @@ def load_context(path: Path, subpath: Optional[str] = None, build: bool = True): checkpoint: TrainerContext = load_ckpt("/path/to/checkpoint", subpath="model.config") """ - return load(path, output_type=TrainerContext, subpath=subpath, build=build) + if not isinstance(path, Path): + path = Path(path) + try: + return load(path, output_type=TrainerContext, subpath=subpath, build=build) + except FileNotFoundError: + # Maintain backwards compatibility with checkpoints that don't have '/context' dir. + if path.parts[-1] == 'context': + path = path.parent + else: + path = path / 'context' + return load(path, output_type=TrainerContext, subpath=subpath, build=build) def model_importer(target: Type[ConnectorMixin], ext: str) -> Callable[[Type[ConnT]], Type[ConnT]]: diff --git a/nemo/lightning/io/connector.py b/nemo/lightning/io/connector.py index be1dcc3c088d8..fd7b814fe7307 100644 --- a/nemo/lightning/io/connector.py +++ b/nemo/lightning/io/connector.py @@ -228,7 +228,9 @@ def nemo_load( model = load_context(path).model _trainer = trainer or Trainer( - devices=1, accelerator="cpu" if cpu else "gpu", strategy=MegatronStrategy(ddp="pytorch") + devices=1, + accelerator="cpu" if cpu else "gpu", + strategy=MegatronStrategy(ddp="pytorch", setup_optimizers=False), ) _trainer.strategy.connect(model) diff --git a/nemo/lightning/pytorch/strategies/megatron_strategy.py b/nemo/lightning/pytorch/strategies/megatron_strategy.py index c22df7cc9dfe3..e99be666ec04e 100644 --- a/nemo/lightning/pytorch/strategies/megatron_strategy.py +++ b/nemo/lightning/pytorch/strategies/megatron_strategy.py @@ -281,7 +281,7 @@ def connect(self, model: pl.LightningModule) -> None: model.config = update_config_with_dtype_overrides(dtype_config, model.config) has_optim = getattr(model, "optim", None) - if has_optim: + if has_optim and self._setup_optimizers: opt_config = getattr(model.optim, "config", None) if isinstance(opt_config, OptimizerConfig): mcore_opt_config: OptimizerConfig = cast(OptimizerConfig, opt_config) From 70c887eb4d57dbe10153c4a37242352774a4c2f1 Mon Sep 17 00:00:00 2001 From: Huy Vu <86480512+huvunvidia@users.noreply.github.com> Date: Wed, 30 Oct 2024 09:45:44 -0400 Subject: [PATCH 041/125] NeMorun for NeMo 2.0 T5 finetuning (#11040) * workable code commit * small edits for t5_3b.py and t5_11b.py * Apply isort and black reformatting Signed-off-by: huvunvidia --------- Signed-off-by: huvunvidia Co-authored-by: Huy Vu2 Co-authored-by: huvunvidia --- .../llm/recipes/finetune_default.py | 4 +- nemo/collections/llm/recipes/t5_11b.py | 83 ++++++++++++++++++ nemo/collections/llm/recipes/t5_220m.py | 84 ++++++++++++++++++- nemo/collections/llm/recipes/t5_3b.py | 83 ++++++++++++++++++ nemo/collections/llm/t5/data/fine_tuning.py | 16 +++- nemo/collections/llm/t5/data/pre_training.py | 14 +++- tests/collections/llm/recipes/test_t5_11b.py | 20 ++++- tests/collections/llm/recipes/test_t5_220m.py | 20 ++++- tests/collections/llm/recipes/test_t5_3b.py | 20 ++++- 9 files changed, 332 insertions(+), 12 deletions(-) diff --git a/nemo/collections/llm/recipes/finetune_default.py b/nemo/collections/llm/recipes/finetune_default.py index 5a1ff58e86610..69266737edc99 100644 --- a/nemo/collections/llm/recipes/finetune_default.py +++ b/nemo/collections/llm/recipes/finetune_default.py @@ -42,7 +42,7 @@ def default_finetune_recipe( Args: model (run.Config[pl.LightningModule]): Configuration for a NeMo model. - resume_path (str): Path to the Huggingface model. + resume_path (str): Path to the Huggingface model or pretrained distributed checkpoint for resume dir (Optional[str]): Directory for saving logs and checkpoints. name (str): Name of the fine-tuning run. num_nodes (int): Number of compute nodes to use. @@ -134,7 +134,7 @@ def nemo_resume(model_id: str) -> run.Config[nl.AutoResume]: This translates to the full path {NEMO_HOME}/models/{model_id}. Args: - model_id (str): The Huggingface model to resume. + model_id (str): Path to the Huggingface model or pretrained distributed checkpoint for resume Returns: run.Config[nl.AutoResume]: Configuration for resuming from NeMo checkpoint. diff --git a/nemo/collections/llm/recipes/t5_11b.py b/nemo/collections/llm/recipes/t5_11b.py index 09d4698793649..b3806e6f25407 100644 --- a/nemo/collections/llm/recipes/t5_11b.py +++ b/nemo/collections/llm/recipes/t5_11b.py @@ -24,9 +24,12 @@ from nemo import lightning as nl from nemo.collections.llm.api import finetune, pretrain +from nemo.collections.llm.peft.lora import LoRA +from nemo.collections.llm.recipes.finetune_default import default_finetune_trainer, nemo_resume from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger from nemo.collections.llm.recipes.precision.mixed_precision import bf16_mixed from nemo.collections.llm.t5.data.mock import MockDataModule +from nemo.collections.llm.t5.data.squad import SquadDataModule from nemo.collections.llm.t5.model.t5 import T5Config11B, T5Model from nemo.lightning.pytorch.optim.lr_scheduler import WarmupAnnealingScheduler from nemo.lightning.pytorch.optim.megatron import MegatronOptimizerModule @@ -202,3 +205,83 @@ def pretrain_recipe( optim=MegatronOptimizerModule(config=opt_config, lr_scheduler=lr_scheduler), resume=default_resume(), ) + + +@run.cli.factory(target=finetune, name=NAME) +def finetune_recipe( + checkpoint_path: str = "", + dir: Optional[str] = None, + name: str = "default", + num_nodes: int = 1, + num_gpus_per_node: int = 8, + peft_scheme: Optional[str] = 'lora', +) -> run.Partial: + """ + Create a fine-tuning recipe for T5 11B model. + + This function sets up a complete configuration for fine-tuning, including + model, trainer, data, logging, optimization, and resumption settings. + The recipe uses LoRA (Low-Rank Adaptation) for efficient fine-tuning, unless peft_scheme is set to None. + + Args: + checkpoint_path (str): Path to pretrained checkpoint + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the fine-tuning run. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + + Returns: + run.Partial: Partial configuration for fine-tuning. + + Examples: + CLI usage: + $ nemo llm finetune --factory t5_11b + + Python API usage: + >>> recipe = finetune_recipe(name="t5_11b_finetune", num_nodes=1) + >>> print(recipe) + + Note: + This recipe uses the SQuAD dataset for fine-tuning. For more information + on fine-tuning LLMs with NeMo, see the fine-tuning guide in the + `examples/llm/finetune/` directory. + """ + opt_config = OptimizerConfig( + optimizer='adam', + lr=1e-4, + use_distributed_optimizer=True, + bf16=True, + weight_decay=0.01, + ) + + lr_scheduler = WarmupAnnealingScheduler( + warmup_steps=50, + max_steps=2000, + min_lr=0.00001, + ) + + recipe = run.Partial( + finetune, + model=model(), + trainer=default_finetune_trainer( + num_nodes=num_nodes, + num_gpus_per_node=num_gpus_per_node, + ), + data=run.Config( + SquadDataModule, seq_length=512, seq_length_dec=128, global_batch_size=128, micro_batch_size=1 + ), + log=default_log(dir=dir, name=name, tensorboard_logger=tensorboard_logger(name=name)), + optim=MegatronOptimizerModule(config=opt_config, lr_scheduler=lr_scheduler), + resume=nemo_resume(checkpoint_path), + ) + + if peft_scheme is None or peft_scheme.lower() == 'none': + recipe.trainer.strategy.tensor_model_parallel_size = 4 + recipe.optim.config.lr = 5e-6 + elif peft_scheme.lower() == 'lora': + recipe.peft = run.Config(LoRA) + recipe.optim.config.lr = 1e-4 + else: + raise ValueError(f"Unrecognized peft scheme: {peft_scheme}") + return recipe diff --git a/nemo/collections/llm/recipes/t5_220m.py b/nemo/collections/llm/recipes/t5_220m.py index a3b2b761b65b6..e220eb3fb1b0c 100644 --- a/nemo/collections/llm/recipes/t5_220m.py +++ b/nemo/collections/llm/recipes/t5_220m.py @@ -24,9 +24,12 @@ from nemo import lightning as nl from nemo.collections.llm.api import finetune, pretrain +from nemo.collections.llm.peft.lora import LoRA +from nemo.collections.llm.recipes.finetune_default import default_finetune_trainer, nemo_resume from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger from nemo.collections.llm.recipes.precision.mixed_precision import bf16_mixed from nemo.collections.llm.t5.data.mock import MockDataModule +from nemo.collections.llm.t5.data.squad import SquadDataModule from nemo.collections.llm.t5.model.t5 import T5Config220M, T5Model from nemo.lightning.pytorch.optim.lr_scheduler import WarmupAnnealingScheduler from nemo.lightning.pytorch.optim.megatron import MegatronOptimizerModule @@ -132,7 +135,6 @@ def trainer( plugins=bf16_mixed(), strategy=strategy, use_distributed_sampler=False, - # DEBUGGING val_check_interval=2000, ) @@ -201,3 +203,83 @@ def pretrain_recipe( optim=MegatronOptimizerModule(config=opt_config, lr_scheduler=lr_scheduler), resume=default_resume(), ) + + +@run.cli.factory(target=finetune, name=NAME) +def finetune_recipe( + checkpoint_path: str = "", + dir: Optional[str] = None, + name: str = "default", + num_nodes: int = 1, + num_gpus_per_node: int = 8, + peft_scheme: Optional[str] = 'lora', +) -> run.Partial: + """ + Create a fine-tuning recipe for T5 220M model. + + This function sets up a complete configuration for fine-tuning, including + model, trainer, data, logging, optimization, and resumption settings. + The recipe uses LoRA (Low-Rank Adaptation) for efficient fine-tuning, unless peft_scheme is set to None. + + Args: + checkpoint_path (str): Path to pretrained checkpoint + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the fine-tuning run. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + + Returns: + run.Partial: Partial configuration for fine-tuning. + + Examples: + CLI usage: + $ nemo llm finetune --factory t5_220m + + Python API usage: + >>> recipe = finetune_recipe(name="t5_220m_finetune", num_nodes=1) + >>> print(recipe) + + Note: + This recipe uses the SQuAD dataset for fine-tuning. For more information + on fine-tuning LLMs with NeMo, see the fine-tuning guide in the + `examples/llm/finetune/` directory. + """ + opt_config = OptimizerConfig( + optimizer='adam', + lr=1e-4, + use_distributed_optimizer=True, + bf16=True, + weight_decay=0.01, + ) + + lr_scheduler = WarmupAnnealingScheduler( + warmup_steps=50, + max_steps=2000, + min_lr=0.00001, + ) + + recipe = run.Partial( + finetune, + model=model(), + trainer=default_finetune_trainer( + num_nodes=num_nodes, + num_gpus_per_node=num_gpus_per_node, + ), + data=run.Config( + SquadDataModule, seq_length=512, seq_length_dec=128, global_batch_size=128, micro_batch_size=1 + ), + log=default_log(dir=dir, name=name, tensorboard_logger=tensorboard_logger(name=name)), + optim=MegatronOptimizerModule(config=opt_config, lr_scheduler=lr_scheduler), + resume=nemo_resume(checkpoint_path), + ) + + if peft_scheme is None or peft_scheme.lower() == 'none': + recipe.trainer.strategy.tensor_model_parallel_size = 1 + recipe.optim.config.lr = 5e-6 + elif peft_scheme.lower() == 'lora': + recipe.peft = run.Config(LoRA) + recipe.optim.config.lr = 1e-4 + else: + raise ValueError(f"Unrecognized peft scheme: {peft_scheme}") + return recipe diff --git a/nemo/collections/llm/recipes/t5_3b.py b/nemo/collections/llm/recipes/t5_3b.py index 08bcae895c3e6..e7f215d576354 100644 --- a/nemo/collections/llm/recipes/t5_3b.py +++ b/nemo/collections/llm/recipes/t5_3b.py @@ -24,9 +24,12 @@ from nemo import lightning as nl from nemo.collections.llm.api import finetune, pretrain +from nemo.collections.llm.peft.lora import LoRA +from nemo.collections.llm.recipes.finetune_default import default_finetune_trainer, nemo_resume from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger from nemo.collections.llm.recipes.precision.mixed_precision import bf16_mixed from nemo.collections.llm.t5.data.mock import MockDataModule +from nemo.collections.llm.t5.data.squad import SquadDataModule from nemo.collections.llm.t5.model.t5 import T5Config3B, T5Model from nemo.lightning.pytorch.optim.lr_scheduler import WarmupAnnealingScheduler from nemo.lightning.pytorch.optim.megatron import MegatronOptimizerModule @@ -202,3 +205,83 @@ def pretrain_recipe( optim=MegatronOptimizerModule(config=opt_config, lr_scheduler=lr_scheduler), resume=default_resume(), ) + + +@run.cli.factory(target=finetune, name=NAME) +def finetune_recipe( + checkpoint_path: str = "", + dir: Optional[str] = None, + name: str = "default", + num_nodes: int = 1, + num_gpus_per_node: int = 8, + peft_scheme: Optional[str] = 'lora', +) -> run.Partial: + """ + Create a fine-tuning recipe for T5 3B model. + + This function sets up a complete configuration for fine-tuning, including + model, trainer, data, logging, optimization, and resumption settings. + The recipe uses LoRA (Low-Rank Adaptation) for efficient fine-tuning, unless peft_scheme is set to None. + + Args: + checkpoint_path (str): Path to pretrained checkpoint + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the fine-tuning run. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + + Returns: + run.Partial: Partial configuration for fine-tuning. + + Examples: + CLI usage: + $ nemo llm finetune --factory t5_3b + + Python API usage: + >>> recipe = finetune_recipe(name="t5_3b_finetune", num_nodes=1) + >>> print(recipe) + + Note: + This recipe uses the SQuAD dataset for fine-tuning. For more information + on fine-tuning LLMs with NeMo, see the fine-tuning guide in the + `examples/llm/finetune/` directory. + """ + opt_config = OptimizerConfig( + optimizer='adam', + lr=1e-4, + use_distributed_optimizer=True, + bf16=True, + weight_decay=0.01, + ) + + lr_scheduler = WarmupAnnealingScheduler( + warmup_steps=50, + max_steps=2000, + min_lr=0.00001, + ) + + recipe = run.Partial( + finetune, + model=model(), + trainer=default_finetune_trainer( + num_nodes=num_nodes, + num_gpus_per_node=num_gpus_per_node, + ), + data=run.Config( + SquadDataModule, seq_length=512, seq_length_dec=128, global_batch_size=128, micro_batch_size=1 + ), + log=default_log(dir=dir, name=name, tensorboard_logger=tensorboard_logger(name=name)), + optim=MegatronOptimizerModule(config=opt_config, lr_scheduler=lr_scheduler), + resume=nemo_resume(checkpoint_path), + ) + + if peft_scheme is None or peft_scheme.lower() == 'none': + recipe.trainer.strategy.tensor_model_parallel_size = 2 + recipe.optim.config.lr = 5e-6 + elif peft_scheme.lower() == 'lora': + recipe.peft = run.Config(LoRA) + recipe.optim.config.lr = 1e-4 + else: + raise ValueError(f"Unrecognized peft scheme: {peft_scheme}") + return recipe diff --git a/nemo/collections/llm/t5/data/fine_tuning.py b/nemo/collections/llm/t5/data/fine_tuning.py index 4e2a88e5712c9..4180b4f135cb1 100644 --- a/nemo/collections/llm/t5/data/fine_tuning.py +++ b/nemo/collections/llm/t5/data/fine_tuning.py @@ -71,10 +71,18 @@ def __init__( self.seed = seed self.dataset_root = Path(dataset_root) - # add additional tokens for T5 tokenizer - from nemo.collections.nlp.modules.common.tokenizer_utils import get_nmt_tokenizer - - self.tokenizer = tokenizer or get_nmt_tokenizer("megatron", "BertWordPieceCase") + # create tokenizer if tokenizer is None + if tokenizer is None: + from nemo.collections.nlp.modules.common.tokenizer_utils import get_nmt_tokenizer + + special_tokens = {} + special_tokens['additional_special_tokens'] = [f'' for i in range(100)] + tokenizer = get_nmt_tokenizer( + "megatron", + "BertWordPieceCase", + special_tokens=special_tokens, + ) + self.tokenizer = tokenizer self.memmap_workers = memmap_workers self.num_workers = num_workers diff --git a/nemo/collections/llm/t5/data/pre_training.py b/nemo/collections/llm/t5/data/pre_training.py index 9f6cb27933d55..45d485ba20740 100644 --- a/nemo/collections/llm/t5/data/pre_training.py +++ b/nemo/collections/llm/t5/data/pre_training.py @@ -141,8 +141,18 @@ def __init__( self.index_mapping_dir = index_mapping_dir self.init_global_step = 0 - # add additional tokens for T5 tokenizer - from nemo.collections.nlp.modules.common.tokenizer_utils import get_nmt_tokenizer + # create tokenizer if tokenizer is None + if tokenizer is None: + from nemo.collections.nlp.modules.common.tokenizer_utils import get_nmt_tokenizer + + special_tokens = {} + special_tokens['additional_special_tokens'] = [f'' for i in range(100)] + tokenizer = get_nmt_tokenizer( + "megatron", + "BertWordPieceCase", + special_tokens=special_tokens, + ) + self.tokenizer = tokenizer self.data_sampler = MegatronDataSampler( seq_len=self.seq_length, diff --git a/tests/collections/llm/recipes/test_t5_11b.py b/tests/collections/llm/recipes/test_t5_11b.py index 8c4ab8332c182..5a0130323672d 100644 --- a/tests/collections/llm/recipes/test_t5_11b.py +++ b/tests/collections/llm/recipes/test_t5_11b.py @@ -15,9 +15,11 @@ import nemo_run as run import pytest -from nemo.collections.llm.api import pretrain +from nemo.collections.llm.api import finetune, pretrain +from nemo.collections.llm.peft.lora import LoRA from nemo.collections.llm.recipes import t5_11b from nemo.collections.llm.t5.data.mock import MockDataModule +from nemo.collections.llm.t5.data.squad import SquadDataModule from nemo.collections.llm.t5.model.t5 import T5Config11B, T5Model from nemo.lightning import Trainer @@ -82,6 +84,22 @@ def test_pretrain_recipe(self, recipe_module): assert recipe.data.seq_length_dec == 128 assert recipe.data.global_batch_size == 1920 + def test_finetune_recipe(self, recipe_module): + recipe = recipe_module.finetune_recipe() + assert isinstance(recipe, run.Partial) + assert recipe.__fn_or_cls__ == finetune + assert isinstance(recipe.model, run.Config) + assert recipe.model.__fn_or_cls__ == T5Model + assert isinstance(recipe.trainer, run.Config) + assert recipe.trainer.__fn_or_cls__ == Trainer + assert isinstance(recipe.data, run.Config) + assert recipe.data.__fn_or_cls__ == SquadDataModule + assert recipe.data.seq_length == 512 + assert recipe.data.seq_length_dec == 128 + assert recipe.data.global_batch_size == 128 + assert isinstance(recipe.peft, run.Config) + assert recipe.peft.__fn_or_cls__ == LoRA + @pytest.mark.parametrize("num_nodes,num_gpus_per_node", [(1, 8), (2, 4), (4, 2)]) def test_pretrain_recipe_with_different_configurations(self, recipe_module, num_nodes, num_gpus_per_node): recipe = recipe_module.pretrain_recipe(num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node) diff --git a/tests/collections/llm/recipes/test_t5_220m.py b/tests/collections/llm/recipes/test_t5_220m.py index 744598e3b01b3..725061280b4fa 100644 --- a/tests/collections/llm/recipes/test_t5_220m.py +++ b/tests/collections/llm/recipes/test_t5_220m.py @@ -15,9 +15,11 @@ import nemo_run as run import pytest -from nemo.collections.llm.api import pretrain +from nemo.collections.llm.api import finetune, pretrain +from nemo.collections.llm.peft.lora import LoRA from nemo.collections.llm.recipes import t5_220m from nemo.collections.llm.t5.data.mock import MockDataModule +from nemo.collections.llm.t5.data.squad import SquadDataModule from nemo.collections.llm.t5.model.t5 import T5Config220M, T5Model from nemo.lightning import Trainer @@ -82,6 +84,22 @@ def test_pretrain_recipe(self, recipe_module): assert recipe.data.seq_length_dec == 128 assert recipe.data.global_batch_size == 512 + def test_finetune_recipe(self, recipe_module): + recipe = recipe_module.finetune_recipe() + assert isinstance(recipe, run.Partial) + assert recipe.__fn_or_cls__ == finetune + assert isinstance(recipe.model, run.Config) + assert recipe.model.__fn_or_cls__ == T5Model + assert isinstance(recipe.trainer, run.Config) + assert recipe.trainer.__fn_or_cls__ == Trainer + assert isinstance(recipe.data, run.Config) + assert recipe.data.__fn_or_cls__ == SquadDataModule + assert recipe.data.seq_length == 512 + assert recipe.data.seq_length_dec == 128 + assert recipe.data.global_batch_size == 128 + assert isinstance(recipe.peft, run.Config) + assert recipe.peft.__fn_or_cls__ == LoRA + @pytest.mark.parametrize("num_nodes,num_gpus_per_node", [(1, 8), (2, 4), (4, 2)]) def test_pretrain_recipe_with_different_configurations(self, recipe_module, num_nodes, num_gpus_per_node): recipe = recipe_module.pretrain_recipe(num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node) diff --git a/tests/collections/llm/recipes/test_t5_3b.py b/tests/collections/llm/recipes/test_t5_3b.py index 7672b95426cb1..e81bb3e27f477 100644 --- a/tests/collections/llm/recipes/test_t5_3b.py +++ b/tests/collections/llm/recipes/test_t5_3b.py @@ -15,9 +15,11 @@ import nemo_run as run import pytest -from nemo.collections.llm.api import pretrain +from nemo.collections.llm.api import finetune, pretrain +from nemo.collections.llm.peft.lora import LoRA from nemo.collections.llm.recipes import t5_3b from nemo.collections.llm.t5.data.mock import MockDataModule +from nemo.collections.llm.t5.data.squad import SquadDataModule from nemo.collections.llm.t5.model.t5 import T5Config3B, T5Model from nemo.lightning import Trainer @@ -82,6 +84,22 @@ def test_pretrain_recipe(self, recipe_module): assert recipe.data.seq_length_dec == 128 assert recipe.data.global_batch_size == 1920 + def test_finetune_recipe(self, recipe_module): + recipe = recipe_module.finetune_recipe() + assert isinstance(recipe, run.Partial) + assert recipe.__fn_or_cls__ == finetune + assert isinstance(recipe.model, run.Config) + assert recipe.model.__fn_or_cls__ == T5Model + assert isinstance(recipe.trainer, run.Config) + assert recipe.trainer.__fn_or_cls__ == Trainer + assert isinstance(recipe.data, run.Config) + assert recipe.data.__fn_or_cls__ == SquadDataModule + assert recipe.data.seq_length == 512 + assert recipe.data.seq_length_dec == 128 + assert recipe.data.global_batch_size == 128 + assert isinstance(recipe.peft, run.Config) + assert recipe.peft.__fn_or_cls__ == LoRA + @pytest.mark.parametrize("num_nodes,num_gpus_per_node", [(1, 8), (2, 4), (4, 2)]) def test_pretrain_recipe_with_different_configurations(self, recipe_module, num_nodes, num_gpus_per_node): recipe = recipe_module.pretrain_recipe(num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node) From ae1fd4684fa16741bdcb6ef081f381a317da0194 Mon Sep 17 00:00:00 2001 From: Ethan He Date: Wed, 30 Oct 2024 09:42:23 -0700 Subject: [PATCH 042/125] fix model_checkpoint.py (#11057) support the case where monitor is the only metric in checkpoint name. for example 'global_step=1000' Signed-off-by: Ethan He --- nemo/lightning/pytorch/callbacks/model_checkpoint.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nemo/lightning/pytorch/callbacks/model_checkpoint.py b/nemo/lightning/pytorch/callbacks/model_checkpoint.py index 1bfe9ccd80524..cffa8b9275ffa 100644 --- a/nemo/lightning/pytorch/callbacks/model_checkpoint.py +++ b/nemo/lightning/pytorch/callbacks/model_checkpoint.py @@ -196,7 +196,9 @@ def nemo_topk_check_previous_run(self): match = re.search('[A-z]', checkpoint[index:]) if match: value = checkpoint[index : index + match.start() - 1] # -1 due to separator hyphen - self.best_k_models[checkpoint] = float(value) + else: + value = checkpoint[index:] + self.best_k_models[checkpoint] = float(value) if len(self.best_k_models) < 1: return # No saved checkpoints yet From b9f627fb7e955f3e4097f31acc89f3dedf62d797 Mon Sep 17 00:00:00 2001 From: Jan Lasek Date: Wed, 30 Oct 2024 19:08:36 +0100 Subject: [PATCH 043/125] Update PTQ tests and ModelOpt version (#11095) * Deprecate NeMo 1 PTQ tests except FP8 Signed-off-by: Jan Lasek * Convert model right before testing it for FP8 PTQ Signed-off-by: Jan Lasek * Bump modelopt version Signed-off-by: Jan Lasek --------- Signed-off-by: Jan Lasek --- .github/workflows/cicd-main.yml | 72 ++++----------------------------- Dockerfile.ci | 2 +- docs/source/starthere/intro.rst | 2 +- 3 files changed, 10 insertions(+), 66 deletions(-) diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml index 22bbb3c1a447b..bb239acb00fc5 100644 --- a/.github/workflows/cicd-main.yml +++ b/.github/workflows/cicd-main.yml @@ -495,18 +495,6 @@ jobs: # - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main" # if: "failure()" - L2_PTQ_Llama2_Export_Only: - needs: [cicd-test-container-setup] - uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_PTQ_Llama2_Export_Only') || needs.cicd-test-container-setup.outputs.all == 'true' - with: - RUNNER: self-hosted-azure - SCRIPT: | - python examples/nlp/language_modeling/megatron_gpt_ptq.py \ - model.restore_from_path=/home/TestData/nlp/megatron_llama/llama_ci.nemo \ - quantization.algorithm=null \ - export.save_path=/tmp/nlp_megatron_llama_export_only/ci_baseline - L2_PTQ_Llama2_FP8: needs: [cicd-test-container-setup] uses: ./.github/workflows/_test_template.yml @@ -514,8 +502,13 @@ jobs: with: RUNNER: self-hosted-azure SCRIPT: | + CUDA_VISIBLE_DEVICES=0 python scripts/checkpoint_converters/convert_llama_hf_to_nemo.py \ + --input_name_or_path=/home/TestData/nlp/megatron_llama/llama-ci-hf-tiny \ + --output_path=/tmp/nlp_megatron_llama/llama_ci.nemo \ + --precision=16 + python examples/nlp/language_modeling/megatron_gpt_ptq.py \ - model.restore_from_path=/home/TestData/nlp/megatron_llama/llama_ci.nemo \ + model.restore_from_path=/tmp/nlp_megatron_llama/llama_ci.nemo \ model.tensor_model_parallel_size=2 \ trainer.devices=2 \ quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \ @@ -526,55 +519,8 @@ jobs: export.sample_output=False \ export.save_path=/tmp/nlp_megatron_llama_eo/ci_fp8.qnemo - L2_PTQ_Llama2_INT8_SQ: - needs: [cicd-test-container-setup] - uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_PTQ_Llama2_INT8_SQ') || needs.cicd-test-container-setup.outputs.all == 'true' - with: - RUNNER: self-hosted-azure - TIMEOUT: 15 - SCRIPT: | - python examples/nlp/language_modeling/megatron_gpt_ptq.py \ - model.restore_from_path=/home/TestData/nlp/megatron_llama/llama_ci.nemo \ - quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \ - quantization.algorithm=int8_sq \ - quantization.num_calib_size=8 \ - inference.batch_size=2 \ - export.sample_output=False \ - export.save_path=/tmp/nlp_megatron_llama_eo/ci_int8_sq.qnemo - - # TODO: investigate int4_awq stuck issues and restore the test - #L2_PTQ_Llama2_INT4_AWQ: - # needs: [cicd-test-container-setup] - # runs-on: self-hosted-azure - # timeout-minutes: 10 - # container: - # image: nemoci.azurecr.io/nemo_container_${{ github.run_id }} - # options: - # # --user 0:128 - # --device=/dev/nvidia0 - # --gpus all - # --shm-size=8g - # --env TRANSFORMERS_OFFLINE=0 - # --env HYDRA_FULL_ERROR=1 - # --volume /mnt/datadrive/TestData:/home/TestData - # steps: - # - name: Checkout repository - # uses: actions/checkout@v4 - # - run: | - # python examples/nlp/language_modeling/megatron_gpt_ptq.py \ - # model.restore_from_path=/home/TestData/nlp/megatron_llama/llama_ci.nemo \ - # model.tensor_model_parallel_size=1 \ - # trainer.devices=1 \ - # quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \ - # quantization.algorithm=int4_awq \ - # quantization.num_calib_size=8 \ - # inference.batch_size=2 \ - # export.save_path=/home/TestData/nlp/megatron_llama/ci_int4_awq.qnemo - # - # rm -rf /home/TestData/nlp/megatron_llama/ci_int4_awq.qnemo - #- uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main" - # if: "failure()" + AFTER_SCRIPT: | + rm -rf /tmp/nlp_megatron_llama_eo/ci_fp8.qnemo # OPTIONAL_L2_QAT_Llama2_INT4: # needs: [cicd-test-container-setup] @@ -4477,10 +4423,8 @@ jobs: - L2_NeMo_2_GPT_LoRA_TP2PP1_MBS2 - L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_PACKED - L2_NeMo_2_Mixtral_Pretraining - - L2_PTQ_Llama2_INT8_SQ - L2_PTQ_Llama2_FP8 - L2_Community_LLM_Checkpoints_tests_Llama3 - - L2_PTQ_Llama2_Export_Only - L2_Distill_Llama2 - L2_Prune_Width_Llama2 - L2_Speech_to_Text_AED diff --git a/Dockerfile.ci b/Dockerfile.ci index c881a4c830ebe..10ea68f2c2474 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -52,7 +52,7 @@ RUN pip install nemo_run@git+https://github.com/NVIDIA/NeMo-Run.git@${NEMO_RUN_T # Install NeMo requirements ARG TE_TAG=7d576ed25266a17a7b651f2c12e8498f67e0baea -ARG MODELOPT_VERSION=0.17.0 +ARG MODELOPT_VERSION=0.19.0 ARG MCORE_TAG=213c8a23fa9fe95d19eff0932a1e6e71767f0962 ARG APEX_TAG=810ffae374a2b9cb4b5c5e28eaeca7d7998fca0c diff --git a/docs/source/starthere/intro.rst b/docs/source/starthere/intro.rst index c3c3304082dc6..0cf7146ff1eff 100644 --- a/docs/source/starthere/intro.rst +++ b/docs/source/starthere/intro.rst @@ -102,7 +102,7 @@ This final step involves installing the TensorRT Model Optimizer package. .. code-block:: bash - pip install nvidia-modelopt[torch]~=0.17.0 --extra-index-url https://pypi.nvidia.com + pip install nvidia-modelopt[torch]~=0.19.0 --extra-index-url https://pypi.nvidia.com .. code-block:: bash From 6c217892fbaecd65a6faf5a2c52cc54b96a0c7f0 Mon Sep 17 00:00:00 2001 From: meatybobby Date: Wed, 30 Oct 2024 11:26:03 -0700 Subject: [PATCH 044/125] Fix TRTLLM nemo2 activation parsing (#11062) * Fix TRTLLM nemo2 activation parsing * Apply isort and black reformatting Signed-off-by: meatybobby --------- Signed-off-by: meatybobby Co-authored-by: meatybobby Co-authored-by: Onur Yilmaz <35306097+oyilmaz-nvidia@users.noreply.github.com> --- nemo/export/trt_llm/converter/model_converter.py | 2 ++ nemo/export/trt_llm/nemo_ckpt_loader/nemo_file.py | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/nemo/export/trt_llm/converter/model_converter.py b/nemo/export/trt_llm/converter/model_converter.py index e5e9f8154d249..9729781e6eba3 100755 --- a/nemo/export/trt_llm/converter/model_converter.py +++ b/nemo/export/trt_llm/converter/model_converter.py @@ -254,6 +254,8 @@ def model_to_trtllm_ckpt( layer_num = int(new_key.split(".")[2]) if layer_num in layers_range: new_key = new_key.replace(f"layers.{layer_num}", f"layers.{layer_num-layers_range[0]}") + else: + continue if config.get("new_decoder_architecture", False) and "post_layernorm" in new_key: new_key = new_key.replace("post_layernorm", "mlp_layernorm") weights_dict_local[new_key] = v diff --git a/nemo/export/trt_llm/nemo_ckpt_loader/nemo_file.py b/nemo/export/trt_llm/nemo_ckpt_loader/nemo_file.py index 171932d84cfb7..5a9cded38babd 100644 --- a/nemo/export/trt_llm/nemo_ckpt_loader/nemo_file.py +++ b/nemo/export/trt_llm/nemo_ckpt_loader/nemo_file.py @@ -394,7 +394,10 @@ def load_nemo_model(nemo_ckpt: Union[str, Path], nemo_export_dir: Union[str, Pat if isinstance(v, (float, int, str, bool)): nemo_model_config[k] = v elif k == "activation_func": - nemo_model_config["activation"] = v.__name__ + if isinstance(v, torch.jit.ScriptFunction): + nemo_model_config["activation"] = v.name.replace("_", "-") + else: + nemo_model_config["activation"] = v.__name__ if nemo_model_config.get("num_moe_experts") is None: nemo_model_config["num_moe_experts"] = 0 From d441dcaa53cf325d11838bcfcaf7582781a07637 Mon Sep 17 00:00:00 2001 From: Marc Romeyn Date: Wed, 30 Oct 2024 21:26:47 +0100 Subject: [PATCH 045/125] Fix datasets in CLI (#11097) * Fix datasets in CLI Signed-off-by: Marc Romeyn * Fix wrong import Signed-off-by: Marc Romeyn * Apply isort and black reformatting Signed-off-by: marcromeyn * Fix wrong import Signed-off-by: Marc Romeyn --------- Signed-off-by: Marc Romeyn Signed-off-by: marcromeyn Co-authored-by: marcromeyn --- nemo/collections/llm/__init__.py | 3 ++- nemo/collections/llm/gpt/data/api.py | 20 +++++++++++++++----- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/nemo/collections/llm/__init__.py b/nemo/collections/llm/__init__.py index 3fe20173cba26..a224b2638b78c 100644 --- a/nemo/collections/llm/__init__.py +++ b/nemo/collections/llm/__init__.py @@ -26,7 +26,7 @@ PreTrainingDataModule, SquadDataModule, ) -from nemo.collections.llm.gpt.data.api import dolly, mock, squad +from nemo.collections.llm.gpt.data.api import dolly, hf_dataset, mock, squad from nemo.collections.llm.gpt.model import ( Baichuan2Config, Baichuan2Config7B, @@ -197,6 +197,7 @@ "squad", "dolly", "peft", + "hf_dataset", "HfAutoModelForCausalLM", ] diff --git a/nemo/collections/llm/gpt/data/api.py b/nemo/collections/llm/gpt/data/api.py index a7fde4cfc8d88..74ecb5272ac24 100644 --- a/nemo/collections/llm/gpt/data/api.py +++ b/nemo/collections/llm/gpt/data/api.py @@ -12,27 +12,37 @@ # See the License for the specific language governing permissions and # limitations under the License. +import nemo_run as run import pytorch_lightning as pl from nemo.collections.llm.gpt.data.dolly import DollyDataModule +from nemo.collections.llm.gpt.data.hf_dataset import HfDatasetDataModule from nemo.collections.llm.gpt.data.mock import MockDataModule from nemo.collections.llm.gpt.data.squad import SquadDataModule -from nemo.collections.llm.utils import factory -@factory +@run.cli.factory +@run.autoconvert def mock() -> pl.LightningDataModule: return MockDataModule(seq_length=4096, global_batch_size=16, micro_batch_size=2) -@factory +@run.cli.factory +@run.autoconvert def squad() -> pl.LightningDataModule: return SquadDataModule(seq_length=4096, global_batch_size=16, micro_batch_size=2) -@factory +@run.cli.factory +@run.autoconvert def dolly() -> pl.LightningDataModule: return DollyDataModule(seq_length=4096, global_batch_size=16, micro_batch_size=2) -__all__ = ["mock", "squad", "dolly"] +@run.cli.factory +@run.autoconvert +def hf_dataset(dataset: str) -> pl.LightningDataModule: + return HfDatasetDataModule(dataset=dataset, global_batch_size=16, micro_batch_size=2) + + +__all__ = ["mock", "squad", "dolly", "hf_dataset"] From ab7b325cb42f4ea82d290c695cff22c2f4765838 Mon Sep 17 00:00:00 2001 From: Hemil Desai Date: Wed, 30 Oct 2024 19:46:36 -0700 Subject: [PATCH 046/125] Fix yaml serialization in io mixin (#11106) * Fix yaml serialization in io mixin Signed-off-by: Hemil Desai * Add docstring Signed-off-by: Hemil Desai --------- Signed-off-by: Hemil Desai --- nemo/lightning/io/mixin.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/nemo/lightning/io/mixin.py b/nemo/lightning/io/mixin.py index e2b9d7f409aef..3613444b63300 100644 --- a/nemo/lightning/io/mixin.py +++ b/nemo/lightning/io/mixin.py @@ -81,15 +81,33 @@ def _partial_representer_with_defaults(dumper, data): def _safe_object_representer(dumper, data): - if not inspect.isclass(data): - cls = data.__class__ - call = True - else: - cls = data + """ + Represent a given object as YAML using the specified dumper. + + This function is a fallback for objects that don't have specific representers. + If the object has __qualname__ attr, the __target__ is set to f"{inspect.getmodule(obj).__name__}.{obj.__qualname__}". + If the object does not have a __qualname__ attr, the __target__ is set from its __class__ attr. + The __call__ key is used to indicate whether the target should be called to create an instance. + + Args: + dumper (yaml.Dumper): The YAML dumper to use for serialization. + data (Any): The data to serialize. This can be any Python object, + but if it's a class or a class instance, special handling will be applied. + + Returns: + str: The YAML representation of the data. + """ + try: + obj = data + target = f"{inspect.getmodule(obj).__name__}.{obj.__qualname__}" call = False + except AttributeError: + obj = data.__class__ + target = f"{inspect.getmodule(obj).__name__}.{obj.__qualname__}" + call = True value = { - "_target_": f"{inspect.getmodule(cls).__name__}.{cls.__qualname__}", # type: ignore + "_target_": target, # type: ignore "_call_": call, } return dumper.represent_data(value) From b6f1051c7f93b6c6b9d76baa80a00a26c1090a5c Mon Sep 17 00:00:00 2001 From: JimmyZhang12 <67203904+JimmyZhang12@users.noreply.github.com> Date: Wed, 30 Oct 2024 23:14:15 -0400 Subject: [PATCH 047/125] disable overlap_param_gather_with_optimizer_step (#11102) * disable overlap_param_gather_with_optimizer_step Signed-off-by: Jimmy Zhang * fix comment Signed-off-by: Jieming Zhang * Apply isort and black reformatting Signed-off-by: JimmyZhang12 * fix typo again Signed-off-by: Jieming Zhang * Apply isort and black reformatting Signed-off-by: JimmyZhang12 --------- Signed-off-by: Jimmy Zhang Signed-off-by: Jieming Zhang Signed-off-by: JimmyZhang12 Co-authored-by: Jimmy Zhang Co-authored-by: JimmyZhang12 --- nemo/collections/llm/recipes/gpt3_175b.py | 2 +- nemo/collections/llm/recipes/llama31_405b.py | 2 +- nemo/collections/llm/recipes/llama3_70b.py | 2 +- nemo/collections/llm/recipes/mixtral_8x22b.py | 4 +++- nemo/collections/llm/recipes/mixtral_8x7b.py | 2 +- nemo/collections/llm/recipes/nemotron4_22b.py | 2 +- nemo/collections/llm/recipes/nemotron4_340b.py | 2 +- nemo/lightning/pytorch/callbacks/megatron_comm_overlap.py | 3 ++- 8 files changed, 11 insertions(+), 8 deletions(-) diff --git a/nemo/collections/llm/recipes/gpt3_175b.py b/nemo/collections/llm/recipes/gpt3_175b.py index 1abe8a218e82f..5932ce5346b9c 100644 --- a/nemo/collections/llm/recipes/gpt3_175b.py +++ b/nemo/collections/llm/recipes/gpt3_175b.py @@ -229,7 +229,7 @@ def pretrain_performance_optimizations(recipe: run.Partial) -> run.Partial: tp_comm_overlap_cfg=userbuffers_bf16_h100_h12288_tp4_mbs1_seqlen2048, defer_embedding_wgrad_compute=True, wgrad_deferral_limit=50, - overlap_param_gather_with_optimizer_step=True, + overlap_param_gather_with_optimizer_step=False, # Currently disabled due to an issue with checkpointing align_param_gather=True, ) ) diff --git a/nemo/collections/llm/recipes/llama31_405b.py b/nemo/collections/llm/recipes/llama31_405b.py index 055e9a06fcbaa..ce0d0cdc63ca0 100644 --- a/nemo/collections/llm/recipes/llama31_405b.py +++ b/nemo/collections/llm/recipes/llama31_405b.py @@ -231,7 +231,7 @@ def pretrain_performance_optimizations(recipe: run.Partial) -> run.Partial: tp_comm_overlap_cfg=userbuffers_bf16_h100_h16384_tp8_cp2_mbs1_seqlen8192, defer_embedding_wgrad_compute=True, wgrad_deferral_limit=50, - overlap_param_gather_with_optimizer_step=True, + overlap_param_gather_with_optimizer_step=False, # Currently disabled due to an issue with checkpointing align_param_gather=True, ) ) diff --git a/nemo/collections/llm/recipes/llama3_70b.py b/nemo/collections/llm/recipes/llama3_70b.py index cb862bf50ee4b..e393dea908b7a 100644 --- a/nemo/collections/llm/recipes/llama3_70b.py +++ b/nemo/collections/llm/recipes/llama3_70b.py @@ -232,7 +232,7 @@ def pretrain_performance_optimizations(recipe: run.Partial) -> run.Partial: tp_comm_overlap_cfg=userbuffers_bf16_h100_h8192_tp4_mbs1_seqlen8192, defer_embedding_wgrad_compute=True, wgrad_deferral_limit=22, - overlap_param_gather_with_optimizer_step=True, + overlap_param_gather_with_optimizer_step=False, # Currently disabled due to an issue with checkpointing. align_param_gather=True, ) ) diff --git a/nemo/collections/llm/recipes/mixtral_8x22b.py b/nemo/collections/llm/recipes/mixtral_8x22b.py index e9e6a27c24b7d..c6f688149ae4f 100644 --- a/nemo/collections/llm/recipes/mixtral_8x22b.py +++ b/nemo/collections/llm/recipes/mixtral_8x22b.py @@ -226,7 +226,9 @@ def pretrain_performance_optimizations(recipe: run.Partial) -> run.Partial: MegatronTokenDropCallback, ), run.Config( - MegatronCommOverlapCallback, overlap_param_gather_with_optimizer_step=True, align_param_gather=True + MegatronCommOverlapCallback, + overlap_param_gather_with_optimizer_step=False, # Currently disabled due to an issue with checkpointing + align_param_gather=True, ), ] ) diff --git a/nemo/collections/llm/recipes/mixtral_8x7b.py b/nemo/collections/llm/recipes/mixtral_8x7b.py index 9b41e03882a1a..dc6e16639820a 100644 --- a/nemo/collections/llm/recipes/mixtral_8x7b.py +++ b/nemo/collections/llm/recipes/mixtral_8x7b.py @@ -222,7 +222,7 @@ def pretrain_performance_optimizations(recipe: run.Partial) -> run.Partial: run.Config(MegatronTokenDropCallback), run.Config( MegatronCommOverlapCallback, - overlap_param_gather_with_optimizer_step=True, + overlap_param_gather_with_optimizer_step=False, # Currently disabled due to an issue with checkpointing. align_param_gather=True, ), ] diff --git a/nemo/collections/llm/recipes/nemotron4_22b.py b/nemo/collections/llm/recipes/nemotron4_22b.py index a20afedfea56c..595b1fc0bac2d 100644 --- a/nemo/collections/llm/recipes/nemotron4_22b.py +++ b/nemo/collections/llm/recipes/nemotron4_22b.py @@ -209,7 +209,7 @@ def pretrain_performance_optimizations(recipe: run.Partial) -> run.Partial: tp_comm_overlap=True, defer_embedding_wgrad_compute=True, wgrad_deferral_limit=22, - overlap_param_gather_with_optimizer_step=True, + overlap_param_gather_with_optimizer_step=False, # Currently disabled due to an issue with checkpointing align_param_gather=True, ) ) diff --git a/nemo/collections/llm/recipes/nemotron4_340b.py b/nemo/collections/llm/recipes/nemotron4_340b.py index 8268b2a87791b..0ddd10c2bc210 100644 --- a/nemo/collections/llm/recipes/nemotron4_340b.py +++ b/nemo/collections/llm/recipes/nemotron4_340b.py @@ -212,7 +212,7 @@ def pretrain_performance_optimizations(recipe: run.Partial) -> run.Partial: tp_comm_overlap=True, defer_embedding_wgrad_compute=True, wgrad_deferral_limit=22, - overlap_param_gather_with_optimizer_step=True, + overlap_param_gather_with_optimizer_step=False, # Currently disabled due to an issue with checkpointing align_param_gather=True, ) ) diff --git a/nemo/lightning/pytorch/callbacks/megatron_comm_overlap.py b/nemo/lightning/pytorch/callbacks/megatron_comm_overlap.py index 2f53babfe5598..fc4312e2ff842 100644 --- a/nemo/lightning/pytorch/callbacks/megatron_comm_overlap.py +++ b/nemo/lightning/pytorch/callbacks/megatron_comm_overlap.py @@ -181,7 +181,8 @@ def _get_optimizer_overlap_cfgs(self, parallelism_cfg: ParallelismConfig) -> _Co comm_overlap_cfg.overlap_grad_reduce = True comm_overlap_cfg.overlap_param_gather = True if parallelism_cfg.pipeline_model_parallel_size > 1 and vp_size > 1: - comm_overlap_cfg.overlap_param_gather_with_optimizer_step = True + # Currently disabled due to an issue with checkpointing + # comm_overlap_cfg.overlap_param_gather_with_optimizer_step = True comm_overlap_cfg.align_param_gather = True comm_overlap_cfg = self._override_user_cfgs(comm_overlap_cfg) From 20d195a1437a1b27a8b2e32a7bf679c21d6c4cfe Mon Sep 17 00:00:00 2001 From: Ao Tang Date: Thu, 31 Oct 2024 11:20:31 -0400 Subject: [PATCH 048/125] Add Packed Seq option to GPT based models (#11100) * add pack seq args/docstr Signed-off-by: Ao Tang * Apply isort and black reformatting Signed-off-by: suiyoubi * reword docstr Signed-off-by: Ao Tang * space Signed-off-by: Ao Tang --------- Signed-off-by: Ao Tang Signed-off-by: suiyoubi Co-authored-by: suiyoubi --- nemo/collections/llm/recipes/baichuan2_7b.py | 1 + nemo/collections/llm/recipes/chatglm3_6b.py | 1 + nemo/collections/llm/recipes/gemma2_27b.py | 6 +++++- nemo/collections/llm/recipes/gemma2_2b.py | 6 +++++- nemo/collections/llm/recipes/gemma2_9b.py | 6 +++++- nemo/collections/llm/recipes/gemma_2b.py | 1 + nemo/collections/llm/recipes/gemma_7b.py | 1 + nemo/collections/llm/recipes/mistral_7b.py | 1 + nemo/collections/llm/recipes/mistral_nemo_12b.py | 1 + nemo/collections/llm/recipes/mixtral_8x22b.py | 1 + nemo/collections/llm/recipes/mixtral_8x7b.py | 1 + nemo/collections/llm/recipes/qwen2_1p5b.py | 6 +++++- nemo/collections/llm/recipes/qwen2_500m.py | 6 +++++- nemo/collections/llm/recipes/qwen2_72b.py | 6 +++++- nemo/collections/llm/recipes/qwen2_7b.py | 6 +++++- nemo/collections/llm/recipes/starcoder2_15b.py | 6 +++++- nemo/collections/llm/recipes/starcoder2_3b.py | 6 +++++- nemo/collections/llm/recipes/starcoder2_7b.py | 6 +++++- 18 files changed, 58 insertions(+), 10 deletions(-) diff --git a/nemo/collections/llm/recipes/baichuan2_7b.py b/nemo/collections/llm/recipes/baichuan2_7b.py index f56c16afcf6a1..20de2c73f9dde 100644 --- a/nemo/collections/llm/recipes/baichuan2_7b.py +++ b/nemo/collections/llm/recipes/baichuan2_7b.py @@ -255,6 +255,7 @@ def finetune_recipe( num_nodes (int): Number of compute nodes to use. num_gpus_per_node (int): Number of GPUs per node. peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048. Returns: run.Partial: Partial configuration for fine-tuning. diff --git a/nemo/collections/llm/recipes/chatglm3_6b.py b/nemo/collections/llm/recipes/chatglm3_6b.py index 5ced78916c292..ef815a0851fc5 100644 --- a/nemo/collections/llm/recipes/chatglm3_6b.py +++ b/nemo/collections/llm/recipes/chatglm3_6b.py @@ -255,6 +255,7 @@ def finetune_recipe( num_nodes (int): Number of compute nodes to use. num_gpus_per_node (int): Number of GPUs per node. peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048. Returns: run.Partial: Partial configuration for fine-tuning. diff --git a/nemo/collections/llm/recipes/gemma2_27b.py b/nemo/collections/llm/recipes/gemma2_27b.py index 4b7c09e30bfcf..67a142426ae4e 100644 --- a/nemo/collections/llm/recipes/gemma2_27b.py +++ b/nemo/collections/llm/recipes/gemma2_27b.py @@ -177,6 +177,7 @@ def finetune_recipe( num_nodes: int = 1, num_gpus_per_node: int = 8, peft_scheme: Optional[str] = 'lora', + packed_sequence: bool = False, ) -> run.Partial: """ Create a fine-tuning recipe for Gemma2 27B model. @@ -191,6 +192,7 @@ def finetune_recipe( num_nodes (int): Number of compute nodes to use. num_gpus_per_node (int): Number of GPUs per node. peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048. Returns: run.Partial: Partial configuration for fine-tuning. @@ -208,7 +210,9 @@ def finetune_recipe( on fine-tuning LLMs with NeMo, see the fine-tuning guide in the `examples/llm/finetune/` directory. """ - recipe = default_finetune_recipe(model(), "google/gemma-2-27b", dir, name, num_nodes, num_gpus_per_node) + recipe = default_finetune_recipe( + model(), "google/gemma-2-27b", dir, name, num_nodes, num_gpus_per_node, packed_sequence + ) if peft_scheme is None or peft_scheme.lower() == 'none': recipe.optim.config.lr = 5e-6 recipe.trainer.strategy.tensor_model_parallel_size = 8 diff --git a/nemo/collections/llm/recipes/gemma2_2b.py b/nemo/collections/llm/recipes/gemma2_2b.py index 952d081841688..c926bb27474eb 100644 --- a/nemo/collections/llm/recipes/gemma2_2b.py +++ b/nemo/collections/llm/recipes/gemma2_2b.py @@ -177,6 +177,7 @@ def finetune_recipe( num_nodes: int = 1, num_gpus_per_node: int = 8, peft_scheme: Optional[str] = 'lora', + packed_sequence: bool = False, ) -> run.Partial: """ Create a fine-tuning recipe for Gemma2 2B model. @@ -191,6 +192,7 @@ def finetune_recipe( num_nodes (int): Number of compute nodes to use. num_gpus_per_node (int): Number of GPUs per node. peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048. Returns: run.Partial: Partial configuration for fine-tuning. @@ -208,7 +210,9 @@ def finetune_recipe( on fine-tuning LLMs with NeMo, see the fine-tuning guide in the `examples/llm/finetune/` directory. """ - recipe = default_finetune_recipe(model(), "google/gemma-2-2b", dir, name, num_nodes, num_gpus_per_node) + recipe = default_finetune_recipe( + model(), "google/gemma-2-2b", dir, name, num_nodes, num_gpus_per_node, packed_sequence + ) if peft_scheme is None or peft_scheme.lower() == 'none': recipe.optim.config.lr = 5e-6 elif peft_scheme.lower() == 'lora': diff --git a/nemo/collections/llm/recipes/gemma2_9b.py b/nemo/collections/llm/recipes/gemma2_9b.py index 8f004c5a2a8d7..9159b4beb1f9e 100644 --- a/nemo/collections/llm/recipes/gemma2_9b.py +++ b/nemo/collections/llm/recipes/gemma2_9b.py @@ -177,6 +177,7 @@ def finetune_recipe( num_nodes: int = 1, num_gpus_per_node: int = 8, peft_scheme: Optional[str] = 'lora', + packed_sequence: bool = False, ) -> run.Partial: """ Create a fine-tuning recipe for Gemma2 9B model. @@ -191,6 +192,7 @@ def finetune_recipe( num_nodes (int): Number of compute nodes to use. num_gpus_per_node (int): Number of GPUs per node. peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048. Returns: run.Partial: Partial configuration for fine-tuning. @@ -208,7 +210,9 @@ def finetune_recipe( on fine-tuning LLMs with NeMo, see the fine-tuning guide in the `examples/llm/finetune/` directory. """ - recipe = default_finetune_recipe(model(), "google/gemma-2-9b", dir, name, num_nodes, num_gpus_per_node) + recipe = default_finetune_recipe( + model(), "google/gemma-2-9b", dir, name, num_nodes, num_gpus_per_node, packed_sequence + ) if peft_scheme is None or peft_scheme.lower() == 'none': recipe.optim.config.lr = 5e-6 recipe.trainer.strategy.tensor_model_parallel_size = 4 diff --git a/nemo/collections/llm/recipes/gemma_2b.py b/nemo/collections/llm/recipes/gemma_2b.py index cead1f2e5689d..1c4e268b0dbc9 100644 --- a/nemo/collections/llm/recipes/gemma_2b.py +++ b/nemo/collections/llm/recipes/gemma_2b.py @@ -254,6 +254,7 @@ def finetune_recipe( num_nodes (int): Number of compute nodes to use. num_gpus_per_node (int): Number of GPUs per node. peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + packed_sequence (Optional[bool]): If true, fine-tuning sequences will be packed into batches up to the given maximum seq_length for better efficiency. Returns: run.Partial: Partial configuration for fine-tuning. diff --git a/nemo/collections/llm/recipes/gemma_7b.py b/nemo/collections/llm/recipes/gemma_7b.py index ba6458af20d2c..23013649c56c9 100644 --- a/nemo/collections/llm/recipes/gemma_7b.py +++ b/nemo/collections/llm/recipes/gemma_7b.py @@ -254,6 +254,7 @@ def finetune_recipe( num_nodes (int): Number of compute nodes to use. num_gpus_per_node (int): Number of GPUs per node. peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048. Returns: run.Partial: Partial configuration for fine-tuning. diff --git a/nemo/collections/llm/recipes/mistral_7b.py b/nemo/collections/llm/recipes/mistral_7b.py index dfcc671cc61fe..7685bcd3ace64 100644 --- a/nemo/collections/llm/recipes/mistral_7b.py +++ b/nemo/collections/llm/recipes/mistral_7b.py @@ -208,6 +208,7 @@ def finetune_recipe( num_nodes (int): Number of compute nodes to use. num_gpus_per_node (int): Number of GPUs per node. peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048. Returns: run.Partial: Partial configuration for fine-tuning. diff --git a/nemo/collections/llm/recipes/mistral_nemo_12b.py b/nemo/collections/llm/recipes/mistral_nemo_12b.py index 3f1878edb59d5..e6616826d9a8d 100644 --- a/nemo/collections/llm/recipes/mistral_nemo_12b.py +++ b/nemo/collections/llm/recipes/mistral_nemo_12b.py @@ -256,6 +256,7 @@ def finetune_recipe( num_nodes (int): Number of compute nodes to use. num_gpus_per_node (int): Number of GPUs per node. peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048. Returns: run.Partial: Partial configuration for fine-tuning. diff --git a/nemo/collections/llm/recipes/mixtral_8x22b.py b/nemo/collections/llm/recipes/mixtral_8x22b.py index c6f688149ae4f..f768bf0499b1e 100644 --- a/nemo/collections/llm/recipes/mixtral_8x22b.py +++ b/nemo/collections/llm/recipes/mixtral_8x22b.py @@ -260,6 +260,7 @@ def finetune_recipe( num_nodes (int): Number of compute nodes to use. num_gpus_per_node (int): Number of GPUs per node. peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + packed_sequence (Optional[bool]): If true, fine-tuning sequences will be packed into batches up to the given maximum seq_length for better efficiency. Returns: run.Partial: Partial configuration for fine-tuning. diff --git a/nemo/collections/llm/recipes/mixtral_8x7b.py b/nemo/collections/llm/recipes/mixtral_8x7b.py index dc6e16639820a..d4286a15843f2 100644 --- a/nemo/collections/llm/recipes/mixtral_8x7b.py +++ b/nemo/collections/llm/recipes/mixtral_8x7b.py @@ -255,6 +255,7 @@ def finetune_recipe( num_nodes (int): Number of compute nodes to use. num_gpus_per_node (int): Number of GPUs per node. peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048. Returns: run.Partial: Partial configuration for fine-tuning. diff --git a/nemo/collections/llm/recipes/qwen2_1p5b.py b/nemo/collections/llm/recipes/qwen2_1p5b.py index 80ed957e3b48d..662f8e98899d0 100644 --- a/nemo/collections/llm/recipes/qwen2_1p5b.py +++ b/nemo/collections/llm/recipes/qwen2_1p5b.py @@ -180,6 +180,7 @@ def finetune_recipe( num_nodes: int = 1, num_gpus_per_node: int = 8, peft_scheme: Optional[str] = 'lora', + packed_sequence: bool = False, ) -> run.Partial: """ Create a fine-tuning recipe for Qwen2 1.5b model. @@ -194,6 +195,7 @@ def finetune_recipe( num_nodes (int): Number of compute nodes to use. num_gpus_per_node (int): Number of GPUs per node. peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048. Returns: run.Partial: Partial configuration for fine-tuning. @@ -211,7 +213,9 @@ def finetune_recipe( on fine-tuning LLMs with NeMo, see the fine-tuning guide in the `examples/llm/finetune/` directory. """ - recipe = default_finetune_recipe(model(), "Qwen/Qwen2-1.5B", dir, name, num_nodes, num_gpus_per_node) + recipe = default_finetune_recipe( + model(), "Qwen/Qwen2-1.5B", dir, name, num_nodes, num_gpus_per_node, packed_sequence + ) if peft_scheme is None or peft_scheme.lower() == 'none': recipe.optim.config.lr = 5e-6 elif peft_scheme.lower() == 'lora': diff --git a/nemo/collections/llm/recipes/qwen2_500m.py b/nemo/collections/llm/recipes/qwen2_500m.py index 677fc066c0478..ac6cbfe84464f 100644 --- a/nemo/collections/llm/recipes/qwen2_500m.py +++ b/nemo/collections/llm/recipes/qwen2_500m.py @@ -180,6 +180,7 @@ def finetune_recipe( num_nodes: int = 1, num_gpus_per_node: int = 8, peft_scheme: Optional[str] = 'lora', + packed_sequence: bool = False, ) -> run.Partial: """ Create a fine-tuning recipe for Qwen2 500m model. @@ -194,6 +195,7 @@ def finetune_recipe( num_nodes (int): Number of compute nodes to use. num_gpus_per_node (int): Number of GPUs per node. peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048. Returns: run.Partial: Partial configuration for fine-tuning. @@ -211,7 +213,9 @@ def finetune_recipe( on fine-tuning LLMs with NeMo, see the fine-tuning guide in the `examples/llm/finetune/` directory. """ - recipe = default_finetune_recipe(model(), "Qwen/Qwen2-0.5B", dir, name, num_nodes, num_gpus_per_node) + recipe = default_finetune_recipe( + model(), "Qwen/Qwen2-0.5B", dir, name, num_nodes, num_gpus_per_node, packed_sequence + ) if peft_scheme is None or peft_scheme.lower() == 'none': recipe.optim.config.lr = 5e-6 elif peft_scheme.lower() == 'lora': diff --git a/nemo/collections/llm/recipes/qwen2_72b.py b/nemo/collections/llm/recipes/qwen2_72b.py index d93be1b9257a9..0b94761e5749c 100644 --- a/nemo/collections/llm/recipes/qwen2_72b.py +++ b/nemo/collections/llm/recipes/qwen2_72b.py @@ -180,6 +180,7 @@ def finetune_recipe( num_nodes: int = 1, num_gpus_per_node: int = 8, peft_scheme: Optional[str] = 'lora', + packed_sequence: bool = False, ) -> run.Partial: """ Create a fine-tuning recipe for Qwen2 72b model. @@ -194,6 +195,7 @@ def finetune_recipe( num_nodes (int): Number of compute nodes to use. num_gpus_per_node (int): Number of GPUs per node. peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048. Returns: run.Partial: Partial configuration for fine-tuning. @@ -211,7 +213,9 @@ def finetune_recipe( on fine-tuning LLMs with NeMo, see the fine-tuning guide in the `examples/llm/finetune/` directory. """ - recipe = default_finetune_recipe(model(), "Qwen/Qwen2-72B", dir, name, num_nodes, num_gpus_per_node) + recipe = default_finetune_recipe( + model(), "Qwen/Qwen2-72B", dir, name, num_nodes, num_gpus_per_node, packed_sequence + ) if peft_scheme is None or peft_scheme.lower() == 'none': assert num_nodes >= 4 recipe.trainer.strategy.tensor_model_parallel_size = 8 diff --git a/nemo/collections/llm/recipes/qwen2_7b.py b/nemo/collections/llm/recipes/qwen2_7b.py index 57ccd48e9fe18..10c990f151427 100644 --- a/nemo/collections/llm/recipes/qwen2_7b.py +++ b/nemo/collections/llm/recipes/qwen2_7b.py @@ -180,6 +180,7 @@ def finetune_recipe( num_nodes: int = 1, num_gpus_per_node: int = 8, peft_scheme: Optional[str] = 'lora', + packed_sequence: bool = False, ) -> run.Partial: """ Create a fine-tuning recipe for Qwen2 7b model. @@ -194,6 +195,7 @@ def finetune_recipe( num_nodes (int): Number of compute nodes to use. num_gpus_per_node (int): Number of GPUs per node. peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048. Returns: run.Partial: Partial configuration for fine-tuning. @@ -211,7 +213,9 @@ def finetune_recipe( on fine-tuning LLMs with NeMo, see the fine-tuning guide in the `examples/llm/finetune/` directory. """ - recipe = default_finetune_recipe(model(), "Qwen/Qwen2-7B", dir, name, num_nodes, num_gpus_per_node) + recipe = default_finetune_recipe( + model(), "Qwen/Qwen2-7B", dir, name, num_nodes, num_gpus_per_node, packed_sequence + ) if peft_scheme is None or peft_scheme.lower() == 'none': recipe.trainer.strategy.tensor_model_parallel_size = 2 recipe.optim.config.lr = 5e-6 diff --git a/nemo/collections/llm/recipes/starcoder2_15b.py b/nemo/collections/llm/recipes/starcoder2_15b.py index 5faebb9460f3b..9ca06ce4691a0 100644 --- a/nemo/collections/llm/recipes/starcoder2_15b.py +++ b/nemo/collections/llm/recipes/starcoder2_15b.py @@ -180,6 +180,7 @@ def finetune_recipe( num_nodes: int = 1, num_gpus_per_node: int = 8, peft_scheme: Optional[str] = 'lora', + packed_sequence: bool = False, ) -> run.Partial: """ Create a fine-tuning recipe for Starcoder2 15B model. @@ -194,6 +195,7 @@ def finetune_recipe( num_nodes (int): Number of compute nodes to use. num_gpus_per_node (int): Number of GPUs per node. peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048. Returns: run.Partial: Partial configuration for fine-tuning. @@ -211,7 +213,9 @@ def finetune_recipe( on fine-tuning LLMs with NeMo, see the fine-tuning guide in the `examples/llm/finetune/` directory. """ - recipe = default_finetune_recipe(model(), "bigcode/starcoder2-15b", dir, name, num_nodes, num_gpus_per_node) + recipe = default_finetune_recipe( + model(), "bigcode/starcoder2-15b", dir, name, num_nodes, num_gpus_per_node, packed_sequence + ) if peft_scheme is None or peft_scheme.lower() == 'none': recipe.trainer.strategy.tensor_model_parallel_size = 4 recipe.optim.config.lr = 5e-6 diff --git a/nemo/collections/llm/recipes/starcoder2_3b.py b/nemo/collections/llm/recipes/starcoder2_3b.py index 232f5842ff842..55884b353d8fe 100644 --- a/nemo/collections/llm/recipes/starcoder2_3b.py +++ b/nemo/collections/llm/recipes/starcoder2_3b.py @@ -180,6 +180,7 @@ def finetune_recipe( num_nodes: int = 1, num_gpus_per_node: int = 8, peft_scheme: Optional[str] = 'lora', + packed_sequence: bool = False, ) -> run.Partial: """ Create a fine-tuning recipe for Starcoder2 3B model. @@ -194,6 +195,7 @@ def finetune_recipe( num_nodes (int): Number of compute nodes to use. num_gpus_per_node (int): Number of GPUs per node. peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048. Returns: run.Partial: Partial configuration for fine-tuning. @@ -211,7 +213,9 @@ def finetune_recipe( on fine-tuning LLMs with NeMo, see the fine-tuning guide in the `examples/llm/finetune/` directory. """ - recipe = default_finetune_recipe(model(), "bigcode/starcoder2-3b", dir, name, num_nodes, num_gpus_per_node) + recipe = default_finetune_recipe( + model(), "bigcode/starcoder2-3b", dir, name, num_nodes, num_gpus_per_node, packed_sequence + ) if peft_scheme is None or peft_scheme.lower() == 'none': recipe.trainer.strategy.tensor_model_parallel_size = 2 recipe.optim.config.lr = 5e-6 diff --git a/nemo/collections/llm/recipes/starcoder2_7b.py b/nemo/collections/llm/recipes/starcoder2_7b.py index ee6dacdc98e9a..46e34b8b0c77d 100644 --- a/nemo/collections/llm/recipes/starcoder2_7b.py +++ b/nemo/collections/llm/recipes/starcoder2_7b.py @@ -180,6 +180,7 @@ def finetune_recipe( num_nodes: int = 1, num_gpus_per_node: int = 8, peft_scheme: Optional[str] = 'lora', + packed_sequence: bool = False, ) -> run.Partial: """ Create a fine-tuning recipe for Starcoder2 7B model. @@ -194,6 +195,7 @@ def finetune_recipe( num_nodes (int): Number of compute nodes to use. num_gpus_per_node (int): Number of GPUs per node. peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048. Returns: run.Partial: Partial configuration for fine-tuning. @@ -211,7 +213,9 @@ def finetune_recipe( on fine-tuning LLMs with NeMo, see the fine-tuning guide in the `examples/llm/finetune/` directory. """ - recipe = default_finetune_recipe(model(), "bigcode/starcoder2-7b", dir, name, num_nodes, num_gpus_per_node) + recipe = default_finetune_recipe( + model(), "bigcode/starcoder2-7b", dir, name, num_nodes, num_gpus_per_node, packed_sequence + ) if peft_scheme is None or peft_scheme.lower() == 'none': recipe.trainer.strategy.tensor_model_parallel_size = 2 recipe.optim.config.lr = 5e-6 From ed9722d19320415aa8acc6ffaa6d159e56dd4f67 Mon Sep 17 00:00:00 2001 From: Hemil Desai Date: Thu, 31 Oct 2024 08:43:53 -0700 Subject: [PATCH 049/125] Fix MCoreGPTModel import in llm.gpt.model.base (#11109) Signed-off-by: Hemil Desai --- nemo/collections/llm/gpt/model/base.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/nemo/collections/llm/gpt/model/base.py b/nemo/collections/llm/gpt/model/base.py index 1e34ffcea0c14..8bb4752436109 100644 --- a/nemo/collections/llm/gpt/model/base.py +++ b/nemo/collections/llm/gpt/model/base.py @@ -20,6 +20,7 @@ import torch.distributed from megatron.core.inference.model_inference_wrappers.gpt.gpt_inference_wrapper import GPTInferenceWrapper from megatron.core.inference.model_inference_wrappers.inference_wrapper_config import InferenceWrapperConfig +from megatron.core.models.gpt.gpt_model import GPTModel as MCoreGPTModel from megatron.core.optimizer import OptimizerConfig from megatron.core.transformer.spec_utils import ModuleSpec from megatron.core.transformer.transformer_config import TransformerConfig @@ -44,8 +45,6 @@ _grad_accum_fusion_available = False if TYPE_CHECKING: - from megatron.core.models.gpt.gpt_model import GPTModel as MCoreGPTModel - from nemo.collections.common.tokenizers.tokenizer_spec import TokenizerSpec @@ -189,7 +188,6 @@ def configure_model(self, tokenizer) -> "MCoreGPTModel": ) % vp_size == 0, "Make sure the number of model chunks is the same across all pipeline stages." from megatron.core import parallel_state - from megatron.core.models.gpt.gpt_model import GPTModel as MCoreGPTModel transformer_layer_spec = self.transformer_layer_spec if not isinstance(transformer_layer_spec, ModuleSpec): From ac2f7ead6add03261e88104c40e7fb190da59f23 Mon Sep 17 00:00:00 2001 From: Alexandros Koumparoulis <153118171+akoumpa@users.noreply.github.com> Date: Thu, 31 Oct 2024 11:41:44 -0700 Subject: [PATCH 050/125] TP+MoE peft fix (#11114) * add is_expert option to ParallelLinearAdapter Signed-off-by: Alexandros Koumparoulis * pass is_expert to ParalleLinearAdapter Signed-off-by: Alexandros Koumparoulis * Apply isort and black reformatting Signed-off-by: akoumpa --------- Signed-off-by: Alexandros Koumparoulis Signed-off-by: akoumpa Co-authored-by: akoumpa --- nemo/collections/llm/peft/lora.py | 6 +++- .../megatron/adapters/parallel_adapters.py | 32 ++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/nemo/collections/llm/peft/lora.py b/nemo/collections/llm/peft/lora.py index ecebf696a42c0..77063b9d7e982 100644 --- a/nemo/collections/llm/peft/lora.py +++ b/nemo/collections/llm/peft/lora.py @@ -66,7 +66,6 @@ def forward(self, x): elif len(linear_output) == 3: linear_output, bias, layernorm_output = linear_output x = layernorm_output - adapter_output = self.adapter(x.contiguous()) return linear_output + adapter_output, bias @@ -114,6 +113,10 @@ def forward(self, x): return res + lora_res +def is_expert_linear(fqn): + return re.match('.*mlp\.experts\.local_experts.[0-9]+\.linear_fc[1-2]$', fqn) is not None + + @dataclass class LoRA(PEFT): """ @@ -237,6 +240,7 @@ def wildcard_match(pattern, key): dropout_position=self.dropout_position, model_parallel_config=getattr(m, "config", None), alpha=self.alpha, + is_expert=is_expert_linear(full_name), ) return AdapterParallelAdd(m, adapter) return m diff --git a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py index 042dbb95979e6..22f669fb55440 100644 --- a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py +++ b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py @@ -136,6 +136,26 @@ class MLPInfusedAdapterConfig(InfusedAdapterConfig): _target_: str = "{0}.{1}".format(MLPInfusedAdapter.__module__, MLPInfusedAdapter.__name__) +def pad_seq_to_mult(x, mult): + import torch.nn.functional as F + + if x.shape[0] % mult == 0: + return x, 0 + pad_len = mult - (x.shape[0] % mult) + with torch.no_grad(): + # pad at the tail + x = torch.nn.functional.pad(x, (0, 0, 0, pad_len)) + return x, pad_len + + +def unpad_seq_to_mult(x, pad_len): + if pad_len <= 0: + return x + with torch.no_grad(): + # prune tail padding + return x[:-pad_len, :] + + class ParallelLinearAdapter(nn.Module, AdapterModuleUtil): def __init__( self, @@ -154,6 +174,7 @@ def __init__( alpha: float | None = None, dropout_position: str = 'post', a2a_experimental: bool = False, # TODO: should rename this or make it a default feature + is_expert: bool = False, **kwargs, ): super().__init__() @@ -167,6 +188,7 @@ def __init__( self.input_is_parallel = input_is_parallel self.dropout_position = dropout_position self.use_a2a = a2a_experimental + self.is_expert = is_expert # megatron_gpt_peft_models will provide this arg, but deprecated ones do not. # in case this arg is not provided, use the dummy default config. @@ -292,6 +314,10 @@ def forward(self, x): if self.dropout is not None and self.dropout_position == 'pre': x = self.dropout(x) + pad_len = 0 + if self.is_expert: + x, pad_len = pad_seq_to_mult(x, self.config.tensor_model_parallel_size) + if self.norm_position == 'pre': x = self.layer_norm(x) if self._sequence_parallel and not self.input_is_parallel: @@ -311,7 +337,7 @@ def forward(self, x): x.activation_offloading = True x, _ = self.linear_out(x) - if self._sequence_parallel and self.input_is_parallel: + if self._sequence_parallel and self.input_is_parallel and not self.is_expert: # for attention_dense and linear_fc2 # layernorm after lora is impacted by sequence parallel, # hence seq dim need to be scattered right after lora linear layers @@ -331,6 +357,10 @@ def forward(self, x): x = x * (self.alpha / self.dim) + if pad_len > 0: + # Remove MoE padding. + x = unpad_seq_to_mult(x, pad_len) + return x def sharded_state_dict( From b86998fbdf40623458b6085b8b377759cb4f7037 Mon Sep 17 00:00:00 2001 From: Huiying Date: Thu, 31 Oct 2024 12:15:09 -0700 Subject: [PATCH 051/125] nemo1 to nemo2 checkpoint convert (#10937) * initial draft Signed-off-by: HuiyingLi * add .nemo loading and handle tokenizers Signed-off-by: HuiyingLi * format Signed-off-by: HuiyingLi * remove tmp dir afterwards and minor fixes Signed-off-by: HuiyingLi * add model.yaml save to context to io_dump Signed-off-by: HuiyingLi * change to cpu convert Signed-off-by: HuiyingLi * change to default bf16, add rc2 patch and logging Signed-off-by: HuiyingLi * remove torch.git.script for squared_relu for 24.09 Signed-off-by: Huiying Li * add copyright Signed-off-by: Huiying Li * add more model and minor changes Signed-off-by: Huiying Li * minor fix Signed-off-by: Huiying Li * format Signed-off-by: Huiying Li * format Signed-off-by: Huiying Li --------- Signed-off-by: HuiyingLi Signed-off-by: Huiying Li --- nemo/collections/llm/fn/activation.py | 2 +- .../convert_nemo1_to_nemo2.py | 217 ++++++++++++++++++ 2 files changed, 218 insertions(+), 1 deletion(-) create mode 100644 scripts/checkpoint_converters/convert_nemo1_to_nemo2.py diff --git a/nemo/collections/llm/fn/activation.py b/nemo/collections/llm/fn/activation.py index 50e076a79d360..5970846d32b2c 100644 --- a/nemo/collections/llm/fn/activation.py +++ b/nemo/collections/llm/fn/activation.py @@ -25,7 +25,7 @@ def openai_gelu(x): return gelu_impl(x) -@torch.jit.script +# @torch.jit.script # remove until we have serialization def squared_relu(x): """Squared ReLU activation function.""" return torch.pow(torch.nn.functional.relu(x), 2) diff --git a/scripts/checkpoint_converters/convert_nemo1_to_nemo2.py b/scripts/checkpoint_converters/convert_nemo1_to_nemo2.py new file mode 100644 index 0000000000000..1d69c1aec5ebd --- /dev/null +++ b/scripts/checkpoint_converters/convert_nemo1_to_nemo2.py @@ -0,0 +1,217 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r""" +Script to convert NeMo 1.0 checkpoints to NeMo 2.0 format. +Available model listed in MODEL_CONFIG_MAPPING +Example usage: + +a. Convert a .nemo checkpoint + python /opt/NeMo/scripts/checkpoint_converters/convert_nemo1_to_nemo2.py \ + --input_path=Meta-Llama-3-8B.nemo \ + --output_path=your_output_dir \ + --model_id=meta-llama/Meta-Llama-3-8B + +b. Convert a model weight directory. The checkpoint should be similar to `model_weights` subdir after extracting the .nemo file. + Please also provide tokenizer_library and tokenizer_path when loading from weight directory. + python /opt/NeMo/scripts/checkpoint_converters/convert_nemo1_to_nemo2.py \ + --input_path=nemotron3-8b-extracted/model_weights \ + --tokenizer_path=path_to_your_tokenizer_model.model \ + --tokenizer_library=sentencepiece \ + --output_path=your_output_dir \ + --model_id=nvidia/nemotron-3-8b-base-4k + +""" + +import os +import shutil +import tempfile +from argparse import ArgumentParser +from pathlib import Path + +import torch +from megatron.core.dist_checkpointing.dict_utils import dict_list_map_inplace +from megatron.core.dist_checkpointing.mapping import LocalNonpersistentObject, ShardedObject +from omegaconf import OmegaConf +from transformers import AutoTokenizer as HFAutoTokenizer + +from nemo.collections import llm +from nemo.collections.common.tokenizers.huggingface.auto_tokenizer import AutoTokenizer +from nemo.collections.llm.recipes.precision.mixed_precision import bf16_mixed +from nemo.collections.nlp.modules.common.tokenizer_utils import get_nmt_tokenizer +from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector +from nemo.lightning import MegatronStrategy, Trainer, _strategy_lib +from nemo.lightning.ckpt_utils import ckpt_to_context_subdir, ckpt_to_weights_subdir +from nemo.lightning.io.pl import TrainerContext +from nemo.utils import logging + +MODEL_CONFIG_MAPPING = { + "meta-llama/Llama-2-7b-hf": (llm.LlamaModel, llm.Llama2Config7B), + "meta-llama/Llama-2-13b-hf": (llm.LlamaModel, llm.Llama2Config13B), + "meta-llama/Llama-2-70b-hf": (llm.LlamaModel, llm.Llama2Config70B), + "meta-llama/Meta-Llama-3-8B": (llm.LlamaModel, llm.Llama3Config8B), + "meta-llama/Meta-Llama-3-70B": (llm.LlamaModel, llm.Llama3Config70B), + "mistralai/Mixtral-8x7B-v0.1": (llm.MixtralModel, llm.MixtralConfig8x7B), + "mistralai/Mixtral-8x22B-v0.1": (llm.MixtralModel, llm.MixtralConfig8x22B), + "mistralai/Mistral-7B-v0.1": (llm.MistralModel, llm.MistralConfig7B), + "nvidia/nemotron-3-8b-base-4k": (llm.NemotronModel, llm.Nemotron3Config8B), + "nemotron4-22b": (llm.NemotronModel, llm.Nemotron4Config22B), + "nemotron4-15b": (llm.NemotronModel, llm.Nemotron4Config15B), + "nemotron4-340b": (llm.NemotronModel, llm.Nemotron4Config340B), +} + + +def get_args(): + parser = ArgumentParser( + description="Script to convert NeMo 1.0 checkpoints to NeMo 2.0 format. This script may download from Hugging Face, make sure you have access to gate repo and have logged into Hugging Face (e.g. huggingface-cli login)" + ) + parser.add_argument( + "--input_path", + type=str, + default=None, + required=True, + help="Path to NeMo 1.0 checkpoints. Could be .nemo file, or `model_weights` directory after untar the .nemo. Please also provide tokenizer_library and tokenizer_path if you pass in `model_weights` directory.", + ) + parser.add_argument( + "--output_path", type=str, default=None, required=True, help="Path to output NeMo 2.0 directory." + ) + parser.add_argument( + "--model_id", type=str, default=None, required=True, help="Hugging Face or nemotron model id for the model" + ) + parser.add_argument( + "--tokenizer_path", + type=str, + default=None, + required=False, + help="Path to tokenizer. If not provided, will 1. try instantiate from nemo1 config 2. pull AutoTokenizer from Hugging Face according to model_id if 1 fails", + ) + parser.add_argument( + "--tokenizer_library", + type=str, + default=None, + required=False, + help="Tokenizer library, e.g. `sentencepiece`, `megatron`. Defaults to `sentencepiece`", + ) + args = parser.parse_args() + return args + + +def get_nemo2_model(model_id, tokenizer) -> llm.GPTModel: + + if model_id not in MODEL_CONFIG_MAPPING: + valid_ids = "\n- ".join([""] + list(MODEL_CONFIG_MAPPING.keys())) + raise ValueError(f"Unsupported model_id: {model_id}. Please provide a valid model_id from {valid_ids}") + model_cls, config_cls = MODEL_CONFIG_MAPPING[model_id] + # nemo1 ckpts are bf16 + return model_cls(config_cls(bf16=True, params_dtype=torch.bfloat16), tokenizer=tokenizer) + + +def get_tokenizer(input_path: Path, tokenizer_tmp_dir: Path) -> AutoTokenizer: + if not input_path.is_dir(): # if .nemo tar + with tempfile.TemporaryDirectory() as tmp_dir: # we want to clean up this tmp dir + NLPSaveRestoreConnector._unpack_nemo_file(input_path, tmp_dir) + cfg = OmegaConf.load(f"{tmp_dir}/model_config.yaml") + tokenizer_lib = cfg.tokenizer.library + tokenizer_model = cfg.tokenizer.get("model") and cfg.tokenizer.get("model").split("nemo:", 1)[-1] + if tokenizer_model: + shutil.copy(f"{tmp_dir}/{tokenizer_model}", f"{tokenizer_tmp_dir}/{tokenizer_model}") + elif cfg.tokenizer.library == "huggingface": + HFAutoTokenizer.from_pretrained(cfg.tokenizer.type).save_pretrained(tokenizer_tmp_dir) + tokenizer_model = f"{tokenizer_tmp_dir}/{tokenizer_model}" if tokenizer_model else None + else: + if args.tokenizer_path: # not .nemo file, only weight dir need to specify tokenizer lib and path + tokenizer_lib = args.tokenizer_library or "sentencepiece" + if args.tokenizer_library is None: + logging.warning( + "You specified tokenizer_path but did not provide tokenizer_library, will default to sentencepiece" + ) + tokenizer_model = args.tokenizer_path + else: # no .nemo config, no tokenizer path specified, grab from HF, reload + tokenizer_lib = "huggingface" + HFAutoTokenizer.from_pretrained(args.model_id).save_pretrained(tokenizer_tmp_dir) + + if tokenizer_lib == "huggingface": + return AutoTokenizer(tokenizer_tmp_dir) + else: # not directly use huggingface tokenizer in get_nmt_tokenizer since it will pull from HF and no reload + return get_nmt_tokenizer(library=tokenizer_lib, tokenizer_model=tokenizer_model) + + +def main() -> None: + tokenizer_tmp_dir = Path("/tmp/nemo_tokenizer") + tokenizer_tmp_dir.mkdir(parents=True, exist_ok=True) + tokenizer = get_tokenizer(Path(args.input_path), tokenizer_tmp_dir) + model = get_nemo2_model(args.model_id, tokenizer=tokenizer) + model.optim = None + + trainer = Trainer( + devices=1, + accelerator="cpu", + strategy=MegatronStrategy(ddp="pytorch", setup_optimizers=False, plugins=bf16_mixed()), + ) + + trainer.strategy.connect(model) + trainer.strategy.setup_environment() + if not model.state_dict(): + with _strategy_lib.megatron_cpu_init_context(model.config): + model.configure_model() + + trainer.strategy.setup(trainer) + + logging.info(f"loading checkpoint {args.input_path}") + + sharded_state_dict = {"state_dict": trainer.strategy.megatron_parallel.sharded_state_dict()} + + for key in list(sharded_state_dict['state_dict'].keys()): + new_key = key.replace('module', 'model', 1) + sharded_state_dict['state_dict'][new_key] = sharded_state_dict['state_dict'].pop(key) + sharded_state_dict['state_dict'][new_key].key = sharded_state_dict['state_dict'][new_key].key.replace( + 'module', 'model', 1 + ) + + def skip_fp8_load(x): + if isinstance(x, ShardedObject) and 'core_attention' in x.key and '_extra_state' in x.key: + x = LocalNonpersistentObject(x.data) # use the FP8 state from initialization, not from ckpt + return x + + dict_list_map_inplace(skip_fp8_load, sharded_state_dict) + if not Path(args.input_path).is_dir(): + with tempfile.TemporaryDirectory() as tmp_dir: + NLPSaveRestoreConnector._unpack_nemo_file(args.input_path, tmp_dir) + model_weight_dir = f"{tmp_dir}/model_weights" + model_ckpt = trainer.strategy.checkpoint_io.load_checkpoint(model_weight_dir, sharded_state_dict, None) + else: + model_ckpt = trainer.strategy.checkpoint_io.load_checkpoint(args.input_path, sharded_state_dict, None) + + logging.info(f"Saving checkpoint to {args.output_path}") + model_ckpt['state_dict'] = {k.replace('model', 'module', 1): v for k, v in model_ckpt['state_dict'].items()} + trainer.model.module.load_state_dict(model_ckpt['state_dict']) + trainer.save_checkpoint(ckpt_to_weights_subdir(args.output_path)) + if getattr(trainer.strategy, "async_save", False): + trainer.strategy.checkpoint_io.maybe_finalize_save_checkpoint(blocking=True) + + # Corresponding to Connector: on_import_ckpt + if hasattr(trainer.model, "__io__") and hasattr(trainer.model.tokenizer, '__io__'): + trainer.model.__io__.tokenizer = trainer.model.tokenizer.__io__ + TrainerContext.from_trainer(trainer).io_dump(ckpt_to_context_subdir(args.output_path), yaml_attrs=["model"]) + + # remove tmp dir + if os.path.isdir(tokenizer_tmp_dir): + shutil.rmtree(tokenizer_tmp_dir) + + logging.info(f"NeMo 2.0 checkpoint saved at {args.output_path}") + + +if __name__ == '__main__': + args = get_args() + main() From bf58f3379a45e1880f752587e6c7902e8d0e3735 Mon Sep 17 00:00:00 2001 From: Alexandros Koumparoulis <153118171+akoumpa@users.noreply.github.com> Date: Thu, 31 Oct 2024 12:25:36 -0700 Subject: [PATCH 052/125] fix expert regex filter (#11103) Signed-off-by: Alexandros Koumparoulis --- nemo/export/trt_llm/nemo_ckpt_loader/nemo_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/export/trt_llm/nemo_ckpt_loader/nemo_file.py b/nemo/export/trt_llm/nemo_ckpt_loader/nemo_file.py index 5a9cded38babd..08629483e0061 100644 --- a/nemo/export/trt_llm/nemo_ckpt_loader/nemo_file.py +++ b/nemo/export/trt_llm/nemo_ckpt_loader/nemo_file.py @@ -117,7 +117,7 @@ def load_scaling_factors(state_dict: dict, basename: str, size: int) -> Optional def filter_experts_extra_states(state_dict: dict): - pattern = r'module\.decoder\.layers\.mlp\.experts\.experts\.linear_fc\d+\._extra_state/shard_\d+\.\d+_\d+\.\d+' + pattern = r'model\.decoder\.layers\.mlp\.experts\.experts\.linear_fc\d+\._extra_state/shard_\d+\.\d+_\d+\.\d+' return {k: v for k, v in state_dict.items() if not re.fullmatch(pattern, k)} From bad4bfe69ac4bdf6bfb1fc0b808b570d81a2c55d Mon Sep 17 00:00:00 2001 From: Jinyang Yuan <154768711+jinyangyuan-nvidia@users.noreply.github.com> Date: Fri, 1 Nov 2024 04:18:10 +0800 Subject: [PATCH 053/125] Support exporting Nemotron-340B for TensorRT-LLM (#11015) Signed-off-by: Jinyang Yuan Co-authored-by: Jinyang Yuan Co-authored-by: meatybobby --- scripts/export/export_to_trt_llm.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/scripts/export/export_to_trt_llm.py b/scripts/export/export_to_trt_llm.py index 6b246131b69ec..d9e846547c68d 100644 --- a/scripts/export/export_to_trt_llm.py +++ b/scripts/export/export_to_trt_llm.py @@ -44,7 +44,7 @@ def get_args(argv): parser.add_argument( "-mr", "--model_repository", required=True, default=None, type=str, help="Folder for the trt-llm model files" ) - parser.add_argument("-ng", "--num_gpus", default=1, type=int, help="Number of GPUs for the deployment") + parser.add_argument("-ng", "--num_gpus", default=None, type=int, help="Number of GPUs for the deployment") parser.add_argument("-tps", "--tensor_parallelism_size", default=1, type=int, help="Tensor parallelism size") parser.add_argument("-pps", "--pipeline_parallelism_size", default=1, type=int, help="Pipeline parallelism size") parser.add_argument( @@ -64,7 +64,14 @@ def get_args(argv): "-mpet", "--max_prompt_embedding_table_size", default=None, type=int, help="Max prompt embedding table size" ) parser.add_argument( - "-npkc", "--no_paged_kv_cache", default=False, action='store_true', help="Enable paged kv cache." + "-upe", + "--use_parallel_embedding", + default=False, + action='store_true', + help="Use parallel embedding.", + ) + parser.add_argument( + "-npkc", "--no_paged_kv_cache", default=False, action='store_true', help="Disable paged kv cache." ) parser.add_argument( "-drip", @@ -183,6 +190,7 @@ def nemo_export_trt_llm(argv): max_num_tokens=args.max_num_tokens, opt_num_tokens=args.opt_num_tokens, max_prompt_embedding_table_size=args.max_prompt_embedding_table_size, + use_parallel_embedding=args.use_parallel_embedding, paged_kv_cache=(not args.no_paged_kv_cache), remove_input_padding=(not args.disable_remove_input_padding), dtype=args.dtype, @@ -191,6 +199,7 @@ def nemo_export_trt_llm(argv): max_lora_rank=args.max_lora_rank, fp8_quantized=args.export_fp8_quantized, fp8_kvcache=args.use_fp8_kv_cache, + load_model=False, ) LOGGER.info("Export is successful.") From 5b7daa002b29dc598f37235b654f7ef148e48f7d Mon Sep 17 00:00:00 2001 From: JimmyZhang12 <67203904+JimmyZhang12@users.noreply.github.com> Date: Fri, 1 Nov 2024 02:07:11 -0400 Subject: [PATCH 054/125] GPT recipes to use full te spec (#11119) * use full te spec Signed-off-by: Jimmy Zhang * Apply isort and black reformatting Signed-off-by: JimmyZhang12 * more recipe fix Signed-off-by: Jimmy Zhang * Apply isort and black reformatting Signed-off-by: JimmyZhang12 * rm dropout_ffn, default num modes Signed-off-by: Jimmy Zhang --------- Signed-off-by: Jimmy Zhang Signed-off-by: JimmyZhang12 Co-authored-by: Jimmy Zhang Co-authored-by: JimmyZhang12 --- nemo/collections/llm/gpt/model/base.py | 16 +++++++++++++--- nemo/collections/llm/recipes/llama3_70b.py | 9 +++++++-- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/nemo/collections/llm/gpt/model/base.py b/nemo/collections/llm/gpt/model/base.py index 8bb4752436109..6b158a33b226a 100644 --- a/nemo/collections/llm/gpt/model/base.py +++ b/nemo/collections/llm/gpt/model/base.py @@ -255,6 +255,9 @@ class GPTConfig126M(GPTConfig): hidden_size: int = 768 ffn_hidden_size: int = 3072 num_attention_heads: int = 12 + bias_activation_fusion: bool = True + bias_dropout_add_fusion: bool = True + use_transformer_engine_full_layer_spec: bool = True @dataclass @@ -264,9 +267,9 @@ class GPTConfig5B(GPTConfig): hidden_size: int = 4096 ffn_hidden_size: int = 16384 num_attention_heads: int = 32 - bias_activation_fusion: bool = True bias_dropout_add_fusion: bool = True + use_transformer_engine_full_layer_spec: bool = True @dataclass @@ -276,6 +279,9 @@ class GPTConfig7B(GPTConfig): hidden_size: int = 4096 ffn_hidden_size: int = 10880 num_attention_heads: int = 32 + bias_activation_fusion: bool = True + bias_dropout_add_fusion: bool = True + use_transformer_engine_full_layer_spec: bool = True @dataclass @@ -285,9 +291,9 @@ class GPTConfig20B(GPTConfig): hidden_size: int = 6144 ffn_hidden_size: int = 24576 num_attention_heads: int = 48 - bias_activation_fusion: bool = True bias_dropout_add_fusion: bool = True + use_transformer_engine_full_layer_spec: bool = True @dataclass @@ -297,6 +303,9 @@ class GPTConfig40B(GPTConfig): hidden_size: int = 8192 ffn_hidden_size: int = 32768 num_attention_heads: int = 64 + bias_activation_fusion: bool = True + bias_dropout_add_fusion: bool = True + use_transformer_engine_full_layer_spec: bool = True @dataclass @@ -308,9 +317,10 @@ class GPTConfig175B(GPTConfig): num_attention_heads: int = 96 hidden_dropout: float = 0.0 attention_dropout: float = 0.0 - ffn_dropout: float = 0.0 bias_activation_fusion: bool = True bias_dropout_add_fusion: bool = True + use_transformer_engine_full_layer_spec: bool = True + layernorm_zero_centered_gamma: bool = True class GPTModel(L.LightningModule, io.IOMixin, io.ConnectorMixin, fn.FNMixin): diff --git a/nemo/collections/llm/recipes/llama3_70b.py b/nemo/collections/llm/recipes/llama3_70b.py index e393dea908b7a..e2156993647df 100644 --- a/nemo/collections/llm/recipes/llama3_70b.py +++ b/nemo/collections/llm/recipes/llama3_70b.py @@ -244,7 +244,7 @@ def pretrain_performance_optimizations(recipe: run.Partial) -> run.Partial: def finetune_recipe( dir: Optional[str] = None, name: str = "default", - num_nodes: int = 1, + num_nodes: int = None, num_gpus_per_node: int = 8, peft_scheme: Optional[str] = 'lora', seq_length: Optional[int] = None, @@ -293,11 +293,16 @@ def finetune_recipe( if seq_length is None: seq_length = 4096 if packed_sequence else 2048 + if num_nodes is None: + if peft_scheme is None or peft_scheme.lower() == 'none': + num_nodes = 4 + elif peft_scheme.lower() == 'lora': + num_nodes = 1 + recipe = default_finetune_recipe( model(), "meta-llama/Meta-Llama-3-70B", dir, name, num_nodes, num_gpus_per_node, packed_sequence ) if peft_scheme is None or peft_scheme.lower() == 'none': - assert num_nodes >= 4 recipe.trainer.strategy.tensor_model_parallel_size = 8 recipe.trainer.strategy.pipeline_model_parallel_size = 4 recipe.optim.config.lr = 5e-6 From 643c074eeafd9e7884eff0fdc233549499b5bd16 Mon Sep 17 00:00:00 2001 From: Alexandros Koumparoulis <153118171+akoumpa@users.noreply.github.com> Date: Fri, 1 Nov 2024 04:49:21 -0700 Subject: [PATCH 055/125] rm rm (#11116) Signed-off-by: Alexandros Koumparoulis --- nemo/lightning/io/pl.py | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/nemo/lightning/io/pl.py b/nemo/lightning/io/pl.py index fb6ef707ab7c3..1a7880e384927 100644 --- a/nemo/lightning/io/pl.py +++ b/nemo/lightning/io/pl.py @@ -142,21 +142,13 @@ def save_checkpoint(self, checkpoint: Dict[str, Any], path: _PATH, storage_optio validate_sharding_integrity = not (self.validated_consistency and self.assume_constant_structure) self.validated_consistency = True - try: - return dist_checkpointing.save( - sharded_state_dict=checkpoint, - checkpoint_dir=checkpoint_dir, - sharded_strategy=self.save_sharded_strategy, - validate_access_integrity=validate_sharding_integrity, - async_sharded_save=self.async_save, - ) - except: - logging.error(f"Failed to save checkpoint to {checkpoint_dir}") - # Do cleanup. - import shutil - - shutil.rmtree(checkpoint_dir) - raise + return dist_checkpointing.save( + sharded_state_dict=checkpoint, + checkpoint_dir=checkpoint_dir, + sharded_strategy=self.save_sharded_strategy, + validate_access_integrity=validate_sharding_integrity, + async_sharded_save=self.async_save, + ) @override def load_checkpoint( From ba2b96dd94e85fcf6dea2b6d7eac131a5cfb629c Mon Sep 17 00:00:00 2001 From: Alexandros Koumparoulis <153118171+akoumpa@users.noreply.github.com> Date: Fri, 1 Nov 2024 05:36:21 -0700 Subject: [PATCH 056/125] NeMo-UX: Mistral/mixtral peft ci test (#11094) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add mistral/mixtral peft ci test Signed-off-by: Alexandros Koumparoulis * add mistral/mixtral peft ci test Signed-off-by: Alexandros Koumparoulis * add mistral tp2 Signed-off-by: Alexandros Koumparoulis * Apply isort and black reformatting Signed-off-by: akoumpa * add tests to NEMO_CICD_Test Signed-off-by: Alexandros Koumparoulis * Update .github/workflows/cicd-main.yml Co-authored-by: oliver könig Signed-off-by: Alexandros Koumparoulis <153118171+akoumpa@users.noreply.github.com> * fix params Signed-off-by: Alexandros Koumparoulis * rm devices arg Signed-off-by: Alexandros Koumparoulis * add --dist-opt arg Signed-off-by: Alexandros Koumparoulis * add tp=2 mixtral Signed-off-by: Alexandros Koumparoulis * add ep test Signed-off-by: Alexandros Koumparoulis * fix Signed-off-by: Alexandros Koumparoulis --------- Signed-off-by: Alexandros Koumparoulis Signed-off-by: akoumpa Signed-off-by: Alexandros Koumparoulis <153118171+akoumpa@users.noreply.github.com> Co-authored-by: akoumpa Co-authored-by: oliver könig --- .github/workflows/cicd-main.yml | 80 ++++++++++++++ tests/collections/llm/lora_mistralai.py | 139 ++++++++++++++++++++++++ 2 files changed, 219 insertions(+) create mode 100644 tests/collections/llm/lora_mistralai.py diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml index bb239acb00fc5..2bdbe673d19be 100644 --- a/.github/workflows/cicd-main.yml +++ b/.github/workflows/cicd-main.yml @@ -4266,6 +4266,81 @@ jobs: --pp_size 1 \ --mbs 1 --packed + L2_NeMo_2_Mixtral_LoRA_EP2PP1_MBS2: + needs: [cicd-test-container-setup] + uses: ./.github/workflows/_test_template.yml + if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_Mixtral_LoRA_EP2PP1_MBS2') || needs.cicd-test-container-setup.outputs.all == 'true' + with: + RUNNER: self-hosted-azure + SCRIPT: | + + python tests/collections/llm/lora_mistralai.py \ + --max-steps 3 \ + --ep 1 \ + --mbs 2 \ + --model mixtral + + L2_NeMo_2_Mixtral_LoRA_TP1PP1_MBS1: + needs: [cicd-test-container-setup] + uses: ./.github/workflows/_test_template.yml + if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_Mixtral_LoRA_TP1PP1_MBS1') || needs.cicd-test-container-setup.outputs.all == 'true' + with: + RUNNER: self-hosted-azure + SCRIPT: | + + python tests/collections/llm/lora_mistralai.py \ + --max-steps 3 \ + --tp 1 \ + --mbs 1 \ + --model mixtral \ + --dist-opt + + L2_NeMo_2_Mixtral_LoRA_TP2PP1_MBS1: + needs: [cicd-test-container-setup] + uses: ./.github/workflows/_test_template.yml + if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_Mixtral_LoRA_TP2PP1_MBS1') || needs.cicd-test-container-setup.outputs.all == 'true' + with: + RUNNER: self-hosted-azure + SCRIPT: | + + python tests/collections/llm/lora_mistralai.py \ + --max-steps 3 \ + --tp 2 \ + --mbs 1 \ + --model mixtral \ + --dist-opt + + L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1: + needs: [cicd-test-container-setup] + uses: ./.github/workflows/_test_template.yml + if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1') || needs.cicd-test-container-setup.outputs.all == 'true' + with: + RUNNER: self-hosted-azure + SCRIPT: | + + python tests/collections/llm/lora_mistralai.py \ + --max-steps 3 \ + --tp 1 \ + --mbs 1 \ + --model mistral \ + --dist-opt + + L2_NeMo_2_Mistral_LoRA_TP2PP1_MBS1: + needs: [cicd-test-container-setup] + uses: ./.github/workflows/_test_template.yml + if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_Mistral_LoRA_TP2PP1_MBS1') || needs.cicd-test-container-setup.outputs.all == 'true' + with: + RUNNER: self-hosted-azure + SCRIPT: | + + python tests/collections/llm/lora_mistralai.py \ + --max-steps 3 \ + --tp 2 \ + --mbs 1 \ + --model mistral \ + --dist-opt + + L2_NeMo_2_NeMo_Mcore_Mixtral_bitexact: needs: [cicd-test-container-setup] uses: ./.github/workflows/_test_template.yml @@ -4422,6 +4497,11 @@ jobs: - L2_NeMo_2_GPT_LoRA_TP1PP2_MBS2 - L2_NeMo_2_GPT_LoRA_TP2PP1_MBS2 - L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_PACKED + - L2_NeMo_2_Mixtral_LoRA_EP2PP1_MBS2 + - L2_NeMo_2_Mixtral_LoRA_TP1PP1_MBS1 + - L2_NeMo_2_Mixtral_LoRA_TP2PP1_MBS1 + - L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1 + - L2_NeMo_2_Mistral_LoRA_TP2PP1_MBS1 - L2_NeMo_2_Mixtral_Pretraining - L2_PTQ_Llama2_FP8 - L2_Community_LLM_Checkpoints_tests_Llama3 diff --git a/tests/collections/llm/lora_mistralai.py b/tests/collections/llm/lora_mistralai.py new file mode 100644 index 0000000000000..09a52668e3eea --- /dev/null +++ b/tests/collections/llm/lora_mistralai.py @@ -0,0 +1,139 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse + +import pytorch_lightning as pl +import torch +from megatron.core.optimizer import OptimizerConfig + +from nemo import lightning as nl +from nemo.collections import llm +from nemo.lightning.io.mixin import track_io + + +def get_args(): + parser = argparse.ArgumentParser(description='Finetune a small GPT model using NeMo 2.0') + parser.add_argument('--model', type=str.lower, choices=['mistral', 'mixtral'], help="model") + parser.add_argument('--max-steps', type=int, default=9, help="number of devices") + parser.add_argument('--mbs', type=int, default=2, help="micro batch size") + parser.add_argument('--gbs', type=int, default=4, help="global batch size") + parser.add_argument('--tp', type=int, default=1, help="tensor parallel size") + parser.add_argument('--ep', type=int, default=1, help="expert parallel size") + parser.add_argument('--dist-opt', action='store_true', help='use dist opt') + return parser.parse_args() + + +def trainer(devices, tp, ep, sp, max_steps) -> nl.Trainer: + strategy = nl.MegatronStrategy( + tensor_model_parallel_size=tp, + expert_model_parallel_size=ep, + sequence_parallel=sp, + ) + + return nl.Trainer( + devices=max(ep, tp), + max_steps=max_steps, + accelerator="gpu", + strategy=strategy, + plugins=nl.MegatronMixedPrecision(precision="bf16-mixed"), + log_every_n_steps=1, + limit_val_batches=0, + val_check_interval=0, + num_sanity_val_steps=0, + ) + + +@track_io +class OrdTokenizer: + def __init__(self, vocab_size=30_000, num_reserved_tokens=128, special_token_names=['bos_id', 'eos_id', 'pad_id']): + self.vocab_size = vocab_size + self.num_reserved_tokens = num_reserved_tokens + self.special_token_names = special_token_names + assert len(self.special_token_names) < num_reserved_tokens + + def __getattr__(self, name): + if name in self.__dict__.get('special_token_names', {}): + return self.__dict__['special_token_names'].index(name) + elif name in self.__dict__: + return self.__dict__[name] + else: + raise AttributeError + + def text_to_ids(self, text): + token_ids = list(map(lambda x: self.num_reserved_tokens + ord(x), list(text))) + assert max(token_ids) < self.vocab_size + return token_ids + + +def logger() -> nl.NeMoLogger: + ckpt = nl.ModelCheckpoint( + save_last=True, + every_n_train_steps=10, + monitor="reduced_train_loss", + save_top_k=1, + save_on_train_epoch_end=True, + save_optim_on_train_end=True, + ) + + return nl.NeMoLogger( + name="nemo2_peft", + log_dir="/tmp/peft_logs", + use_datetime_version=False, # must be false if using auto resume + ckpt=ckpt, + wandb=None, + ) + + +def squad(mbs, gbs) -> pl.LightningDataModule: + return llm.SquadDataModule(seq_length=2048, micro_batch_size=mbs, global_batch_size=gbs, num_workers=0) + + +def mixtral_8x7b() -> pl.LightningModule: + tokenizer = OrdTokenizer() + model = llm.MixtralModel(llm.MixtralConfig8x7B(num_layers=2), tokenizer=tokenizer) + lora = llm.peft.LoRA() + return model, lora + + +def mistral_7b() -> pl.LightningModule: + tokenizer = OrdTokenizer() + model = llm.MistralModel(llm.MistralConfig7B(num_layers=2), tokenizer=tokenizer) + lora = llm.peft.LoRA() + return model, lora + + +if __name__ == '__main__': + args = get_args() + if args.model == 'mistral': + model, lora = mistral_7b() + else: + model, lora = mixtral_8x7b() + llm.finetune( + model=model, + data=squad(args.mbs, args.gbs), + trainer=trainer(args.tp, args.tp, args.ep, args.tp > 1, args.max_steps), + peft=lora, + log=logger(), + optim=nl.MegatronOptimizerModule( + config=OptimizerConfig( + optimizer="adam", + lr=0.0001, + adam_beta2=0.98, + use_distributed_optimizer=args.dist_opt, + clip_grad=1.0, + bf16=True, + ), + ), + ) From e78c1d9c235a0e92b595e2f86bf6aaccff66927d Mon Sep 17 00:00:00 2001 From: Hemil Desai Date: Fri, 1 Nov 2024 09:31:37 -0700 Subject: [PATCH 057/125] Make nemo.collections.llm PreTrainingDataModule num samples configurable (#11088) * Make nemo.collections.llm PreTrainingDataModule num samples configurable Signed-off-by: Hemil Desai * Apply isort and black reformatting Signed-off-by: hemildesai * Fix Signed-off-by: Hemil Desai * Apply isort and black reformatting Signed-off-by: hemildesai * Add explicit method to build pretraining datamodule index mapping Signed-off-by: Hemil Desai * Apply isort and black reformatting Signed-off-by: hemildesai * Fix Signed-off-by: Hemil Desai * Apply isort and black reformatting Signed-off-by: hemildesai * fix Signed-off-by: Hemil Desai * Apply isort and black reformatting Signed-off-by: hemildesai * PR feedback Signed-off-by: Hemil Desai --------- Signed-off-by: Hemil Desai Signed-off-by: hemildesai Co-authored-by: hemildesai --- nemo/collections/llm/gpt/data/__init__.py | 3 +- nemo/collections/llm/gpt/data/pre_training.py | 125 ++++++++++++++---- 2 files changed, 103 insertions(+), 25 deletions(-) diff --git a/nemo/collections/llm/gpt/data/__init__.py b/nemo/collections/llm/gpt/data/__init__.py index f4e97d91e5cd5..92f73069fcc2b 100644 --- a/nemo/collections/llm/gpt/data/__init__.py +++ b/nemo/collections/llm/gpt/data/__init__.py @@ -16,7 +16,7 @@ from nemo.collections.llm.gpt.data.fine_tuning import FineTuningDataModule from nemo.collections.llm.gpt.data.hf_dataset import HfDatasetDataModule from nemo.collections.llm.gpt.data.mock import MockDataModule -from nemo.collections.llm.gpt.data.pre_training import PreTrainingDataModule +from nemo.collections.llm.gpt.data.pre_training import PreTrainingDataModule, build_pretraining_datamodule from nemo.collections.llm.gpt.data.squad import SquadDataModule __all__ = [ @@ -25,5 +25,6 @@ "DollyDataModule", "MockDataModule", "PreTrainingDataModule", + "build_pretraining_datamodule", "HfDatasetDataModule", ] diff --git a/nemo/collections/llm/gpt/data/pre_training.py b/nemo/collections/llm/gpt/data/pre_training.py index 534922efe3a30..cfacde118b89a 100644 --- a/nemo/collections/llm/gpt/data/pre_training.py +++ b/nemo/collections/llm/gpt/data/pre_training.py @@ -16,7 +16,7 @@ import os import warnings from pathlib import Path -from typing import TYPE_CHECKING, Any, Dict, List, Optional +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union import pytorch_lightning as pl from pytorch_lightning.utilities.types import EVAL_DATALOADERS, TRAIN_DATALOADERS @@ -77,7 +77,7 @@ def validate_dataset_asset_accessibility(paths): raise ValueError("Expected path to be of string or Path type.") path = Path(paths) - suffices = ('.bin', '.idx') + suffices = (".bin", ".idx") if path.is_dir(): if not os.access(path, os.R_OK): raise PermissionError(f"Expected {str(path)} to be readable.") @@ -133,6 +133,9 @@ class PreTrainingDataModule(pl.LightningDataModule, IOMixin): to allocate to train, validation, and test sets, respectively. Unused if ``paths`` is a dict. index_mapping_dir (Optional[str]): Path to a directory to write index mapping files. num_dataset_builder_threads (int): The number of threads to use for dataset building. + num_train_samples (Optional[int]): The number of samples to use for training, defaults to total train steps times global batch size. + num_val_samples (Optional[int]): The number of samples to use for validation, defaults to total validation steps times global batch size. + num_test_samples (Optional[int]): The number of samples to use for testing, defaults to total test steps times global batch size. """ def __init__( @@ -154,6 +157,9 @@ def __init__( split: str = "900,50,50", index_mapping_dir: Optional[str] = None, num_dataset_builder_threads: int = 1, + num_train_samples: Optional[int] = None, + num_val_samples: Optional[int] = None, + num_test_samples: Optional[int] = None, ) -> None: super().__init__() if not isinstance(paths, (list, tuple, dict)): @@ -196,6 +202,9 @@ def __init__( self.index_mapping_dir = index_mapping_dir self.num_dataset_builder_threads = num_dataset_builder_threads self.init_global_step = 0 + self.num_train_samples = num_train_samples + self.num_val_samples = num_val_samples + self.num_test_samples = num_test_samples from nemo.collections.nlp.modules.common.tokenizer_utils import get_nmt_tokenizer @@ -207,27 +216,46 @@ def __init__( rampup_batch_size=rampup_batch_size, ) - def setup(self, stage: str = "") -> None: + def build( + self, + trainer_max_steps: int, + trainer_val_check_interval: int, + trainer_limit_val_batches: Union[int, float], + trainer_limit_test_batches: Union[int, float], + ): from megatron.core.datasets.blended_megatron_dataset_builder import BlendedMegatronDatasetBuilder from megatron.core.datasets.gpt_dataset import GPTDataset - assert ( - hasattr(self, "trainer") and self.trainer is not None - ), "Setup should be completed when trainer and config are attached." + train_iters = trainer_max_steps + assert train_iters > 0, f"max_steps {train_iters} should be greater than 0" + num_train_samples = int(train_iters * self.data_sampler.global_batch_size) + + if self.num_train_samples is not None: + assert ( + self.num_train_samples >= num_train_samples + ), f"num_train_samples must be greater than or equal to {num_train_samples}." + num_train_samples = self.num_train_samples + train_iters = int(num_train_samples / self.data_sampler.global_batch_size) - # Trainer API - max_train_steps = self.trainer.max_steps - assert max_train_steps > 0, "Please specify trainer.max_steps" - eval_iters = (max_train_steps // self.trainer.val_check_interval + 1) * self.trainer.limit_val_batches - test_iters = self.trainer.limit_test_batches - num_train_samples = int(max_train_steps * self.data_sampler.global_batch_size) + eval_iters = (train_iters // trainer_val_check_interval + 1) * trainer_limit_val_batches num_val_samples = int(eval_iters * self.data_sampler.global_batch_size) + + test_iters = trainer_limit_test_batches num_test_samples = int(test_iters * self.data_sampler.global_batch_size) + if self.num_val_samples is not None: + assert self.num_val_samples > num_val_samples, f"num_val_samples must be greater than {num_val_samples}." + num_val_samples = self.num_val_samples + if self.num_test_samples is not None: + assert ( + self.num_test_samples > num_test_samples + ), f"num_test_samples must be greater than {num_test_samples}." + num_test_samples = self.num_test_samples + if ( - self.trainer.limit_val_batches > 0.0 - and self.trainer.limit_val_batches <= 1.0 - and isinstance(self.trainer.limit_val_batches, float) + trainer_limit_val_batches > 0.0 + and trainer_limit_val_batches <= 1.0 + and isinstance(trainer_limit_val_batches, float) ): assert "blend" not in self.build_kwargs, ( "When using a single data distribution, limit_val_batches <= 1.0 is not supported. If you'd " @@ -251,6 +279,18 @@ def setup(self, stage: str = "") -> None: config=self.gpt_dataset_config, ).build() + def setup(self, stage: str = "") -> None: + assert ( + hasattr(self, "trainer") and self.trainer is not None + ), "Setup should be completed when trainer and config are attached." + + self.build( + trainer_max_steps=self.trainer.max_steps, + trainer_val_check_interval=self.trainer.val_check_interval, + trainer_limit_val_batches=self.trainer.limit_val_batches, + trainer_limit_test_batches=self.trainer.limit_test_batches, + ) + # uncomment once fabric API is merged # def fabric_setup( # self, @@ -269,13 +309,13 @@ def setup(self, stage: str = "") -> None: # ).build() def train_dataloader(self) -> TRAIN_DATALOADERS: - return self._create_dataloader(self._train_ds, mode='train') + return self._create_dataloader(self._train_ds, mode="train") def val_dataloader(self) -> EVAL_DATALOADERS: - return self._create_dataloader(self._validation_ds, mode='validation') + return self._create_dataloader(self._validation_ds, mode="validation") def test_dataloader(self) -> EVAL_DATALOADERS: - return self._create_dataloader(self._test_ds, mode='test') + return self._create_dataloader(self._test_ds, mode="test") def _create_dataloader(self, dataset, mode, **kwargs) -> WrappedDataLoader: self.init_global_step = self.trainer.global_step @@ -286,7 +326,7 @@ def _create_dataloader(self, dataset, mode, **kwargs) -> WrappedDataLoader: num_workers=self.num_workers, pin_memory=self.pin_memory, persistent_workers=self.persistent_workers, - collate_fn=getattr(dataset, 'collate_fn', data.dataloader.default_collate), + collate_fn=getattr(dataset, "collate_fn", data.dataloader.default_collate), **kwargs, ) return dataloader @@ -316,7 +356,7 @@ def state_dict(self) -> Dict[str, Any]: """ consumed_samples = self.data_sampler.compute_consumed_samples(self.trainer.global_step - self.init_global_step) - return {'consumed_samples': consumed_samples} + return {"consumed_samples": consumed_samples} def load_state_dict(self, state_dict: Dict[str, Any]) -> None: """Called when loading a checkpoint, implement to reload datamodule state given datamodule stat @@ -332,7 +372,7 @@ def load_state_dict(self, state_dict: Dict[str, Any]) -> None: logging.warning("Megatron num_microbatches_calculator not found, using Apex version.") from apex.transformer.pipeline_parallel.utils import update_num_microbatches - consumed_samples = state_dict['consumed_samples'] + consumed_samples = state_dict["consumed_samples"] self.data_sampler.init_consumed_samples = consumed_samples self.data_sampler.prev_consumed_samples = consumed_samples @@ -344,9 +384,9 @@ def load_state_dict(self, state_dict: Dict[str, Any]) -> None: def reconfigure_limit_batches(self): # Override limit_train_batches in terms of num of microbatches - self._reconfigure_limit_batches(self.trainer.limit_train_batches, self._train_ds, 'train') + self._reconfigure_limit_batches(self.trainer.limit_train_batches, self._train_ds, "train") # Override limit_val_batches to be a multiple of num microbatches to prevent val_step from exiting in between a step - self._reconfigure_limit_batches(self.trainer.limit_val_batches, self._validation_ds, 'val') + self._reconfigure_limit_batches(self.trainer.limit_val_batches, self._validation_ds, "val") def _reconfigure_limit_batches(self, limit_batches, dataloader, mode): """ @@ -388,10 +428,47 @@ def _reconfigure_limit_batches(self, limit_batches, dataloader, mode): else: limit_batches = limit_batches - limit_batches % get_num_microbatches() - if mode == 'train': + if mode == "train": self.trainer.limit_train_batches = limit_batches else: self.trainer.limit_val_batches = limit_batches # Override num sanity steps to be a multiple of num of microbatches self.trainer.num_sanity_val_steps *= get_num_microbatches() + + +def build_pretraining_datamodule( + datamodule: PreTrainingDataModule, + trainer_max_steps: int, + trainer_val_check_interval: int, + trainer_limit_val_batches: Union[int, float], + trainer_limit_test_batches: Union[int, float], +): + """ + Builds the index mapping cache for nemo.collections.llm.gpt.data.PreTrainingDataModule. + + Args: + datamodule (PreTrainingDataModule): The pre-training data module to build. + trainer_max_steps (int): The max_steps set in your trainer. + trainer_val_check_interval (int): The interval at which to perform validation in your trainer. + trainer_limit_val_batches (Union[int, float]): The number of validation batches to use in your trainer. + trainer_limit_test_batches (Union[int, float]): The number of test batches to use in your trainer. + + Returns: + None + """ + import torch.distributed as dist + + assert not dist.is_initialized(), "This function cannot be called inside an existing torch.distributed job." + # The indices in Megatron are built on rank 0, so we set the world size to 1 here. + dist.init_process_group(world_size=1, rank=0) + + from nemo.utils import logging + + logging.info(f"Building {datamodule}") + datamodule.build( + trainer_max_steps=trainer_max_steps, + trainer_val_check_interval=trainer_val_check_interval, + trainer_limit_val_batches=trainer_limit_val_batches, + trainer_limit_test_batches=trainer_limit_test_batches, + ) From 2c42fc3961f5d66a1ba91f936ced7cb653c0afea Mon Sep 17 00:00:00 2001 From: Valerie Sarge Date: Fri, 1 Nov 2024 13:05:03 -0400 Subject: [PATCH 058/125] Virtual pipeline parallel support for LoRA in NLPAdapterModelMixin (#11128) * Update NLPAdapterModelMixin to handle model structure for virtual pipeline parallel + LoRA Signed-off-by: Valerie Sarge * Clean up assert guard Signed-off-by: Valerie Sarge * Clean up ValueError raise Signed-off-by: Valerie Sarge * Apply isort and black reformatting Signed-off-by: vysarge * documentation Signed-off-by: Valerie Sarge --------- Signed-off-by: Valerie Sarge Signed-off-by: vysarge Co-authored-by: vysarge --- .../nlp/parts/mixins/nlp_adapter_mixins.py | 71 ++++++++++++------- 1 file changed, 47 insertions(+), 24 deletions(-) diff --git a/nemo/collections/nlp/parts/mixins/nlp_adapter_mixins.py b/nemo/collections/nlp/parts/mixins/nlp_adapter_mixins.py index e2ccffeebdfa6..8f7870b7d4c71 100644 --- a/nemo/collections/nlp/parts/mixins/nlp_adapter_mixins.py +++ b/nemo/collections/nlp/parts/mixins/nlp_adapter_mixins.py @@ -101,15 +101,18 @@ def _unwrap_model(self): else: return self.model + def _unwrap_model_list(self): + m = getattr(self, "model", []) + return m if isinstance(m, list) else [m] + + def _unwrap_layers_model_list(self): + l = torch.nn.ModuleList([]) + for m in self._unwrap_model_list(): + l.extend(self._get_layers_from_model(m)) + return l + def first_stage_of_pipeline(self): - if hasattr(self._unwrap_model(), "pre_process"): - return self._unwrap_model().pre_process - elif hasattr(self._unwrap_model(), "module") and hasattr(self._unwrap_model().module, "pre_process"): - # (guyueh1): this if condition is used to handle amp O2 - # when amp_O2 is on, self.model will be wrapped by the Float16Module class - return self._unwrap_model().module.pre_process - logging.warning("no attribute named model or no model.pre_process found. Can not detect stage of pipeline...") - return False + return parallel_state.is_pipeline_first_stage() def _get_all_keys( self, @@ -117,11 +120,12 @@ def _get_all_keys( """ Returns all the keys in the model """ - k = [n for n, p in self._unwrap_model().named_parameters(prefix="model")] + k = [n for m in self._unwrap_model_list() for n, p in m.named_parameters(prefix="model")] b = [ n - for n, p in self._unwrap_model().named_buffers(prefix="model") - if n.replace("model.module.", "model.", 1) in self._unwrap_model().state_dict(prefix="model.").keys() + for m in self._unwrap_model_list() + for n, p in m.named_buffers(prefix="model") + if n.replace("model.module.", "model.", 1) in m.state_dict(prefix="model.").keys() ] # we include buffers because ptuning representations are cached in a buffer and saved to state_dict for inference time use. return set(k + b) @@ -195,7 +199,7 @@ def _check_and_add_peft_cfg(self, peft_cfg): f"{self.__class__.__name__} + {adapter_name})" ) - layers = self._get_layers_from_model(self._unwrap_model()) + layers = self._unwrap_layers_model_list() for layer in layers: if layer.layer_number in (layer_selection or list(range(1, self.cfg.num_layers + 1))): for name, module in layer.named_modules(): @@ -312,13 +316,15 @@ def setup_optimizer_param_groups(self): self.freeze(training=True) # Freeze the entire model if not self.ptuning_only_and_non_first_stage: opt_params = [] - for _, module in self._unwrap_model().named_modules(prefix="model"): + for _, module in [elem for m in self._unwrap_model_list() for elem in m.named_modules(prefix="model")]: if isinstance(module, AdapterModuleMixin) and module.is_adapter_available(): module.set_enabled_adapters(enabled=True) module.unfreeze_enabled_adapters() # selectively unfreeze the adapter modules. opt_params += [p for p in module.parameters() if p.requires_grad] - for name, param in self._unwrap_model().named_parameters(prefix="model"): + for name, param in [ + elem for m in self._unwrap_model_list() for elem in m.named_parameters(prefix="model") + ]: if name in self.tunable_base_param_keys: param.requires_grad = True opt_params += [param] @@ -380,7 +386,7 @@ def load_adapters( super().load_state_dict(state_dict, strict=False) def set_tunable_base_params(self, peft_cfg): - for n, p in self.named_parameters(): + for n, p in self._unwrap_model().named_parameters(prefix="model"): for tpn in peft_cfg.tunable_base_param_names: # TODO: simplistic param name matching, should support regex-like syntax @adithyare if f".{tpn}." in n: @@ -390,7 +396,7 @@ def set_tunable_base_params(self, peft_cfg): def tie_weights(self, peft_cfg): pos_idx = 0 - layers = self._get_layers_from_model(self._unwrap_model()) + layers = self._unwrap_layers_model_list() if isinstance(peft_cfg, LoraPEFTConfig): layer0 = layers[0].self_attention @@ -419,12 +425,22 @@ def get_peft_state_dict(self): """ Gets the keys associated with the adapters only. """ - state_dict = self._unwrap_model().state_dict(prefix="model.") - peft_state_dict = {} - for k in self.adapter_keys.union(self.tunable_base_param_keys): - # state_dict keys needs to be in non-O2 format and will be corrected in PEFTSaveRestoreConnector if O2=True - new_k = k.replace("model.module.", "model.", 1) - peft_state_dict[new_k] = state_dict[new_k] + + def filter_state_dict(state_dict): + peft_state_dict = {} + for k in self.adapter_keys.union(self.tunable_base_param_keys): + # state_dict keys needs to be in non-O2 format and will be corrected in PEFTSaveRestoreConnector if O2=True + new_k = k.replace("model.module.", "model.", 1) + peft_state_dict[new_k] = state_dict[new_k] if new_k in state_dict else state_dict[k] + return peft_state_dict + + if hasattr(self, 'model') and isinstance(self.model, list): + peft_state_dict = {} + for i, m in enumerate(self.model): + peft_state_dict[f"model_{i}"] = filter_state_dict(m.state_dict(prefix="model.")) + else: + peft_state_dict = filter_state_dict(self._unwrap_model().state_dict(prefix="model.")) + return peft_state_dict def state_dict(self, destination=None, prefix=None, keep_vars=False): @@ -447,8 +463,15 @@ def sharded_state_dict(self, prefix: str = ''): return super().sharded_state_dict(prefix=prefix) def load_state_dict(self, state_dict, strict: bool = True): - if len(state_dict) == 0: - return # checkpoint is loaded in on_load_checkpoint() + # If state_dict is empty, or if state_dict contains keys for virtual pipeline + # parallel chunks (starting from model_0) but those chunks are empty, skip this function. + # Checkpoint is loaded in on_load_checkpoint() instead. + if len(state_dict) == 0 or ( + parallel_state.get_virtual_pipeline_model_parallel_world_size() is not None + and "model_0" in state_dict + and len(state_dict["model_0"]) == 0 + ): + return if self.use_peft and self.setup_complete: # at this stage only adapter params will appear in the state_dict arg # so we only update those while the rest of the model is frozen. From 51fdc2f9d3da969ce091293254f9e509883d7117 Mon Sep 17 00:00:00 2001 From: Chen Cui Date: Fri, 1 Nov 2024 13:06:04 -0400 Subject: [PATCH 059/125] fix path (#11121) Signed-off-by: Chen Cui --- nemo/collections/llm/gpt/data/fine_tuning.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nemo/collections/llm/gpt/data/fine_tuning.py b/nemo/collections/llm/gpt/data/fine_tuning.py index 2545bbc93f1d6..93835cb8e83ff 100644 --- a/nemo/collections/llm/gpt/data/fine_tuning.py +++ b/nemo/collections/llm/gpt/data/fine_tuning.py @@ -212,7 +212,10 @@ def _extract_tokenizer_model_name(self) -> str: tokenizer_model_name = self.packed_sequence_specs.tokenizer_model_name elif isinstance(self.tokenizer, AutoTokenizer): name = self.tokenizer.tokenizer.name_or_path - if name.endswith("nemo_tokenizer"): + if name.endswith("context/nemo_tokenizer"): + # NEMO_HOME/hf_org/hf_model/context/nemo_tokenizer => hf_org--hf_model + tokenizer_model_name = '--'.join(name.split("/")[-4:-2]) + elif name.endswith("nemo_tokenizer"): # NEMO_HOME/hf_org/hf_model/nemo_tokenizer => hf_org--hf_model tokenizer_model_name = '--'.join(name.split("/")[-3:-1]) else: From 76c91b7cf0fcc7fdc20ffd3a8086fc9f71f04dc3 Mon Sep 17 00:00:00 2001 From: Chen Cui Date: Fri, 1 Nov 2024 13:52:59 -0400 Subject: [PATCH 060/125] Allow arguments passed to dataset class + Gemma recipe fix (#11125) * add dataset kwargs Signed-off-by: Chen Cui * update gemma recipes Signed-off-by: Chen Cui * Apply isort and black reformatting Signed-off-by: cuichenx * change arguments from callers Signed-off-by: Chen Cui * address comments Signed-off-by: Chen Cui * add bos to gemma2 as well Signed-off-by: Chen Cui --------- Signed-off-by: Chen Cui Signed-off-by: cuichenx Co-authored-by: cuichenx --- examples/llm/sft/hf.py | 2 +- nemo/collections/llm/gpt/data/dolly.py | 6 +++--- nemo/collections/llm/gpt/data/fine_tuning.py | 20 +++++++------------ nemo/collections/llm/gpt/data/squad.py | 8 +++----- nemo/collections/llm/recipes/gemma2_27b.py | 3 +++ nemo/collections/llm/recipes/gemma2_2b.py | 3 +++ nemo/collections/llm/recipes/gemma2_9b.py | 3 +++ nemo/collections/llm/recipes/gemma_2b.py | 3 +++ nemo/collections/llm/recipes/gemma_7b.py | 3 +++ .../llm/gpt/model/megatron_ssm_finetuning.py | 2 +- 10 files changed, 30 insertions(+), 23 deletions(-) diff --git a/examples/llm/sft/hf.py b/examples/llm/sft/hf.py index b7e12d8fb2ded..f7b50298ea149 100644 --- a/examples/llm/sft/hf.py +++ b/examples/llm/sft/hf.py @@ -41,7 +41,7 @@ def squad(tokenizer) -> pl.LightningDataModule: micro_batch_size=2, global_batch_size=128, # assert gbs == mbs * accumulate_grad_batches num_workers=0, - sanity_check_dist_workers=False, + dataset_kwargs={"sanity_check_dist_workers": False}, ) diff --git a/nemo/collections/llm/gpt/data/dolly.py b/nemo/collections/llm/gpt/data/dolly.py index fb8cf9fd5da0f..c241580db8e30 100644 --- a/nemo/collections/llm/gpt/data/dolly.py +++ b/nemo/collections/llm/gpt/data/dolly.py @@ -14,7 +14,7 @@ import json import shutil -from typing import TYPE_CHECKING, List, Optional +from typing import TYPE_CHECKING, Any, Dict, List, Optional import numpy as np from datasets import load_dataset @@ -56,8 +56,8 @@ def __init__( num_workers: int = 8, pin_memory: bool = True, persistent_workers: bool = False, - pad_to_max_length: bool = False, packed_sequence_specs: Optional["PackedSequenceSpecs"] = None, + dataset_kwargs: Optional[Dict[str, Any]] = None, ): self.force_redownload = force_redownload self.delete_raw = delete_raw @@ -74,8 +74,8 @@ def __init__( num_workers=num_workers, pin_memory=pin_memory, persistent_workers=persistent_workers, - pad_to_max_length=pad_to_max_length, packed_sequence_specs=packed_sequence_specs, + dataset_kwargs=dataset_kwargs, ) def prepare_data(self) -> None: diff --git a/nemo/collections/llm/gpt/data/fine_tuning.py b/nemo/collections/llm/gpt/data/fine_tuning.py index 93835cb8e83ff..d7ed08a01ed49 100644 --- a/nemo/collections/llm/gpt/data/fine_tuning.py +++ b/nemo/collections/llm/gpt/data/fine_tuning.py @@ -15,7 +15,7 @@ import math from functools import lru_cache from pathlib import Path -from typing import TYPE_CHECKING, List, Optional, Union +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union import pytorch_lightning as pl from torch.utils.data import DataLoader @@ -50,9 +50,8 @@ class FineTuningDataModule(pl.LightningDataModule): num_workers (int, optional): The number of worker processes for data loading. Defaults to 8. pin_memory (bool, optional): Whether to pin memory during data loading for faster GPU training. Defaults to True. persistent_workers (bool, optional): Whether to keep data loading workers persistent across epochs. Defaults to False. - max_train_steps (int, optional): Maximum number of steps to train. Used to calculate samples mapping for the mmap dataset - pad_to_max_length (bool, optional): Whether to pad the input to the max sequence length. If False, will pad to the max length of the current batch. packed_sequence_specs (PackedSequenceSpecs, optional): See PackedSequenceSpecs for details + dataset_kwargs (Optional[Dict[str, Any]], optional): Keyword arguments to pass into the GPTSFTDataset class """ def __init__( @@ -68,9 +67,8 @@ def __init__( num_workers: int = 8, pin_memory: bool = True, persistent_workers: bool = False, - pad_to_max_length: bool = False, packed_sequence_specs: Optional["PackedSequenceSpecs"] = None, - sanity_check_dist_workers: bool = True, + dataset_kwargs: Optional[Dict[str, Any]] = None, ): super().__init__() self.seq_length = seq_length @@ -86,11 +84,10 @@ def __init__( self.rampup_batch_size = rampup_batch_size self.data_sampler = None self.max_train_samples = None - self.pad_to_max_length = pad_to_max_length self.packed_sequence_specs = packed_sequence_specs self.packed_sequence_size = -1 if not packed_sequence_specs else packed_sequence_specs.packed_sequence_size self.validate_batch_size_for_packed_sequence() - self._sanity_check_dist_workers = sanity_check_dist_workers + self.dataset_kwargs = dataset_kwargs or {} def validate_batch_size_for_packed_sequence(self): if self.packed_sequence_size > 0 and self.micro_batch_size > 1: @@ -135,8 +132,7 @@ def train_dataloader(self) -> DataLoader: self._create_dataset( self.train_path if self.packed_sequence_size <= 0 else self.train_path_packed, max_num_samples=self.max_train_samples, - pad_to_max_length=self.pad_to_max_length, - sanity_check_dist_workers=self._sanity_check_dist_workers, + **self.dataset_kwargs, ) ) @@ -145,8 +141,7 @@ def val_dataloader(self) -> DataLoader: self._create_dataset( self.validation_path, is_test=True, - pad_to_max_length=self.pad_to_max_length, - sanity_check_dist_workers=self._sanity_check_dist_workers, + **self.dataset_kwargs, ), ) @@ -156,8 +151,7 @@ def test_dataloader(self) -> DataLoader: self.test_path, tokens_to_generate=32, is_test=True, - pad_to_max_length=self.pad_to_max_length, - sanity_check_dist_workers=self._sanity_check_dist_workers, + **self.dataset_kwargs, ) ) diff --git a/nemo/collections/llm/gpt/data/squad.py b/nemo/collections/llm/gpt/data/squad.py index cabbd444c0cff..c359925cb2f65 100644 --- a/nemo/collections/llm/gpt/data/squad.py +++ b/nemo/collections/llm/gpt/data/squad.py @@ -13,7 +13,7 @@ # limitations under the License. import json import shutil -from typing import TYPE_CHECKING, List, Optional +from typing import TYPE_CHECKING, Any, Dict, List, Optional from datasets import DatasetDict, load_dataset @@ -54,9 +54,8 @@ def __init__( num_workers: int = 8, pin_memory: bool = True, persistent_workers: bool = False, - pad_to_max_length: bool = False, packed_sequence_specs: Optional["PackedSequenceSpecs"] = None, - sanity_check_dist_workers: bool = True, + dataset_kwargs: Optional[Dict[str, Any]] = None, ): self.force_redownload = force_redownload self.delete_raw = delete_raw @@ -73,9 +72,8 @@ def __init__( num_workers=num_workers, pin_memory=pin_memory, persistent_workers=persistent_workers, - pad_to_max_length=pad_to_max_length, packed_sequence_specs=packed_sequence_specs, - sanity_check_dist_workers=sanity_check_dist_workers, + dataset_kwargs=dataset_kwargs, ) def prepare_data(self) -> None: diff --git a/nemo/collections/llm/recipes/gemma2_27b.py b/nemo/collections/llm/recipes/gemma2_27b.py index 67a142426ae4e..6f852f0fe6cfd 100644 --- a/nemo/collections/llm/recipes/gemma2_27b.py +++ b/nemo/collections/llm/recipes/gemma2_27b.py @@ -213,6 +213,9 @@ def finetune_recipe( recipe = default_finetune_recipe( model(), "google/gemma-2-27b", dir, name, num_nodes, num_gpus_per_node, packed_sequence ) + # Gemma requires BOS + recipe.data.dataset_kwargs = {'add_bos': True} + if peft_scheme is None or peft_scheme.lower() == 'none': recipe.optim.config.lr = 5e-6 recipe.trainer.strategy.tensor_model_parallel_size = 8 diff --git a/nemo/collections/llm/recipes/gemma2_2b.py b/nemo/collections/llm/recipes/gemma2_2b.py index c926bb27474eb..98c7955917740 100644 --- a/nemo/collections/llm/recipes/gemma2_2b.py +++ b/nemo/collections/llm/recipes/gemma2_2b.py @@ -213,6 +213,9 @@ def finetune_recipe( recipe = default_finetune_recipe( model(), "google/gemma-2-2b", dir, name, num_nodes, num_gpus_per_node, packed_sequence ) + # Gemma requires BOS + recipe.data.dataset_kwargs = {'add_bos': True} + if peft_scheme is None or peft_scheme.lower() == 'none': recipe.optim.config.lr = 5e-6 elif peft_scheme.lower() == 'lora': diff --git a/nemo/collections/llm/recipes/gemma2_9b.py b/nemo/collections/llm/recipes/gemma2_9b.py index 9159b4beb1f9e..a211d8cfa8389 100644 --- a/nemo/collections/llm/recipes/gemma2_9b.py +++ b/nemo/collections/llm/recipes/gemma2_9b.py @@ -213,6 +213,9 @@ def finetune_recipe( recipe = default_finetune_recipe( model(), "google/gemma-2-9b", dir, name, num_nodes, num_gpus_per_node, packed_sequence ) + # Gemma requires BOS + recipe.data.dataset_kwargs = {'add_bos': True} + if peft_scheme is None or peft_scheme.lower() == 'none': recipe.optim.config.lr = 5e-6 recipe.trainer.strategy.tensor_model_parallel_size = 4 diff --git a/nemo/collections/llm/recipes/gemma_2b.py b/nemo/collections/llm/recipes/gemma_2b.py index 1c4e268b0dbc9..3e54deb0bc1c4 100644 --- a/nemo/collections/llm/recipes/gemma_2b.py +++ b/nemo/collections/llm/recipes/gemma_2b.py @@ -278,6 +278,9 @@ def finetune_recipe( recipe = default_finetune_recipe( model(), "google/gemma-2b", dir, name, num_nodes, num_gpus_per_node, packed_sequence ) + # Gemma requires BOS + recipe.data.dataset_kwargs = {'add_bos': True} + if peft_scheme is None or peft_scheme.lower() == 'none': recipe.trainer.strategy.tensor_model_parallel_size = 2 recipe.optim.config.lr = 5e-6 diff --git a/nemo/collections/llm/recipes/gemma_7b.py b/nemo/collections/llm/recipes/gemma_7b.py index 23013649c56c9..836f0404c0212 100644 --- a/nemo/collections/llm/recipes/gemma_7b.py +++ b/nemo/collections/llm/recipes/gemma_7b.py @@ -278,6 +278,9 @@ def finetune_recipe( recipe = default_finetune_recipe( model(), "google/gemma-7b", dir, name, num_nodes, num_gpus_per_node, packed_sequence ) + # Gemma requires BOS + recipe.data.dataset_kwargs = {'add_bos': True} + if peft_scheme is None or peft_scheme.lower() == 'none': recipe.trainer.strategy.tensor_model_parallel_size = 2 recipe.optim.config.lr = 5e-6 diff --git a/tests/collections/llm/gpt/model/megatron_ssm_finetuning.py b/tests/collections/llm/gpt/model/megatron_ssm_finetuning.py index 67174974f9a38..d0f95811edf58 100644 --- a/tests/collections/llm/gpt/model/megatron_ssm_finetuning.py +++ b/tests/collections/llm/gpt/model/megatron_ssm_finetuning.py @@ -109,7 +109,7 @@ def get_args(): global_batch_size=4, tokenizer=model.tokenizer, num_workers=0, - pad_to_max_length=True, + dataset_kwargs={"pad_to_max_length": True}, ) app_state = _setup( From a5c841312f3a909777f4726deda7d4efe2761910 Mon Sep 17 00:00:00 2001 From: Ao Tang Date: Sun, 3 Nov 2024 20:17:49 -0500 Subject: [PATCH 061/125] Nemotron Recipe (#11118) * add finetune recipe for nemotron * Refactor Nemotron4-22b to Nemotron3-22b, * Apply isort and black reformatting Signed-off-by: suiyoubi * optimize 22b finetune recipe --------- Signed-off-by: suiyoubi Co-authored-by: suiyoubi --- nemo/collections/llm/__init__.py | 4 +- nemo/collections/llm/gpt/model/__init__.py | 4 +- nemo/collections/llm/gpt/model/nemotron.py | 20 +-- nemo/collections/llm/recipes/__init__.py | 12 +- nemo/collections/llm/recipes/nemotron.py | 18 +-- .../{nemotron4_22b.py => nemotron3_22b.py} | 78 ++++++++-- ...otron4_22b_16k.py => nemotron3_22b_16k.py} | 14 +- ...otron4_22b_64k.py => nemotron3_22b_64k.py} | 14 +- nemo/collections/llm/recipes/nemotron3_4b.py | 61 +++++++- nemo/collections/llm/recipes/nemotron3_8b.py | 113 ++++---------- nemo/collections/llm/recipes/nemotron4_15b.py | 62 +++++++- .../collections/llm/recipes/nemotron4_340b.py | 147 ++++-------------- .../llm/gpt/model/test_nemotron.py | 22 +-- ...nemotron4_22b.py => test_nemotron3_22b.py} | 10 +- ...4_22b_16k.py => test_nemotron3_22b_16k.py} | 10 +- ...4_22b_64k.py => test_nemotron3_22b_64k.py} | 10 +- .../llm/recipes/test_nemotron3_8b.py | 8 +- .../llm/recipes/test_nemotron4_340b.py | 6 +- tests/lightning/test_nemo_run.py | 6 +- 19 files changed, 328 insertions(+), 291 deletions(-) rename nemo/collections/llm/recipes/{nemotron4_22b.py => nemotron3_22b.py} (74%) rename nemo/collections/llm/recipes/{nemotron4_22b_16k.py => nemotron3_22b_16k.py} (94%) rename nemo/collections/llm/recipes/{nemotron4_22b_64k.py => nemotron3_22b_64k.py} (94%) rename tests/collections/llm/recipes/{test_nemotron4_22b.py => test_nemotron3_22b.py} (91%) rename tests/collections/llm/recipes/{test_nemotron4_22b_16k.py => test_nemotron3_22b_16k.py} (93%) rename tests/collections/llm/recipes/{test_nemotron4_22b_64k.py => test_nemotron3_22b_64k.py} (93%) diff --git a/nemo/collections/llm/__init__.py b/nemo/collections/llm/__init__.py index a224b2638b78c..721170212e6d6 100644 --- a/nemo/collections/llm/__init__.py +++ b/nemo/collections/llm/__init__.py @@ -85,8 +85,8 @@ MixtralModel, Nemotron3Config4B, Nemotron3Config8B, + Nemotron3Config22B, Nemotron4Config15B, - Nemotron4Config22B, Nemotron4Config340B, NemotronConfig, NemotronModel, @@ -138,8 +138,8 @@ "NemotronModel", "Nemotron3Config4B", "Nemotron3Config8B", + "Nemotron3Config22B", "Nemotron4Config15B", - "Nemotron4Config22B", "Nemotron4Config340B", "NemotronConfig", "SSMConfig", diff --git a/nemo/collections/llm/gpt/model/__init__.py b/nemo/collections/llm/gpt/model/__init__.py index 6c7d159dd5cfe..b42ceac564bce 100644 --- a/nemo/collections/llm/gpt/model/__init__.py +++ b/nemo/collections/llm/gpt/model/__init__.py @@ -73,8 +73,8 @@ from nemo.collections.llm.gpt.model.nemotron import ( Nemotron3Config4B, Nemotron3Config8B, + Nemotron3Config22B, Nemotron4Config15B, - Nemotron4Config22B, Nemotron4Config340B, NemotronConfig, NemotronModel, @@ -137,7 +137,7 @@ "Nemotron3Config4B", "Nemotron3Config8B", "Nemotron4Config15B", - "Nemotron4Config22B", + "Nemotron3Config22B", "Nemotron4Config340B", "NemotronModel", "CodeLlamaConfig7B", diff --git a/nemo/collections/llm/gpt/model/nemotron.py b/nemo/collections/llm/gpt/model/nemotron.py index 8fdc5f8f0f00a..c45c7fcdbb15c 100644 --- a/nemo/collections/llm/gpt/model/nemotron.py +++ b/nemo/collections/llm/gpt/model/nemotron.py @@ -50,6 +50,7 @@ class NemotronConfig(GPTConfig): persist_layer_norm: bool = True bias_dropout_add_fusion: bool = False layernorm_zero_centered_gamma: bool = True + cross_entropy_loss_fusion: bool = True # Nemotron3Config4B as default configs num_layers: int = 32 @@ -87,27 +88,27 @@ class Nemotron3Config8B(NemotronConfig): @dataclass -class Nemotron4Config15B(NemotronConfig): - num_layers: int = 32 +class Nemotron3Config22B(NemotronConfig): + num_layers: int = 40 seq_length: int = 4096 hidden_size: int = 6144 ffn_hidden_size: int = 24576 num_attention_heads: int = 48 - num_query_groups: Optional[int] = 8 + num_query_groups: Optional[int] = None kv_channels: Optional[int] = None - init_method_std: float = 0.0134 + init_method_std: float = 0.008 @dataclass -class Nemotron4Config22B(NemotronConfig): - num_layers: int = 40 +class Nemotron4Config15B(NemotronConfig): + num_layers: int = 32 seq_length: int = 4096 hidden_size: int = 6144 ffn_hidden_size: int = 24576 num_attention_heads: int = 48 - num_query_groups: Optional[int] = None + num_query_groups: Optional[int] = 8 kv_channels: Optional[int] = None - init_method_std: float = 0.008 + init_method_std: float = 0.0134 @dataclass @@ -141,6 +142,7 @@ def init(self) -> NemotronModel: def apply(self, output_path: Path) -> Path: from transformers import NemotronForCausalLM + print('Start converting Nemotron model..') source = NemotronForCausalLM.from_pretrained(str(self), torch_dtype='auto') target = self.init() trainer = self.nemo_setup(target) @@ -357,8 +359,8 @@ def _export_qkv(ctx: io.TransformCTX, linear_qkv): "NemotronConfig", "Nemotron3Config4B", "Nemotron3Config8B", + "Nemotron3Config22B", "Nemotron4Config15B", - "Nemotron4Config22B", "Nemotron4Config340B", "NemotronModel", ] diff --git a/nemo/collections/llm/recipes/__init__.py b/nemo/collections/llm/recipes/__init__.py index 551de93cce431..a37d75d4dee88 100644 --- a/nemo/collections/llm/recipes/__init__.py +++ b/nemo/collections/llm/recipes/__init__.py @@ -47,12 +47,12 @@ nemotron, nemotron3_4b, nemotron3_8b, + nemotron3_22b, + nemotron3_22b_16k, + nemotron3_22b_64k, nemotron4_15b, nemotron4_15b_16k, nemotron4_15b_64k, - nemotron4_22b, - nemotron4_22b_16k, - nemotron4_22b_64k, nemotron4_340b, qwen2, qwen2_1p5b, @@ -100,12 +100,12 @@ "nemotron", "nemotron3_4b", "nemotron3_8b", + "nemotron3_22b", + "nemotron3_22b_16k", + "nemotron3_22b_64k", "nemotron4_15b", "nemotron4_15b_16k", "nemotron4_15b_64k", - "nemotron4_22b", - "nemotron4_22b_16k", - "nemotron4_22b_64k", "nemotron4_340b", "t5_220m", "t5_3b", diff --git a/nemo/collections/llm/recipes/nemotron.py b/nemo/collections/llm/recipes/nemotron.py index aedf3fcf2954b..104c3798567a6 100644 --- a/nemo/collections/llm/recipes/nemotron.py +++ b/nemo/collections/llm/recipes/nemotron.py @@ -24,8 +24,8 @@ from nemo.collections.llm.gpt.model.nemotron import ( Nemotron3Config4B, Nemotron3Config8B, + Nemotron3Config22B, Nemotron4Config15B, - Nemotron4Config22B, Nemotron4Config340B, NemotronModel, ) @@ -37,9 +37,9 @@ def nemotron_model(version: str) -> run.Config[pl.LightningModule]: A function to create a Nemotron models. Args: - version (str): The version of the Nemotron model to create. one of ["nemotron3_4b", "nemotron3_8b", + version (str): The version of the Nemotron model to create. one of ["nemotron3_4b", "nemotron3_8b",\ + "nemotron3_22b", "nemotron3_22b_16k", "nemotron3_22b_64k", "nemotron4_15b", "nemotron4_15b_16k", "nemotron4_15b_64k", - "nemotron4_22b", "nemotron4_22b_16k", "nemotron4_22b_64k", "nemotron4_340b"]. Returns: @@ -50,18 +50,18 @@ def nemotron_model(version: str) -> run.Config[pl.LightningModule]: config = run.Config(Nemotron3Config4B) elif version == "nemotron3_8b": config = run.Config(Nemotron3Config8B) + elif version == "nemotron3_22b": + config = run.Config(Nemotron3Config22B) + elif version == "nemotron3_22b_16k": + config = run.Config(Nemotron3Config22B, seq_length=16384) + elif version == "nemotron3_22b_64k": + config = run.Config(Nemotron3Config22B, seq_length=65536) elif version == "nemotron4_15b": config = run.Config(Nemotron4Config15B) elif version == "nemotron4_15b_16k": config = run.Config(Nemotron4Config15B, seq_length=16384) elif version == "nemotron4_15b_64k": config = run.Config(Nemotron4Config15B, seq_length=65536) - elif version == "nemotron4_22b": - config = run.Config(Nemotron4Config22B) - elif version == "nemotron4_22b_16k": - config = run.Config(Nemotron4Config22B, seq_length=16384) - elif version == "nemotron4_22b_64k": - config = run.Config(Nemotron4Config22B, seq_length=65536) elif version == "nemotron4_340b": config = run.Config(Nemotron4Config340B) diff --git a/nemo/collections/llm/recipes/nemotron4_22b.py b/nemo/collections/llm/recipes/nemotron3_22b.py similarity index 74% rename from nemo/collections/llm/recipes/nemotron4_22b.py rename to nemo/collections/llm/recipes/nemotron3_22b.py index 595b1fc0bac2d..724e21f002e39 100644 --- a/nemo/collections/llm/recipes/nemotron4_22b.py +++ b/nemo/collections/llm/recipes/nemotron3_22b.py @@ -18,28 +18,30 @@ import pytorch_lightning as pl import torch -from nemo.collections.llm.api import pretrain +from nemo.collections.llm.api import finetune, pretrain from nemo.collections.llm.gpt.data.mock import MockDataModule +from nemo.collections.llm.peft.lora import LoRA +from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger from nemo.collections.llm.recipes.nemotron import nemotron_model, nemotron_trainer from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing from nemo.lightning.pytorch.callbacks.megatron_comm_overlap import MegatronCommOverlapCallback from nemo.utils.exp_manager import TimingCallback -NAME = "nemotron4_22b" +NAME = "nemotron3_22b" @run.cli.factory(name=NAME) def model() -> run.Config[pl.LightningModule]: """ - Factory function to create a Nemotron4 22b model configuration. + Factory function to create a Nemotron3 22B model configuration. Returns: - run.Config[pl.LightningModule]: Configuration for the Nemotron4 22b model. + run.Config[pl.LightningModule]: Configuration for the Nemotron3 22b model. Examples: CLI usage: - $ nemo llm pretrain model=nemotron4_22b ... + $ nemo llm pretrain model=nemotron3_22b ... Python API usage: >>> model_config = model() @@ -85,7 +87,7 @@ def pretrain_recipe( fn=pretrain, ) -> run.Partial: """ - Create a pre-training recipe for Nemotron4 22b model. + Create a pre-training recipe for Nemotron3 22B model. This function sets up a complete configuration for pre-training, including model, trainer, data, logging, optimization, and resumption settings. @@ -124,8 +126,8 @@ def pretrain_recipe( Examples: CLI usage: - $ nemo llm pretrain --factory nemotron4_22b - $ nemo llm pretrain --factory "nemotron4_22b(num_nodes=1, name='my_nemotron_pretrain')" + $ nemo llm pretrain --factory nemotron3_22b + $ nemo llm pretrain --factory "nemotron3_22b(num_nodes=1, name='my_nemotron_pretrain')" Python API usage: >>> recipe = pretrain_recipe(name="nemotron_pretrain", num_nodes=1) @@ -181,7 +183,7 @@ def pretrain_recipe( def pretrain_performance_optimizations(recipe: run.Partial) -> run.Partial: """ - Create a performance-optimized pre-training recipe for Nemotron4 22B model. + Create a performance-optimized pre-training recipe for Nemotron3 22B model. This method enables performance optimizations that may not be suitable for all use cases. It builds upon the standard pre-training recipe and adds additional performance enhancements. @@ -214,3 +216,61 @@ def pretrain_performance_optimizations(recipe: run.Partial) -> run.Partial: ) ) return recipe + + +@run.cli.factory(target=finetune, name=NAME) +def finetune_recipe( + dir: Optional[str] = None, + name: str = "default", + num_nodes: int = 1, + num_gpus_per_node: int = 8, + peft_scheme: Optional[str] = 'lora', + packed_sequence: bool = False, +) -> run.Partial: + """ + Create a fine-tuning recipe for Nemotron3 22B model. + + This function sets up a complete configuration for fine-tuning, including + model, trainer, data, logging, optimization, and resumption settings. + The recipe uses LoRA (Low-Rank Adaptation) for efficient fine-tuning, unless peft_scheme is set to None. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the fine-tuning run. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048. + + Returns: + run.Partial: Partial configuration for fine-tuning. + + Examples: + CLI usage: + $ nemo llm finetune --factory nemotron3_22b + + Python API usage: + >>> recipe = finetune_recipe(name="nemotron3_22b_finetune", num_nodes=8) + >>> print(recipe) + + Note: + This recipe uses the SQuAD dataset for fine-tuning. For more information + on fine-tuning LLMs with NeMo, see the fine-tuning guide in the + `examples/llm/finetune/` directory. + """ + + recipe = default_finetune_recipe( + model(), "thhaus/nemotron3-22b-hf", dir, name, num_nodes, num_gpus_per_node, packed_sequence + ) + if peft_scheme is None or peft_scheme.lower() == 'none': + recipe.trainer.strategy.tensor_model_parallel_size = 8 + recipe.optim.config.lr = 5e-6 + elif peft_scheme.lower() == 'lora': + recipe.peft = run.Config(LoRA) + recipe.optim.config.lr = 1e-4 + else: + raise ValueError(f"Unrecognized peft scheme: {peft_scheme}") + + # some settings currently do not function correctly with finetuning + recipe.model.config.cross_entropy_loss_fusion = False + return recipe diff --git a/nemo/collections/llm/recipes/nemotron4_22b_16k.py b/nemo/collections/llm/recipes/nemotron3_22b_16k.py similarity index 94% rename from nemo/collections/llm/recipes/nemotron4_22b_16k.py rename to nemo/collections/llm/recipes/nemotron3_22b_16k.py index 42f258c6057d0..81f4253ad37a4 100644 --- a/nemo/collections/llm/recipes/nemotron4_22b_16k.py +++ b/nemo/collections/llm/recipes/nemotron3_22b_16k.py @@ -25,20 +25,20 @@ from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing from nemo.utils.exp_manager import TimingCallback -NAME = "nemotron4_22b_16k" +NAME = "nemotron3_22b_16k" @run.cli.factory(name=NAME) def model() -> run.Config[pl.LightningModule]: """ - Factory function to create a Nemotron4 22b model with 16k sequence length. + Factory function to create a Nemotron3 22B model with 16k sequence length. Returns: - run.Config[pl.LightningModule]: Configuration for the Nemotron4 22b and 16k sequence length model. + run.Config[pl.LightningModule]: Configuration for the Nemotron3 22b and 16k sequence length model. Examples: CLI usage: - $ nemo llm pretrain model=nemotron4_22b_16k ... + $ nemo llm pretrain model=nemotron3_22b_16k ... Python API usage: >>> model_config = model() @@ -83,7 +83,7 @@ def pretrain_recipe( fn=pretrain, ) -> run.Partial: """ - Create a pre-training recipe for Nemotron4 22b model with 16k sequence length. + Create a pre-training recipe for Nemotron3 22B model with 16k sequence length. This function sets up a complete configuration for pre-training, including model, trainer, data, logging, optimization, and resumption settings. @@ -121,8 +121,8 @@ def pretrain_recipe( Examples: CLI usage: - $ nemo llm pretrain --factory nemotron4_22b_16k - $ nemo llm pretrain --factory "nemotron4_22b_16k(num_nodes=1, name='my_nemotron_pretrain')" + $ nemo llm pretrain --factory nemotron3_22b_16k + $ nemo llm pretrain --factory "nemotron3_22b_16k(num_nodes=1, name='my_nemotron_pretrain')" Python API usage: >>> recipe = pretrain_recipe(name="nemotron_pretrain", num_nodes=1) diff --git a/nemo/collections/llm/recipes/nemotron4_22b_64k.py b/nemo/collections/llm/recipes/nemotron3_22b_64k.py similarity index 94% rename from nemo/collections/llm/recipes/nemotron4_22b_64k.py rename to nemo/collections/llm/recipes/nemotron3_22b_64k.py index 67d60a6e1c907..676694697e4cb 100644 --- a/nemo/collections/llm/recipes/nemotron4_22b_64k.py +++ b/nemo/collections/llm/recipes/nemotron3_22b_64k.py @@ -25,20 +25,20 @@ from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing from nemo.utils.exp_manager import TimingCallback -NAME = "nemotron4_22b_64k" +NAME = "nemotron3_22b_64k" @run.cli.factory(name=NAME) def model() -> run.Config[pl.LightningModule]: """ - Factory function to create a Nemotron4 22b model with 64k sequence length. + Factory function to create a Nemotron3 22B model with 64k sequence length. Returns: - run.Config[pl.LightningModule]: Configuration for the Nemotron4 22b and 64k sequence length model. + run.Config[pl.LightningModule]: Configuration for the Nemotron3 22b and 64k sequence length model. Examples: CLI usage: - $ nemo llm pretrain model=nemotron4_22b_64k ... + $ nemo llm pretrain model=nemotron3_22b_64k ... Python API usage: >>> model_config = model() @@ -83,7 +83,7 @@ def pretrain_recipe( fn=pretrain, ) -> run.Partial: """ - Create a pre-training recipe for Nemotron4 22b model with 16k sequence length. + Create a pre-training recipe for Nemotron3 22B model with 16k sequence length. This function sets up a complete configuration for pre-training, including model, trainer, data, logging, optimization, and resumption settings. @@ -121,8 +121,8 @@ def pretrain_recipe( Examples: CLI usage: - $ nemo llm pretrain --factory nemotron4_22b_64k - $ nemo llm pretrain --factory "nemotron4_22b_64k(num_nodes=2, name='my_nemotron_pretrain')" + $ nemo llm pretrain --factory nemotron3_22b_64k + $ nemo llm pretrain --factory "nemotron3_22b_64k(num_nodes=2, name='my_nemotron_pretrain')" Python API usage: >>> recipe = pretrain_recipe(name="nemotron_pretrain", num_nodes=2) diff --git a/nemo/collections/llm/recipes/nemotron3_4b.py b/nemo/collections/llm/recipes/nemotron3_4b.py index b5521c0d087a3..e1c2ef345d7e0 100644 --- a/nemo/collections/llm/recipes/nemotron3_4b.py +++ b/nemo/collections/llm/recipes/nemotron3_4b.py @@ -18,8 +18,10 @@ import pytorch_lightning as pl import torch -from nemo.collections.llm.api import pretrain +from nemo.collections.llm.api import finetune, pretrain from nemo.collections.llm.gpt.data.mock import MockDataModule +from nemo.collections.llm.peft.lora import LoRA +from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger from nemo.collections.llm.recipes.nemotron import nemotron_model, nemotron_trainer from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing @@ -166,3 +168,60 @@ def pretrain_recipe( ), resume=default_resume(), ) + + +@run.cli.factory(target=finetune, name=NAME) +def finetune_recipe( + dir: Optional[str] = None, + name: str = "default", + num_nodes: int = 1, + num_gpus_per_node: int = 8, + peft_scheme: Optional[str] = 'lora', + packed_sequence: bool = False, +) -> run.Partial: + """ + Create a fine-tuning recipe for Nemotron3 4B model. + + This function sets up a complete configuration for fine-tuning, including + model, trainer, data, logging, optimization, and resumption settings. + The recipe uses LoRA (Low-Rank Adaptation) for efficient fine-tuning, unless peft_scheme is set to None. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the fine-tuning run. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048. + + Returns: + run.Partial: Partial configuration for fine-tuning. + + Examples: + CLI usage: + $ nemo llm finetune --factory nemotron3_4b + + Python API usage: + >>> recipe = finetune_recipe(name="nemotron3_4b_finetune", num_nodes=2) + >>> print(recipe) + + Note: + This recipe uses the SQuAD dataset for fine-tuning. For more information + on fine-tuning LLMs with NeMo, see the fine-tuning guide in the + `examples/llm/finetune/` directory. + """ + + recipe = default_finetune_recipe( + model(), "nvidia/Minitron-4B-Base", dir, name, num_nodes, num_gpus_per_node, packed_sequence + ) + if peft_scheme is None or peft_scheme.lower() == 'none': + recipe.optim.config.lr = 5e-6 + elif peft_scheme.lower() == 'lora': + recipe.peft = run.Config(LoRA) + recipe.optim.config.lr = 1e-4 + else: + raise ValueError(f"Unrecognized peft scheme: {peft_scheme}") + + # some settings currently do not function correctly with finetuning + recipe.model.config.cross_entropy_loss_fusion = False + return recipe diff --git a/nemo/collections/llm/recipes/nemotron3_8b.py b/nemo/collections/llm/recipes/nemotron3_8b.py index 7dcebe17f872b..202efe658d834 100644 --- a/nemo/collections/llm/recipes/nemotron3_8b.py +++ b/nemo/collections/llm/recipes/nemotron3_8b.py @@ -23,6 +23,7 @@ from nemo.collections.llm.gpt.data.mock import MockDataModule from nemo.collections.llm.gpt.data.squad import SquadDataModule from nemo.collections.llm.peft.lora import LoRA +from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger from nemo.collections.llm.recipes.nemotron import nemotron_model, nemotron_trainer from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing @@ -236,71 +237,27 @@ def nemo_resume() -> run.Config[nl.AutoResume]: @run.cli.factory(target=finetune, name=NAME) def finetune_recipe( - # General dir: Optional[str] = None, name: str = "default", - # Trainer - tensor_parallelism: int = 2, - pipeline_parallelism: int = 1, - pipeline_parallelism_type: Optional[torch.dtype] = None, - virtual_pipeline_parallelism: Optional[int] = None, - context_parallelism: int = 1, - sequence_parallelism: bool = False, num_nodes: int = 1, num_gpus_per_node: int = 8, - max_steps: int = 300000, - precision: str = "bf16-mixed", - accumulate_grad_batches: int = 1, - gradient_clip_val: float = 1.0, - limit_test_batches: int = 32, - limit_val_batches: int = 32, - log_every_n_steps: int = 10, - val_check_interval: int = 2000, - # Data - global_batch_size=32, - micro_batch_size=2, - seq_length=4096, - # Optimizer - warmup_steps=500, - constant_steps=0, - min_lr=3.0e-5, - max_lr=3e-4, - # Training function - fn=finetune, + peft_scheme: Optional[str] = 'lora', + packed_sequence: bool = False, ) -> run.Partial: """ Create a fine-tuning recipe for Nemotron3 8B model. This function sets up a complete configuration for fine-tuning, including - model, trainer, and data settings. + model, trainer, data, logging, optimization, and resumption settings. + The recipe uses LoRA (Low-Rank Adaptation) for efficient fine-tuning, unless peft_scheme is set to None. Args: dir (Optional[str]): Directory for saving logs and checkpoints. - name (str): Name of the pre-training run. - tensor_parallelism (int): Degree of tensor model parallelism. - pipeline_parallelism (int): Degree of pipeline model parallelism. - pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism. - virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism. - context_parallelism (int): Degree of context parallelism. - sequence_parallelism (bool): Whether to use sequence parallelism. + name (str): Name of the fine-tuning run. num_nodes (int): Number of compute nodes to use. num_gpus_per_node (int): Number of GPUs per node. - max_steps (int): Maximum number of training steps. - precision (str): Precision configuration, one of fp32, 16-mixed or bf16-mixed. - accumulate_grad_batches (int): Number of steps per gradient accumulation. - gradient_clip_val (float): Value for gradient clipping. - limit_test_batches (int): Limit the number of test batches. - limit_val_batches (int): Limit the number of validation batches. - log_every_n_steps (int): Log every n steps. - val_check_interval (int): Run validation every N steps. - global_batch_size (int): Global batch size. - micro_batch_size (int): Micro batch size. - seq_length (int): Sequence length. - warmup_steps (int): Number of warmup steps. - constant_steps (int): Number of constant steps. - min_lr (float): Minimum learning rate. - max_lr (float): Maximum learning rate. - fn (Callable): The pre-training function to use. + peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048. Returns: run.Partial: Partial configuration for fine-tuning. @@ -308,47 +265,29 @@ def finetune_recipe( Examples: CLI usage: $ nemo llm finetune --factory nemotron3_8b - $ nemo llm finetune --factory "nemotron3_8b(name='my_nemotron3_8b_finetune', num_nodes=4)" Python API usage: - >>> recipe = finetune_recipe(name="my_nemotron3_8b_finetune", num_nodes=4) + >>> recipe = finetune_recipe(name="nemotron3_8b_finetune", num_nodes=2) >>> print(recipe) Note: - This recipe is optimized for fine-tuning Nemotron3 8b model. - This recipe uses the SQuAD dataset. + This recipe uses the SQuAD dataset for fine-tuning. For more information + on fine-tuning LLMs with NeMo, see the fine-tuning guide in the + `examples/llm/finetune/` directory. """ - recipe = pretrain_recipe( - dir=dir, - name=name, - tensor_parallelism=tensor_parallelism, - pipeline_parallelism=pipeline_parallelism, - pipeline_parallelism_type=pipeline_parallelism_type, - virtual_pipeline_parallelism=virtual_pipeline_parallelism, - context_parallelism=context_parallelism, - sequence_parallelism=sequence_parallelism, - num_nodes=num_nodes, - num_gpus_per_node=num_gpus_per_node, - max_steps=max_steps, - precision=precision, - accumulate_grad_batches=accumulate_grad_batches, - gradient_clip_val=gradient_clip_val, - limit_test_batches=limit_test_batches, - limit_val_batches=limit_val_batches, - log_every_n_steps=log_every_n_steps, - val_check_interval=val_check_interval, - global_batch_size=global_batch_size, - micro_batch_size=micro_batch_size, - seq_length=seq_length, - warmup_steps=warmup_steps, - constant_steps=constant_steps, - min_lr=min_lr, - max_lr=max_lr, - fn=fn, - ) - recipe.resume = nemo_resume() - recipe.peft = run.Config(LoRA) - recipe.data = run.Config( - SquadDataModule, seq_length=seq_length, global_batch_size=global_batch_size, micro_batch_size=micro_batch_size + + recipe = default_finetune_recipe( + model(), "thhaus/nemotron3-8b", dir, name, num_nodes, num_gpus_per_node, packed_sequence ) + if peft_scheme is None or peft_scheme.lower() == 'none': + recipe.trainer.strategy.tensor_model_parallel_size = 2 + recipe.optim.config.lr = 5e-6 + elif peft_scheme.lower() == 'lora': + recipe.peft = run.Config(LoRA) + recipe.optim.config.lr = 1e-4 + else: + raise ValueError(f"Unrecognized peft scheme: {peft_scheme}") + + # some settings currently do not function correctly with finetuning + recipe.model.config.cross_entropy_loss_fusion = False return recipe diff --git a/nemo/collections/llm/recipes/nemotron4_15b.py b/nemo/collections/llm/recipes/nemotron4_15b.py index 16ae7b2b1e79a..0f15c47c67b9d 100644 --- a/nemo/collections/llm/recipes/nemotron4_15b.py +++ b/nemo/collections/llm/recipes/nemotron4_15b.py @@ -18,8 +18,10 @@ import pytorch_lightning as pl import torch -from nemo.collections.llm.api import pretrain +from nemo.collections.llm.api import finetune, pretrain from nemo.collections.llm.gpt.data.mock import MockDataModule +from nemo.collections.llm.peft.lora import LoRA +from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger from nemo.collections.llm.recipes.nemotron import nemotron_model, nemotron_trainer from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing @@ -203,3 +205,61 @@ def pretrain_performance_optimizations(recipe: run.Partial) -> run.Partial: ) ) return recipe + + +@run.cli.factory(target=finetune, name=NAME) +def finetune_recipe( + dir: Optional[str] = None, + name: str = "default", + num_nodes: int = 1, + num_gpus_per_node: int = 8, + peft_scheme: Optional[str] = 'lora', + packed_sequence: bool = False, +) -> run.Partial: + """ + Create a fine-tuning recipe for Nemotron4 15B model. + + This function sets up a complete configuration for fine-tuning, including + model, trainer, data, logging, optimization, and resumption settings. + The recipe uses LoRA (Low-Rank Adaptation) for efficient fine-tuning, unless peft_scheme is set to None. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the fine-tuning run. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048. + + Returns: + run.Partial: Partial configuration for fine-tuning. + + Examples: + CLI usage: + $ nemo llm finetune --factory nemotron4_15b + + Python API usage: + >>> recipe = finetune_recipe(name="nemotron4_15b_finetune", num_nodes=2) + >>> print(recipe) + + Note: + This recipe uses the SQuAD dataset for fine-tuning. For more information + on fine-tuning LLMs with NeMo, see the fine-tuning guide in the + `examples/llm/finetune/` directory. + """ + + recipe = default_finetune_recipe( + model(), "thhaus/nemotron4-15b-base-hf", dir, name, num_nodes, num_gpus_per_node, packed_sequence + ) + if peft_scheme is None or peft_scheme.lower() == 'none': + recipe.trainer.strategy.tensor_model_parallel_size = 4 + recipe.optim.config.lr = 5e-6 + elif peft_scheme.lower() == 'lora': + recipe.peft = run.Config(LoRA) + recipe.optim.config.lr = 1e-4 + else: + raise ValueError(f"Unrecognized peft scheme: {peft_scheme}") + + # some settings currently do not function correctly with finetuning + recipe.model.config.cross_entropy_loss_fusion = False + return recipe diff --git a/nemo/collections/llm/recipes/nemotron4_340b.py b/nemo/collections/llm/recipes/nemotron4_340b.py index 0ddd10c2bc210..2b2bb201ecb7b 100644 --- a/nemo/collections/llm/recipes/nemotron4_340b.py +++ b/nemo/collections/llm/recipes/nemotron4_340b.py @@ -21,8 +21,8 @@ from nemo import lightning as nl from nemo.collections.llm.api import finetune, pretrain from nemo.collections.llm.gpt.data.mock import MockDataModule -from nemo.collections.llm.gpt.data.squad import SquadDataModule from nemo.collections.llm.peft.lora import LoRA +from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger from nemo.collections.llm.recipes.nemotron import nemotron_model, nemotron_trainer from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing @@ -219,146 +219,63 @@ def pretrain_performance_optimizations(recipe: run.Partial) -> run.Partial: return recipe -@run.cli.factory(name=NAME + "_nemo") -def nemo_resume() -> run.Config[nl.AutoResume]: - """ - Configure automatic resumption from a NeMo checkpoint converted from Huggingface for Nemotron4 340B model. - - More info about the Huggingface model can be found at: https://huggingface.co/nvidia/Nemotron-4-340B-Base. - - This NeMo checkpoint should be converted from Huggingface beforehand, using nemo.collections.llm.import_ckpt. - When converting the checkpoint, the NeMo checkpoint will be saved in NEMO_HOME (set to ~/.cache/nemo by default). - - This function sets up the configuration to resume training from path nemo://nvidia/Nemotron-4-340B-Base. - This translates to the full path {NEMO_HOME}/models/nvidia/Nemotron-4-340B-Base. - - Returns: - run.Config[nl.AutoResume]: Configuration for resuming from NeMo checkpoint. - - Note: - This is particularly useful for fine-tuning scenarios where you want to - start from the pre-trained Nemotron4 340B model. - """ - return run.Config( - nl.AutoResume, restore_config=run.Config(nl.RestoreConfig, path="nemo://nvidia/Nemotron-4-340B-Base") - ) - - @run.cli.factory(target=finetune, name=NAME) def finetune_recipe( - # General dir: Optional[str] = None, name: str = "default", - # Trainer - tensor_parallelism: int = 8, - pipeline_parallelism: int = 12, - pipeline_parallelism_type: Optional[torch.dtype] = torch.bfloat16, - virtual_pipeline_parallelism: Optional[int] = 8, - context_parallelism: int = 1, - sequence_parallelism: bool = False, - num_nodes: int = 768, + num_nodes: int = 4, num_gpus_per_node: int = 8, - max_steps: int = 100000, - precision: str = "bf16-mixed", - accumulate_grad_batches: int = 1, - gradient_clip_val: float = 1.0, - limit_test_batches: int = 32, - limit_val_batches: int = 32, - log_every_n_steps: int = 10, - val_check_interval: int = 2000, - # Data - global_batch_size=2304, - micro_batch_size=1, - seq_length=4096, - # Optimizer - warmup_steps=500, - constant_steps=0, - min_lr=1.0e-5, - max_lr=1.0e-4, - # Training function - fn=finetune, + peft_scheme: Optional[str] = 'lora', + packed_sequence: bool = False, ) -> run.Partial: """ Create a fine-tuning recipe for Nemotron4 340B model. This function sets up a complete configuration for fine-tuning, including - model, trainer, and data settings. + model, trainer, data, logging, optimization, and resumption settings. + The recipe uses LoRA (Low-Rank Adaptation) for efficient fine-tuning, unless peft_scheme is set to None. Args: dir (Optional[str]): Directory for saving logs and checkpoints. - name (str): Name of the pre-training run. - tensor_parallelism (int): Degree of tensor model parallelism. - pipeline_parallelism (int): Degree of pipeline model parallelism. - pipeline_parallelism_type (Optional[torch.dtype]): Data type for pipeline parallelism. - virtual_pipeline_parallelism (Optional[int]): Size of virtual pipeline parallelism. - context_parallelism (int): Degree of context parallelism. - sequence_parallelism (bool): Whether to use sequence parallelism. + name (str): Name of the fine-tuning run. num_nodes (int): Number of compute nodes to use. num_gpus_per_node (int): Number of GPUs per node. - max_steps (int): Maximum number of training steps. - precision (str): Precision configuration, one of fp32, 16-mixed or bf16-mixed. - accumulate_grad_batches (int): Number of steps per gradient accumulation. - gradient_clip_val (float): Value for gradient clipping. - limit_test_batches (int): Limit the number of test batches. - limit_val_batches (int): Limit the number of validation batches. - log_every_n_steps (int): Log every n steps. - val_check_interval (int): Run validation every N steps. - global_batch_size (int): Global batch size. - micro_batch_size (int): Micro batch size. - seq_length (int): Sequence length. - warmup_steps (int): Number of warmup steps. - constant_steps (int): Number of constant steps. - min_lr (float): Minimum learning rate. - max_lr (float): Maximum learning rate. - fn (Callable): The pre-training function to use. + peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048. Returns: run.Partial: Partial configuration for fine-tuning. Examples: CLI usage: - $ nemo llm finetune --factory nemotron4_340b - $ nemo llm finetune --factory "nemotron4_340b(name='my_nemotron4_340_finetune', num_nodes=4)" + $ nemo llm finetune --factory nemotron3_22b Python API usage: - >>> recipe = finetune_recipe(name="my_nemotron4_340_finetune", num_nodes=4) + >>> recipe = finetune_recipe(name="nemotron4_340b_finetune", num_nodes=2) >>> print(recipe) Note: - This recipe is optimized for fine-tuning Nemotron4 8b model. - This recipe uses the SQuAD dataset. + This recipe uses the SQuAD dataset for fine-tuning. For more information + on fine-tuning LLMs with NeMo, see the fine-tuning guide in the + `examples/llm/finetune/` directory. """ - recipe = pretrain_recipe( - dir=dir, - name=name, - tensor_parallelism=tensor_parallelism, - pipeline_parallelism=pipeline_parallelism, - pipeline_parallelism_type=pipeline_parallelism_type, - virtual_pipeline_parallelism=virtual_pipeline_parallelism, - context_parallelism=context_parallelism, - sequence_parallelism=sequence_parallelism, - num_nodes=num_nodes, - num_gpus_per_node=num_gpus_per_node, - max_steps=max_steps, - precision=precision, - accumulate_grad_batches=accumulate_grad_batches, - gradient_clip_val=gradient_clip_val, - limit_test_batches=limit_test_batches, - limit_val_batches=limit_val_batches, - log_every_n_steps=log_every_n_steps, - val_check_interval=val_check_interval, - global_batch_size=global_batch_size, - micro_batch_size=micro_batch_size, - seq_length=seq_length, - warmup_steps=warmup_steps, - constant_steps=constant_steps, - min_lr=min_lr, - max_lr=max_lr, - fn=fn, - ) - recipe.resume = nemo_resume() - recipe.peft = run.Config(LoRA) - recipe.data = run.Config( - SquadDataModule, seq_length=seq_length, global_batch_size=global_batch_size, micro_batch_size=micro_batch_size + + recipe = default_finetune_recipe( + model(), "mgoin/Nemotron-4-340B-Base-hf", dir, name, num_nodes, num_gpus_per_node, packed_sequence ) + if peft_scheme is None or peft_scheme.lower() == 'none': + assert num_nodes >= 12 + recipe.trainer.strategy.tensor_model_parallel_size = 8 + recipe.trainer.strategy.pipeline_model_parallel_size = 12 + recipe.optim.config.lr = 5e-6 + elif peft_scheme.lower() == 'lora': + recipe.peft = run.Config(LoRA) + recipe.trainer.strategy.tensor_model_parallel_size = 8 + recipe.trainer.strategy.pipeline_model_parallel_size = 4 + recipe.optim.config.lr = 1e-4 + else: + raise ValueError(f"Unrecognized peft scheme: {peft_scheme}") + + # some settings currently do not function correctly with finetuning + recipe.model.config.cross_entropy_loss_fusion = False return recipe diff --git a/tests/collections/llm/gpt/model/test_nemotron.py b/tests/collections/llm/gpt/model/test_nemotron.py index 5fdd1547a75d4..994227518b6f9 100644 --- a/tests/collections/llm/gpt/model/test_nemotron.py +++ b/tests/collections/llm/gpt/model/test_nemotron.py @@ -16,8 +16,8 @@ from nemo.collections.llm.gpt.model.nemotron import ( Nemotron3Config4B, Nemotron3Config8B, + Nemotron3Config22B, Nemotron4Config15B, - Nemotron4Config22B, Nemotron4Config340B, NemotronConfig, ) @@ -72,28 +72,28 @@ def test_nemotron3_config_8b(): assert config.init_method_std == 0.010 -def test_nemotron4_config_15b(): - config = Nemotron4Config15B() - assert config.num_layers == 32 +def test_nemotron3_config_22b(): + config = Nemotron3Config22B() + assert config.num_layers == 40 assert config.seq_length == 4096 assert config.hidden_size == 6144 assert config.ffn_hidden_size == 24576 assert config.num_attention_heads == 48 - assert config.num_query_groups == 8 + assert config.num_query_groups == 48 assert config.kv_channels == 6144 // 48 - assert config.init_method_std == 0.0134 + assert config.init_method_std == 0.008 -def test_nemotron4_config_22b(): - config = Nemotron4Config22B() - assert config.num_layers == 40 +def test_nemotron4_config_15b(): + config = Nemotron4Config15B() + assert config.num_layers == 32 assert config.seq_length == 4096 assert config.hidden_size == 6144 assert config.ffn_hidden_size == 24576 assert config.num_attention_heads == 48 - assert config.num_query_groups == 48 + assert config.num_query_groups == 8 assert config.kv_channels == 6144 // 48 - assert config.init_method_std == 0.008 + assert config.init_method_std == 0.0134 def test_nemotron4_config_340b(): diff --git a/tests/collections/llm/recipes/test_nemotron4_22b.py b/tests/collections/llm/recipes/test_nemotron3_22b.py similarity index 91% rename from tests/collections/llm/recipes/test_nemotron4_22b.py rename to tests/collections/llm/recipes/test_nemotron3_22b.py index fef24c2167663..50ce3028c5bb7 100644 --- a/tests/collections/llm/recipes/test_nemotron4_22b.py +++ b/tests/collections/llm/recipes/test_nemotron3_22b.py @@ -17,15 +17,15 @@ from nemo.collections.llm.api import pretrain from nemo.collections.llm.gpt.data.mock import MockDataModule -from nemo.collections.llm.gpt.model.nemotron import Nemotron4Config22B, NemotronModel -from nemo.collections.llm.recipes import nemotron4_22b +from nemo.collections.llm.gpt.model.nemotron import Nemotron3Config22B, NemotronModel +from nemo.collections.llm.recipes import nemotron3_22b from nemo.lightning import Trainer -class TestNemotron4_22B: +class TestNemotron3_22B: @pytest.fixture(scope="class") def recipe_module(self): - return nemotron4_22b + return nemotron3_22b def test_model(self, recipe_module): model = recipe_module.model() @@ -36,7 +36,7 @@ def test_model_config_parameters(self, recipe_module): model = recipe_module.model() nemotron_config = model.config assert isinstance(nemotron_config, run.Config) - assert nemotron_config.__fn_or_cls__ == Nemotron4Config22B + assert nemotron_config.__fn_or_cls__ == Nemotron3Config22B assert nemotron_config.num_layers == 40 assert nemotron_config.hidden_size == 6144 assert nemotron_config.seq_length == 4096 diff --git a/tests/collections/llm/recipes/test_nemotron4_22b_16k.py b/tests/collections/llm/recipes/test_nemotron3_22b_16k.py similarity index 93% rename from tests/collections/llm/recipes/test_nemotron4_22b_16k.py rename to tests/collections/llm/recipes/test_nemotron3_22b_16k.py index 313d838067fbb..607c655a67034 100644 --- a/tests/collections/llm/recipes/test_nemotron4_22b_16k.py +++ b/tests/collections/llm/recipes/test_nemotron3_22b_16k.py @@ -17,15 +17,15 @@ from nemo.collections.llm.api import pretrain from nemo.collections.llm.gpt.data.mock import MockDataModule -from nemo.collections.llm.gpt.model.nemotron import Nemotron4Config22B, NemotronModel -from nemo.collections.llm.recipes import nemotron4_22b_16k +from nemo.collections.llm.gpt.model.nemotron import Nemotron3Config22B, NemotronModel +from nemo.collections.llm.recipes import nemotron3_22b_16k from nemo.lightning import Trainer -class TestNemotron4_22B_16K: +class TestNemotron3_22B_16K: @pytest.fixture(scope="class") def recipe_module(self): - return nemotron4_22b_16k + return nemotron3_22b_16k def test_model(self, recipe_module): model = recipe_module.model() @@ -36,7 +36,7 @@ def test_model_config_parameters(self, recipe_module): model = recipe_module.model() nemotron_config = model.config assert isinstance(nemotron_config, run.Config) - assert nemotron_config.__fn_or_cls__ == Nemotron4Config22B + assert nemotron_config.__fn_or_cls__ == Nemotron3Config22B assert nemotron_config.num_layers == 40 assert nemotron_config.hidden_size == 6144 assert nemotron_config.seq_length == 16384 diff --git a/tests/collections/llm/recipes/test_nemotron4_22b_64k.py b/tests/collections/llm/recipes/test_nemotron3_22b_64k.py similarity index 93% rename from tests/collections/llm/recipes/test_nemotron4_22b_64k.py rename to tests/collections/llm/recipes/test_nemotron3_22b_64k.py index 5ac90e971cbb8..423ca5a4fa1b3 100644 --- a/tests/collections/llm/recipes/test_nemotron4_22b_64k.py +++ b/tests/collections/llm/recipes/test_nemotron3_22b_64k.py @@ -17,15 +17,15 @@ from nemo.collections.llm.api import pretrain from nemo.collections.llm.gpt.data.mock import MockDataModule -from nemo.collections.llm.gpt.model.nemotron import Nemotron4Config22B, NemotronModel -from nemo.collections.llm.recipes import nemotron4_22b_64k +from nemo.collections.llm.gpt.model.nemotron import Nemotron3Config22B, NemotronModel +from nemo.collections.llm.recipes import nemotron3_22b_64k from nemo.lightning import Trainer -class TestNemotron4_22B_64K: +class TestNemotron3_22B_64K: @pytest.fixture(scope="class") def recipe_module(self): - return nemotron4_22b_64k + return nemotron3_22b_64k def test_model(self, recipe_module): model = recipe_module.model() @@ -36,7 +36,7 @@ def test_model_config_parameters(self, recipe_module): model = recipe_module.model() nemotron_config = model.config assert isinstance(nemotron_config, run.Config) - assert nemotron_config.__fn_or_cls__ == Nemotron4Config22B + assert nemotron_config.__fn_or_cls__ == Nemotron3Config22B assert nemotron_config.num_layers == 40 assert nemotron_config.hidden_size == 6144 assert nemotron_config.seq_length == 65536 diff --git a/tests/collections/llm/recipes/test_nemotron3_8b.py b/tests/collections/llm/recipes/test_nemotron3_8b.py index 2cf1c440080a2..c64c9ae655af7 100644 --- a/tests/collections/llm/recipes/test_nemotron3_8b.py +++ b/tests/collections/llm/recipes/test_nemotron3_8b.py @@ -66,13 +66,13 @@ def test_pretrain_recipe_with_different_configurations(self, recipe_module, num_ def test_finetune_recipe(self, recipe_module): recipe = recipe_module.finetune_recipe() assert isinstance(recipe, run.Partial) - assert recipe.__fn_or_cls__ == finetune + assert isinstance(recipe, run.Partial) assert isinstance(recipe.model, run.Config) assert recipe.model.__fn_or_cls__ == NemotronModel assert isinstance(recipe.trainer, run.Config) assert recipe.trainer.__fn_or_cls__ == Trainer assert isinstance(recipe.data, run.Config) assert recipe.data.__fn_or_cls__ == SquadDataModule - assert recipe.data.seq_length == 4096 - assert recipe.data.global_batch_size == 32 - assert recipe.data.micro_batch_size == 2 + assert recipe.data.seq_length == 2048 + assert recipe.data.global_batch_size == 128 + assert recipe.data.micro_batch_size == 1 diff --git a/tests/collections/llm/recipes/test_nemotron4_340b.py b/tests/collections/llm/recipes/test_nemotron4_340b.py index 603c166964292..968e2cf76e3e6 100644 --- a/tests/collections/llm/recipes/test_nemotron4_340b.py +++ b/tests/collections/llm/recipes/test_nemotron4_340b.py @@ -64,7 +64,7 @@ def test_pretrain_recipe_with_different_configurations(self, recipe_module, num_ assert recipe.trainer.devices == num_gpus_per_node def test_finetune_recipe(self, recipe_module): - recipe = recipe_module.finetune_recipe() + recipe = recipe_module.finetune_recipe(num_nodes=4) assert isinstance(recipe, run.Partial) assert recipe.__fn_or_cls__ == finetune assert isinstance(recipe.model, run.Config) @@ -73,6 +73,6 @@ def test_finetune_recipe(self, recipe_module): assert recipe.trainer.__fn_or_cls__ == Trainer assert isinstance(recipe.data, run.Config) assert recipe.data.__fn_or_cls__ == SquadDataModule - assert recipe.data.seq_length == 4096 - assert recipe.data.global_batch_size == 2304 + assert recipe.data.seq_length == 2048 + assert recipe.data.global_batch_size == 128 assert recipe.data.micro_batch_size == 1 diff --git a/tests/lightning/test_nemo_run.py b/tests/lightning/test_nemo_run.py index 2d63031a5ad06..f91322116824e 100644 --- a/tests/lightning/test_nemo_run.py +++ b/tests/lightning/test_nemo_run.py @@ -42,12 +42,12 @@ ("nemotron3_4b", "pretrain_recipe", "nemotron3_4b_pretrain"), ("nemotron3_8b", "pretrain_recipe", "nemotron3_8b_pretrain"), ("nemotron3_8b", "finetune_recipe", "nemotron3_8b_finetune"), + ("nemotron3_22b", "pretrain_recipe", "nemotron3_22b_pretrain"), + ("nemotron3_22b_16k", "pretrain_recipe", "nemotron3_22b_16k_pretrain"), + ("nemotron3_22b_64k", "pretrain_recipe", "nemotron3_22b_64k_pretrain"), ("nemotron4_15b", "pretrain_recipe", "nemotron4_15b_pretrain"), ("nemotron4_15b_16k", "pretrain_recipe", "nemotron4_15b_16k_pretrain"), ("nemotron4_15b_64k", "pretrain_recipe", "nemotron4_15b_64k_pretrain"), - ("nemotron4_22b", "pretrain_recipe", "nemotron4_22b_pretrain"), - ("nemotron4_22b_16k", "pretrain_recipe", "nemotron4_22b_16k_pretrain"), - ("nemotron4_22b_64k", "pretrain_recipe", "nemotron4_22b_64k_pretrain"), ("nemotron4_340b", "pretrain_recipe", "nemotron4_340b_pretrain"), ("nemotron4_340b", "finetune_recipe", "nemotron4_340b_finetune"), ("gpt3_175b", "pretrain_recipe", "gpt3_175b_pretrain"), From 681df5814c13d0240fefaa5c3a14fdc44239b3b9 Mon Sep 17 00:00:00 2001 From: Alexandros Koumparoulis <153118171+akoumpa@users.noreply.github.com> Date: Mon, 4 Nov 2024 07:50:08 -0800 Subject: [PATCH 062/125] NeMo-UX: HF PeFT fix (#11096) * add model_transform param Signed-off-by: Alexandros Koumparoulis * fix model freeze with vanilla DDPStrategy; param passing to megatronstrategy Signed-off-by: Alexandros Koumparoulis * remove stale code Signed-off-by: Alexandros Koumparoulis * make vanilla adam config a run.Config Signed-off-by: Alexandros Koumparoulis * update peft recipe for hf_auto_model_for_causal_lm Signed-off-by: Alexandros Koumparoulis * fix typo Signed-off-by: Alexandros Koumparoulis * Apply isort and black reformatting Signed-off-by: akoumpa * fix statement condition Signed-off-by: Alexandros Koumparoulis * fix Signed-off-by: Alexandros Koumparoulis * fix Signed-off-by: Alexandros Koumparoulis * Apply isort and black reformatting Signed-off-by: akoumpa --------- Signed-off-by: Alexandros Koumparoulis Signed-off-by: akoumpa Co-authored-by: akoumpa --- .../gpt/model/hf_auto_model_for_causal_lm.py | 16 +++++---- .../recipes/hf_auto_model_for_causal_lm.py | 7 ++-- nemo/collections/llm/recipes/optim/adam.py | 6 ++-- nemo/lightning/pytorch/callbacks/peft.py | 36 +++++++++++++------ 4 files changed, 43 insertions(+), 22 deletions(-) diff --git a/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py b/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py index f29756dc05a7a..4396b82e0e5c8 100644 --- a/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py +++ b/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py @@ -22,10 +22,6 @@ from nemo.lightning import io -def _extract_non_bias_params(model): - return list(map(lambda x: x[1], filter(lambda x: not 'bias' in x[0], model.named_parameters()))) - - def masked_cross_entropy(logits, targets, mask=None): if mask is not None: loss = F.cross_entropy(logits, targets, reduction='none') @@ -35,7 +31,14 @@ def masked_cross_entropy(logits, targets, mask=None): class HfAutoModelForCausalLM(pl.LightningModule, io.IOMixin, fn.FNMixin): - def __init__(self, model_name='gpt2', load_pretrained_weights=True, tokenizer=None, loss_fn=masked_cross_entropy): + def __init__( + self, + model_name='gpt2', + load_pretrained_weights=True, + tokenizer=None, + loss_fn=masked_cross_entropy, + model_transform=None, + ): super().__init__() self.save_hyperparameters() self.model_name = model_name @@ -44,6 +47,7 @@ def __init__(self, model_name='gpt2', load_pretrained_weights=True, tokenizer=No self.loss_fn = loss_fn self.load_pretrained_weights = load_pretrained_weights self.is_hf_model = True + self.model_transform = model_transform @property def tokenizer(self): @@ -67,7 +71,7 @@ def configure_model(self): else: from transformers import AutoConfig - config = AutoConfig.from_pretained(self.model_name) + config = AutoConfig.from_pretrained(self.model_name) self.model = AutoModelForCausalLM.from_config(config) self.model.train() diff --git a/nemo/collections/llm/recipes/hf_auto_model_for_causal_lm.py b/nemo/collections/llm/recipes/hf_auto_model_for_causal_lm.py index f3ac1d6975bca..f5a52cd351be9 100644 --- a/nemo/collections/llm/recipes/hf_auto_model_for_causal_lm.py +++ b/nemo/collections/llm/recipes/hf_auto_model_for_causal_lm.py @@ -24,6 +24,7 @@ from nemo.collections.llm.api import finetune, pretrain from nemo.collections.llm.gpt.data.mock import MockDataModule from nemo.collections.llm.gpt.model.hf_auto_model_for_causal_lm import HfAutoModelForCausalLM +from nemo.collections.llm.peft.lora import LoRA from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger from nemo.collections.llm.recipes.optim.adam import pytorch_adam_with_cosine_annealing from nemo.utils.exp_manager import TimingCallback @@ -32,7 +33,7 @@ @run.cli.factory(name=NAME) -def model(model_name) -> run.Config[pl.LightningModule]: +def model(model_name, load_pretrained_weights) -> run.Config[pl.LightningModule]: """ Factory function to create HfAutoModelForCausalLM model configurations. @@ -50,7 +51,7 @@ def model(model_name) -> run.Config[pl.LightningModule]: >>> model_config = model(model_name="mistralai/Mistral-Nemo-Instruct-2407") >>> print(model_config) """ - return run.Config(HfAutoModelForCausalLM, model_name=model_name) + return run.Config(HfAutoModelForCausalLM, model_name=model_name, load_pretrained_weights=load_pretrained_weights) def trainer( @@ -223,7 +224,7 @@ def finetune_recipe( if peft_scheme is None or peft_scheme.lower() == 'none': recipe.optim.config.lr = 5e-6 elif peft_scheme.lower() == 'lora': - recipe.peft = run.Config(LoRA) + recipe.peft = run.Config(LoRA, target_modules=['*_proj']) recipe.optim.config.lr = 1e-4 else: raise ValueError(f"Unrecognized peft scheme: {peft_scheme}") diff --git a/nemo/collections/llm/recipes/optim/adam.py b/nemo/collections/llm/recipes/optim/adam.py index 4148d19c66357..8aa0e7ebf613f 100644 --- a/nemo/collections/llm/recipes/optim/adam.py +++ b/nemo/collections/llm/recipes/optim/adam.py @@ -80,7 +80,8 @@ def pytorch_adam_with_cosine_annealing( return run.Config( PytorchOptimizerModule, optim_cls=Adam, - config=dict( + config=run.Config( + dict, lr=max_lr, weight_decay=0.1, betas=(0.9, 0.95), @@ -109,7 +110,8 @@ def pytorch_adam_with_flat_lr( return run.Config( PytorchOptimizerModule, optim_cls=Adam, - config=dict( + config=run.Config( + dict, lr=max_lr, weight_decay=0.1, betas=(0.9, 0.95), diff --git a/nemo/lightning/pytorch/callbacks/peft.py b/nemo/lightning/pytorch/callbacks/peft.py index 2e32b1f0b73e0..a089ef8dff8fb 100644 --- a/nemo/lightning/pytorch/callbacks/peft.py +++ b/nemo/lightning/pytorch/callbacks/peft.py @@ -19,6 +19,7 @@ from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Tuple import pytorch_lightning as pl +import torch import torch.nn as nn from lightning_fabric.utilities.types import _PATH from pytorch_lightning.plugins.io.wrapper import _WrappingCheckpointIO @@ -100,6 +101,9 @@ def __call__(self, model: nn.Module) -> nn.Module: for model_chunk in model: model_chunk.freeze() model_chunk.walk(self.transform) + elif isinstance(model, torch.nn.parallel.distributed.DistributedDataParallel): + model.module.freeze() + model.module.walk(self.transform) else: model.freeze() model.walk(self.transform) @@ -118,8 +122,12 @@ def freeze_model(self, model: nn.Module) -> None: Returns: nn.Module: The transformed model with PEFT applied. """ - model.freeze() - model.train(mode=True) + if isinstance(model, torch.nn.parallel.distributed.DistributedDataParallel): + model.module.freeze() + model.module.walk(self.transform) + else: + model.freeze() + model.train(mode=True) def setup(self, trainer: pl.Trainer, pl_module: pl.LightningModule, stage: str) -> None: from nemo.lightning.pytorch.strategies.utils import create_checkpoint_io @@ -128,20 +136,26 @@ def setup(self, trainer: pl.Trainer, pl_module: pl.LightningModule, stage: str) trainer.strategy.trainer = trainer wrapped_io = partial(WrappedAdapterIO, peft=self) + ckpt_io_kwargs = {} + + ckpt_io_kwarg_names = [ + "save_ckpt_format", + "async_save", + "torch_dist_multiproc", + "assume_constant_structure", + "parallel_save", + "parallel_save_within_dp", + "parallel_load", + "load_directly_on_device", + ] ckpt_io_kwargs = { - "save_ckpt_format": trainer.strategy.save_ckpt_format, - "async_save": trainer.strategy.async_save, - "torch_dist_multiproc": trainer.strategy.torch_dist_multiproc, - "assume_constant_structure": trainer.strategy.assume_constant_structure, - "parallel_save": trainer.strategy.parallel_save, - "parallel_save_within_dp": trainer.strategy.parallel_save_within_dp, - "parallel_load": trainer.strategy.parallel_load, - "load_directly_on_device": trainer.strategy.load_directly_on_device, + arg: getattr(trainer.strategy, arg) + for arg in filter(lambda x: hasattr(trainer.strategy, x), ckpt_io_kwarg_names) } trainer.strategy._checkpoint_io = create_checkpoint_io(wrapping_ckpt_io=wrapped_io, **ckpt_io_kwargs) self.wrapped_io = ( trainer.strategy._checkpoint_io._checkpoint_io - if trainer.strategy.async_save + if getattr(trainer.strategy, 'async_save', False) else trainer.strategy._checkpoint_io ) trainer.strategy._init_model_parallel = False From 2b83eae8c5f4535557a8cc640ce3050cf1b104be Mon Sep 17 00:00:00 2001 From: Pablo Garay Date: Mon, 4 Nov 2024 09:21:55 -0800 Subject: [PATCH 063/125] remove deprecated test (#11134) --- .github/workflows/cicd-main.yml | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml index 2bdbe673d19be..2d909838af571 100644 --- a/.github/workflows/cicd-main.yml +++ b/.github/workflows/cicd-main.yml @@ -3649,32 +3649,6 @@ jobs: # #- uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main" # # if: "failure()" - L2_TTS_Fast_dev_runs_1_Mixer-TTS: - needs: [cicd-test-container-setup] - uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_TTS_Fast_dev_runs_1_Mixer-TTS') || needs.cicd-test-container-setup.outputs.all == 'true' - with: - RUNNER: self-hosted-azure - SCRIPT: | - python examples/tts/mixer_tts.py \ - train_dataset=/home/TestData/an4_dataset/an4_train.json \ - validation_datasets=/home/TestData/an4_dataset/an4_val.json \ - sup_data_path=/home/TestData/an4_dataset/sup_data \ - trainer.devices="[0]" \ - +trainer.limit_train_batches=1 \ - +trainer.limit_val_batches=1 \ - trainer.max_epochs=1 \ - trainer.strategy=auto \ - model.pitch_mean=212.35873413085938 \ - model.pitch_std=68.52806091308594 \ - model.train_ds.dataloader_params.batch_size=4 \ - model.train_ds.dataloader_params.num_workers=0 \ - model.validation_ds.dataloader_params.batch_size=4 \ - model.validation_ds.dataloader_params.num_workers=0 \ - ~trainer.check_val_every_n_epoch \ - ~model.text_normalizer \ - ~model.text_normalizer_call_kwargs - L2_TTS_Fast_dev_runs_1_Hifigan: needs: [cicd-test-container-setup] uses: ./.github/workflows/_test_template.yml @@ -4474,7 +4448,6 @@ jobs: - L2_TTS_Fast_dev_runs_1_WaveGlow - L2_TTS_Fast_dev_runs_1_FastPitch #- OPTIONAL_L2_TTS_Fast_dev_runs_1_RADTTS - - L2_TTS_Fast_dev_runs_1_Mixer-TTS - L2_TTS_Fast_dev_runs_1_Hifigan - Speech_Checkpoints_tests - L2_Stable_Diffusion_Training From 2f22ea7f7e0dba1fe00fe61c407e70ee536d1f98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Mon, 4 Nov 2024 18:30:35 +0100 Subject: [PATCH 064/125] ci: Pin release freeze (#11143) * ci: Pin release freeze Signed-off-by: Oliver Koenig * fix Signed-off-by: Oliver Koenig --------- Signed-off-by: Oliver Koenig --- .github/workflows/release-freeze.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release-freeze.yml b/.github/workflows/release-freeze.yml index 0097f0aa2f9f3..4732207a2a5c2 100644 --- a/.github/workflows/release-freeze.yml +++ b/.github/workflows/release-freeze.yml @@ -17,7 +17,7 @@ on: jobs: code-freeze: - uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_code_freeze.yml + uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_code_freeze.yml@v0.1.0 with: name_of_library: NeMo-Toolkit type_of_release: ${{ inputs.type_of_release }} From 5ac9e119889b2f4841856bfb93e3d71895f602d3 Mon Sep 17 00:00:00 2001 From: Pablo Garay Date: Mon, 4 Nov 2024 09:58:49 -0800 Subject: [PATCH 065/125] Autoconfigurator test nonoptional --- .github/workflows/cicd-main.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml index 2d909838af571..b9865549e771b 100644 --- a/.github/workflows/cicd-main.yml +++ b/.github/workflows/cicd-main.yml @@ -2526,10 +2526,10 @@ jobs: rm -rf examples/nlp/language_modeling/gpt_index_mappings IS_OPTIONAL: true - OPTIONAL_L2_Megatron_GPT_Auto_Configurator_TP1_PP1_MBS124: + L2_Megatron_GPT_Auto_Configurator_TP1_PP1_MBS124: needs: [cicd-test-container-setup] uses: ./.github/workflows/_test_template.yml - if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'OPTIONAL_L2_Megatron_GPT_Auto_Configurator_TP1_PP1_MBS124') || needs.cicd-test-container-setup.outputs.all == 'true' + if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_GPT_Auto_Configurator_TP1_PP1_MBS124') || needs.cicd-test-container-setup.outputs.all == 'true' with: RUNNER: self-hosted-azure-gpus-1 SCRIPT: | @@ -2552,7 +2552,6 @@ jobs: --get_results AFTER_SCRIPT: | rm -rf examples/llm/auto_configurator/auto_conf_logs - IS_OPTIONAL: true L2_Megatron_GPT_Finetuning_PP2: needs: [cicd-test-container-setup] @@ -4422,7 +4421,7 @@ jobs: - L2_Megatron_GPT_with_ALiBi_Pretraining_and_Resume_Training_TP2 - L2_Megatron_GPT_with_KERPLE_Pretraining_and_Resume_Training_TP2 # - Optional_L2_Megatron_GPT_Pretraining_and_Resume_Training_PP2 - #- OPTIONAL_L2_Megatron_GPT_Auto_Configurator_TP1_PP1_MBS124 + - L2_Megatron_GPT_Auto_Configurator_TP1_PP1_MBS124 - L2_Megatron_GPT_Finetuning_PP2 - L2_Megatron_GPT_Finetuning_StarCoder_PP1 - L2_Megatron_GPT_Embedding From d19e9d38ec09b0a42310de2c54c4884da2607728 Mon Sep 17 00:00:00 2001 From: Vladimir Bataev Date: Mon, 4 Nov 2024 23:09:02 +0400 Subject: [PATCH 066/125] Fix RNN-T loss memory usage (#11144) * Fix RNN-T memory usage Signed-off-by: artbataev --------- Signed-off-by: Vladimir Bataev --- .../asr/parts/numba/rnnt_loss/rnnt_pytorch.py | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/nemo/collections/asr/parts/numba/rnnt_loss/rnnt_pytorch.py b/nemo/collections/asr/parts/numba/rnnt_loss/rnnt_pytorch.py index 5960d5ab6b189..01f78c0675cd8 100644 --- a/nemo/collections/asr/parts/numba/rnnt_loss/rnnt_pytorch.py +++ b/nemo/collections/asr/parts/numba/rnnt_loss/rnnt_pytorch.py @@ -80,15 +80,16 @@ def forward(ctx, acts, labels, act_lens, label_lens, blank, reduction, fastemit_ if grads is not None: grads /= minibatch_size - ctx.grads = grads + ctx.save_for_backward(grads) return costs @staticmethod def backward(ctx, grad_output): - if grad_output is not None and ctx.grads is not None: - grad_output = grad_output.view(-1, 1, 1, 1).to(ctx.grads) - return ctx.grads.mul_(grad_output), None, None, None, None, None, None, None + (grads,) = ctx.saved_tensors + if grad_output is not None and grads is not None: + grad_output = grad_output.view(-1, 1, 1, 1).to(grads) + return grads.mul_(grad_output), None, None, None, None, None, None, None class _TDTNumba(Function): @@ -170,18 +171,18 @@ def forward( label_grads /= minibatch_size duration_grads /= minibatch_size - ctx.label_grads = label_grads - ctx.duration_grads = duration_grads + ctx.save_for_backward(label_grads, duration_grads) return costs @staticmethod def backward(ctx, grad_output): - if grad_output is not None and ctx.label_grads is not None: - grad_output = grad_output.view(-1, 1, 1, 1).to(ctx.label_grads) + label_grads, duration_grads = ctx.saved_tensors + if grad_output is not None and label_grads is not None: + grad_output = grad_output.view(-1, 1, 1, 1).to(label_grads) return ( - ctx.label_grads.mul_(grad_output), - ctx.duration_grads.mul_(grad_output), + label_grads.mul_(grad_output), + duration_grads.mul_(grad_output), None, None, None, @@ -251,15 +252,16 @@ def forward( if grads is not None: grads /= minibatch_size - ctx.grads = grads + ctx.save_for_backward(grads) return costs @staticmethod def backward(ctx, grad_output): - if grad_output is not None and ctx.grads is not None: - grad_output = grad_output.view(-1, 1, 1, 1).to(ctx.grads) - return ctx.grads.mul_(grad_output), None, None, None, None, None, None, None, None, None, None + (grads,) = ctx.saved_tensors + if grad_output is not None and grads is not None: + grad_output = grad_output.view(-1, 1, 1, 1).to(grads) + return grads.mul_(grad_output), None, None, None, None, None, None, None, None, None, None def rnnt_loss( From 92b4415eedeaee75bfc1d411fcb4da9712232ae0 Mon Sep 17 00:00:00 2001 From: HuiyingLi Date: Mon, 4 Nov 2024 11:17:45 -0800 Subject: [PATCH 067/125] initial draft for eval api Signed-off-by: HuiyingLi --- nemo/collections/llm/api.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/nemo/collections/llm/api.py b/nemo/collections/llm/api.py index 4f47f5c4bc734..a272a17d7ba77 100644 --- a/nemo/collections/llm/api.py +++ b/nemo/collections/llm/api.py @@ -586,6 +586,37 @@ def generate( return [r.generated_text if text_only else r for r in results] +from megatron.core.inference.common_inference_params import CommonInferenceParams +@run.cli.entrypoint(name="eval", namespace="llm") +def eval( + trainer: nl.Trainer, + ckpt_path: Union[Path, str], + input_datamodule: pl.LightningDataModule, + output_path: Union[Path, str], + inference_params: CommonInferenceParams = None, +) -> None: + + from nemo.utils.get_rank import is_global_rank_zero + input_path = input_datamodule.test_path + with open(input_path) as f: + dataset = [json.loads(sample) for sample in f.readlines()] + inputs = [sample["input"] for sample in dataset] + + results = generate(ckpt_path, + trainer=trainer, + prompts=inputs, + inference_params=inference_params, + text_only=True) + assert len(results) == len(dataset) + if is_global_rank_zero(): + with open(output_path, "w") as f: + for sample, pred in zip(dataset, results): + line = json.dumps({"input":sample["input"], "label":sample["output"], "prediction":pred}) + f.writelines(line+"\n") + + logging.info(f"Evaluation results written to {output_path}") + + def _use_tokenizer(model: pl.LightningModule, data: pl.LightningDataModule, tokenizer: TokenizerType) -> None: if tokenizer == "data": _set_with_io(model, "tokenizer", data.tokenizer) From c22871bd5e7f1219e575e7df1f194c5472a84715 Mon Sep 17 00:00:00 2001 From: Ao Tang Date: Mon, 4 Nov 2024 16:06:57 -0500 Subject: [PATCH 068/125] Recipe Fix for NeMo CI (#11127) * rename starcoder to starcoder_15b * change sc1 TP2 -> PP2 * change sc1 to PP8 * change nemotron4 340b to TP16 * Apply isort and black reformatting Signed-off-by: suiyoubi * nemotron340b and gemma 7b fix * change to PP18 * revert TPPPCP settings for 340b * enable sp * enable sp --------- Signed-off-by: suiyoubi Signed-off-by: Ao Tang Co-authored-by: suiyoubi --- nemo/collections/llm/recipes/__init__.py | 4 ++-- nemo/collections/llm/recipes/gemma_7b.py | 5 ++++- nemo/collections/llm/recipes/nemotron4_340b.py | 4 ++-- nemo/collections/llm/recipes/starcoder2_15b.py | 2 +- .../llm/recipes/{starcoder.py => starcoder_15b.py} | 6 +++--- 5 files changed, 12 insertions(+), 9 deletions(-) rename nemo/collections/llm/recipes/{starcoder.py => starcoder_15b.py} (98%) diff --git a/nemo/collections/llm/recipes/__init__.py b/nemo/collections/llm/recipes/__init__.py index a37d75d4dee88..9f53ec88bdc84 100644 --- a/nemo/collections/llm/recipes/__init__.py +++ b/nemo/collections/llm/recipes/__init__.py @@ -59,11 +59,11 @@ qwen2_7b, qwen2_72b, qwen2_500m, - starcoder, starcoder2, starcoder2_3b, starcoder2_7b, starcoder2_15b, + starcoder_15b, t5_3b, t5_11b, t5_220m, @@ -110,7 +110,7 @@ "t5_220m", "t5_3b", "t5_11b", - "starcoder", + "starcoder_15b", "starcoder2", "starcoder2_3b", "starcoder2_7b", diff --git a/nemo/collections/llm/recipes/gemma_7b.py b/nemo/collections/llm/recipes/gemma_7b.py index 836f0404c0212..44efb3fe56b85 100644 --- a/nemo/collections/llm/recipes/gemma_7b.py +++ b/nemo/collections/llm/recipes/gemma_7b.py @@ -55,7 +55,7 @@ def model() -> run.Config[pl.LightningModule]: def trainer( - tensor_parallelism: int = 1, + tensor_parallelism: int = 2, pipeline_parallelism: int = 1, pipeline_parallelism_type: Optional[torch.dtype] = None, virtual_pipeline_parallelism: Optional[int] = None, @@ -171,6 +171,9 @@ def pretrain_recipe( For more details on pre-training LLMs with NeMo, see the pre-training guide in the `examples/llm/pretrain/` directory. """ + # Disable cuDNN attention since TE 1.8 does not support head dim > 128 + os.environ['NVTE_FUSED_ATTN'] = "0" + return run.Partial( fn, model=model(), diff --git a/nemo/collections/llm/recipes/nemotron4_340b.py b/nemo/collections/llm/recipes/nemotron4_340b.py index 2b2bb201ecb7b..c02950109669f 100644 --- a/nemo/collections/llm/recipes/nemotron4_340b.py +++ b/nemo/collections/llm/recipes/nemotron4_340b.py @@ -62,8 +62,8 @@ def pretrain_recipe( pipeline_parallelism: int = 12, pipeline_parallelism_type: Optional[torch.dtype] = torch.bfloat16, virtual_pipeline_parallelism: Optional[int] = 8, - context_parallelism: int = 1, - sequence_parallelism: bool = False, + context_parallelism: int = 2, + sequence_parallelism: bool = True, num_nodes: int = 768, num_gpus_per_node: int = 8, max_steps: int = 100000, diff --git a/nemo/collections/llm/recipes/starcoder2_15b.py b/nemo/collections/llm/recipes/starcoder2_15b.py index 9ca06ce4691a0..a59ec272c8656 100644 --- a/nemo/collections/llm/recipes/starcoder2_15b.py +++ b/nemo/collections/llm/recipes/starcoder2_15b.py @@ -58,7 +58,7 @@ def pretrain_recipe( # Trainer tensor_parallelism: int = 4, pipeline_parallelism: int = 2, - pipeline_parallelism_type: Optional[torch.dtype] = None, + pipeline_parallelism_type: Optional[torch.dtype] = torch.bfloat16, virtual_pipeline_parallelism: Optional[int] = None, context_parallelism: int = 1, sequence_parallelism: bool = False, diff --git a/nemo/collections/llm/recipes/starcoder.py b/nemo/collections/llm/recipes/starcoder_15b.py similarity index 98% rename from nemo/collections/llm/recipes/starcoder.py rename to nemo/collections/llm/recipes/starcoder_15b.py index b90cec0fbd7eb..d4e76abe897e2 100644 --- a/nemo/collections/llm/recipes/starcoder.py +++ b/nemo/collections/llm/recipes/starcoder_15b.py @@ -143,9 +143,9 @@ def pretrain_recipe( dir: Optional[str] = None, name: str = "default", # Trainer - tensor_parallelism: int = 2, - pipeline_parallelism: int = 1, - pipeline_parallelism_type: Optional[torch.dtype] = None, + tensor_parallelism: int = 1, + pipeline_parallelism: int = 8, + pipeline_parallelism_type: Optional[torch.dtype] = torch.bfloat16, virtual_pipeline_parallelism: Optional[int] = None, context_parallelism: int = 1, sequence_parallelism: bool = False, From 65415edd84bf0c07c995d32fd8d79611af3943ad Mon Sep 17 00:00:00 2001 From: Onur Yilmaz <35306097+oyilmaz-nvidia@users.noreply.github.com> Date: Mon, 4 Nov 2024 16:35:18 -0500 Subject: [PATCH 069/125] vLLM Hugging Face exporter (#11124) * fix minor import bug Signed-off-by: Onur Yilmaz * Add save pretrained to HF model Signed-off-by: Onur Yilmaz * vllm hf exporter added Signed-off-by: Onur Yilmaz * pytriton function added Signed-off-by: Onur Yilmaz * update test Signed-off-by: Onur Yilmaz * Example fixed Signed-off-by: Onur Yilmaz * Apply isort and black reformatting Signed-off-by: oyilmaz-nvidia * Remove unused import --------- Signed-off-by: Onur Yilmaz Signed-off-by: oyilmaz-nvidia Co-authored-by: oyilmaz-nvidia --- examples/llm/sft/hf.py | 11 +- examples/llm/sft/hf_vllm.py | 62 ++++++++++ .../tokenizers/huggingface/auto_tokenizer.py | 4 + .../gpt/model/hf_auto_model_for_causal_lm.py | 9 ++ nemo/export/vllm_hf_exporter.py | 115 ++++++++++++++++++ 5 files changed, 199 insertions(+), 2 deletions(-) create mode 100755 examples/llm/sft/hf_vllm.py create mode 100755 nemo/export/vllm_hf_exporter.py diff --git a/examples/llm/sft/hf.py b/examples/llm/sft/hf.py index f7b50298ea149..3d5daddc7a793 100644 --- a/examples/llm/sft/hf.py +++ b/examples/llm/sft/hf.py @@ -55,6 +55,7 @@ def squad(tokenizer) -> pl.LightningDataModule: parser.add_argument('--accelerator', default='gpu', choices=['gpu']) parser.add_argument('--max-steps', type=int, default=100) parser.add_argument('--wandb-project', type=str, default=None) + parser.add_argument('--model-save-path', type=str, default=None) args = parser.parse_args() wandb = None @@ -70,9 +71,12 @@ def squad(tokenizer) -> pl.LightningDataModule: grad_clip = None use_dist_samp = False + model = llm.HfAutoModelForCausalLM(args.model) + tokenizer = model.tokenizer + llm.api.finetune( - model=llm.HfAutoModelForCausalLM(args.model), - data=squad(llm.HfAutoModelForCausalLM.configure_tokenizer(args.model)), + model=model, + data=squad(tokenizer), trainer=nl.Trainer( devices=args.devices, max_steps=args.max_steps, @@ -89,3 +93,6 @@ def squad(tokenizer) -> pl.LightningDataModule: optim=fdl.build(llm.adam.pytorch_adam_with_flat_lr(max_lr=1e-5, clip_grad=0.5)), log=None, ) + + if args.model_save_path is not None: + model.save_pretrained(args.model_save_path) diff --git a/examples/llm/sft/hf_vllm.py b/examples/llm/sft/hf_vllm.py new file mode 100755 index 0000000000000..8110c0fafc4fd --- /dev/null +++ b/examples/llm/sft/hf_vllm.py @@ -0,0 +1,62 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from nemo.deploy import DeployPyTriton +from nemo.deploy.nlp import NemoQueryLLM + +try: + from nemo.export.vllm_hf_exporter import vLLMHFExporter +except Exception: + raise Exception( + "vLLM should be installed in the environment or import " + "the vLLM environment in the NeMo FW container using " + "source /opt/venv/bin/activate command" + ) + + +if __name__ == '__main__': + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument('--model', required=True, type=str, help="Local path or model name on Hugging Face") + parser.add_argument('--triton-model-name', required=True, type=str, help="Name for the service") + args = parser.parse_args() + + exporter = vLLMHFExporter() + exporter.export(model=args.model) + + nm = DeployPyTriton( + model=exporter, + triton_model_name=args.triton_model_name, + triton_model_version=1, + max_batch_size=64, + port=8000, + address="0.0.0.0", + ) + + nm.deploy() + nm.run() + + nq = NemoQueryLLM(url="localhost:8000", model_name=args.triton_model_name) + output_deployed = nq.query_llm( + prompts=["How are you doing?"], + max_output_len=128, + top_k=1, + top_p=0.2, + temperature=1.0, + ) + + print("------------- Output: ", output_deployed) + nm.stop() diff --git a/nemo/collections/common/tokenizers/huggingface/auto_tokenizer.py b/nemo/collections/common/tokenizers/huggingface/auto_tokenizer.py index 43d377b73f347..14da2d13a0303 100644 --- a/nemo/collections/common/tokenizers/huggingface/auto_tokenizer.py +++ b/nemo/collections/common/tokenizers/huggingface/auto_tokenizer.py @@ -298,3 +298,7 @@ def name(self): def save_vocabulary(self, save_directory: str, filename_prefix: str = None): """Saves tokenizer's vocabulary and other artifacts to the specified directory""" return self.tokenizer.save_vocabulary(save_directory=save_directory, filename_prefix=filename_prefix) + + def save_pretrained(self, save_directory: str): + """Saves tokenizer's vocabulary and other artifacts to the specified directory""" + return self.tokenizer.save_pretrained(save_directory) diff --git a/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py b/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py index 4396b82e0e5c8..eada3f4c3eb81 100644 --- a/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py +++ b/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py @@ -20,6 +20,7 @@ from nemo.collections.common.tokenizers.huggingface.auto_tokenizer import AutoTokenizer from nemo.collections.llm import fn from nemo.lightning import io +from nemo.utils import logging def masked_cross_entropy(logits, targets, mask=None): @@ -111,3 +112,11 @@ def validation_step(self, batch, batch_idx): loss = output.loss self.log('val_loss', loss, on_step=True, on_epoch=True, prog_bar=True) + + def save_pretrained(self, path): + assert self.model is not None, "Model has to be created first." + self.model.save_pretrained(path) + if self._tokenizer is not None: + self._tokenizer.save_pretrained(path) + else: + logging.warning("A tokenizer wasn't created before to save.") diff --git a/nemo/export/vllm_hf_exporter.py b/nemo/export/vllm_hf_exporter.py new file mode 100755 index 0000000000000..2d9754e08767c --- /dev/null +++ b/nemo/export/vllm_hf_exporter.py @@ -0,0 +1,115 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from typing import List + +import numpy as np +from pytriton.decorators import batch +from pytriton.model_config import Tensor +from vllm import LLM, SamplingParams + +from nemo.deploy import ITritonDeployable +from nemo.deploy.utils import cast_output, str_ndarray2list + + +class vLLMHFExporter(ITritonDeployable): + """ + The Exporter class uses vLLM APIs to convert a HF model to vLLM and makes the class, + deployable with Triton server. + + Example: + from nemo.export import vLLMHFExporter + from nemo.deploy import DeployPyTriton + + exporter = vLLMHFExporter() + exporter.export(model="/path/to/model/") + + server = DeployPyTriton( + model=exporter, + triton_model_name='model' + ) + + server.deploy() + server.serve() + server.stop() + """ + + def __init__(self): + self.model = None + + def export(self, model): + """ + Exports the HF checkpoint to vLLM and initializes the engine. + Args: + model (str): model name or the path + """ + self.model = LLM(model=model) + + @property + def get_triton_input(self): + inputs = ( + Tensor(name="prompts", shape=(-1,), dtype=bytes), + Tensor(name="max_output_len", shape=(-1,), dtype=np.int_, optional=True), + Tensor(name="top_k", shape=(-1,), dtype=np.int_, optional=True), + Tensor(name="top_p", shape=(-1,), dtype=np.single, optional=True), + Tensor(name="temperature", shape=(-1,), dtype=np.single, optional=True), + ) + return inputs + + @property + def get_triton_output(self): + outputs = (Tensor(name="outputs", shape=(-1,), dtype=bytes),) + return outputs + + @batch + def triton_infer_fn(self, **inputs: np.ndarray): + try: + infer_input = {"input_texts": str_ndarray2list(inputs.pop("prompts"))} + if "max_output_len" in inputs: + infer_input["max_output_len"] = inputs.pop("max_output_len")[0][0] + if "top_k" in inputs: + infer_input["top_k"] = inputs.pop("top_k")[0][0] + if "top_p" in inputs: + infer_input["top_p"] = inputs.pop("top_p")[0][0] + if "temperature" in inputs: + infer_input["temperature"] = inputs.pop("temperature")[0][0] + + output_texts = self.forward(**infer_input) + output = cast_output(output_texts, np.bytes_) + except Exception as error: + err_msg = "An error occurred: {0}".format(str(error)) + output = cast_output([err_msg], np.bytes_) + + return {"outputs": output} + + def forward( + self, + input_texts: List[str], + max_output_len: int = 64, + top_k: int = 1, + top_p: float = 0.0, + temperature: float = 1.0, + ): + assert self.model is not None, "Model is not initialized." + + sampling_params = SamplingParams( + max_tokens=max_output_len, temperature=temperature, top_k=int(top_k), top_p=top_p + ) + request_output = self.model.generate(input_texts, sampling_params) + output = [] + for o in request_output: + output.append(o.outputs[0].text) + + return output From 82ae2f2fae311f691404c54eb0dbec3a8a355aa0 Mon Sep 17 00:00:00 2001 From: Huiying Date: Mon, 4 Nov 2024 18:19:11 -0800 Subject: [PATCH 070/125] update nemo args for mcore flash decode arg change (#11138) * update mcore transformer layer, block, attention forward args for flash decode Signed-off-by: Huiying Li * Apply isort and black reformatting Signed-off-by: HuiyingLi * update mcore tag in Dockerfile.ci Signed-off-by: HuiyingLi --------- Signed-off-by: Huiying Li Signed-off-by: HuiyingLi Signed-off-by: HuiyingLi Co-authored-by: HuiyingLi --- Dockerfile.ci | 2 +- .../megatron/bert/bert_model.py | 2 ++ .../megatron/falcon/falcon_decoder_layer.py | 4 +++- .../gpt_full_te_layer_autocast_spec.py | 2 ++ .../megatron/griffin/griffin_model.py | 4 +++- .../common/megatron/adapters/mcore_mixins.py | 18 +++++++++++++++--- 6 files changed, 26 insertions(+), 6 deletions(-) diff --git a/Dockerfile.ci b/Dockerfile.ci index 10ea68f2c2474..d51f79e1b3af3 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -53,7 +53,7 @@ RUN pip install nemo_run@git+https://github.com/NVIDIA/NeMo-Run.git@${NEMO_RUN_T # Install NeMo requirements ARG TE_TAG=7d576ed25266a17a7b651f2c12e8498f67e0baea ARG MODELOPT_VERSION=0.19.0 -ARG MCORE_TAG=213c8a23fa9fe95d19eff0932a1e6e71767f0962 +ARG MCORE_TAG=441cb9250101cf2cc406f0439b802f34f923f251 ARG APEX_TAG=810ffae374a2b9cb4b5c5e28eaeca7d7998fca0c RUN \ diff --git a/nemo/collections/nlp/models/language_modeling/megatron/bert/bert_model.py b/nemo/collections/nlp/models/language_modeling/megatron/bert/bert_model.py index 67a4802d83f6c..0d75ab7cc7069 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron/bert/bert_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron/bert/bert_model.py @@ -206,6 +206,8 @@ def forward( context=None, context_mask=None, rotary_pos_emb=None, + rotary_pos_cos=None, + rotary_pos_sin=None, inference_params=None, packed_seq_params=None, ): diff --git a/nemo/collections/nlp/models/language_modeling/megatron/falcon/falcon_decoder_layer.py b/nemo/collections/nlp/models/language_modeling/megatron/falcon/falcon_decoder_layer.py index 1783d5f5f3fdb..131f154d67096 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron/falcon/falcon_decoder_layer.py +++ b/nemo/collections/nlp/models/language_modeling/megatron/falcon/falcon_decoder_layer.py @@ -55,7 +55,7 @@ class FalconTransformerLayer(TransformerLayer): Transformer layer takes input with size [s, b, h] and returns an output of the same size. - + """ def __init__( @@ -106,6 +106,8 @@ def forward( context=None, context_mask=None, rotary_pos_emb=None, + rotary_pos_cos=None, + rotary_pos_sin=None, inference_params=None, packed_seq_params=None, ): diff --git a/nemo/collections/nlp/models/language_modeling/megatron/gpt_full_te_layer_autocast_spec.py b/nemo/collections/nlp/models/language_modeling/megatron/gpt_full_te_layer_autocast_spec.py index e9fb1833fc08f..d1945139dee98 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron/gpt_full_te_layer_autocast_spec.py +++ b/nemo/collections/nlp/models/language_modeling/megatron/gpt_full_te_layer_autocast_spec.py @@ -250,6 +250,8 @@ def forward( context=None, context_mask=None, rotary_pos_emb=None, + rotary_pos_cos=None, + rotary_pos_sin=None, inference_params=None, packed_seq_params=None, # TODO: handle this ): diff --git a/nemo/collections/nlp/models/language_modeling/megatron/griffin/griffin_model.py b/nemo/collections/nlp/models/language_modeling/megatron/griffin/griffin_model.py index 7a327a3a35cbf..e0e3a2339ca15 100755 --- a/nemo/collections/nlp/models/language_modeling/megatron/griffin/griffin_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron/griffin/griffin_model.py @@ -160,7 +160,9 @@ def forward( rotary_pos_emb = None self.decoder.input_tensor = None if self.position_embedding_type == 'rope': - rotary_seq_len = self.rotary_pos_emb.get_rotary_seq_len(None, self.decoder, hidden_states, self.config) + rotary_seq_len = self.rotary_pos_emb.get_rotary_seq_len( + None, self.decoder, hidden_states, self.config, None + ) rotary_pos_emb = self.rotary_pos_emb(rotary_seq_len) hidden_states = self.decoder(hidden_states, attention_mask=attention_mask, rotary_pos_emb=rotary_pos_emb) diff --git a/nemo/collections/nlp/modules/common/megatron/adapters/mcore_mixins.py b/nemo/collections/nlp/modules/common/megatron/adapters/mcore_mixins.py index 5128b4ca6b16d..da9c98fd94eaa 100644 --- a/nemo/collections/nlp/modules/common/megatron/adapters/mcore_mixins.py +++ b/nemo/collections/nlp/modules/common/megatron/adapters/mcore_mixins.py @@ -80,11 +80,21 @@ def forward( context: Tensor = None, context_mask: Tensor = None, rotary_pos_emb: Tensor = None, + rotary_pos_cos: Tensor = None, + rotary_pos_sin: Tensor = None, inference_params: InferenceParams = None, packed_seq_params: PackedSeqParams = None, ): hidden_states = super().forward( - hidden_states, attention_mask, context, context_mask, rotary_pos_emb, inference_params, packed_seq_params + hidden_states, + attention_mask, + context, + context_mask, + rotary_pos_emb, + rotary_pos_cos, + rotary_pos_sin, + inference_params, + packed_seq_params, ) mlp_head_adapter = self.get_adapter_module(AdapterName.MLP_HEAD_ADAPTER) @@ -220,6 +230,8 @@ def forward( inference_params=None, rotary_pos_emb=None, packed_seq_params=None, + rotary_pos_cos=None, + rotary_pos_sin=None, ): # hidden_states: [sq, b, h] @@ -237,8 +249,8 @@ def forward( # =================================================== # Adjust key, value, and rotary_pos_emb for inference # =================================================== - key, value, rotary_pos_emb, attn_mask_type = self._adjust_key_value_for_inference( - inference_params, key, value, rotary_pos_emb + query, key, value, rotary_pos_emb, attn_mask_type = self._adjust_key_value_for_inference( + inference_params, query, key, value, rotary_pos_emb ) if packed_seq_params is not None: From 5bf10e411ad83f7c55d1edfe8e499ff9c58f70de Mon Sep 17 00:00:00 2001 From: Chen Cui Date: Mon, 4 Nov 2024 21:41:56 -0500 Subject: [PATCH 071/125] Fix freeze_model call in peft (#11146) * fix freeze_model call in peft Signed-off-by: Chen Cui * Apply isort and black reformatting Signed-off-by: cuichenx --------- Signed-off-by: Chen Cui Signed-off-by: cuichenx Co-authored-by: cuichenx --- nemo/lightning/pytorch/callbacks/peft.py | 14 ++++++-------- tests/lightning/pytorch/callbacks/test_peft.py | 8 ++++++++ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/nemo/lightning/pytorch/callbacks/peft.py b/nemo/lightning/pytorch/callbacks/peft.py index a089ef8dff8fb..906cbd6e450e8 100644 --- a/nemo/lightning/pytorch/callbacks/peft.py +++ b/nemo/lightning/pytorch/callbacks/peft.py @@ -94,18 +94,15 @@ def __call__(self, model: nn.Module) -> nn.Module: Returns: nn.Module: The transformed model with PEFT applied. """ + self.freeze_model(model) - # If using megatron virtual pipeline parallelism, model is a list of - # model chunks so iterate over model + # apply walk to model(s) if isinstance(model, MegatronParallel) and len(model) > 1: for model_chunk in model: - model_chunk.freeze() model_chunk.walk(self.transform) elif isinstance(model, torch.nn.parallel.distributed.DistributedDataParallel): - model.module.freeze() model.module.walk(self.transform) else: - model.freeze() model.walk(self.transform) return model @@ -122,12 +119,14 @@ def freeze_model(self, model: nn.Module) -> None: Returns: nn.Module: The transformed model with PEFT applied. """ + if isinstance(model, MegatronParallel) and len(model) > 1: + for model_chunk in model: + model_chunk.freeze() if isinstance(model, torch.nn.parallel.distributed.DistributedDataParallel): model.module.freeze() - model.module.walk(self.transform) else: model.freeze() - model.train(mode=True) + model.train(mode=True) def setup(self, trainer: pl.Trainer, pl_module: pl.LightningModule, stage: str) -> None: from nemo.lightning.pytorch.strategies.utils import create_checkpoint_io @@ -136,7 +135,6 @@ def setup(self, trainer: pl.Trainer, pl_module: pl.LightningModule, stage: str) trainer.strategy.trainer = trainer wrapped_io = partial(WrappedAdapterIO, peft=self) - ckpt_io_kwargs = {} ckpt_io_kwarg_names = [ "save_ckpt_format", diff --git a/tests/lightning/pytorch/callbacks/test_peft.py b/tests/lightning/pytorch/callbacks/test_peft.py index 95caca4d27848..49a6aa0784aa7 100644 --- a/tests/lightning/pytorch/callbacks/test_peft.py +++ b/tests/lightning/pytorch/callbacks/test_peft.py @@ -26,6 +26,11 @@ class DummyPEFT(PEFT): def transform(self, module, name=None, prefix=None): return module # No-op transform for testing + def freeze_model(self, module): + super().freeze_model(module) + self.is_called = True + return module + class DummyModel(nn.Module, fn.FNMixin): def __init__(self): super().__init__() @@ -38,6 +43,9 @@ def test_peft_call(self): transformed_model = peft(model) + assert ( + hasattr(peft, "is_called") and peft.is_called == True + ), "peft methods may subclass `freeze_model()`, so it must be called" assert transformed_model.linear.weight.requires_grad == False assert transformed_model.conv.weight.requires_grad == False From f5955b2fa220c51bc61d7436ef985498726ed8ff Mon Sep 17 00:00:00 2001 From: HuiyingLi Date: Tue, 5 Nov 2024 00:29:49 -0800 Subject: [PATCH 072/125] add dp to generate Signed-off-by: HuiyingLi --- nemo/collections/llm/api.py | 50 +++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 11 deletions(-) diff --git a/nemo/collections/llm/api.py b/nemo/collections/llm/api.py index a272a17d7ba77..8076a3fdd980b 100644 --- a/nemo/collections/llm/api.py +++ b/nemo/collections/llm/api.py @@ -29,6 +29,8 @@ from nemo.lightning.base import NEMO_MODELS_CACHE from nemo.lightning.pytorch.callbacks import PEFT, ModelTransform from nemo.utils import logging +from torch.distributed import all_gather_object +from megatron.core import parallel_state if TYPE_CHECKING: from megatron.core.inference.common_inference_params import CommonInferenceParams @@ -572,41 +574,67 @@ def generate( params_dtype=params_dtype, inference_batch_times_seqlen_threshold=inference_batch_times_seqlen_threshold, ) - results = inference.generate( + + dp_size = trainer.strategy.distributed_sampler_kwargs['num_replicas'] + dp_rank = trainer.strategy.distributed_sampler_kwargs['rank'] + chunk_size = (len(prompts) + dp_size - 1) // dp_size + start_idx = dp_rank * chunk_size + end_idx = min(start_idx + chunk_size, len(prompts)) + prompts_on_this_dp_rank = prompts[start_idx:end_idx] + + results_on_this_dp_rank = inference.generate( model=inference_wrapped_model, tokenizer=mcore_tokenizer, - prompts=prompts, + prompts=prompts_on_this_dp_rank, encoder_prompts=encoder_prompts, add_BOS=add_BOS, max_batch_size=max_batch_size, random_seed=random_seed, inference_params=inference_params, ) + gathered_results = [None] * dp_size + + all_gather_object(gathered_results, + [ + r.generated_text if text_only else r for r in results_on_this_dp_rank + ], + group=parallel_state.get_data_parallel_group()) + gathered_results = [result for sublist in gathered_results for result in sublist] - return [r.generated_text if text_only else r for r in results] - + return gathered_results -from megatron.core.inference.common_inference_params import CommonInferenceParams @run.cli.entrypoint(name="eval", namespace="llm") def eval( trainer: nl.Trainer, ckpt_path: Union[Path, str], - input_datamodule: pl.LightningDataModule, + input_dataset: pl.LightningDataModule, output_path: Union[Path, str], - inference_params: CommonInferenceParams = None, + encoder_prompts: Optional[list[str]] = None, + params_dtype: torch.dtype = torch.bfloat16, + add_BOS: bool = False, + max_batch_size: int = 4, + random_seed: Optional[int] = None, + inference_batch_times_seqlen_threshold: int = 1000, + inference_params: Optional["CommonInferenceParams"] = None, ) -> None: from nemo.utils.get_rank import is_global_rank_zero - input_path = input_datamodule.test_path - with open(input_path) as f: - dataset = [json.loads(sample) for sample in f.readlines()] + with open(input_dataset.test_path) as f: + dataset = [json.loads(sample) for sample in f.readlines()][:68] inputs = [sample["input"] for sample in dataset] results = generate(ckpt_path, trainer=trainer, prompts=inputs, + encoder_prompts=encoder_prompts, + params_dtype=params_dtype, + add_BOS=add_BOS, + max_batch_size=max_batch_size, + random_seed=random_seed, + inference_batch_times_seqlen_threshold=inference_batch_times_seqlen_threshold, inference_params=inference_params, text_only=True) + assert len(results) == len(dataset) if is_global_rank_zero(): with open(output_path, "w") as f: @@ -614,7 +642,7 @@ def eval( line = json.dumps({"input":sample["input"], "label":sample["output"], "prediction":pred}) f.writelines(line+"\n") - logging.info(f"Evaluation results written to {output_path}") + logging.info(f"Predictions written to {output_path}") def _use_tokenizer(model: pl.LightningModule, data: pl.LightningDataModule, tokenizer: TokenizerType) -> None: From ad2bc2b9f6305ed2253333677d54b22e00fbb42e Mon Sep 17 00:00:00 2001 From: HuiyingLi Date: Tue, 5 Nov 2024 08:34:31 +0000 Subject: [PATCH 073/125] Apply isort and black reformatting Signed-off-by: HuiyingLi --- nemo/collections/llm/api.py | 52 ++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/nemo/collections/llm/api.py b/nemo/collections/llm/api.py index 8076a3fdd980b..b7780967e889e 100644 --- a/nemo/collections/llm/api.py +++ b/nemo/collections/llm/api.py @@ -21,7 +21,9 @@ import nemo_run as run import pytorch_lightning as pl import torch +from megatron.core import parallel_state from rich.console import Console +from torch.distributed import all_gather_object from typing_extensions import Annotated import nemo.lightning as nl @@ -29,8 +31,6 @@ from nemo.lightning.base import NEMO_MODELS_CACHE from nemo.lightning.pytorch.callbacks import PEFT, ModelTransform from nemo.utils import logging -from torch.distributed import all_gather_object -from megatron.core import parallel_state if TYPE_CHECKING: from megatron.core.inference.common_inference_params import CommonInferenceParams @@ -593,16 +593,17 @@ def generate( inference_params=inference_params, ) gathered_results = [None] * dp_size - - all_gather_object(gathered_results, - [ - r.generated_text if text_only else r for r in results_on_this_dp_rank - ], - group=parallel_state.get_data_parallel_group()) + + all_gather_object( + gathered_results, + [r.generated_text if text_only else r for r in results_on_this_dp_rank], + group=parallel_state.get_data_parallel_group(), + ) gathered_results = [result for sublist in gathered_results for result in sublist] return gathered_results + @run.cli.entrypoint(name="eval", namespace="llm") def eval( trainer: nl.Trainer, @@ -617,31 +618,34 @@ def eval( inference_batch_times_seqlen_threshold: int = 1000, inference_params: Optional["CommonInferenceParams"] = None, ) -> None: - + from nemo.utils.get_rank import is_global_rank_zero + with open(input_dataset.test_path) as f: dataset = [json.loads(sample) for sample in f.readlines()][:68] inputs = [sample["input"] for sample in dataset] - results = generate(ckpt_path, - trainer=trainer, - prompts=inputs, - encoder_prompts=encoder_prompts, - params_dtype=params_dtype, - add_BOS=add_BOS, - max_batch_size=max_batch_size, - random_seed=random_seed, - inference_batch_times_seqlen_threshold=inference_batch_times_seqlen_threshold, - inference_params=inference_params, - text_only=True) - + results = generate( + ckpt_path, + trainer=trainer, + prompts=inputs, + encoder_prompts=encoder_prompts, + params_dtype=params_dtype, + add_BOS=add_BOS, + max_batch_size=max_batch_size, + random_seed=random_seed, + inference_batch_times_seqlen_threshold=inference_batch_times_seqlen_threshold, + inference_params=inference_params, + text_only=True, + ) + assert len(results) == len(dataset) if is_global_rank_zero(): with open(output_path, "w") as f: for sample, pred in zip(dataset, results): - line = json.dumps({"input":sample["input"], "label":sample["output"], "prediction":pred}) - f.writelines(line+"\n") - + line = json.dumps({"input": sample["input"], "label": sample["output"], "prediction": pred}) + f.writelines(line + "\n") + logging.info(f"Predictions written to {output_path}") From 6419367452d8a07cc980919e6b038d5f2a500768 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Tue, 5 Nov 2024 13:33:53 +0100 Subject: [PATCH 074/125] =?UTF-8?q?[=F0=9F=A4=A0]:=20Howdy=20folks,=20let'?= =?UTF-8?q?s=20bump=20`Dockerfile.ci`=20to=203d27a9d=20!=20(#11159)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- Dockerfile.ci | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile.ci b/Dockerfile.ci index d51f79e1b3af3..80b4155648e40 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -53,7 +53,7 @@ RUN pip install nemo_run@git+https://github.com/NVIDIA/NeMo-Run.git@${NEMO_RUN_T # Install NeMo requirements ARG TE_TAG=7d576ed25266a17a7b651f2c12e8498f67e0baea ARG MODELOPT_VERSION=0.19.0 -ARG MCORE_TAG=441cb9250101cf2cc406f0439b802f34f923f251 +ARG MCORE_TAG=3d27a9de61534a0af248b7cf5af6013d93bd52db ARG APEX_TAG=810ffae374a2b9cb4b5c5e28eaeca7d7998fca0c RUN \ From a294601ef81c2b70ebda08f0b29d0e73bc5fe5f2 Mon Sep 17 00:00:00 2001 From: Alexandros Koumparoulis <153118171+akoumpa@users.noreply.github.com> Date: Tue, 5 Nov 2024 05:41:57 -0800 Subject: [PATCH 075/125] NeMo-UX: Add sgd optim (#11157) * fix Signed-off-by: Alexandros Koumparoulis * add sgd Signed-off-by: Alexandros Koumparoulis * remove stale args Signed-off-by: Alexandros Koumparoulis * remove clip grad arg Signed-off-by: Alexandros Koumparoulis * Pass optimizer as a run.Partial instead Signed-off-by: Alexandros Koumparoulis * move param extraction out of PytorchOptimizerModule Signed-off-by: Alexandros Koumparoulis * fix Signed-off-by: Alexandros Koumparoulis * Add ParamsT import Signed-off-by: Alexandros Koumparoulis * fix Signed-off-by: Alexandros Koumparoulis * fix Signed-off-by: Alexandros Koumparoulis * fix Signed-off-by: Alexandros Koumparoulis * Apply isort and black reformatting Signed-off-by: akoumpa --------- Signed-off-by: Alexandros Koumparoulis Signed-off-by: akoumpa Co-authored-by: akoumpa --- examples/llm/peft/hf.py | 2 +- examples/llm/sft/hf.py | 2 +- nemo/collections/llm/recipes/optim/adam.py | 34 +++------ nemo/collections/llm/recipes/optim/sgd.py | 62 ++++++++++++++++ nemo/lightning/pytorch/optim/pytorch.py | 84 +++++++++++----------- 5 files changed, 115 insertions(+), 69 deletions(-) create mode 100644 nemo/collections/llm/recipes/optim/sgd.py diff --git a/examples/llm/peft/hf.py b/examples/llm/peft/hf.py index c6dbbf90bf293..97f21d6c253ed 100644 --- a/examples/llm/peft/hf.py +++ b/examples/llm/peft/hf.py @@ -96,7 +96,7 @@ def formatting_prompts_func(examples): use_distributed_sampler=use_dist_samp, logger=wandb, ), - optim=fdl.build(llm.adam.pytorch_adam_with_flat_lr(max_lr=1e-5, clip_grad=0.5)), + optim=fdl.build(llm.adam.pytorch_adam_with_flat_lr(lr=1e-5)), log=None, peft=llm.peft.LoRA( target_modules=['*_proj'], diff --git a/examples/llm/sft/hf.py b/examples/llm/sft/hf.py index 3d5daddc7a793..39efe87de368d 100644 --- a/examples/llm/sft/hf.py +++ b/examples/llm/sft/hf.py @@ -90,7 +90,7 @@ def squad(tokenizer) -> pl.LightningDataModule: use_distributed_sampler=use_dist_samp, logger=wandb, ), - optim=fdl.build(llm.adam.pytorch_adam_with_flat_lr(max_lr=1e-5, clip_grad=0.5)), + optim=fdl.build(llm.adam.pytorch_adam_with_flat_lr(lr=1e-5)), log=None, ) diff --git a/nemo/collections/llm/recipes/optim/adam.py b/nemo/collections/llm/recipes/optim/adam.py index 8aa0e7ebf613f..b5a60b6f8b3f8 100644 --- a/nemo/collections/llm/recipes/optim/adam.py +++ b/nemo/collections/llm/recipes/optim/adam.py @@ -17,12 +17,7 @@ import nemo_run as run from megatron.core.optimizer import OptimizerConfig -from nemo.lightning.pytorch.optim import ( - CosineAnnealingScheduler, - MegatronOptimizerModule, - OptimizerModule, - PytorchOptimizerModule, -) +from nemo.lightning.pytorch.optim import CosineAnnealingScheduler, MegatronOptimizerModule, PytorchOptimizerModule @run.cli.factory @@ -35,7 +30,7 @@ def distributed_fused_adam_with_cosine_annealing( max_lr: float = 1e-4, min_lr: Optional[float] = None, clip_grad: float = 1.0, -) -> run.Config[OptimizerModule]: +) -> run.Config[PytorchOptimizerModule]: opt_cfg = run.Config( OptimizerConfig, @@ -68,20 +63,17 @@ def distributed_fused_adam_with_cosine_annealing( @run.cli.factory def pytorch_adam_with_cosine_annealing( - precision: str = "bf16-mixed", # or "16-mixed" warmup_steps: int = 2000, constant_steps: int = 0, max_lr: float = 1e-5, min_lr: Optional[float] = None, - clip_grad: float = 1.0, -) -> run.Config[OptimizerModule]: +) -> run.Config[PytorchOptimizerModule]: from torch.optim import Adam return run.Config( PytorchOptimizerModule, - optim_cls=Adam, - config=run.Config( - dict, + optimizer_fn=run.Partial( + Adam, lr=max_lr, weight_decay=0.1, betas=(0.9, 0.95), @@ -98,21 +90,15 @@ def pytorch_adam_with_cosine_annealing( @run.cli.factory def pytorch_adam_with_flat_lr( - precision: str = "bf16-mixed", # or "16-mixed" - warmup_steps: int = 2000, - constant_steps: int = 0, - max_lr: float = 1e-5, - min_lr: Optional[float] = None, - clip_grad: float = 1.0, -) -> run.Config[OptimizerModule]: + lr: float = 1e-5, +) -> run.Config[PytorchOptimizerModule]: from torch.optim import Adam return run.Config( PytorchOptimizerModule, - optim_cls=Adam, - config=run.Config( - dict, - lr=max_lr, + optimizer_fn=run.Partial( + Adam, + lr=lr, weight_decay=0.1, betas=(0.9, 0.95), eps=1e-8, diff --git a/nemo/collections/llm/recipes/optim/sgd.py b/nemo/collections/llm/recipes/optim/sgd.py new file mode 100644 index 0000000000000..7c55c6915ee1f --- /dev/null +++ b/nemo/collections/llm/recipes/optim/sgd.py @@ -0,0 +1,62 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +import nemo_run as run + +from nemo.lightning.pytorch.optim import CosineAnnealingScheduler, PytorchOptimizerModule + + +@run.cli.factory +def pytorch_sgd_with_cosine_annealing( + warmup_steps: int = 2000, + constant_steps: int = 0, + max_lr: float = 1e-5, + min_lr: Optional[float] = None, + wd: float = 1e-4, +) -> run.Config[PytorchOptimizerModule]: + from torch.optim import SGD + + return run.Config( + PytorchOptimizerModule, + optimizer_fn=run.Partial( + SGD, + lr=max_lr, + weight_decay=wd, + ), + lr_scheduler=run.Config( + CosineAnnealingScheduler, + warmup_steps=warmup_steps, + constant_steps=constant_steps, + min_lr=min_lr or (0.1 * max_lr), + ), + ) + + +@run.cli.factory +def pytorch_sgd_with_flat_lr( + lr: float = 1e-5, + wd: float = 1e-4, +) -> run.Config[PytorchOptimizerModule]: + from torch.optim import SGD + + return run.Config( + PytorchOptimizerModule, + optimizer_fn=run.Partial( + SGD, + lr=lr, + weight_decay=wd, + ), + ) diff --git a/nemo/lightning/pytorch/optim/pytorch.py b/nemo/lightning/pytorch/optim/pytorch.py index 6600fc0cf0a47..9d773917e4f4e 100644 --- a/nemo/lightning/pytorch/optim/pytorch.py +++ b/nemo/lightning/pytorch/optim/pytorch.py @@ -15,7 +15,9 @@ from typing import Callable, List, Optional import pytorch_lightning as pl +import pytorch_lightning as L from torch.optim import Optimizer +from torch.optim.optimizer import ParamsT from nemo.lightning.megatron_parallel import MegatronParallel from nemo.lightning.pytorch.optim.base import LRSchedulerModule, OptimizerModule @@ -25,20 +27,43 @@ def _param_does_not_have_wd(param_name, param): return 'bias' in param_name +def _extract_model_params_for_optim(model, weight_decay=0, no_weight_decay_cond=None): + params_with_wd, params_without_wd = [], [] + if no_weight_decay_cond is not None: + for name, param in model.named_parameters(): + if no_weight_decay_cond(name, param): + params_without_wd.append(param) + else: + params_with_wd.append(param) + else: + params_with_wd = model.parameters() + + assert max(map(len, (params_with_wd, params_without_wd))) > 0, "Expected at least one optimizer with params" + + return [ + {'params': params, 'weight_decay': wd} + for params, wd in zip((params_with_wd, params_without_wd), (weight_decay, 0)) + ] + + class PytorchOptimizerModule(OptimizerModule): """A OptimizerModule for pytorch optimizers. Attributes: - config (OptimizerConfig): Configuration for the optimizer. + optimizer_fn (Callable[[ParamsT], Optimizer]): Configuration for the optimizer. no_weight_decay_cond (Optional[Callable]): Condition for no weight decay. scale_lr_cond (Optional[Callable]): Condition for scaling learning rate. lr_mult (float): Learning rate multiplier. Example:: - config = OptimizerConfig(...) + optimizer_fn = run.Partial( + SGD, + lr=lr, + weight_decay=wd, + ) lr_scheduler = MyLRSchedulerModule(...) - optimizer_module = PytorchOptimizerModule(config, lr_scheduler) + optimizer_module = PytorchOptimizerModule(optimizer_fn, lr_scheduler) Methods: setup(model): Sets up the optimizer. @@ -47,8 +72,7 @@ class PytorchOptimizerModule(OptimizerModule): def __init__( self, - optim_cls, - config: dict = {'lr': 3e-4}, + optimizer_fn: Callable[[ParamsT], Optimizer], lr_scheduler: Optional[LRSchedulerModule] = None, no_weight_decay_cond: Optional[Callable] = _param_does_not_have_wd, scale_lr_cond: Optional[Callable] = None, @@ -57,7 +81,7 @@ def __init__( """Initializes the PytorchOptimizerModule. Args: - config (OptimizerConfig): Configuration for the optimizer. + optimizer_fn (Callable[[ParamsT], Optimizer]): Configuration for the optimizer. lr_scheduler (Optional[LRSchedulerModule]): The learning rate scheduler module. no_weight_decay_cond (Optional[Callable]): Condition for no weight decay. scale_lr_cond (Optional[Callable]): Condition for scaling learning rate. @@ -65,12 +89,10 @@ def __init__( """ super().__init__(lr_scheduler=lr_scheduler) - self.optim_cls = optim_cls - self.config = config + self.optimizer_fn = optimizer_fn self.no_weight_decay_cond = no_weight_decay_cond self.scale_lr_cond = scale_lr_cond self.lr_mult = lr_mult - self.optim_cls = optim_cls def on_fit_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule"): # Noop @@ -92,41 +114,17 @@ def optimizers(self, model) -> List[Optimizer]: if isinstance(model, MegatronParallel): raise ValueError("Model cannot be an instance of MegatronParallel") - params_with_wd, params_without_wd = [], [] - if self.no_weight_decay_cond is not None: - for name, param in model.named_parameters(): - if self.no_weight_decay_cond(name, param): - params_without_wd.append(param) - else: - params_with_wd.append(param) - else: - params_with_wd = model.parameters() - - optimizers = [] - if len(params_with_wd) > 0: - optimizers.append( - self.optim_cls( - params_with_wd, - **self.config, - ) - ) - - if len(params_without_wd) > 0: - wd = self.config.get('weight_decay', None) - kwargs['weight_decay'] = 0 - optimizers.append( - self.optim_cls( - params_without_wd, - **kwargs, - ) - ) - # restore value - if wd is not None: - kwargs['weight_decay'] = wd - - assert len(optimizers) > 0, "Expected at least one optimizer with params" - return optimizers + wd = self.optimizer_fn.keywords.get('weight_decay', 0) + return self.optimizer_fn(_extract_model_params_for_optim(model, wd, self.no_weight_decay_cond)) def finalize_model_grads(self, *args, **kwargs): # Noop pass + + def connect(self, model: L.LightningModule) -> None: + """Connects the optimizer module to the model and trainer. + + Args: + model (L.LightningModule): The model to which the optimizer module is being connected. + """ + model.configure_optimizers = lambda: self.optimizers(model) From ee7f47bfd6fdcadf9386d9298b838c33933cb454 Mon Sep 17 00:00:00 2001 From: Ssofja <78349198+Ssofja@users.noreply.github.com> Date: Tue, 5 Nov 2024 20:13:22 +0400 Subject: [PATCH 076/125] Added deprecation notice (#11133) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Added deprecation notice Signed-off-by: Ssofja * Apply isort and black reformatting Signed-off-by: Ssofja * Updated text in deprecation notice Co-authored-by: Nithin Rao Signed-off-by: Ssofja <78349198+Ssofja@users.noreply.github.com> * Updated text in deprecation notice Co-authored-by: Nithin Rao Signed-off-by: Ssofja <78349198+Ssofja@users.noreply.github.com> * Apply isort and black reformatting Signed-off-by: Ssofja * Update version Co-authored-by: Somshubra Majumdar Signed-off-by: Ssofja <78349198+Ssofja@users.noreply.github.com> * Update nemo/collections/asr/data/audio_to_text.py Co-authored-by: oliver könig Signed-off-by: Ssofja <78349198+Ssofja@users.noreply.github.com> --------- Signed-off-by: Ssofja Signed-off-by: Ssofja Signed-off-by: Ssofja <78349198+Ssofja@users.noreply.github.com> Co-authored-by: Ssofja Co-authored-by: Nithin Rao Co-authored-by: Somshubra Majumdar Co-authored-by: oliver könig --- nemo/collections/asr/data/audio_to_text.py | 4 ++++ nemo/collections/asr/models/aed_multitask_models.py | 5 +++++ nemo/collections/asr/models/classification_models.py | 2 ++ nemo/collections/asr/models/confidence_ensemble.py | 2 ++ nemo/collections/asr/models/ctc_models.py | 4 ++++ nemo/collections/asr/models/rnnt_models.py | 4 ++++ 6 files changed, 21 insertions(+) diff --git a/nemo/collections/asr/data/audio_to_text.py b/nemo/collections/asr/data/audio_to_text.py index d5ece6202da71..542f0fe70eca2 100644 --- a/nemo/collections/asr/data/audio_to_text.py +++ b/nemo/collections/asr/data/audio_to_text.py @@ -42,6 +42,7 @@ is_datastore_path, is_tarred_path, ) +from nemo.utils.decorators import deprecated from nemo.utils.distributed import webdataset_split_by_workers from nemo.utils.get_rank import is_global_rank_zero @@ -730,6 +731,9 @@ def __call__(self, *args): ) +@deprecated( + explanation='Webdataset support will be removed in v2.1.0 versions, please use LhotseSpeechToTextBpeDataset class instead' +) class _TarredAudioToTextDataset(IterableDataset): """ A similar Dataset to the AudioToCharDataset/AudioToBPEDataset, but which loads tarred audio files. diff --git a/nemo/collections/asr/models/aed_multitask_models.py b/nemo/collections/asr/models/aed_multitask_models.py index fc3662b04bc89..268438c2e09d8 100644 --- a/nemo/collections/asr/models/aed_multitask_models.py +++ b/nemo/collections/asr/models/aed_multitask_models.py @@ -61,6 +61,8 @@ SpectrogramType, ) from nemo.utils import logging, model_utils +from nemo.utils.decorators import deprecated + __all__ = ['EncDecMultiTaskModel'] @@ -886,6 +888,9 @@ def _transcribe_forward( decoder_input_ids=decoder_input_ids, ) + @deprecated( + explanation='The return type of args will be updated in the upcoming release to ensure a consistent output format across all decoder types, such that a Hypothesis object is always returned.' + ) def _transcribe_output_processing(self, outputs, trcfg: MultiTaskTranscriptionConfig) -> GenericTranscriptionType: """ Internal function to process the model's outputs to return the results to the user. This function is called by diff --git a/nemo/collections/asr/models/classification_models.py b/nemo/collections/asr/models/classification_models.py index 7b226f59e364b..b49ef50583a7d 100644 --- a/nemo/collections/asr/models/classification_models.py +++ b/nemo/collections/asr/models/classification_models.py @@ -39,6 +39,7 @@ from nemo.core.neural_types import * from nemo.utils import logging, model_utils from nemo.utils.cast_utils import cast_all +from nemo.utils.decorators import deprecated __all__ = ['EncDecClassificationModel', 'EncDecRegressionModel'] @@ -483,6 +484,7 @@ def get_transcribe_config(cls) -> ClassificationInferConfig: return ClassificationInferConfig() +@deprecated(explanation='EncDecClassificationModel will be merged with EncDecSpeakerLabelModel class.') class EncDecClassificationModel(_EncDecBaseModel): """Encoder decoder Classification models.""" diff --git a/nemo/collections/asr/models/confidence_ensemble.py b/nemo/collections/asr/models/confidence_ensemble.py index 9ae3bc3fbb5d4..c6b2846085af8 100644 --- a/nemo/collections/asr/models/confidence_ensemble.py +++ b/nemo/collections/asr/models/confidence_ensemble.py @@ -33,6 +33,7 @@ from nemo.collections.asr.parts.utils.rnnt_utils import Hypothesis from nemo.core.classes import ModelPT from nemo.utils import model_utils +from nemo.utils.decorators import deprecated # frozen is required to allow hashing of this class and use it @@ -151,6 +152,7 @@ def compute_confidence(hypothesis: Hypothesis, confidence_cfg: ConfidenceConfig) return conf_value +@deprecated(version='v2.1.0') class ConfidenceEnsembleModel(ModelPT): """Implementation of the confidence ensemble model. diff --git a/nemo/collections/asr/models/ctc_models.py b/nemo/collections/asr/models/ctc_models.py index 4976c09b44c1c..edf4f84a9f9bf 100644 --- a/nemo/collections/asr/models/ctc_models.py +++ b/nemo/collections/asr/models/ctc_models.py @@ -43,6 +43,7 @@ from nemo.core.classes.mixins import AccessMixin from nemo.core.neural_types import AudioSignal, LabelsType, LengthsType, LogprobsType, NeuralType, SpectrogramType from nemo.utils import logging +from nemo.utils.decorators import deprecated __all__ = ['EncDecCTCModel'] @@ -668,6 +669,9 @@ def _transcribe_forward(self, batch: Any, trcfg: TranscribeConfig): del greedy_predictions return output + @deprecated( + explanation='The return type of args will be updated in the upcoming release to ensure a consistent output format across all decoder types, such that a Hypothesis object is always returned.' + ) def _transcribe_output_processing(self, outputs, trcfg: TranscribeConfig) -> GenericTranscriptionType: logits = outputs.pop('logits') logits_len = outputs.pop('logits_len') diff --git a/nemo/collections/asr/models/rnnt_models.py b/nemo/collections/asr/models/rnnt_models.py index 0d1fbe651dc39..2b319a3c7dece 100644 --- a/nemo/collections/asr/models/rnnt_models.py +++ b/nemo/collections/asr/models/rnnt_models.py @@ -46,6 +46,7 @@ from nemo.core.classes.mixins import AccessMixin from nemo.core.neural_types import AcousticEncodedRepresentation, AudioSignal, LengthsType, NeuralType, SpectrogramType from nemo.utils import logging +from nemo.utils.decorators import deprecated class EncDecRNNTModel(ASRModel, ASRModuleMixin, ExportableEncDecModel, ASRTranscriptionMixin): @@ -899,6 +900,9 @@ def _transcribe_forward(self, batch: Any, trcfg: TranscribeConfig): output = dict(encoded=encoded, encoded_len=encoded_len) return output + @deprecated( + explanation='The return type of args will be updated in the upcoming release to ensure a consistent output format across all decoder types, such that a "Hypothesis" object is always returned.' + ) def _transcribe_output_processing( self, outputs, trcfg: TranscribeConfig ) -> Tuple[List['Hypothesis'], List['Hypothesis']]: From 3a1a34d6d65f108ac36529d6c9e56733fd39fd8c Mon Sep 17 00:00:00 2001 From: Pablo Garay Date: Tue, 5 Nov 2024 08:47:56 -0800 Subject: [PATCH 077/125] Update copyright check (#11168) * Make/move copyright check template * Adjustment for standard format --- .github/workflows/copyright-check.yml | 43 ++------------------------- 1 file changed, 3 insertions(+), 40 deletions(-) diff --git a/.github/workflows/copyright-check.yml b/.github/workflows/copyright-check.yml index 724f3afb61770..ebd35c51dc447 100644 --- a/.github/workflows/copyright-check.yml +++ b/.github/workflows/copyright-check.yml @@ -11,49 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + name: Copyright check on: pull_request: jobs: - main: - runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@v4 - with: - path: ${{ github.run_id }} - fetch-depth: 0 - - - name: Check files have copyright notice - run: | - cd ${{ github.run_id }} - - # Files ending with .py should have Copyright notice in the first 10 lines - find_files_with_missing_copyright() { - find ./ -type f -name '*.py' -not -path "./.git/*" -not -path "./*__init__.py" | while read path; do - echo -en $path"\t" - head -n 10 $path | tr '\n' '\t' | sed 's/\t$/\n/' - done \ - | egrep -iv 'Copyright.*NVIDIA CORPORATION.*' \ - | egrep -iv '*MIT.*Licen.e.*' \ - | egrep -iv '*Copyright.*Apache.*' \ - | egrep -iv '*Apache.*License.*' \ - | while read line; do - echo $line | cut -d' ' -f1 - done - } - - - declare RESULT=($(find_files_with_missing_copyright)) # (..) = array - - if [ "${#RESULT[@]}" -gt 0 ]; then - echo "Error: Found files with missing copyright:" - for (( i=0; i<"${#RESULT[@]}"; i++ )); do - echo "path= ${RESULT[$i]}" - done - exit 1; - else - echo "Ok: All (Python) files start with copyright notice" - fi + copyright-check: + uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_copyright_check.yml@v0.2.0 \ No newline at end of file From fb00406638facbcd0f7e1ba9aaea304e9d713a5f Mon Sep 17 00:00:00 2001 From: Anna Shors <71393111+ashors1@users.noreply.github.com> Date: Tue, 5 Nov 2024 10:01:49 -0800 Subject: [PATCH 078/125] Call `ckpt_to_weights_subdir` from `MegatronCheckpointIO` (#10897) * locate weights path within MegatronCheckpointIO Signed-off-by: ashors1 * small refactor Signed-off-by: ashors1 * remove another instance of ckpt_to_weights_subdir Signed-off-by: ashors1 * move ckpt_to_weights_subdir Signed-off-by: ashors1 * Apply isort and black reformatting Signed-off-by: ashors1 * Apply isort and black reformatting Signed-off-by: artbataev * add weights path in save_checkpoint Signed-off-by: ashors1 * fix circular import Signed-off-by: ashors1 * Apply isort and black reformatting Signed-off-by: ashors1 * handle saving in ckpt_to_weights_subdir Signed-off-by: ashors1 * fix minor typo Signed-off-by: ashors1 * bug fixes Signed-off-by: ashors1 * fix undefined variable Signed-off-by: ashors1 * move function Signed-off-by: ashors1 * Apply isort and black reformatting Signed-off-by: ashors1 * fix adapter meta file path Signed-off-by: Chen Cui * Apply isort and black reformatting Signed-off-by: cuichenx * fix mixtral test Signed-off-by: ashors1 * fix mixtral test Signed-off-by: ashors1 * use function for weights subdir Signed-off-by: Chen Cui * address comments Signed-off-by: ashors1 * move asserts Signed-off-by: ashors1 * fix undefined vars Signed-off-by: ashors1 * bug fix Signed-off-by: ashors1 --------- Signed-off-by: ashors1 Signed-off-by: ashors1 Signed-off-by: artbataev Signed-off-by: Chen Cui Signed-off-by: cuichenx Co-authored-by: ashors1 Co-authored-by: artbataev Co-authored-by: Chen Cui Co-authored-by: cuichenx --- nemo/lightning/ckpt_utils.py | 6 ---- nemo/lightning/fabric/fabric.py | 4 +-- nemo/lightning/io/connector.py | 4 +-- nemo/lightning/io/pl.py | 30 +++++++++++++++++-- .../pytorch/callbacks/model_checkpoint.py | 22 +++++++------- nemo/lightning/pytorch/callbacks/peft.py | 4 +-- .../pytorch/strategies/megatron_strategy.py | 9 +----- tests/collections/llm/bitexact/mixtral/run.sh | 2 +- .../llm/megatron_mixtral_pretraining.py | 2 +- 9 files changed, 47 insertions(+), 36 deletions(-) diff --git a/nemo/lightning/ckpt_utils.py b/nemo/lightning/ckpt_utils.py index ae1fe520a1195..fa588092497ac 100644 --- a/nemo/lightning/ckpt_utils.py +++ b/nemo/lightning/ckpt_utils.py @@ -33,12 +33,6 @@ def idempotent_path_append(base_dir: Union[str, Path], suffix) -> Path: return base_dir -def ckpt_to_weights_subdir(filepath: Union[str, Path]) -> Path: - """Given an input checkpoint filepath, clean it using `ckpt_to_dir` and then return the weights subdirectory.""" - base_dir = ckpt_to_dir(filepath=filepath) - return idempotent_path_append(base_dir, WEIGHTS_PATH) - - def ckpt_to_context_subdir(filepath: Union[str, Path]) -> Path: """Given an input checkpoint filepath, clean it using `ckpt_to_dir` and then return the context subdirectory.""" base_dir = ckpt_to_dir(filepath=filepath) diff --git a/nemo/lightning/fabric/fabric.py b/nemo/lightning/fabric/fabric.py index b1ca867cab83f..60eb518a1e424 100644 --- a/nemo/lightning/fabric/fabric.py +++ b/nemo/lightning/fabric/fabric.py @@ -22,7 +22,7 @@ from torch import nn from typing_extensions import Self, override -from nemo.lightning.ckpt_utils import ckpt_to_context_subdir, ckpt_to_weights_subdir +from nemo.lightning.ckpt_utils import ckpt_to_context_subdir from nemo.lightning.io.mixin import IOMixin, serialization, track_io if TYPE_CHECKING: @@ -83,7 +83,7 @@ def load_model( model = context.model dist_model = self.setup_module(model) - self.load(ckpt_to_weights_subdir(path), {"state_dict": dist_model}) + self.load(path, {"state_dict": dist_model}) return dist_model diff --git a/nemo/lightning/io/connector.py b/nemo/lightning/io/connector.py index fd7b814fe7307..2ccb9bb1b1fe5 100644 --- a/nemo/lightning/io/connector.py +++ b/nemo/lightning/io/connector.py @@ -22,7 +22,7 @@ from filelock import FileLock, Timeout from pytorch_lightning.trainer.states import TrainerFn -from nemo.lightning.ckpt_utils import ckpt_to_context_subdir, ckpt_to_weights_subdir +from nemo.lightning.ckpt_utils import ckpt_to_context_subdir # Dynamically inherit from the correct Path subclass based on the operating system. if os.name == 'nt': @@ -198,7 +198,7 @@ def nemo_save(self, output_path: Path, trainer: pl.Trainer, dump_io: bool = True trainer.strategy.setup(trainer) output_path = Path(output_path) output_path.mkdir(parents=True, exist_ok=True) - trainer.save_checkpoint(ckpt_to_weights_subdir(output_path)) + trainer.save_checkpoint(output_path) if getattr(trainer.strategy, "async_save", False): trainer.strategy.checkpoint_io.maybe_finalize_save_checkpoint(blocking=True) diff --git a/nemo/lightning/io/pl.py b/nemo/lightning/io/pl.py index 1a7880e384927..10ed52b136c28 100644 --- a/nemo/lightning/io/pl.py +++ b/nemo/lightning/io/pl.py @@ -37,7 +37,7 @@ from torch import nn from typing_extensions import Self, override -from nemo.lightning.ckpt_utils import ckpt_to_dir +from nemo.lightning.ckpt_utils import WEIGHTS_PATH, ckpt_to_dir from nemo.lightning.io.capture import IOProtocol from nemo.lightning.io.mixin import IOMixin @@ -78,6 +78,26 @@ def construct_extra(cls, trainer: pl.Trainer) -> Dict[str, Any]: return extra +def ckpt_to_weights_subdir(filepath: Union[str, Path], is_saving) -> Path: + """Given an input checkpoint filepath, clean it using `ckpt_to_dir` and then return the weights subdirectory, if it exists.""" + filepath = ckpt_to_dir(filepath=filepath) + base_dir = filepath + assert isinstance(base_dir, Path) + if base_dir.parts[-1] != WEIGHTS_PATH: + maybe_base_dir = base_dir / WEIGHTS_PATH + if maybe_base_dir.is_dir() or is_saving: + base_dir = maybe_base_dir + ## handle adapter paths + if hasattr(base_dir, "base_model_path") and base_dir.base_model_path.parts[-1] != WEIGHTS_PATH: + maybe_base_model_path = base_dir.base_model_path / WEIGHTS_PATH + if maybe_base_model_path.is_dir() or is_saving: + base_dir.base_model_path = base_dir.base_model_path / WEIGHTS_PATH + if is_saving: + assert base_dir.parts[-1] == WEIGHTS_PATH + assert base_dir.parent == Path(filepath) + return base_dir + + class MegatronCheckpointIO(AsyncCompatibleCheckpointIO, IOMixin): """CheckpointIO that utilizes :func:`torch.save` and :func:`torch.load` to save and load checkpoints respectively, common for most use cases. @@ -132,7 +152,8 @@ def save_checkpoint(self, checkpoint: Dict[str, Any], path: _PATH, storage_optio f" storage_options, but {storage_options=} was provided." f" Ignoring given storage_options" ) - checkpoint_dir = ckpt_to_dir(path) + checkpoint_dir = ckpt_to_weights_subdir(path, is_saving=True) + fs = get_filesystem(checkpoint_dir) if fs.isdir(checkpoint_dir) and dist_checkpointing.check_is_distributed_checkpoint(checkpoint_dir): logging.info(f'Distributed checkpoint at path {checkpoint_dir} already exists, skipping saving') @@ -180,6 +201,11 @@ def load_checkpoint( if not fs.isdir(path): raise ValueError(f"Distributed checkpoints should be a directory. Found: {path}.") + # Load from ckpt_path/weights (new format) if it exists + path = ckpt_to_weights_subdir(path, is_saving=False) + if hasattr(path, "base_model_path") and not path.base_model_path.exists(): + path.base_model_path = path.base_model_path.parent + if self.save_ckpt_format == 'zarr' and self.load_directly_on_device: from megatron.core.dist_checkpointing.strategies.tensorstore import TensorStoreLoadShardedStrategy diff --git a/nemo/lightning/pytorch/callbacks/model_checkpoint.py b/nemo/lightning/pytorch/callbacks/model_checkpoint.py index cffa8b9275ffa..b384976d82bdc 100644 --- a/nemo/lightning/pytorch/callbacks/model_checkpoint.py +++ b/nemo/lightning/pytorch/callbacks/model_checkpoint.py @@ -58,7 +58,6 @@ class ModelCheckpoint(PTLModelCheckpoint): """ UNFINISHED_CHECKPOINT_SUFFIX = "-unfinished" - WEIGHTS_PATH = "weights" def __init__( self, @@ -438,7 +437,6 @@ def _save_checkpoint(self, trainer: 'pytorch_lightning.Trainer', filepath: str) # barrier_after=True, so all ranks continue after the unfinished checkpoint marker is placed. # if anything goes wrong during checkpointing, we should be able to detect that data is incomplete. - ckpt_filepath = ckpt_to_dir(filepath) / ModelCheckpoint.WEIGHTS_PATH self.set_checkpoint_unfinished_marker(filepath, barrier_after=True) ema_callback = self._ema_callback(trainer) @@ -455,15 +453,15 @@ def _save_checkpoint(self, trainer: 'pytorch_lightning.Trainer', filepath: str) if self.async_save: raise ValueError('async_save with EMA not supported') with ema_callback.save_original_optimizer_state(trainer): - super()._save_checkpoint(trainer, ckpt_filepath) + super()._save_checkpoint(trainer, filepath) # save EMA copy of the model as well. with ema_callback.save_ema_model(trainer): - rank_zero_info(f"Saving EMA weights to separate checkpoint {ckpt_filepath}") - ckpt_filepath = self._ema_format_filepath(ckpt_filepath) + rank_zero_info(f"Saving EMA weights to separate checkpoint {filepath}") + filepath = self._ema_format_filepath(filepath) if self.verbose: - rank_zero_info(f"Saving EMA weights to separate checkpoint {ckpt_filepath}") - super()._save_checkpoint(trainer, ckpt_filepath) + rank_zero_info(f"Saving EMA weights to separate checkpoint {filepath}") + super()._save_checkpoint(trainer, filepath) self.remove_checkpoint_unfinished_marker(filepath, barrier_before=True) else: ## Determine whether to include optimizer states in the checkpoint @@ -489,7 +487,7 @@ def _save_checkpoint(self, trainer: 'pytorch_lightning.Trainer', filepath: str) self.deferred_ckpts_to_remove.append([]) else: storage_options = None - trainer.save_checkpoint(ckpt_filepath, save_weights_only, storage_options=storage_options) + trainer.save_checkpoint(filepath, save_weights_only, storage_options=storage_options) if self.always_save_context and is_global_rank_zero(): TrainerContext.from_trainer(trainer).io_dump(ckpt_to_dir(filepath) / "context", yaml_attrs=["model"]) @@ -598,11 +596,11 @@ def _remove_unfinished_checkpoints(checkpoint_dir: Union[Path, str]) -> None: } checkpoint_filepaths = {f.resolve() for f in checkpoint_dir.rglob("*.ckpt")} - for ckpt_filepath in checkpoint_filepaths: - possible_marker_path = ModelCheckpoint.format_checkpoint_unfinished_marker_path(ckpt_filepath) + for filepath in checkpoint_filepaths: + possible_marker_path = ModelCheckpoint.format_checkpoint_unfinished_marker_path(filepath) if possible_marker_path in existing_marker_filepaths: - logging.warning(f'Removing unfinished checkpoint: {ckpt_filepath}') - os.remove(ckpt_filepath) + logging.warning(f'Removing unfinished checkpoint: {filepath}') + os.remove(filepath) # some directories might be distributed checkpoints, we remove these if they have a unfinished marker all_dirpaths = {d.resolve() for d in checkpoint_dir.glob("*") if d.is_dir()} diff --git a/nemo/lightning/pytorch/callbacks/peft.py b/nemo/lightning/pytorch/callbacks/peft.py index 906cbd6e450e8..5336615a4a38b 100644 --- a/nemo/lightning/pytorch/callbacks/peft.py +++ b/nemo/lightning/pytorch/callbacks/peft.py @@ -28,7 +28,7 @@ from nemo.lightning.ckpt_utils import ADAPTER_META_FILENAME from nemo.lightning.io.mixin import IOMixin -from nemo.lightning.io.pl import ckpt_to_dir +from nemo.lightning.io.pl import ckpt_to_dir, ckpt_to_weights_subdir from nemo.lightning.megatron_parallel import MegatronParallel from nemo.lightning.pytorch.callbacks.model_transform import ModelTransform from nemo.lightning.pytorch.optim.megatron import MegatronOptimizerModule @@ -346,7 +346,7 @@ def save_checkpoint(self, checkpoint: Dict[str, Any], path: _PATH, storage_optio if is_global_rank_zero(): metadata = {"model_ckpt_path": str(self.model_ckpt_path)} - base_dir = ckpt_to_dir(path) + base_dir = ckpt_to_weights_subdir(path, is_saving=True) base_dir.mkdir(parents=True, exist_ok=True) adapter_meta_path = base_dir / ADAPTER_META_FILENAME with open(adapter_meta_path, "w") as f: diff --git a/nemo/lightning/pytorch/strategies/megatron_strategy.py b/nemo/lightning/pytorch/strategies/megatron_strategy.py index e99be666ec04e..8a0147a4613a6 100644 --- a/nemo/lightning/pytorch/strategies/megatron_strategy.py +++ b/nemo/lightning/pytorch/strategies/megatron_strategy.py @@ -57,7 +57,6 @@ from nemo.core.optim.mcore_optim import McoreDistributedOptimizer from nemo.lightning import _strategy_lib, io -from nemo.lightning.ckpt_utils import ckpt_to_weights_subdir from nemo.lightning.megatron_parallel import ( CallbackConnector, MegatronParallel, @@ -703,13 +702,7 @@ def load_checkpoint(self, checkpoint_path: Union[str, Path], selective_restore: if self.lightning_module.optimizers(use_pl_optimizer=False): sharded_state_dict["optimizer"] = [self.optimizer_sharded_state_dict(is_loading=True)] - # Load from ckpt_path/weights (new format) if it exists, otherwise load from ckpt_path (legacy format) - load_dir = ckpt_to_weights_subdir(checkpoint_path) - if not load_dir.exists(): - load_dir = checkpoint_path - if isinstance(load_dir, AdapterPath) and not load_dir.base_model_path.exists(): - load_dir.base_model_path = load_dir.base_model_path.parent - checkpoint = self.checkpoint_io.load_checkpoint(load_dir, sharded_state_dict=sharded_state_dict) + checkpoint = self.checkpoint_io.load_checkpoint(checkpoint_path, sharded_state_dict=sharded_state_dict) return checkpoint diff --git a/tests/collections/llm/bitexact/mixtral/run.sh b/tests/collections/llm/bitexact/mixtral/run.sh index c32dbbc95b981..0fe9e331b18a3 100644 --- a/tests/collections/llm/bitexact/mixtral/run.sh +++ b/tests/collections/llm/bitexact/mixtral/run.sh @@ -43,4 +43,4 @@ python3 /workspace/tests/collections/llm/bitexact/mixtral/pretrain_mini_mixtral. # Compare outputs python3 /workspace/tests/collections/llm/bitexact/mixtral/compare_ckpts.py \ - "$NEMO_OUTPUT_PATH/checkpoints/--None=0.0000-epoch=0/" "$MCORE_OUTPUT_PATH/iter_0000010/" + "$NEMO_OUTPUT_PATH/checkpoints/--None=0.0000-epoch=0/weights" "$MCORE_OUTPUT_PATH/iter_0000010/" diff --git a/tests/collections/llm/megatron_mixtral_pretraining.py b/tests/collections/llm/megatron_mixtral_pretraining.py index 82188f75351ee..b4c5b960e0a7e 100644 --- a/tests/collections/llm/megatron_mixtral_pretraining.py +++ b/tests/collections/llm/megatron_mixtral_pretraining.py @@ -158,7 +158,7 @@ def main(args): ) # Confirm checkpoint directory structure - output_path = Path(args.experiment_dir) / "checkpoints/--None=0.0000-epoch=0/" + output_path = Path(args.experiment_dir) / "checkpoints/--None=0.0000-epoch=0/weights" assert output_path.exists(), f"Expected {output_path} to exist" assert output_path.is_dir(), f"Expected {output_path} to be a directory" output_files = ['__0_0.distcp', '__0_1.distcp', 'common.pt', 'metadata.json', '.metadata'] From a08754d26e04ae1a765274cad70312e1e7caae3c Mon Sep 17 00:00:00 2001 From: Sam O Date: Tue, 5 Nov 2024 12:56:09 -0600 Subject: [PATCH 079/125] Fix export of configuration parameters to Weights and Biases (#10995) * Fix export of configuration parameters to weights and biases Add test to ensure json serializability of hparams_init after restore Signed-off-by: Sam Oluwalana * Function was moved in recent versions Signed-off-by: Sam Oluwalana * Apply isort and black reformatting Signed-off-by: soluwalana --------- Signed-off-by: Sam Oluwalana Signed-off-by: soluwalana Co-authored-by: soluwalana --- nemo/core/classes/modelPT.py | 9 +++++++++ tests/core/test_save_restore.py | 17 ++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/nemo/core/classes/modelPT.py b/nemo/core/classes/modelPT.py index 5b8d414ac85b3..a15f769e9d88c 100644 --- a/nemo/core/classes/modelPT.py +++ b/nemo/core/classes/modelPT.py @@ -1027,6 +1027,7 @@ def on_validation_epoch_end(self) -> Optional[Dict[str, Dict[str, torch.Tensor]] if 'log' in output_dict: self.log_dict(output_dict.pop('log'), on_epoch=True) + # return everything else return output_dict @@ -1646,6 +1647,14 @@ def hparams(self): self._cfg (e.g., in self.setup_optimization()) that was not done via `self.cfg = new_cfg`. """ self._set_hparams(OmegaConf.create({'cfg': self._cfg})) + + if ( + hasattr(self, '_hparams_initial') + and 'cfg' in self._hparams_initial + and isinstance(self._hparams_initial['cfg'], DictConfig) + ): + self._hparams_initial['cfg'] = OmegaConf.to_object(self._hparams_initial['cfg']) + return super().hparams @property diff --git a/tests/core/test_save_restore.py b/tests/core/test_save_restore.py index 394ced55a4527..8ac9dfeca1ae2 100644 --- a/tests/core/test_save_restore.py +++ b/tests/core/test_save_restore.py @@ -12,10 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. import filecmp +import json import os import shutil import tempfile -from typing import Callable, Dict, Optional, Set, Union +from typing import Any, Callable, Dict, Optional, Set, Union import pytest import torch @@ -59,6 +60,18 @@ def getattr2(object, attr): return getattr2(getattr(object, arr[0]), '.'.join(arr[1:])) +def _is_json_serializable(value: Any) -> bool: + """Test whether a variable can be encoded as json.""" + if value is None or isinstance(value, (bool, int, float, str, list, dict)): # fast path + return True + try: + json.dumps(value) + return True + except (TypeError, OverflowError): + # OverflowError is raised if number is too large to encode + return False + + class MockModel(ModelPT): def __init__(self, cfg, trainer=None): super(MockModel, self).__init__(cfg=cfg, trainer=trainer) @@ -1193,6 +1206,8 @@ def test_mock_model_nested_child_from_pretrained(self): parent = self.__test_restore_elsewhere(parent, map_location='cpu') assert isinstance(parent.ctc_model, EncDecCTCModel) + assert _is_json_serializable(parent.ctc_model.hparams_initial) + @pytest.mark.unit def test_mock_model_nested_custom_config_field(self): """ From f14a622b7667beebdf310faafeddb793e9bc7775 Mon Sep 17 00:00:00 2001 From: Ali Taghibakhshi <71892896+JRD971000@users.noreply.github.com> Date: Tue, 5 Nov 2024 22:08:55 +0300 Subject: [PATCH 080/125] add lora recipt for 405b (#10991) * add lora recipt for 405b * Apply isort and black reformatting Signed-off-by: JRD971000 * remove pdb :D * Apply isort and black reformatting Signed-off-by: JRD971000 * Update llama31_405b.py Signed-off-by: Ali Taghibakhshi <71892896+JRD971000@users.noreply.github.com> * fix typo Signed-off-by: Ali Taghibakhshi <71892896+JRD971000@users.noreply.github.com> --------- Signed-off-by: JRD971000 Signed-off-by: Ali Taghibakhshi <71892896+JRD971000@users.noreply.github.com> Co-authored-by: JRD971000 --- nemo/collections/llm/recipes/llama31_405b.py | 63 +++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/nemo/collections/llm/recipes/llama31_405b.py b/nemo/collections/llm/recipes/llama31_405b.py index ce0d0cdc63ca0..e753c48387c04 100644 --- a/nemo/collections/llm/recipes/llama31_405b.py +++ b/nemo/collections/llm/recipes/llama31_405b.py @@ -22,9 +22,11 @@ from pytorch_lightning.callbacks.callback import Callback from nemo import lightning as nl -from nemo.collections.llm.api import pretrain +from nemo.collections.llm.api import finetune, pretrain from nemo.collections.llm.gpt.data.mock import MockDataModule from nemo.collections.llm.gpt.model.llama import Llama31Config405B, LlamaModel +from nemo.collections.llm.peft.lora import LoRA +from nemo.collections.llm.recipes.finetune_default import default_finetune_recipe from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger from nemo.collections.llm.recipes.optim.adam import distributed_fused_adam_with_cosine_annealing from nemo.collections.llm.recipes.precision.mixed_precision import bf16_mixed @@ -237,3 +239,62 @@ def pretrain_performance_optimizations(recipe: run.Partial) -> run.Partial: ) return recipe + + +@run.cli.factory(target=finetune, name=NAME) +def finetune_recipe( + dir: Optional[str] = None, + name: str = "default", + num_nodes: int = 3, + num_gpus_per_node: int = 8, + peft_scheme: Optional[str] = 'lora', +) -> run.Partial: + """ + Create a fine-tuning recipe for Llama3.1 405B model. + + This function sets up a complete configuration for fine-tuning, including + model, trainer, data, logging, optimization, and resumption settings. + The recipe uses LoRA (Low-Rank Adaptation) for efficient fine-tuning, unless peft_scheme is set to None. + + Args: + dir (Optional[str]): Directory for saving logs and checkpoints. + name (str): Name of the fine-tuning run. + num_nodes (int): Number of compute nodes to use. + num_gpus_per_node (int): Number of GPUs per node. + peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None. + + Returns: + run.Partial: Partial configuration for fine-tuning. + + Examples: + CLI usage: + $ nemo llm finetune --factory llama31_405b + $ nemo llm finetune --factory "llama31_405b(num_nodes=3, name='my_llama31_405b_finetune')" + + Python API usage: + >>> recipe = finetune_recipe(name="llama31_405b_finetune", num_nodes=3) + >>> print(recipe) + + Note: + This recipe uses the SQuAD dataset for fine-tuning. Be aware that fine-tuning a 405B model + requires substantial computational resources. + """ + recipe = default_finetune_recipe( + model(), "meta-llama/Meta-Llama-3.1-405B", dir, name, num_nodes, num_gpus_per_node + ) + + if peft_scheme is None or peft_scheme.lower() == 'none': + assert num_nodes >= 4 + recipe.trainer.strategy.tensor_model_parallel_size = 8 + recipe.trainer.strategy.pipeline_model_parallel_size = 4 + recipe.optim.config.lr = 5e-6 + elif peft_scheme.lower() == 'lora': + recipe.peft = run.Config(LoRA) + recipe.trainer.strategy.tensor_model_parallel_size = 4 + recipe.trainer.strategy.pipeline_model_parallel_size = 6 + recipe.trainer.strategy.virtual_pipeline_parallelism = 7 + recipe.data.global_batch_size = 128 + recipe.optim.config.lr = 1e-4 + else: + raise ValueError(f"Unrecognized peft scheme: {peft_scheme}") + return recipe From a9ba593028b42aebb2e369eb2a35f6eb23376551 Mon Sep 17 00:00:00 2001 From: meatybobby Date: Tue, 5 Nov 2024 17:22:19 -0800 Subject: [PATCH 081/125] Change activation parsing in TRTLLM (#11173) * Fix squared relu * Fix openai-gelu --- nemo/export/trt_llm/nemo_ckpt_loader/nemo_file.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/nemo/export/trt_llm/nemo_ckpt_loader/nemo_file.py b/nemo/export/trt_llm/nemo_ckpt_loader/nemo_file.py index 08629483e0061..d261874db7572 100644 --- a/nemo/export/trt_llm/nemo_ckpt_loader/nemo_file.py +++ b/nemo/export/trt_llm/nemo_ckpt_loader/nemo_file.py @@ -395,7 +395,7 @@ def load_nemo_model(nemo_ckpt: Union[str, Path], nemo_export_dir: Union[str, Pat nemo_model_config[k] = v elif k == "activation_func": if isinstance(v, torch.jit.ScriptFunction): - nemo_model_config["activation"] = v.name.replace("_", "-") + nemo_model_config["activation"] = v.name else: nemo_model_config["activation"] = v.__name__ @@ -405,7 +405,9 @@ def load_nemo_model(nemo_ckpt: Union[str, Path], nemo_export_dir: Union[str, Pat if nemo_model_config["activation"] == "silu": nemo_model_config["activation"] = "fast-swiglu" elif nemo_model_config["activation"] == "openai_gelu": - nemo_model_config["activation"] = "geglu" + nemo_model_config["activation"] = "openai-gelu" + elif nemo_model_config["activation"] == "squared_relu": + nemo_model_config["activation"] = "squared-relu" nemo_model_config["mcore_gpt"] = True nemo_model_config["max_position_embeddings"] = nemo_model_config.get("seq_length", 4096) From 30235e07d7ff8897cedaa43bf6030060941e4953 Mon Sep 17 00:00:00 2001 From: Zeeshan Patel Date: Wed, 6 Nov 2024 00:26:48 -0800 Subject: [PATCH 082/125] added dit training diagrams (#10873) Signed-off-by: Zeeshan Patel --- .../diffusion/assets/mixed_training.png | Bin 0 -> 917676 bytes .../diffusion/assets/pipeline_conditioning.png | Bin 0 -> 161908 bytes 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 nemo/collections/diffusion/assets/mixed_training.png create mode 100644 nemo/collections/diffusion/assets/pipeline_conditioning.png diff --git a/nemo/collections/diffusion/assets/mixed_training.png b/nemo/collections/diffusion/assets/mixed_training.png new file mode 100644 index 0000000000000000000000000000000000000000..2226e4c4d5e905af62451852b599bd353eb2f217 GIT binary patch literal 917676 zcmeFZXH-*L7cPv5A|hZ{K#C$D(gdVeMd=U)=}p9d2%-0qhy^TE=_R1jM4I$YP?Q!R zLV(bVG(!lH1PCD{`8MaR_q*dg=jZ)%GDgT|uf6tKd#zcXIp>qSx?`ftag^sM6B84M zzTPcUCZ;2vOiV1$BZq-AZw|~WFfkp|a?{qnqpz(kc*obv#mxiE#H9Bs#gf&^tmkZw z?b`&cM+aE)pU<$I5Id0n{Krkg5rvy41(S~-yzxCw;%dX$dlL7xEpMMLeKK}J_00nU z`_-epO;(@9y-c+*5ldmD2)`8sBb$QC@N->nJ@{loKTa<>_S3-xp}uonER?}6QDL;c zI@2MRcgMQ5o^|w}lF9q2hVsOZ<5tDS1r~Dy8K<5rym@AdGN*f38$kP^2W!zqt2Hf_iqx+o>|rB!;gQrp48}EmGzvbtt>#J>@}` z9Lw5~Lo>O#C(cQrxJG-Ix_?IdeJTI_qHpISA9QBt^ZUl%JM;s@MrZYKr|{C7E6=XX zO6zExEqYm%cY3=)OjA{_H|yqFw&n4X^KV2KI}V9qy3n^McYTOT8()n~^pe&1!J@U!HZ-RWkBU_(x^CVuuo%rf5afJnT^BZeO$HLyj z*6=$_l!}hhy~kWb4_gmaov@x0`=%^-`e8@dE01LFFpPWuodMnv_v2NyHI1fNLjxlv z3{TWw7vIdSVJJKk$cg$Ix2C7lBh=mY5SOd_Y2s7Uu@XH$nlp51yb4B9KWn8va%4zHX(CR+q_rV=LZ3qu0@vm{a4cJjAm?Z$#NwkI?76e)Sm0 zWvp1?#&A2gXVOr)QW3fbEy^E7StaBof?U5!Gv^MR_2ttUgkOMXA4q^YE>=`14ptEk ze?V`;ZGW>eIfx!+WsMJCxoN`m8O7nr=$KV@KfLFE<)AnHn&l;_1iyyOn@_yh59;5SjnOz*?kAIe*d>zkjwLyA>At)bmqKLOebKRl z+s|~oPiHf`ei8OQG{r=oTg4uh*3t}S)et&z^XA7R9D~|1C)mGn`rJJKQt{-I=ND!4 zPQHDbFLd+P)4rQ$1;KjCRs7>mhXtt#9o=G!%tMh=H<}V07UhX--|w8c$1Z9qYDi@fH+!<(l=*;*q5KS=*LaO{3>I;YFiapz+(FK$2R zvEmWwT)tmmb;u%i#Hs(cvSWuzrN)|&$u~D8zWQi!rStJ!jXU?kIontq;|bq*e_QPF z%)B5k%pSS-^uU+6xf}OSb)FhxLmv@-e)MbL*Nb1d8`7HC2rqAa;2eDY>oEDjNtq<2 zq=x4Wkyaoz{WU=i5zYj?%j$3WdfuGY|CvD47tRmJpUyv(Uy|=R#3~FHb`};b&?@=!GeKhN{(2BNJ)}pH~*rZY`ogDmY9~WYG8|!y!ppY`6!xE|2zE{Bfy19qB)zwqxm#-R{2Y$@H*O*6) zGAOVyjVSIJ4qr8W%&~QS>*Ch4tHvWYQp$SG;3YQ26YoOJw+eWy`)|e;D_ED^DZ7b( zG6AmD2&if*P%(t(Y3R0^(mwA>ZXMvr>^vW<7keRAn5X%=)T7NuZ?8+rAG{ut`90HC z9^|f3=yY9<=S^pU^d@a28R85h)}D-AZD5&n9h%&3gix(cXxw6)1YaX;(f;6Y`>0^&{lYI6&lH{=&=%?t)J~jN z=|D$C=&0zl>161X#KiG#@t|W`V-Izf#DLCg7P+G{Ha&K2Ld9EWzqMZ;yY-;zd;aud zb#aPYi(8^wEB2t=Ywiw-evcIEH`a*8%8&+%1j&ZfeD)!G)a|J6Ja7EY5+utcPD!4# zQLx#z!rD}p9W8rYwr*=y7ExMcV`=MHe{tfPN0o=dm|f*{D-D^G;`6Mvn_L&egUfBj6yH}r4J zobQ*p=={3#dVC1}A?{niC;ujmR22p9wW&&DXGi5pWfELbWj64AWe>>gw{< zz%oC4fLxHFPl<2y_a6^#$)sKzXg)PXn_6iS2w?o0Uri)b&MruU!cY<@`yBO!($0-8 z2rqdIIs9g!_In9BYaRZxb-Wqf^(}hA3{UFD^rfvdPm{UPQ5ii`{o&Y^U!vQpi{zZ8 zFPZ%+*mw#nC2hB!_0i#PtfgGR=O6K;^IP$b2`rwxF8AxpRz@w}08!c9GW zqwy5ti&H(t%g6Go%t}nDjS}lT>+CtwUS}%O2;ZewG!01x;vk$p*}ySlCTl+FZsMTm zeYR0{2k>E6&))FdGa0o)?$=ahnl*IAM1@yA-!V%U_UM=Ih38COtU8tQQl%)=$X>nH z@xs-aoaja4UiV(iQ%xe|qE$x^O0AG6e9U*boHK{d(iKkS z_Q1zwkq<}rtnI5as_Xl1EawlnWwFbiBo-1j8QLlKen^Q$_uu>o1f6jLC(HP)Xg+o|s)IYv@P z&e*=PWv|niPW_CZ@`{;#T!-_Lo}>pvd+V+Yq{GYPJ>}(_j|EDW+;$hPZEY4^Z>#Ep zfz03$)2X5G-Ky666%Prd2r;VXI@k29?-QSQ>+imQ`J`Q=UBIE&5fer4jOiRG%rA`V zxEO7lsh-8f=c81$@UdsG*Lk8Vm9OZ6w1!6Thu`1Y^#^;}%&hR%hPx!>?SyA>qP4NK+JX&(lafqyus0J z{XE8J?-Odo46|1OZyjiZVs*a7#$|0Ndd+4Amyi?)fhhQzYQ_55DO$MKOvG2(5bG+} zRem0|QhLX7{H_URz}cQ|4~c&uFdfzxvG-GdCP{kwJE#~Wp2MwKg7sNdp5Pq|TcSK) znU=mKtwB!NE8cJ&F~p^@GKjR2GFUi_EWT0l1!Fk#t95*1lmbC&4Yzj$DHAjh(zJJE zrxo4KF2CH9n(9zzbcBAi|1xfQ!+@=4KUBhdn4R?Q*?H<*Wt=H{agTVEgeG;{U^gFXEodHI{3Kd}v*VDr|q z_G4l?E4=?cpl^ElH*o(kw|iFpR>npu&R!5Hr-xn-z*4~w@BRBQsRgS5hY+y8lVC8! z!_!YCSpCu;SEvBT`=_NZ3I1`3zq|S+E8{zY+Frh3K?SL6Qr9kN92FE4RP%l4qGEbW z=bzny|I{x%^7r>vk(Le$3X%$vlk)O)m6lOfR+hddD=jN430xuR7vkyf6fEiKC-je- z{Odlqz<$oYZr=WGUY>&c_jP*U72vOa>C*l{|9<^rp5S1&|BU46_s_=y9#DF}MOsGc zn)JW#4Rlr8KdW-bEg0-!eaj63tQjzdhK!8-HMKuF{9jG~8S=lnTK%W1tc<+Ue|P<_ zrvHCeb3d@Jwig5#)L-L28}`r6|K0e{j%w2T&;DO)@sEZ6<1Db!8b{Tn|GjG(M-AS+ zu?IHtg4-?7J>U}%v;7ydCh+IVKR$tD_WY?+3Vh~FOj=C(w{F}EKCm{;J{2%&C;c0n z|7_88oh|Fu_$B?zBPM#c&b^;fh(jj&UbP<{yxq8VtI_{^pg!LI^v%S}CS%H;jccFr zdKYd%COoIJ13JA%(_PhY_L!9nRCP}63}GjH+ksF$YUdY6;q&WPUOvxI?z}A`9B}dg zGs}_Vf?7|Q{`Wz9BD>wKxz5C_Nx}bp(ZBk?J9oTcUhB_q_{X&#dRlXxQFD;gfA##| zQxNp-konUY{^J&72~XN%IlfFv{Pirg=AxYc_jvyv>+{6}k-WT5>#hHKk)E`3bp7>{ zXx-OlW>!>S@v8pIRc8W*j{EB;dD0%m!s6+1WX6T00Z-$Jl;@wF*r;3-%I;H4~|TEAN%VkVNtj&=E9UgPju^#roS=dpWy7jgPK3ltv{On4r=~qymwsi?3q56jl$z!`2(8yEL{dBUTm~*1Fn)?jsSwqt>ng($I}J&1e(Vvat4p{MXiup}dsX z%hSK36xXBziVZn(DP23vsEBpH<`qo_Wf~JPjS1j_{(jaq`$RtHIOvF`pb zau5u|NIL`f?G>9o2irJTQ9BSguh@z;o-Wd)Vd-TR^wo-WDPW5A0)#09Y_$mbiJFC= zWUb%e3cW+#a%61jHo*ia-MURd2ap5t)26Y@22&2}+CgDLd zA=AQAKCkG(J%QvTI&GHG>WHDNgr( z?56QFCcGcBCW>qG(~8W->~9O1tPcwQdWw_Qpng9v0uH!$(b)EfvRL2#Qml?*)7_vR zLx(ClHh|;67FqstT<~BB1QZRAg_P+^`zo(w`!#We&mD|_tB_N0`7(q<8l()=Ci!*` za;pMHTVJ(A?5bE>hm6;NvLm*~r<-TC%GbZL0}Jb$QnE>waqDRLbz8cL7%HKZ(=Non z@v=>A9mb%N$w3QctzFMC^0b@H6H4B)YVsN=6Lrs8N=^ z=*#-pZ&nQ>CwF8@7ZmCn`@e$YkEW1`x(ea@_fsltmyfgfYP4*`dQGJvPv!1> z(;jzcPo_FiguO2uqAxkp3%u@mia$rv;YSyp`AO-G%|hPJ zQvEgpN2aXa;H)IWakjHf6_3(HQ$gKn?XjoyIBBuD)W%#KazPwH6c=S|docokO;$%v zA6JUOEtIo(w#;*Qr6-G0sdF{7+L|Rj_H*Z6N+4OX0Jw==Cr`a^^{v_EY1>! zFa{Z>DV8@RITg*~Bcc@=g7W$ zo-vk(R|yN&agBwyAs}bZqH^~v++cu!OKa+&hzQvK`}>^M$8(+OWabL!R!MYwB9{7g zH;CIh1ly5Zz3sKz%EFR7s2NQw!=Z4Jo<47|-)RxwnCn}XB&k(md4~lyQXz4GCbI&? zD<5xdcflXJ#JV#zvSmGs8}YEcMK1armk5lK22<8QuEUdsAW+)~&yOwwTXe`X#_xV@ zBrjMG)zeCdAk@;sR?Tu%Oms@Yl98@z3*6cr5uQH_liFk55<3{Gpe092ujM*&>27@J z@VwWLvw=4SK}~>?Or^BPsuR@{}a-L-8 z>0!qPUY_1k3@mfEcPF|dw%wBuJ8ux_EtGz*oV@|U-avkBYW`O+{KIm!^XNAj+XIgQFW|-5(YuvqL~$ggD0IBQof0AjG#0O zEMn%91ypnLqYLO%loO9u@<2A=7ZFt zMcAR9l|!s2(Kj%fG#aEr7eoNjs7bkd4+To^;KoAig}t5o!Xjhx?-rU)o1tK%)j1=x z=L)jD0!nN-a+mm511iGo`{`{C)vy2<@-#MS%Fy-sM9j6u*}-e%loO7bVhL4xltq47 za@T(CvQ8hUA}XY$w(uK9UXQ`NwsDaA<{PyOPt(|Z&t>cxSe?Z&+qednR;3idc8gA( z1r;c2-&saLxGnY?Kq*8LU`j-l!WMSe7GEu?4BzLGcPW@+8JX=W(%34teiKYM&H}}z zL5nqmz`tk^WobUHS^cqP|7&FLL;^(-L6F0*$ub;+YI zHFFx%!~xk~O9e!K18GK!2uIUOH$G10)V1=fL$aff3+%MBuCu)^-&$*tVxKxsa+t#d zSScaC#BS+o5-NOI4n2-%n6y^$;iV(*QH}95edas)3!(|deycJW8tktCi!^x&D0A8x z7-++f-;83w>ugO-$lFCEVhhxX4mk@@$DAn7)J}|gDx%jU45UMP45J_enHtOS z^k4B&D8xK(o73PY5w_5BD)o%x#u@fB`%NdFbX@4rM{zz-B+I!GUzJZMJmQL;+W}Vn zK51VJr4MeAH;gg%3nCNyi#E-JST(QUVgBq6$EzYps-lz%daJ}A_CkO*D8N^jHMenF z46Q$^MsMfZYyWWj9CTY-b5pw`6iQNr`OQ~pOvPw12dqA0u3pzwOcJ*R%FXS*k#8O* z`c}RLY*djJw@sONhbDOzN=(X~iU3>@!@xZhYv?$U@@swH4cl3Mh-l8;EzX@FwaRkb zAeSI^OK4%3*7bnqlfNogNFF#q{Jku5pacWa8w_jiobs?GamR7E-+;3HD~0uP015>; zZkaodOs@yl;|y3QatNqhp`vH!+J3*VZ_M~L1Hh}G0<$`5Z^d5k`o8sUR-L5`Om6|e zxx9}Zqt2k92nc8{b*e}_cBSJ}4s~iJFsKn}zl$J1$<0CF;Dz~WEG$`++$@UJ-SBU#52P!u9 z7&j5W1vsv6H7{X@+U#%}ccXX-z1MuEgMbJiXo+pCY3wUTmh7C%NH!0-R7b5~Y$3M6 zDGNeihp5|jJ2Z1miH`R{U=1pCLpDBwJjWwDxsxZEp`%?+LBWI zs%bwwv2PSxg9#`{A<Cn3{BTSW)_E1n-%c5|zIN+@9Lw+{SB9bXp$+O)Vt~uis1a(3Ph`eu7 zr;WWr(R6aUFS1bxF>--bAMojUXO{^fs4xfe1g5PHHQBh5Gnv9!(ssI(E zld?`N3a39k*K+?Fvax9DbOli!nf;tOIv^sPf^?jj-W{!9zwH-+^lJ9+3@kWMBX0J> z9|#R_RKI%}Iv4J;2kG4p1;W3_QP4VsR7#O-6M~LItP`7?Cq~LgQ?R^Eyi;?B zX_P9p_nJZKC3XTt+Y(IYzNfdZ%W$9GM>E?vNgVVo;N_^lkfIKa1WF5*J`jsHD3Ch;t6XPO$Rb-3rET zIZNU8O55-mQ)&I)Myd^_X|&XQix3;os{ANqpnoog%V6%7B@Yu7)N5u-+X8n!$ZJb+ zUqt89H|e{nxceD49`m~>6A6gsGrSRBSmSYbgh~g4;j84x6RU z&{WLMnM_|5+v*(d=Lh`RR#!+yA^4*^ICo+bKzJ<`LM!J#20W2v2}&dBtcn%E18rcr z3A+=eH9fnAL=u2}Dx|AzFi840rGVDqB2tz^s)4AUB@dSBrwhdu!}eoXn)B5Q=&Kq`D3cslv6ii?=G3;QvZkv!+i$bUL**GuLx_a1wy9`vBbJ&A{VqpW z)S)_0a1GUEZ6g;IA;OKh-LOVY{j9(YXkh0kxow2MM6CLP_&$n@Yt<^guE%oZf|elq zP_jTM!x1~D{-b3QqF`{y&B`msCdVY88>eQUbx(y9z2pv^z#OyAb^5hjpw?rh;=NKX zL`r7R2@HC^|8)*zGE1T+8BIugm#L2} z&1fDtX79kPysH+znI~XxRbD{yTb|ksV13KDRgP&~a$Rcm4>#hJT??JIa4TNh-C~HY1GzViInOfqJcnIYnQMhwl!3&64cl?9zv(A5XLR2 zx;aYe@)awa+Xi`LxeeDGwFg~u7I3$e*;7ULCv&Db zHDdP`3GNQ0%%o!%8w`3Al7#h&9cRx}1b=_gg}REX?pr^4VRWmK7Mo_$PH(YhKC{BV zupfAf3eP>d=#R~ZCm;URi0dX?20c!G_p%XZy|jCWng5)pklCFz(Ud3kPOT*|Z3z&y zoiAMcZDuMo(3pp+D=n>_>ydhY*(s}9hoZ-q?~r7{ZT>!PCz+7U^s#pPz5l0Ulb_6e1?+@U47@mfmiyf&<}Jn3i3iI`Kck%lV@hI$!cS z507+uo-(wrVQJ*VIQ6)uH=KJBcE5=Yu<5`fweb~gUP39eR_>ukaR7WH`cq}a6Xee&Xl6?^`( zmNKP$ZNx240i`F-Bki&N>#0SDqSf*jcH)9s7A|W)F=l1&Yc4htmZy|c6XTZp8Z++4 zmzD$y7FE{b$0k5?Ix0Kvth#k+y}qyB;-4KHI$)`D;yf|2_rcXzR{iKR4T(n|OunL8`5P4}cndAPt?DLblwYEzyWKvi zl}IK!hBWz@et0WdloR@x5u|eJJ5FVMy<_8sf(bd|z21G}l()94j`iRM#xEB`)#ocLl9d_|6gualB zqcnfkTTrfQ(-7Z?w@)%M85f$tHSzjUCVd=1AvO`H*oUcE=hoiFWd+)cHSZ}9sgfYv zHaLfnHW;|g^1AOR5aSimp(Hqq-QxLfyX=e)VqNwxN&60-*uS`cO{C5Lt8 zjeA>K`oc7q;7<^lD4uuMyeKj=pwC_$yM0^20dKMQqRWiidao#=1#*7}@rJ*BvJ_yt zdRJKkc}4A+wz>Ze_o-VM45_@{W0c)SAW@Zp3{KkjoScsL*V+2FkM)tTL9P(#g*F1GSjQ81cswqsq;@ph0g|J)Gf@WrLcpt*)WpliU+T| zBd>&uhS!+Pby|^`AtB2%5h=-auv-%slK>}Q>(4E5-B zH{ZcA&gIh|>Tob+6%jSNQ#mZ<`n&ukH8s(FzY%#-Yt z!QyRX`L*}+N`&`qdr*YEqPNh2OOPL0Je^4Y<6*?4#;(btOJ3m%yyU?zVP|a?;yaE` zeLRf4UJA}WynOGnd+*C50gbm{7|zJukcfUg;o_K4u-xoV|HlUvtL&~KLwKqqh%!Px z`5m9ji!91dJxr7>72}XDPR1+^{g4qaIPpO$DJ#KRNH%%?TXg$N3EL=SZtbU(8TS=A zY3(HGy@5}Wyz-2YrfMUaT&W!5nL-o7W@%YO@c|?h@Lj4(4W`b)>+<||DF7@fvM?_7N!NbCB58PBR>e{sly8zU8*1^~C2-5}#)UlwRIiW|Ez@lDp(!XBiuU8sAsJ3um^ zK6KVDOW+ag1Xbt`^0|3l5(I?53}HIZd+dy{)Q zf>NhdQMQhgqcQ}jD_r4e-Fny$g+(mMnSY}DpgTUoU|rwc_R$`Zpmr-Yth`Pt_wMxx zYI+UKqzKR`WR1MGmOgXX$qO3rA(06UyQI1ua51F1|Ch!e7_u2i!z?)5B-Wo{E`Ilr zstVA3ojO%MkGq)MPKv_O9Ws(15K!mI|5E$=NXm`=v0J<&W2=tos{C zeEMB*w{2aU`f$0_sJ=sRS;tkynANk7-Q!VFRZrbYtX*RJ3m$#=(!{$c4^>dxXyVO^pVo1G(L20GXsI-q>uy?;$7q`9=;UU4mzv9%Nlq{vQ=F)kjUCh~Jg7L)`BF`? z=A&Kj1$ajw@re(?d>Qmt?ZZ!u=w}N}!<4~e%^K41i zEsTqTML%zsRu5#-A7N(!m?|FSEIFw^_r)nc#${w3M28pq62ssrL~3zBw1g^7MV;2V z-IS`b1Ra7hE1r$rPKcUmnssAxNS=67Je*dD-fYBw#z`xC)q#%Z0`)VrH4i<`mgA1T zAX0T^BW}KWFE8##YWtS|?|y?~fu;114sQx`ddX&(V&rnTGT)lIJMWCpzWi?->O3}e zzPfxg(1<_ZSOUI`|Kh%lK%-yNQ< zFW_%|O#o~jkU3q17V&EW{*50~5Bp6h4LzYCphtJmgjfTR%QXvgD?Cjqiq=0JG{i3pk3gucI(iwuep6 z@6jK-8kG$jA(jhe;Uce2!qZ0CrmPfh-wyN3=A2Nd<-42F9D29c4hj*X0#b!dFXm)> zkReahZN}G`sYjlr8Uw;SO}|>f4TV@dR$VszVxPMWt$zRg`= zGU@A=Juo9WJ3wZV5l$VEvv*b8migkXEiN68l&OeE!?VyrkeZ*| z;6nbAjKGh^SQd{8H}uMv!0|m*zEx{o`&s{mE@~m0;1`bGIiLt0|G09EpYMP0$K3oJF3}!ngHT_mL-<8n;!^ z3={4`{)9wcU;EzQHS*Vj0VAs%0brlh0reXeVSs7 z_%(~U&ar^nI(2932>-iTW9$yUpU(57YJIznDPp-;$ECw}-CLU$F-5I3kKZ!~0P`n$ zo8;Cn^KeU!{RH(p{F-BdNfzx=Yf3<@n<*@DLK%H^(r6fL-mZc?rt#hjqKh=l+`Ns6=>(zVrD6KU& zbSSiu1tm?KBWre%o zT_;pmYq9Tu?L_Q`VZux>k}l&bt+&a}q&7Jk{SySFYYv-D4c)9+C`J53PzAZvM`j-u z-f|GGrKkZD&GXRqM8&_+Iqjy))%Wb-8&hV5Pk!a>^0DLebnKPMzyY<*OzhRk!0e{! z0EbEEhodhq;!2TK`vQ7&UqIh`@QSu-hN-*JpwoP`Ab4 z`~l;DOoe!OJzS_OZJFqd18cuP^_o#xJc!ANa|J-yn>xMcNC?I79y`nDELQTk7Kjg+ z#bjfR;*ySiPkU0Z%{E?I!IAnnI>>HI=L6pd1jp2)Xqmp21Z1ytuB+a5Q#`V8TYtQ! zg9hTtX#2PpuJi897;0Zejh=n08a81cF#!*2^jBUHEVqX}m0u-14DGE5sNjk|(U4fo z`k3w+iq5LD7NAeZttA!Bt`yt`Dn@3%OZMoKTJZ`%wFcKewxo*gH(hve}&du<&qHh2+3P z;wL6VAEPGC#Rj#J;2l4?vBlF}=G$d_Psy8Tkicl|fab)mj!tG~%EWtr^vCAh%`aTg z&$%YjW(aTuw{k(8e-h0k;=W4lN?qPZL=Czf%n}O@c(DzA&=i*;IJ=#W)L7;hMbL(w z8x>CgOT%b+5pLQ~B1o%Oxyq%kw2od*>s0 zfk^X2gG?uFjF=ZPR4+e5MB+Zw73Gy8>O(YpL;WYdoSkM}tOg5J)rEK+ytHiGld;s* za<2OsORbS5yjQWymNp>Q>z_*1$Hn~<7WfQ4v(C_|R*`T=La1^RK*T#KAh-Ll-z0{51*~25<4U|5ja;kuS7DccZ0Xb zs+l)lQQsYm=f8Rd@ZUTs?%%-{2Z^Nte_;OK@BqNDT(_Mbg*sR^M&_iShJgApHlf=Xwa=|l2PsPvfsk?{s?@rxy|kr3=wmq`mrA5Q zxo9$O1=q6NGY-Zg+t!d#kVPHiOWt=P%`1TTZTYH@Qa>=hmO0J7lqX??vm=vqCkt2` z^YTCBaez=_kAK_&ZKX~VJgdi8Iff%lqE|m$m-ZIgN@YYn0!>B2&6C4Fz7DdV73b^>d zPjO&0wSN~nW2@-BAQKx-{FkempwJTO^5_tzMnp&s(la;~PILyTdWq zCxq)S>pCujD#SU;*)}0Kjj-dcdx^!rRzpv+0IV(`Sun^RbEZKi(7p_xKIN5Vb$t@s zX3?lq3;*WK;`tH5(4Ff%Pj)WZQvq@xDxY?#rQrhR>Y)~*A!T7B7#4DQrUl1T_b!l} z)H@#DkF$hrSym*E1U9XJ(Wggzu7A#b(_D4>Gf zOp_5hvvRS^rVw4h#);Ct+NkIu&}W@Y30&+@yYu;xLFsMW2KRnK-vFRx+|{2P7+^1@ zo*sAH`|h|_{M<(vr(-Rg0@s*Qip}AG= z^{U%1c{2>XtatZG66zo;i{=&=h8)Z0KI02|D{6PV)Px%NE!JpKMjC9j+LS{(?78Uj zp*(Nd4_=i?X2G?nt9}{r=n7Kg!$@`*CZ|C5P)%5m7%-$6?rnEX=iG z&btkdMXgm9A6o-t9_*t}grb99a3Zpb+uGH-H`a7oNA&@+ia^<0JawMBI~M3EMUB-B zbC}ant$~#%mE`Z23p;#Z)mhUXD9isy=P~e+(cMXpXYwTL$%dM>CKW&|S2COE*^U-s zvw`;t|79(?E5OBY5s$&COz*7rfBhD~7v!j1=ZahSZlVAs@x6zsj$4A=XnH_;B=2~I znww>Onh^?9XvAu&=^p#H6G_>{Y$y<$b1 zLZ2RPw(ibUwa0>2>SR`>w=N{BmDz&2iLiaT53}*=`;5Q>_|>9yzli=IOYU)Tagcfw zz%<~5pORhT*r)j@CHx;&@lYXLBfy$TFBR45)v6E+_=kW`h@d2y^{xkv5@nOyH69Vs z6w2MZ*L!^x1R15FWEv&-irQmi z?9e8-r1xLZ)~RuC{E5pX1@Nu6O%?c4>i8cK$`%&-Yy-tOz}P_nhHFr;rr$EZ&STz& z8KAPu-zie+_vR2MX;;Z&PG|XRIU?6C&q5#nqnoYsF}f*MwibQ;ON?_N8K^jRw0x$( zN0UV@_bI^J)NIo52d7999MBv}>9RVM0Ms>!qhC*#Jf05x5g*dZ-K=(Q{Ra#>=Fq%P z$gLozXj_YL+{N$xsx;C4dKDeNsQ-0xYFUAup{f@hQfOTa&MjJU;BT+g+|rvopVsWte7TxOMg!us0g?9|1)|*l=mZ&Qt5w2B<89j)m@h7HQ3j@zJa+T_c9r3IKE@x0C?>QCZ!)@dGbikn4wr8WE3eT))Lut{)-_-e}eA zZw*)vODm|8#P8Fis=m8IHGtGaI;6WKN1*G-l+xOESF)=PrC;4`!cO+5POE0OdtrjZ z*_VSF4VibD_Dduef&jeN)%tvqV8vd8j+z44&UUI5kQWLaI(68OKt`i+ebQu6K^l*c zbG%baS?yF!1$1-o%feRVNq96FS*}DJ@|v`x6{Y7%ebODr3a99Kf>k? z3Mg&7d*Sh1n~r#5x6 zyrKx+dk1jR%+FSv3Uj@4n!n0xK2mkk0K9iO1@eZ_)b8XU&~!#xma=1DMaq*por;HE%cZTQYbd1B<5u0Y>SKkM>Ux&lQ{Dn1AxlnsJbW_sSV_v zbo+w}FWaWd8d%!{aX4Nymak!OG|!LU`t

F|WsT*I57hfksHo2}RjfQ2!r%GeWfA z5+VNvtw1`$O>@X@dfBZAUbjijaBr5%O*H<4GpH0ASqYGU%1g>7WVkE$Q6z0|0jSOw zwBwVdoez0F%v&Xt828uzf_k{kryLrEgxP@MB~sORAYR?BhO7*{y!d7apnSR|RRnYb zL>+`p&A3KzQ6Rl2!V}@vw~wC^EY&sRPBi$O#>Ydrm*yL45;3wyjR;~&g~vJN`B^Uj ztVb;@44Z+}=l#3@)Hl}v{PcyAr|>0BTj7Mm-)v<1+zSn@MD1y-mI7^>_V3k!P@^x` zfqYThL)swZ=sWl`Zvba)i>Sx;zj=E8ZFG2!fp=60tWz#WX56dmcOkPP5PzRLR?Z#k zWZi3rue~sIKn@H4zMAiD0~o&x_Dfv54WHK&l>bRcyQv~XMm?~-vQKl?d^i+Ul_6% zsVll{#wzR^x9#GzWA&NC4g{dWXjyNaOCbO|v(NftZ3d$gmZp+VXspVERlM`$)w{P` z)`;1!WVe?0i3U!HlMO+`Vk51zT5&GIN zzBUbN_56wLi7?_jLNYvqNNqB#joHW7v=9qNbmHoZ;Pabiu z<-6f*L7y~v8}7QRzZJ+6bvsrk#CL1|t0SMMOWk)1m$TZiSy3oLV109c@4#<|0ZlyA zXvN*?wBMRc)CUMT14}EBl4cv??t%q|y2%W~8y&9jz z0&4t+)!Kyi?W$rrFR+hSWNR;kCyV)sVVti-GEq6Q?L<*{f*{FMn#bwqKBFkd-F(^0 z3%y?+H!|%)YS?XlIHx{8_9zXq0)=&J^AAC5MiE{Ah;|M$hTor~#rc&^{XAQLJ{2?_ zrc&nT$twipfC@(zL~oo%78<(czV^U0h{tmX(~S zLjj~$t@|)$z-qVIcvUE5tkA*B0hmI+ZU1F1J0R*%aRIW&VHnK@ z`E!WyAEVl?)A5qKo>c3wH3|2y$B3XGuzJeu|Hs~2g~hRMTf+ew4KBgmEm&}Ohu{`m zgKJ}f1b3IAs&+tmk(ES5|PF_@cm@(%| zoLU!TEL7WJ(E2`H-L4*dlgD7;fi`#(ZHa zD_Ru9x~Z6_g&34OW%Mam%kA=$5wEJFMBdFv4hv@XHunr@ZhJs5hw_jeJ?8n*t7ACv zO5Wf+UNE9qg=r_&OMvSBbBo#`L^r$z$PKTm74*S;j+>A=bAmVCe5#9^v2eQJYUG(o z-Y!66JNqlb2qw^*horxu`L@oePjf<^XLs2zceIZ*bqZ4uWt-qmuTIzd12}uLw{!Oo zqgbJ)#jf`AS1-R=o`0X7Xas*zGI#@PUqD{Ix(4y&aPS*(B*_rw#6K#)+eZT6t4=_R zIi_*LA*obcQ~#v)54*n_%2=4qzP5B6rpz+9Xs3yl(TE(S=7^5vZtT*H;chgP^@>Lk z-Kx@Qn*M5(FH;v9nZ3KmcN$1WN7$B&A)bC5-y4As)6Ge@giG~A09iZ>k!<7 zz7Pq)?n6&0ZVeDHwhAP|ySAg5R){*@1yrZN2nL)>&dpqZ01^DU)Oqf+Z}meBUS&L_ zi*D}o-}|l$u-}zK^knI)hZa327Y2ZGdSpGS@rnIvPv?g1+o5GLJ_nF(xwbUMV<4}? z?DK6Td$?1@%Z_M{XTHPE#zZ5tAX+k$+)+RYe!KSn*#K|wRlEFs){-2=7*`9aoGbT0Mh&bZzM}GMeZHy^_PnmkUUfI3 zS*IdKUc0ti-f;KFB)dJER|gDAKYAe!JOqUh8ELGj)Jz9=cT`iw`x zgRq>vY!i-On{Z^avAtLUdjISl4*c zeuM$B_VE2UlVJwbEIW5Tvuh$nCWA8?cf>_79Lh%2yJb&`+hJ`w#1nR8;+2*^RV!cI zT0bTZo-inXF6i-&O#JS1aL?itCV%;&v~yE7KLfG>|ZNDJ#_r>gYk6VxNuYx0u zRL9*LrUma(QC9|AAPqo`tf6^^kHVsC-^$$=+3hyfqL*igHJ@1K{R0X<&{{7RDLD9E zq$@w)#=JDYjAj59y{C5sM-9~TWu6zP#ZV6sfH*b8^a{}BA5)=#uXu(z8&U8E*WhIV zpg=`>q)vj^FDGu)7{}+}ONn3h-Hyj@wWv5qZ)b%IsI1N>z+%*Rbko27aA&kANMZiu zt$9LO+f#Dizo}vlY3yMf*whE~-W@J)Z}51((u6YZH1mlm8hqufZp_EBxV3>~N)Q(% zD{*t2(TK(YkTZa`ndiMf3B?ed*b?w{YV~lWzym8-E(pu3G*OW6);=^o;-`nCeZX+vuYK$Gy{1 z1xL5-@d?C9<=(tA_|ylesk6&#+p(M!i>~MNP&9F` z?Y$Sa(k(Wv%>3B~^haJTp@z0)BWmUl*|LSRHj*V`8 zv-^Ibts|LPIqTzj?0P$-NnFKx;cgW#)YqS->rSy?w5_!V;b%Uu2pS*2jWt# z4j@IRfBm!Z1P<)>^b_vseint`+7KEbKG{wlg-!rxUE{~{e*xTU$dgNW*aIPrEyvVP zZ|SCi3tL8jtaJLwZKmM)nG`KaeRFRfXs&#F)5~Y4{0*=U{Ap|e(5_G=Awb|nd$Ppd zyZS7AykGeEGPauouLybJSWFwqtGs-PfYc7@`z@EN2Gfs6lH8Um`&b*R6s-Zk9#Y^r z>W~8N$n|Cfpo)4+O-{;B9y@G7(Q~0LT7j+bqlmmo>gny$UOwuIi~&3K@O0L7eFqF9 z$}ZxZ%$Bvi!P;50`{knDPKA7;SGp$RO36>lvLGA7hb5ob?s^pltJF1?xJGzIfUuGI zH9fHc-k$XtRs77ic0pxvbUT)ab#<|4T*M6|a~~O3FTRJRNlj~^3xZ7zsgHv#$%SC5 z5Kpx|GvWbyN6I@{>G#l77O)O|%$c5H&&tDC(z7f*)nn=YG~BqJ@%-Eryv1AYadD1- z#a0?Q-)4(vxaGY5FfWnTg2Q&pRa&(p{e}c^_gpJ;v*=1cF`+oPwL+@t6ers`qt4AIHxLgtB z11$B+(=Omk8VQj&XEhu9`7P_S3#rcd;>Uo$+-WG#xUw`qhX6;dk(nQaYSYK}ZDvH7 z(@*WrVT`)d$5;HzP~Yl&!F@g#ZSQ7o=64KuJ=A`5O*u^@tw+GvwW*J_vd`#f1dpAObULgjO3Y0Pa5m0bZsOpwyjM3dMNEl(8egr z>7!HFCbzR9x2A8N3vr?9BNFW6GAn;{r$+Kksv=G|AFh|y_r;|@h|oVYff$EkcBV=> zy-3vH3Io0wI_%vZ$%zb3*(DZ--ai&1|5QADAACR*0W>EWDUq+18KSiG@q;Y)z|0~! zI6TMm15f%G*@n>e{zfi^ZEq7>UR%X0TDv0qpG_}!4 z-maK`0O|U%hl^`vEgTNZ;P~YHU`I>>Z?c*~eb~>Fat_L9xIedi_)(ON(Qy?i|+A%pnzElqhgc#>mCO<*|?U%pFHWVl5PiNHL7i1$lN?D$Fkk z0vz?48Q3-*vSi>7Wcg4QuB8QP!!q9ZZY=qdqjKY)pD$WZ?ki9Bg@<8iZ~-EB#oW?o z&3iz8-OS#b&w!eo8-yyIqp-S^R66(&N90n;GObNA7F?bLoRRW1!U`V#%szr`dMDwp z)8Jzo?(eeMN9@8rUw?3%_qz@!fDq>fv2J9Sg{ZbJpYI+#5Ld1nn~vB~q_aFcFVfdm z{wxUKPeJHSU`+Hw8hIZ_)I_<8BfExd5tHoj=^6fZ-H7noi%-hSLozVw-yC~ znj28^YX}?o)Pwz9cs!tlw@Dsy%{KsDzO1TxF1sEJa!&Bt;a~gy5FmUT;QM+H)B*7u z`pjziXngftgE+3;fc~+Pu$v^xlFW`>aQw_iZR@jlafUZ(a%_6`N$4{r`bu7M#AZ|BTA?jl{!ATuP^W#FZ^KMNlwVu0;k(w8r%o3=9E z!*rl}x6Ua8@1-3E&%Kldl0vj$zJ$6D%}toQ-T(-eyv;1Csd zWjd$E0&a(_dKp?7aDTRj0KViNrN4ZmFY8mdK9jgt`PKyn?vSx4sX{#TqCtO+S9!SF zkbka8f~K`up4NPmtbnC0PUg4%k!4ie3DRYGc-79?VXp#|*S6{auOfA4m(h8KJ~(RW z&jz?~=9xy{^Tk1$YwbU)V=?`DuuGz7OU<$&<4F2|*(*IOD>XVdkZm4b=F@2a>4%hJ zSL7_Gete(eeq(qQ>dphmA_(6PDL-#1ga73&91Q{Pf=37T*tMA74Z;H5=Pc|9?C{bu z20YPJv-pudc99-t-S*Ogo0|KWTOc+q+R;#(hwk&gNB}$yGG()36pEXU2-V{VwSjNb z-1LVbX(4>yFZ{eIeBGL96lL>alR%#j?gVIBxK;V)GHfK`j#Jbi}MviyU9>+8`1#_WS|gh6C=qFOix47n0(gW18pVXyrDK;MmSHMxYzf5<~4FnmUl2Y5z^5)^ANK;b(Y zVBKO_HB3hZ!u}Hxs9umbL^T#3dJ}=_^tdkj$yT^RqbM^O67+TuKYm5;)>lYYfNT(d zrHB95FAMmx#Du2F`QOjrK8yrfw3+mB{9^y(h5!3A*`@=B+A*@- z{r#+PuoUpljbRrOlmE~6^IxZJR0IxfOiFqD`op~`m8TB<= zj=$(g5+Ai}6JH~d;i3;qHXeH?tCDOr82m)L5CeDk<7doufT#!&@=$!52)Cd($c&KH z$Y0Ziby@M(T?XIIOkz^%v#+7V;YgV`i=8k!&tZ5{l6Mt(ze7c)Mgyhu(frQl%kA3p zy1ahMsD)|ksuz>eGF$fetTZlsR7}G+y;%#dKAE|X;mG)e?bJ#qyDRoHS(KvZ9zg^b ztx(M8mmE=DW&Ul|-%?^n$xh;vdRdJE6GwM$^R1UeZY=7YT0ix@jbJk*Q6O~|<7T%y zIrYoFv2f8-@Z83}mOZEPYE==xPEmGaO(|^`{Sh+tEHJwB`hf@>JX*NM9?O&)kI5cn zlNcLvSTaf2(20|X-xFHEY-memF{TJZ2-ZxEh@R8L3-5F@JL?hcB3r}+D>O4}3$uMb zRWo@z2~+ipAE$D8Cr4q?zOy8p(*mBhff|PD7()-uROMXYwO(v3;qryE-lZ}2faDar z`UQ?jT-^5Zz)V3$ET(jUo>u5Pl`{^_)%T{jm@xV4AyB@pXbt_Qhh zynk1xnfZxc!}^Z{mmH2FV$8OFrtBWuWf~0%3;(-TV?B%qBIo!S!H#{oCa+Em&sl)I z{>ZJ+hsH9G_PjW?_2;5?aL-+y7z>q4+y9n`*?6}Xw;@0;Vuu1i;R0jHxP-Uc%n;%POzBzu7aDw-_>_X*YM_odHZK4GQmcK0An1=Emr&l_PK0I zO0`I=t4buHwDey7xJPWK2s$1Gacr%|gik!}y(95XJe-^QY%pea85wCzaW4uSUK8=| zeB_{%L@KUsJ3}fA_3mqp1qv{AJa*+9%K(|dT%6yN;lc;V!IYgHp>rN){5-CSG5sG7 zg0|ksO#}&UOO@Kjfs{-?=GK;48JC^I-toR_{Mc4lFfu{M%)2e(J8VQh%Omhdx)d{| z%vz$&uOMv%KUgP=;a;?eTIX#;B7;JSGG7_R{>%JtHxg%?XcZ|6`8$8aG{gv`G}X>w zbk2K8?8}8bgNVSl`|si~51_H%89Gna@@}=PjPY5c{St+84v;hZjFg?yM6D*9ssoe77)md!tD2=p_I-pYwDEPC{)0j6W2w==%Dn}M_u9a444_9|> zJ>OGw5Q<_UZlg?dwL_RFrs8wXa+ghS0dhP>Xgb68;kx&F{;P_9Mn5QL==!AQjqhYm z;{1--2G_5RyWNYP$moRT~JJ% zRGl!YZu{dW(tS~j28a!O8cxP-qa{mDHE7mlN%wUM5xbCrm6<`IFN;Yh6O(b~sG*$0 z$2-x23^BjFm0xq|3+Pjqq>G|{V1q&R@HLsV>06^F+H#XCR}I14-B3>F0eG}u!ZU6T z^j&X_N0y_txQ3VMyx|62mMJg5_2sMQHqwL#-vtQ|f&eTX;hW{?d$= zOKF+;)Su%FhuJ&ZFZRDxNMV65-&JdF&>4u%$Kr;r6XJqKL-bQx@sO!o?cjor5}}+x zc=vC9v^5}n()b-55P;vC8A*#!+K;{M5*zCluRuG=wZ*6a)%F)6a4EnL^_XUJgv1NUCQEsiLB>&|^wM6J%W$Tk7m$ zBQ}FfFHJ?`ya@=7wNcU8)1;3h21mQy`GVG20*uIBM8e`#$WHq3Wy!MZgq~b0z9AVR z@azba30%6?L;}f2Q+6hJr>vchxb>HBdGJE&B^9OQvhCE?e02^hT>6>)#gnJ7^cU!H z?utl;s7i369S@2tGZ&m-7{aBx{HYSrE%Y{W!=hwD3AQ8)_^6*U55EhOsOMp24}djm zqdQWXFW%k4i3LbW1~^S^%k5d;<2 zY>b`5D6HY0y|)3T9y8d{8J%{m=nTS4Vv%17jpLaU;PFH@d7*+Ms8_IEvSj!^tJ8v1 zhfYCg76QxhM>L~=gfWhUK6~%dEze~*BxIlE>FM(i_Z4(heFDFbKoMGdfr3SQ!+(%U ze(8cbn4Eh240RM7>-@jrETj_+)f`>VK`HqAwholUisV$&i%PX4Ld6nM`_-#kI{N}3jpuUmt@Y{Y!ALU#)Ed-VyV+e3?qX)*v$OS~W_nDvENYzmHt z9%sEmpBHR5$)G~P9D-@m-m_Z`zsjJbJNXMruB94vLp6+$O1OG6IDscV0|W#8v=I8v zb-bY!`btuWq0s+&pMc5*0q(An3WZuB@K}Ln^3f>GZkm2OL7z7zwe=eFRpmNM3-?tC z!Z=14;pLoOy-cU&Rf{1!cO2o=<{4w{qB^?d;|h2}9`|`z5{-P^rY{UtM$}@$SY>Dj zIp&HL&3fU@SsW3E-U1)O2k_=LLPQ^Fva-7S7JQ*&uiOFBJ*>x|1>h>z^KXHf2GXkQ zK%23!MAuJMl>?_Uaz^LKlNR2HjBsu0 za4H+CIbWk(>AJw%kkR5WQup!;TSeN)TVQMOMDQ&J=!V;c?r$CVl0M@QpKHUso#y-M zUrVUEiQ)U@yfQJWmBe~IB0GzFskUNR^Wd{Jt-j3LaGn6dPMl&VCV0**w`l#ceA-}> zb~qMrGHlrtbiQm(+Z(I{DE!2sN-N(p?fXbKr!WPLpF@@H7(apShDB^qFsd_#4?45? zEMKVS$nbBn36NZ1%kt7W_>dT4po*HF_kul%}< zKi`mN&wWs_XFc`CjJ78TRi;&;H zhKNr-kf*c$+;GHzMHVFvBnRogDZx@*;H7Bm8ka#N6CqZYWsC-d(ddyF4Ax{-W@WTY z0lJO4)tDdRM-&%&p-~M&>NSvHE{x#?NDvII6$794#jTKfY2argL_bN!gd+YF{6sDZ zGM955%cvD*=AuL6k`WT9jpjLcUYW$1r=KUTjpUnl4vBNsFNn6SfY^&5bGVJR@ zk=JNRMGAnvckzmPr+dLXSZZVdHDFgsluOP_@UxW|mzcQYh~f35IoJ4mBXL}=lL&eI zP=%5)2x##GR$@Z?2-;2&MRl)7C zj?p|rkLF?nm04lR0V^kDvae1f>ToW{w++>(y4!5sOec43PFt9f5mWm--(-9WeJzX- zYefGehy)oDD@H{v#CCrNX#oVBj#1K!LJQ(~CDU0A?S~Y~cspX#Ge)o%3RAA*UMtf7 znpmP>%`WKmiG(a7dpsy^2zy(rQ|%q0@H3fzkuP$ntLA!EwWWY+L45>UKs-i$JsJE5 zN<8HhOqf{Q7K#musK8Yf`4A4pq1#@5!Ekac8pmW}Wc3_9D_VGy98@ytKn+TyqZU++ z2~~MAg;qj%1KYR|3TnhWOupTlWlg_qs%3bIqeFN70P5++oMU+;7j*U-8uMKYx=ML2 z%iAX|=Y8^}XuFK5Os}@l4kQn77$N^dS%32Zg51MiM4m=cm#hSng@>C)DoJ~2~Hn%J9natHGuVV?@LE~U1tlF1f7Pvx5*rxPKuTI~- zWg273LgUn-O-u#L)+{fyQL3@afyQ%1f7oG=Q?>P6cu>jLryk_Yo~q+9e$bzVB8sfR z8`?{Z$Qm zS`F2uY-d5f$G+`d4y+NY^Do8+eFNIaA+!^U%`&=?N*Jzvb=mh=7I}=MtHsE~W@mJi z@osZzTv;Sr<7`6+T3v+Z-jb;CRy`53GKc~tlhHzvU)-{PTm~Q&R_7h-bC-mYB1ZOE z;x`7a;IU0+ihsHeS9AUJcvpx2+V{0Le6N{x@utN zor-bW<2f&uF~hAr1#x7sRr?&cKZTUeS{jb2wn4L`n%hSF63af`m@4SCmXgH1oAYPu z3)tMVT3A>abVjgIkJR~-%$ouop}7N`f;VPZy%NH(2J?z_2iYmCD1w+WQ$xPF8f`aN z+|Ip1tW9iH{MqOcpMz9J!Z`cxyR4#c;fz^s7s#U64C{}VsNPWNk+voH=w#p4tagf2 zeP>uh|6+ZWs`)NhXu0$WmF3X_o!J>c6ynG=(bK=Ch@XSnnPj%(cuP$_o1tZ?*^+oU z>O~P#+tD2-$M2B8Hr21}m7|EgWk%0pG49vg3(s@%;R>K7IsW>~$&@szk{cQg27-6- zE>;Zw3EusKp#B@KYv9iRBpC}il57o3_gzG^>&~`RWKD{^137r@f*xE?CM)JA;t}>b zJS!88Vx2%1lkHWVf8>NNszBT=6myrMM6(R)h5}vO!2}IlnaBb79dk<=fqGKazyX!c z!4)4E{{0MBeI87DyqPM3=WdhhkP`(wS18Tn48qf$m_JPMN0+;plefvaqK8_mZIvO; z3R`Rpm&;_tvAAH0uH^TK1oP-$k?Kb2UB!PeCVwk}86+^8sbxcy%PbaoA|`|(jX7Bq z+*8!K{p0_nQ=-Ux6+o!IP`mscCcEO-2ds8xv*Y#Hpe`%VT`phrI-8&|D$MG)x%qkz zBD;8;3g0APK2Z&skeB153g=9>d_^`exY@5oojecMr}kIsr9fLs$S#w4pN*xgtv zO`=dy6NPT{wC}}oOEamD!%2q%LF;beMJcCKsx4H@%aM9n%ggp+x>&VFgE zaE11>Mnz|MmY#wZm87+&v)$`#>^}*CTTIIJFO2;Dr}Hg8Dw*ZiOHo@cl9w)#B7c!@ zWK=BBVW@i+=uO>mgD+y_6bvoDubQDRs+GD7I4L&#ClNHQ)!m@KZ5(WI69<;?8aE?d zqt^%VbeCVwrC3+SGrbD4byDYDI^J5(VJP8yzM=PJ?&M(O<-DPYdBVq zHRQ8?l}?wG5nHsZ8BfYe-%(?D9fEwR)|qXxh5_5Y%=RXK@BZDlIJodP&3EwifEP0)Ckk_(@{Mkak7=fYq(6o}BF&}z%vape9Wb%10}ZRg zdV>R@X`@zfknd$sZkz-bXqVb)b27oQ%T zK+S=PO%-hQjJgV;7`ZU3Lh0dB1Q5f-VC9>ong9{%wSHR1T@f122I5c+P`o}G!V*6Z zK8BJ?Z_^F&q1#}5j)~Qm=o{U7)NV%E-d3`b1oYxRG$=$?6+D(8TS}(@i~%)rRr(G^ zIHBCDF?3Xhd_(Mo;xd@6GObr0@?=E$|Ik%-&4@mU{7un9?qAw?%t~Mr3nnZW!C_hWo^VT=#Y2o@cic&L%q5 zw!7gyy>n7EG-fp$0b8fh0%iO;{ce!>@jJ0v2`hF^A}^h^hZ>A<84E;luC!O`)xW-w zhUy>2H0!wRU+PY3kI0W<1sd1(Oa9z6iY=rEe&!218{4^P zbbPjr{rpPO&bn1m%>}8~KG*^Q(O09qj;4v@t<7As5waf|}vNyc~H9`SgTd=a# zG9=;*zs`EH$T;UvYbTb&2!l(|qB;Y_aJM4pW{+^x==mUT?4@4VOfxuzNAOAm2Qp;4ew`GrvrpP)#psboeF?R)(bp zOfIw1OYzc)QuqB%66gt&i%D_RS+{%29pl>CJBX9Mlox* zz8+~5BiuaI{6gnFOMBzHfx_%(F@b_5&cc+X$>17BgUQ~>KbPsHQ61(o7O; z47w}9mf0z1oc1$3|7OQ0QL5ll)H2)3jkdp_9BHH((e&xUO9~!V!4)YokHbrV!QM8+ zWM12tf}aH>zX&Qe#OHHb+f11c zHHrqpZ;U#+%jO;?(q0_|QrqHkGfv+NhG@&hW1HyIxJNwA;njLx3{ubWG_q-ZvWM;8 z@HSF>gW-I=wWwev=%^K@d(B2%vGx5**Cuq;Bxi0+dWwvXJd?c|9?kQMJXtX2Hqw<- z{VF;6k=c7!FOKbT>bvTf)Qi`jPe`w#3H?juDNke3fs#Jt-SUymKPcn%B1EX|J&NI> z1KJQ&8Lp+9=Z{vL#G^`iPNjbbdr=_BFo(k=g1BZ^U>Ol<`x&JM=?71r4OlV538Y14 zZvwh6B&pcZ6=Hf32wywxx+HYc3QdG$<%ybrbh$x-vr$ifU@jBoG&{2v8px$SWrW7h ziN2>3PsR@ZjaQ^5jE5D8I&osm8yB~wf=E$|(Ha$U+oKfphZ=sR{M)4`k=$LWUl0jy zE%nEsJTl>)TcQgw%aOX473J}-I8;hr67Su{W<~6Az)EC_uMR`ePNe)-YCb;DKIK(N zukf&RiFMK!IEAym!?6}%hZ~)%#7a|_Xr2HgP=PQOih5kaCcOK?1PDUPq%sdpTGi6G z*c2E19H#FZj3=W(ulKu`$K=Q^ixv^9IL$fUkzlHb2v!$L85_7$m%sMC+HOf@)#%e! zGRPscKcjwI-vdSi_Re6>L-@9h8>P)Bf)3iW0%VVc=RZrotR`-)B3J1c&OREVZek0# zWzx`Ccz6F)|5#;(#QjstI}K)Xbw_q?Z)@)q6kngmiB+V$>C1z~w-OLTC2ce#*iDA8 zN+8B^Fc4<^dmD*@H*i3f7ix&qoORDmg}zEiraVUI<2Ul5-X@ed{mp6=A z-?wi0WGxQwOnyt1=Pn(=$ZA%J8@=sQ@ZGZO2Sf)0M0@TggzQ%ei!dCsXgqO^F?;j4hu2Jj8P#ZbMaOAlgN9$F2 z;UK1)2)9z0tlJ`fzRN*u=(;>+Uk@4|2_`j#%iI7FMyM$~ZLQyM{F1vtBS_YK>tUTt z{XwFH2JP=WkC0sZ^t9vX{8r72VP4$z6`;s%ghQ3l8vArMOcZ1cmO;V0iJ%oR_|~^P zPCagJ_q3NGdHV4p#x)m4TIrVs0sSg)IgIO}cqKhAgh*R|{p z7{IqHQh0H|@uH-0&`OgHRS+vu+gq7+WH!6Ec5XZs4-dm_TO~x#DIHlsY9YGvkZMtT2Rh*zn>3q9z*5kS#$qRL*Q2Dcf zdLlxCkmroUbqfoe?0=Sv=*>^*?E6R!Dd9+dpQJ zB;|FQf!J{7DbtEO`PXP@(RtS1&Xx{@^E?v1)yPR-Z@^pC5gp%mF^XL`hD}E7{>Eb; zih89N;vaS?8e&h&l`@t2#^C|`Lp_a2%)}WTRK&Zc2NPV-t!fOl9;HI6I%WV!+8s*s z%!){^UK>qXX4bG#6br5o-^*nP{2;ro{qDBYo=io|kc)|MH6fbdly{q)X~<6t5_3zmkZ*`rRWmXz&%?~yzZK__mQNtUSFX-SNeZ^0 z&Drg_g~MtJ$%Es3RkNte{9EenB#ySzh>GT^OLm{f> z0a_w_0&6WYVC=a3)i?i0ueci#j2+)5>^DpE?5% zs{;>-nv*P>uh8mSM?#ER*A5%%@9rPJHx3OpEW+5V^l;DF;++{--|F`3%kei_ZvBv= zN+6aYyDF0*W>stOnV|5eqNl zR3i}mfa~LNGt{vFLmzwMV4X2k*xYuRt{0duZ=C%aH5Z1BEoMU5Z5KTP1@l9gY+PIK zeWCDHt&l>5Ncf#=)>6?mddfWI7n83z9zCXmHooGa<#G1Vx7hpC%N?5Hk7sy^6EM8r z-=@Jf_e0;+b3pl>5f{pq<67G;uq*`l8p4lM%)$G--p=A9dPY>*ERN>m2u@Yw526e( zIaFUg;(;+=w2YrP`ehYz^cw{OOQ?nr4qW5=Zm>CU!(C>!j0=mvB&sz}d~uW51J8v* z_Cpn2b*44@EJs!QOoA`j0H0WWXi0C2e;_Y+@(kzi4=kBGy3;9P?I{+5@h6QErzv0W zKb#+nS~F*eLgP|5zi!wqO;sjhC6L4S|5S;)|IC!6|6`q)J1eya4FJ!ltbbu0w(%qL ze}Z+QA$!*o?vFp~MzBnX*kc(bqVHowypPl|{2qP2B1rJ@lzEVa0WEGs1-@8$$M-D0 zziY^?c^&b(dJx`uEoj6`Of--Lo0OrnE{L*D5jV-{i=N7F`4fg>5VQCtGh0ar zgV^*i9%Fmc22`;ZLio98-FKgn=a zH+g=fs+bSi_e+9PrK!CInc%#i7})$!hSyZTW?GhD1p16KSTp{#FY%dLRYa}j#xs20 z601NYlfpAdnk%U4T8fWX>vxDSZcUGvn`Wjo+(yLw>v?pdeeq9sY>QzL)dO6lW$zC| zttb(xy#2>|X=W`i9mrKJDa$^N8FbsiApQY5o%2Yex!CFKBrA-Z{OwCf51)u zAV-L8K5IXDY=rRzS>W)G7&%cH9ZDNyKAsl(9zi#vRx&sNJ}h*mF(Y!(L61v0gd9bu z4@r0hSQvSf9UGNEBkZ6cK?Y7STi>+=Jum82c|Z;V72Mtyl7=GFPmVouv9HFsjrQ|4 zw1lya)ybIUzZEwkb+=T@)@o-%xug#RDQt7MEeKiwfD4*ve+V*Tvv79>v#yYIDpr|M zzo|aCr_esxLb-xRsqc!zX`s8@r-MR;Zl%3{bqcg731)D?DRu+2Zv@1&sI5etUf75x z@F-z@@#{3gbYv2lMJoO6ZP6pllPeUk1VLOAtI*g*iX4hmS8NiG@*buc*?w6zZT?9@E!l>AMi@0?C`5Y^UJEB-tGR@Y-DP;*pg~Z3TYlP zL2DKX{sbjf6J_{*Yb-1hrr-6@lb*h|JQpYkeqvOaotd|aV~G7^P#L9U@SpG~5qpM^ zyj_!0pkUbV!g>qR-s~a5e=0EWP?j5^%Jpv4HrRLiEw$goZ1CD(?~RI=`+03Lf0DW| zkH<|y?LbON!g%(~(H=TXS-Vh4G4WglhLGpAPufIhqkXN+f{S+7MnKv9| zQ^zJ2;YSINTbZK-l+0*heAuLZORXdLXluGy8f@(`t8BFHgU*RhrDVoPxg`CBTw4{n zn>}~~vk|a@o)`#j1!MA9bW-Z%hh^_Pk@TGcJ6dX{3;6_8wF7NkFM8fPD)sK)l)&kb zV^jZFw>P zi0j({o2EnkbWekA^B5I{rvLEu`(WdQrKyOT*D7p{ZyNtNEZ9d^6Oy!VBzpF%Y2e=D zkrM~1+K>~l^&_~-#-n7a2F*i7FhrQ=s`*f(DbQi{m#yt^h>{2lU~MN1hxOu_#`?+) zyek1jZoAbdTz$UFz-<2Uj;GlBNxp?WsMB&)7c`}j&nC>!Rqj{oF1Ot(dk;i)mc>MM_WZ>u&3$TW`X`(cJ@jGo*~5}k zfOZy4KhmvTo45q%76BJ)fs$30?rZ1Hs+fPE)LroReuQ^?ZwEz1dt-Ho%8_! z$8#~Zgaw`U+*}}D9{cyE;5V1XCxCX$99YGQj6RJu za`j&Fx0-bL(Xir@_h|W3DpJcvydgUQ1lTh44sq-#yB@0Pw^X@LFBFU zW%Wd->f@Sq+yXcJ+Mk0%nP;TdnyK9iOrAtr4&gYIfz8qDNgh6LzW%Po=2fD|yK|2Z zzvx88V`?|j_;}Va7!GPCyjwd+XypZU|6G46uN)h2evl$`{9t7nrAPa5gpDq-E^uE_qA`3Fm#7Q%c(Gvq(2>dbPZEX-8oU$4-Y5@^&V0`g&vgWxb9 z2mM35xh-w`EQ$5U{~Uoo{h}w9%g*g{Ak5W* zeFWFFCk$Ik7!$Imbe-b!Mo#w7=GieBp^zk(d?{2?6H8o5jDI~bDekZ^{+qoCZvCOp z@WCAb_np%~W)p#AHkh)QMIMS z!!YhbG1^=OR3t#$aXUu5PhOr)VMq#E?^RluG>*Q<^B^cpZN`h9HIP_}k$uK>r5fBl zrN^0PZ2nF&9AyvPHU#xY6T?c~MA%N)T11urzoVY7pCZ%&-i)6>objG0M`PM~RisJB zErlVTSb!6=#q6s`?B@ExCwzfCHzS-oGcfuGMR2@Q#y9f!lK~mA%mlJE)P-{b?gYs; zD`kWM=mnh}XYA@2MyLuVO(9<8BbO-{x8~}%QOFfY$O*vQ$!gph*NK?V*Ly-V=XL=^ zlfk5-@Z&TbaGjkodKkn9NE@J97)tegrrT80xgE_Rct3cq_Y&DQC7-^GUc>vv+$t1x zfAA0)SQ15RTrCdD?rwIcQXziOKAA`tt_nCs;XnyzTilJKzBOT^&fXvklOELW7*gkt zg)jc4xCzd%8jDVJbP;Tn2hZ{l&T~uaZkh^s`qDgDZtm|(4dGv${kG;GGc=4oNNQHj z9+aWpFR$z$8uB9vttA-GCW+}0-=NgPUH<8mBM}`0{++VgMo@S4eYa3tNb`4amE~Lk zQ}^7GQ@2+B>A>SDH<4^g4w#;)to<`r6ej7h^M@8r5D;W+(NW^^4`lzOjO`pyBKDpv@EmyWRXJhHCISF7r;qn1e z9Y!2NsxJgKwf>U?NDJ*?@Z&t1@TP*YdC2sgMfm%%%Ujs_r`tqi8ooTb%EJI*=qL=C z5%h+_-Sw^o#DFgpqX=L4PTxYq%X=Oo6nJ+HA_?*B;^%J{@4sRYRmVn0D3syi6fT2d z#<#j!bR}@@B&E3Rg+{ZyTKo0O8V_dAtO*sm1Oq{m%b7`DmXr#wT;j*XTLjUsQ1%GM zal)5mWrviqegi7&KQwsQqdy!KkWubN1Y&5LC2lnvy2b^PL66L(*S;r9=Mr_0om?j9 z@kOX*RW;A&b?MaWK7+L-7LX2xlgaAr=w1m!0(bXd*0C@b&NWMKDE%+W-ZCi8pzGQV z?jGD70>RzgCAd2TcXxLgT!MRWO>l=92tk884DN$Fhvz+3U)4EP&-efScX#jJz1Ou? z=l--|c3E9Ug9_veyU4=`uJH26+H-zEvLN=x4n8C7x&0$2$$R-~P& zji1;auWKc@v_RDkMx}DD5pGI}S0|0MN z9TJ8r6}okk_I4cp-aMo&7kf=k>vw83M_#c5ANMA7$rxNM0!xRJI#x!pqjEnuZT*CJ zEt)>VjY&|-4Qo1$M{_q}gLA7&2JG#|zehfi?8Fx83C_h884nbrZ5>w7-43{^`_bjE zGwVpSqrz76=9O0GcSDTk{Cx3}QB_v)r;{Ieb#&OZnWvc8I8`Ei(8=xJH6m1;;zMTm z_Ok`d*X`l>I_NE1coqM=l_C9kp-8 zD@=r^@e|~VOcCr|D_@rdeT6xBjqF<#bP7?*`XF8?|3kdk{$5@DFK!iWc%cmnYr2B$ zp`Tiz2UUJjX!_N(n@GVUabsW76xGLUlD^w$y}01>X*zph_t$MPs~eKE(k3exafbc8 zyz){H8zs@jb^VF*qexZ)-U}-_EMEZLC=M^j{$|<$xbMr<2Udd_1suKy44f0l6LSNK zqhzUk?vaq18_*}QPs@j0?ft)n*e=1V!v3DY4J^?z#k2fOeowfHSZz+vl?k|hqRgR5 z6&sZCLne1GMd)n}^-FZdE%)4sf*u)_l7Mp}MD#BM(BQmWq5PWTOrfHKYBI~R-(x4? z(cqwBJ1sa?(tl37YEf*4=CtRu%xD*yQH;M+%d|6UpkDr_aex|Ms8Lv`G3^Nu@3)zGR4gGGi-~a)0O{`BQ_*^Gg5}+^x_U`--&KWNLpYr)vv#s>M zDW9CsCi(5f{ReqQ=$|LAEo44pY8EZC+foH^dD3+hhs9tKtobVT!4!(0n=*;dzM39p zn)=^iJtP$|8Zm4|htOBS&B(Udkru*d{>Fhr@?FjU2QYN%Gy1_rhSkUiMB0}p?iNOg7(7dGFQT{ZZ z5z4OnDLm!lar~w*LCf`rzu*{$UMu3+4@*@CZ^4w7{lS$V*)YDs>*T6#tJXook$-KE z<78Xey=MbdCoIYRQptb6zHu-?pN3!yi{qfNB1|z|7f9n`d+G)oilJ<`<&)YwskTZE zQlI^ROlFe{6K3Vg{iP5GCoUYE#1o$#mgKVy-yiRo8`=v-mZr(>ES@8~o!^e!w$CN8 z@~*I3l-l1cx3svHfQo8|yEJDfYHCLK=8_02Wfi)oe{^^Dl9~t#lkn=af!%sYWEE3+ z)1^vi!qdPbeAAt}%si6B0SVv{2|?!oF5t*LXy*1&_fJKnWxf2?^f}2XNmzJql%*=I z3p+-)kWMvdUR!LJ@aHa$O9-W)U5GEwL@dZb@&qer?^dp1&Z zaaQ%PeJ%&tWnXz+0TS9VO)TKG&=}fG^&fk{Fk1b4Cs5FJ=+j;C|Nf1+2AdZVu6amC zN&xAKvHd57%NZutfV4Cw630~qW=~laC-`QTcAt;4HVEN!+DHm0Rz@JDoWXn)!!L@m zO$n%2U?oO8Mvbl>mm0zZHR&@99e`Z&L%TN4Lq9X@#HwL0K3XSo)f;dznlB`!8B1oi zDg$-#3wmAXeUNUsw=s0f=7Eq;dzvf2lpZd7FhfphsrDC6c&Y-`rzD}Uc1y>I{KE*Q zfH+g;7jLqfu-oG@3_*rrK%xkpZWtGe$uJdp9!dK|Qw(XmF8C(e*eq7*-{DX8B}Li% z6uGM3M{LxNNOz6)g(^5vu7Z#9fM7F=E%o2iU8UaR0(#zBMdj1nv>0+M?x#V5)qY&- z-UrFe$eh41Ue|1~kCW-E+QL^O|NVrV=C5+~ml1Lces>;!GdIb2TmHFz{qs08_HZ+w zNS5fvSMU65JPMnm-nfsjVbTP^Bn@`Pm`rbl=#CNT=+lq8j#6vx4vkDq_}tt)`rICy z=jD+kw8*wVo&pkJ&U!T3_xW7Y98`?c1kPPwLAJ^^*+H#?B3Q1TXrKs{La6_j* zCH#a*vACvWAHlD-xllEaciw(F*wi#vJ zgC53j=jm@K!9+wtk7q?t0~COoVlU23U8)kLQ0rzkNu*|o?nf2zY#sJjhNaPD(F2d~ zS!gcChXjvY0UNZy<{Up7r-&e<+npUp=h_l%-k>|3_idk>_78Uj--o*bA^bmVP@seTVJ+;%ZT|PA0du<-gYb6T}O|<)R6ZXZxlHX5)L{BeOG~A+8=eeW0yh9NFG*YaQblF^y{|X(IvosVbA$W)$4nE8Xp}i zbji}V9-K*uU(HslL=kLdTHB$G1{J$R3`4uhPLwOfkp!4h`|YPw;+20Lq6>|bT>ev5 z+}xlAd21R}By2n_5>vGSMwequ?3OD{v1MN<&Kr$jJI!IWY_qJ_LF^;vZ!!#hFB8NU zD}48{>`k-E=dvZfnZh72Mmjq1Vyg03%W$0MMsrLD(fF~E`Zk-}{u#ck)2YA^fur^m z!@;V}IQ?oYQ7HYiQS>r&tWTHedDR1Tl;PwasZ0^r9$%^mSI_3~?Ggb-K@3L;}0#3|lVDqv7UoL%^Z*|k> zkH0f#A%G7LJ=Ws(wPQt$iDg&BCHX%yk#6|kkLflhaR+&{6BPDzkVXsib+(gU6d-nu z6sHQ*gTUZhA2+{R*rW-cmqVAAOYpcWCwi2d3{Y_DHajDu-gE3_rDe~@?>8w5tt*L1 zAP4Xucwe^Cfp>AX&U5@F*|Xm|#29B1C^powz`q!ZY`_p!;!a5Yi z=~DF|Wq`ubFq*{&?YLF^KcKZ$#p=|5({>Vte%LzJ5pBlRp~F2bPg%gSD(_fgVoNCe zfXR*%t+yig^g=3vEo3T*{1JhQjZ4Kv7=WxD=S)XdX>W@6w^^0l8TXXRisDQouoR?< znYZtXW#$K~R_t|2#KMM&hV*$lq?8s#k!(Vu`O^zT?lT|v7YUEF81_OYJt@|pFG!^x zaO@V}iXtOm z+76US@fs~b`Qbm%gUowWgD%^NdM+_KUrCi6I((sh(frTqt*&RENo@-B)CRqI`bZU1 zoq^lBw$}0GFRPOP;MwWe5?S|wRP6jf8f5HI<~GgMvxTg%*Qb~UReqX#L|-(xDgs&f zi&Ad1DNpietbH1ttkX$;KRf>`(BAPHa&zfIlIH-5 zWvP=b;qS_DE_^L)zC&=hE|TqLfRYyab27S1f{COgK1$GB->jsO+OE?#bY=(;|JX?OrZ&O)+YZ+UD6aj}YJGeF7`IrPyRsSQR-TVv%SmzPHXmU=fHw zOyjMvte&6jOHYw64_oSpatHZlT|2L9fBDDmdZb=RV&=^>H`VsWH7w!D!K(a<5Kbli z;V&l3o2De+;)PM)v3sa-JvW~?fE-Z~{C{p!cAcxs|IM<-g~I0Tz0cnq zD>50rpxO=*D=P6t4v}$5Nv0G(x{TPO98i!*OqaWJi4i^~YAB)v;~JJ3zoP$*tKX@K zIAy%K*3L`ELqO2aZx59}52N$A7)N68d_P)`%-mWHpc zE8rBrpR<)yPl__6G7}H_Sx73kRhAS;1Iti3@ZHQB)@TUbl9ISVUMwdqz+5;6?d)<% z+mA+2I#yOUHzM5T1|5BVTBEK6%5sr*;C7fHA_D!4D6DKYk-a-PYQ0-%s-&NFLo2UV zjC#)uMi48aZ(2U-rnyQ1t}o?eo*Q6^1U~IRX38#_{-Mv@)tJV~;YmjfjjI_?bU!U^ z#Hv7|5vAfIM-QInX#0&Lff0o|`UhkijdQ&%C3pL)BvK(29{UVn`4^8K+q2Xo$za=s ztv>TYZmEeZj05%x}DiBHrdF^knWDfqf_lNgYsYB?KVDB&JhQ-)q;FqIL*PR>ND z=g`&$Zc61U;IL0djj7d^=vzU)lMi9uuV>G%|N0kH)jZ zL8(%sXttjDa|f{pb758*6Ir5Djb9fK^J;U53N1~$eUi0eiFe88SJvCeH+JIv&g;ww zV`8T;p0!YvpaXs_7rcB9OX7}otfKI;8$ep9s!wZO^-JJ--KOy?Q|3I-ib-1_?H-*| z{4F8fk=9SpS>$!R-SrPGo$@#u{S$R<}id zgqr?H6&5#)6dNvJW0azVfaF~~hAcGLhQfS)kIg{y_ous;&Q27L1QlLYh@Yn1%FmEG z|LH6&xn04fShWapB$w~gg>p=SW;P8dGHi6vRjpD^tADK}=b22T!>wo(C|o(u2#TYS z-6Tww;JvyJL59#JlpJR%#o|~m33{wI3PPr=$^9I_`F!aBoUhr`7_YybRHL8jhK7(& z6%GC%I$%MgaWs&)Re=BckSpcpv{2ZDsTuoIRNe z&z@xw&uRX}k40ZOHft{AHeFTzEMWAjqt)W;%K0brFs4)~njie=U+;Vm>LQnVxeU71 zIA2v}RN(oR3}?zkqI+9JT{1*66B*imr~;VO%xjE{oiZ;HdE@Uv4v)7>@{;nBtQ)nU z183tCEV^9C<0J7nJ<`tpZLVvT5qE*8FBc073#j9TZ{C7w!{4J~I5`*M_lw9{b%Suc z6a;S3Y53JL5sRaO7oXwfHgis_cy?Y4$|gBV`BJ+axiTIZ_MlMBms4odmYC~$V_9s8 zZiG_9F7k3?E+i8%Z=b4)YHzVmlq|S0HC*rc%!t-|3;I8A5lXfy1fsW7 z`f_{sRup03v23VOWmkCCSNNw|igv(|jsI-%NCmXSV8jV_#pwo-O$USMY-6-OOj1Rc zDLd7o20ybzeeikQ(9+xnFqrZ;+Jm;e$8Cx*g6egYZ!Fzuoh~2omm`11aCn0*yh&+2k~2c6pv29?gf=8A zK)mgcP!}4rK0bkgFq`VII1SUeQf~|_4=JZeYDEcHY9_vpF$Fl);y9@=L`OdmTFaNw zpCPn6N=&crXL#Gr$xJq}oUnA%{q>n%C`IQYIT#qO-{$zgeL+W%NP*!Ezp8H|7|l*EH~DR3Sm^daL= zhV#_{wma|}*|5~!<@;#-rce!xut|S)ifK12CE z33AS9!w?BH4RqmTQ!x~y1!~$vQqsgVei6zdLnbA|mtQWXJlQ)fkBjLEn|f}QgS=eK zDEa;KIZX(g*-v&j_M1ygrQKIZZlAM|bIKGI(T*o4Zy0IQGKUdt$TO+UU|`p|io)h@ z?E>1B8}ak#fbZt%gEYss$Cml#&WVEGX5w9i1xYr2-XYm0y(BG6!uPvJV94&Pb3*Qf zTlY*ok?bnS!qcBqA; z@X*}!Fj#0VH`Lo6dS_2Ud!BF0MW2EkZxXP^e-Qnnmi5yg@C$F%N&9Psy@V z-mh7-ush%0zGe8+Xk>wnMowdFHZK$R^zgTely@zIB3f>tsUC@i7DXoY*l@V2CBQw{ zd?Gn|9(Y&yoDs}6na(^{{vlYAq5sFo>ayP{{NET^ZE*uq;;rlTYOul7@w-j*x-O z_aUg1FB=2=0#=})83_=B;g=k70vU6O0#%4ytjYdnc}&kHzBTKBVQt{Bc7{|KVlvg| zI$WA%ZZed~GBAf97=&gnXpkV730BkX@#F%cey}8W;f;RGaMEj;7Q>`7QRpli$WPeq zK(UPGp2lo=uJ6a$sPn#M@nnllG}GgTEo2I{4yicCHK6|$NvC}$+FvV*y4Wk z#j1Ufj^llc*UB|ZlyyWTPfCj10+hOwNUz1EFEi#neA8fsUJhg2qP1fGN?S@{jh^$E%E#{Ef$GN|{S>XEzchLUD@HT;vm6I;Gc(q0y``RvDH3T>e1mMhd+E zO;1|%TrFE}!?1`NQH&^(ZTFRs^j8YbVx~v@DA=Pq*-|_;OTgkUetxalDh6yEB{N4y z*Uw1w#6amC_#Gzk#9f|<;&Hoc{WhQFH}})_nONffl@C#t?SF8SoMW%n|3Z{?l%e>T zZba)Am&OI{Mne0;${_`vBmCk;e%WBXT_mZ!#ApO7bxV~pbjLVTw1%DYNW$ku{qWkJ{_}~? ztT2#L6qS=z+mZu;%3d}k^2tXNzU1OW?6V-_@HtJ_Zpx27{wveJh(fVKO>~3nTvnF(`>A3r45uOTc3TE{ZE#3%FYnMY*p2j-A}*ipzZE$nu5^Ly_l=G6VYhkhSA9nkxxnre{VF#C*fy?jVKA~{{7Vg59Q^-Xl>4% zwPI`0ON6hm(4HOaU#r^3FYl0J{s(@U zPfVWzrqNe|A&>xOPuqgvAmOA!WR!{R@hoQ`&e^o9j}IpqX+e^Gs!+tgIx=rJ8ohYQ znGe=M8$ANxwe9or;1zJQY!`580RYC2=emA*uXj#&pz=Enyy*GYWLTGB(@anp;3e1# z86Z$iZ+E47RAqT{Au$Tz`7OKREBOqvp;W;2vbo0+W|8H#K+H_<+*n%@T?M~w?Jbc{6N&a|gThRDKAY}sjQY%Z14ZoQ& z2NoCdELJHbLCm6f`mgjPMc-@sK9;=cM=Af`!Xb$Cm=DJwJw)5lsO%O9v<3u$Y!KRI+*y(=YRw_z^#U?hNdI|XXN1RQ-6KEq8GeFP*-?(WV}aEu@^AU1AcZ97;BHc<3GX?E_E@W=E#D z`)?GNP|+F%I>3hBCu-XcEMh73@)K@UFN0(~E%ChFf$iM{9P=Fe7la+KCvR?SK6CzLXbuxoam?)n~yD{UkbzOneyh zrjy|v>Ob(c&&%_+NhFon`-bux!8GxF_9 znY3CV6LEn~fdgLih4ki>b<;-^H5UC>N7NOW=Ab=vSFw)SH#QAXk>QVw3Y+TR(G*%p zWUlfUtp1rn5pzYHjvOnQe%+z5sl_iiO)!Ja(Wb0720hd<#8+RMpai_uLMpI%CGbJR zirlBo8_nW@Vrj~&&VERLZ8Le*wn~)_4wFg`Xa!-T?ijx{qg&6||NRK^sccL&t38~% zBbn!o#pS$HF9IX{sOH@LWrGlg2N^K?9qSdWqEG=B_amb{xuq8Fw6`F*C?c_sQ)Ce= zYpi%@5>z2u)csMGWzm8Ek>Q@)h3j|6aL#<1NNXk(1kJk1Kz^uyZb!yl-CzAkzRZBH zr=~!}b*ov0Jp?B2D_!8ru~OF-;aP1mRiNU`#C{w2RM+-%r-*h<4mSS=;Q^~QcsGB@ z)sKVN8i9${`DZ=o$IDD%&)h@tAy*Iw`9)vw&khj+rXx|&z6B@ZRaZAI^62plY=v}! zt49R@e~M89$mhur;Hfq^#!1#tB3s407w*OmjK9_ar!)(rAIO3sY0J8{m4FVi@vFBc zFSeQ6=dVZEfj1zq&ut*g(K)N7YEhjwk;_DG_IQm~nvol|d0!=xOqT~hB5}Quv zcT<7HlxfXE?#s2sy+=HwWphH0YG)JH*LM@|wMJAamm?dqqP~DC_Q|)re+YOyDec=L z1`;s#v|_Oi0UJsHE;T=l-z&c3xOD-jX<+@^N%os`@``8#9?vkVq-CBb%@^POAkwwA zGud*291$*P43z)GHRxWiZ~U)u4Qi-Zzy9wmfd9|LvhH$3B|#@khveiUZ?>g)AAzl( zOJw3JRhE~4VI=cl?>J~c(4#9UyTYt2(43YYCYEjQ9M5CkLJ84OJ2~IEq0&}c6G}~S zuV~8!mR~Cx!|ujHRLlDFbPaqM#klEr;mJ>E=N8`tDZgPFZO_Cs4W3>$S&0J(s@x!q zC&hBDRHg_?IjPvESEPQnT9voDn5~rKvb!2pEj8z{B0iW3jrYc(cJ(wE^!U3%5>5c~ zZ=ODoQ1t1UWl>mwBrmnc!a3QDSEVE1$6+}*XeWw4U&{h3Ht)D%!;b@^`;b~s<;UBMnqrw_o2ubuE-%SU_{v5V~qk&e;zqc%+PM&TwIc;s4 zsmC{@lvVKY;V}LhJp<2HMlg{f#b=;rQ-X_GjSACg5%xq>%FsG$siCcfIQBQt}r5*zQ|O z%CiyZoiM~n8*Z3b3Ql7eRHusNVCublaUhx0E)gWKwmwZ`2bL%3tvdiA+vY;S_bzJ= zEVn)x9oPz*eY2$oPEMe#i`lyD5uY{w^Ve?!*9vCe&*|S{dr!B3<5sZXsh_VQI#F zd#MYWDC?N=H|a-rdUjj> zn`z#wsuOFpZwEyfA?sj7B=~LP;i0sTK+!R=bC#zHCI8X-In(;!7#_`=cn7|s*SjpZ z3?HR;uk^hgY;TP7_XRu=yz@b=+D%s+MPd3e_NItg<2hmWJWoECWCK2)AjP=-`s1Ke zi;Z0MR%y*3Fm3;ON)*d#%_YHr3PLX089<#u+#Yn2&I|Jg+No8SynZ*gi8PXhP7HqRdsFS~ z@Z=T=f)t-`9}!Dg1IkWd*1!3g>-(T%sCa~*+K?OtHtJUQ%(W%r9UH``@HTQqDo#Fm z{kwQc3+DLVNjQ@DBNFJy3-2dkA+ql-+@Ry$H@$XktgtU(A3^N8Mq&BZq4%dpp%E;D z!z;Cvl(SfO*}2e`Q(wQ45MPr8-r0#fF0@5i#~;+}g(CrG5%bvBQJ>p_&}^U>kwsMv zO(z-^QC-7vn8R~RTZs&uc1%&6pYS_lgDeG`lr&SxfBjqx5F@7DjjX}o)zw9K~Fp1n_oGVe`^8wldbV^)YXQR9qKvjV}6}$LmU;4O-FNF_cwW)m6(j3 z9IBFNC5EhbJL5|)x#Es##I~kB)j^#CL;6GTx(4uo^WG!*OrvWr zM4bq@*GcEP8=eI`$GvThakkEaMW87!^$-JRyvVsmAp~4qejaCIY6x$U1<6MBpAy-3 zzSVQukpBUdZB%l{IxH*~B57h4aZdStcbwU?r`iK@F6Uef5|iz7zF?8lL*g7GiaPhNidKqr)sdl~E!2;k9w$Dy+j2#}Cho zU&Xf`WmDw<(E$Fp@%NX$=K7D*;eQ~bms{$~hPQ*wcXiq)Zp^-1yYx-R7uVuVzL%hQ zv-kb6-+nhk7n2I8jh|c}yb*_ja022t8DEUTn1ezo85QOq);-&-OE3YB)2vcUGsMBv zV#Oi`I&;TZiSL!u%N+a^Lw)y;!hbfcn>o+)PcB{oqA%MjA%|dZzr(uF2P0M!uF1_V zm7PZMl162SAa4+P4XfjAsbnLR$jq}d4!6-9FzZ2(u^aL(G9SH8g&MfWNOHV~!Tp}j zMiVEa9902bMZ8cZ@U`bLT-EBkP^03zGX%xZ;@gkE+0jZ*e^ak9S!na6n$T+vZnIev z+4Bgh{%}PEV67aGq(vux9}x723Pfb|6(o|AjMBlsn(opF6F=T+or+YlypBc&7t?c% zbT(pn*bt}~(CP0xrtbckYc43ksFx4m8tGeoqz=q-cnb^rBu|^@dHm~p0a3CeReq}A zizqz97~LB^Tbus$G`;b0=82zicp9Fi{2l%ue8EcJ#wy4?z1;eTyUB8G65Yc@Z0tXJ z`kBJ}i6YxOj@x&-tmX@uE|azDGkfuPo4R)xi{+yW`^WM3HAPw@(aX!= zviN4rOt@g^&8zpqH7iikFNZDg*1n84)Q#xZ_*nKg)qT~S3>%D_`>)xPBY^ie1Kqo; zC0zI<^j4hF{lxQN!&p*!8iZA@(-H+CAcG$thw7VPkKEjwgc^BIFT6*6M}9rilyfc2 zO@|5Uw3*IpL%*RWWM%px){CKd@<(w*Mac@Slj$b_#ZwJ(klfL(v%X^$PGLGkj$+|8 z?3VJW9li+hk*4!$n@g#|Z(m;!P3_D&Snm1geB zkvH5m!h(I|fm@F)vVbH|F&Xr@w^C;3a&t#7k&d(RF>pMx0N1}NcoGbMksE&k8F=0# z2)+V+OsTne<(+O537zstY-%*lV=h$%2SnaESwx^iR&uqbX7&ob=kFC@F(!VW;KoXf zzJW-4G)*~c&ObfCUwvoqeY_?ACwkffIRX^|{>iF(N^YA5TqZVrsCIY(KZ!fm5aV8j zI^_G$BHKF#CbANpxEz0-F&Vx5n?jptCoCa`s($FD;GJv7sLmBzo3qRtRAzi~$f{my z!TKJb(>SMGW%4#+GCNT?@&I@ndiN0$^!FAL@UL*Rx?S5in7aL1%xmG=P53_H{eA+m zCJzdJIWl=W;^Pw}pa;k;JqUil8%FCfezn9LfSo$#@PDNm!d+R8mbVKF4fKBJ7fnt! zNAVI*-^a!t;#EZn-$$I^_WAtpG{-2+%5YFE-?gC(pqrQ4lN}XP#Xj!}CwEI0V$4;- zqVxZnGZf&9cmAR3wB|CD@VJgRp3xiV>FUY8VVDnCY!IE#><}eko%$FJd-iq&!EyIL z?BnsyJL%{n?QGiZwEbWOw&%zs`%Er+>N0t-m5e2}9YYfdnlHm*IF6)(VtcEAXks<9 zJ;gcug&8}N~(L6H(HO;1eGqKWp~hTO0r|mzP%|POabHQ{Zjy z{SpM8>U%dQaZ;fz?L~`LR^o9>uHzhV7~ezft%Y$&;N=o$)W6M<<UVL7r zQ;wWXt-<;|MZijcI4sZh@Dn%au`E_*^!v%_^guU(n&PdY%_oyX>t0VpV@t%~edzp~ zOeZMna8LKS8^gt?yDcbrJ;Oel=oow)DTFEQhlaK%>oURC?)886-HrN%54(w(vw9hCd>YH9B)a25t3u ziCT|42w%A`+AU#|dcofAUE8(pP=bSNG){Z^x+~V}B@qs%@8IDX7!0P)5@B}(O05!7 z*-#~eHP?2W6>hUw;2}7(>6U&099^SFvcKPx{`TD_mv8#9MaMRWiy9QSaAuD8@%*0T zxXp?Swl*kv8iId&OQN6)=)NSlc)7apS)w(B3%;I0x-Z_CVr$eVX6hhF#jZ8c9wUES zXheTCLM?oMlSNs_lPa|FMpgp#DZE%{6f8N_K`Fc_gEqm8IO-dA_&rdw-%I+F3S6<*f@a|%t&@iPWx(ZmP{p*DgtzxGZ$0_h-~1{w=ZtO4l!or z+`aVcS8J@UjI?Z;2O>df=(ji}Jm}9tcVB5vJ%9BqDg zs+y%a=e0Cogg5#nd>^eYxjjXU*W0BDT>dO_uilHO#6o=LJIq$0(xT4;kvnED#M^(G zz)I(`QTFTZh&8U`R5a?UnX2<=n=`YgAOy)2Q|n7{ zgcVb)-T{eb{43$q$?FmLEC<;k{bmDb&Gw|z@WWaJ16Zo7V?)pS*dO-s(FbIoUNC}d zStFE!!iu@!D)@*tWatGh@Rh!kB@S&wNE}JguYDV+|FR^*+b3zbr2(;}3a4`wcb*h% zxu7z6(JKea6ebSjJy45U5y2eA-X-A^M#i5IA-gZRf;W#yc|Vo^|0<-SUl3nD{=Iib z_vOq3kT;k)mB+Yerbm?ayN$m(Hh;VHP-15(jw9(Gyh;T`(_Hn)OOVoLTn>|5Uq_Gn zw`7W+VL`}2*MKV-Uf)8YAQ6%0Q!r!(6kKH_l)f~e;}Fw)jq&?UPn2($nCHd6fp@cd z1|4&pdcA$OzS8D=Lu@*D?NPN8zDXfquGyu9YI9AfD7Z>&Z{oJjOVLMG5+xADx;z9N z8WYR$IIC!XCt(e*{v6DxU7VAgp^*Nb1t zqe8a}_A4Xg@2qO9NqIo5gWnO}*B=4{{jY|y$N{n!BDilWXgjCQM+Ty*t+>ox9x=x9AD>olcl6QNCAYg) zSfi)4yO#a-N+1Okh!AZ)Zg_+G-mc&69d6D_5}n(}y&6;XdLP4BkFC8E%dv6Mad-DB z062VH2M8OH;lpTw?j|g5;jzBpUKDYwi*~9)`K|Uj7G@m<5i?NY;sLqc$;!Kbw+|#7 z@-$zGjNDMB@el{v+)(F0QM4#MKyps+#%k;GkW}!FeMJl2ABx3X!sjz@>snt>(8_Hy zj2%X!4SD#EuJMO7C^Ov&**ErQ4RWtWD`CQcyn$`Pze)tK{iXovMSu&}hObF`0a=W5 zm7_oCVZ-~)rIsa~ulRr?8ZNc!@eYk?R}r0qnv!nHPWCdd>2eYZOJ1yJb7_*mx~G59 z8Oehqk?7$g+kak8VZh{MMsF_2J@%50KinXF4cmJsYEotG8@Yvo?3KQZAXiFz3J?r? zg;^6G*kaM#?~~S#C8BdBGc8@{TmYfm68t{8#7Z#AZ6fXEa-FyCCr2`EIG}18VTlH) zVm)>Z_nzR6#Yoi{BghCDSRNxIy60Wv$SHL?gd??JPh$Q=Ej|=WUM*>FOo$YSYj^9o zQ8ksnqZVx?NgaeNLa{0)v*^TpJAJkl!^D-10##^j1XXV@HG+eEjYXQNs=}~xe zljQAo_=4u&F)Q?IaaqODLtW`+Oo_qp1>b8uVo=bGjZ`p9X7?4!#)GuFu!&GKi^q?p zzLp1E&_zRI*4wm`QxG}(dGA9D%Z+}j`~z{Uh~oPDIw`!eO#i^e!xt;jpeXPQTG?ju z({l)oVQviLm*2|JjV-CjhZsGYH(hMfg5*56mx4{m!BEmyiYrXd$vUybW&nZymEhH? z`jv?IA3*V7fN~&D>elzA9Z@ee29iIbCkNggHV=Z~?gUD-RHtksPO8uGC{CJ)aweu*8#E@#NW!G6)0 zW3iUl4vDVxBkh!AOKYqz3JgZa4d4IT|53>si1yyjEcLHkzuiKzf)%f_jAp%t`?{h) zJ+CBC@*|2#dF`pXj?%R;E)Ktp7NwBE<6tvi#cNT_HUw6~t9QgM!m;o3tjRgOjxobf zMdNs?)q#qF*1e|%&Ff`R_+y;NnNl^>(@bns&UyHusj8Ozz<^1Y?4W(!9Y*=ErgIac541`97nNKa|f7IFEjY`9^J!x*7UVw@`maaqYSeq^Hn|ndd8YpF8JnRn2gN!vDU?(w#gJ7y#m}_p<|(u)Bh{Ur z2i^Sne{Zh4nRfErS*zZFPc9cpe*?|+n-=15o4kjOo4lm7>@qT^t9?pJolagSQ2!~S zH>)K)*eho-$gxfN)p=Ip8gIy;ETzs@3=vF2D-NMu56|z>uz* z?}1Sgx^I1t_kl?KIa+Bbj0f8(${)Kt+4A;O!C4kUWMcSx;V7BChcyhJ#2LTZv)_j~nz8<@iSO3iWsVDVrOb`1 zG)zUR4){tp45!~PSt&!TL%`g4nj}7!c*pd|oIiu{L?VzVi>hY*?kXZ1sK>0C_i#1Mg7yqfD)%Nx!izEsgY>XL&7ySIQmHGoy%C z`ym;^6TiG2`(9b!SDpX08{}P#*W>p~*L|j}4hWS_gyBLIv#rLO;E^GAXGP^+vnowe z-hRj+aMEQ=*F10DF7Za?W!orbWw!aJv2uUR;=S=4F~zOJMRP2%G0QRj*|!nCU+WkW zs9O4h64g8&%dr_GZS4Is-5j&{Hp+x-l271~&YE^Jz_zRBso4IkBR860+|WMw7GKon z{akg^K`<}4mJ&g8Ak8?=1i5l6gO}h-De@1K{Rqx9CLrOHDFe@!h6EnirdrR2o6~lv zplj}QxgN*Lel%_?vv_loZX%xc$R@3+a)J;51fD}oE0gSKUBilblV8E8Oj(E-rqna_ zHN3S`w9|P_)TJ^Aub)u^pY%_8+ASN|Yx$ht21q9MUmxqzGF>3jo4IcB0Q~DvuU-@` z9Mj)0CLrU3m7$qQZDZq3A~yGS+%v8rA>-V&g;lLtM`zS*E2>~^Sl$BXz?U2+{dp}S zY0#gHZJBK{pTBK}EIprp9)1(RV|GI!;Q*MNais4*T{U^YY8n108~z9XNNU!cEUI4o z)9ProKZDX(KU(1PZ;pPdtoe#OwZiW(7M~C2xC61sODp(gh=s71Qet~Y^|r1ZXA!UE z4h5a2S&!(6oKG(d?O92JT34C44lP?(ox z^aNoY%vwyd^#H#jpls&}&MoQqif3LG-{Ty@Llf;|kdge2nu&QFf?^NbT>AVWjH6atpuNOQ`zRFzOD|a(g`% zYX)wn6g&Q-gl5L#ywmSg`_@~$tc=^#IkC?z=}psb?jsMO-2}3qzfnl{aI9fkZ+{`l zzgK9Uo6Ttn5?$WS#LJp|Leohl%nmJP_SW3Rd3&f5u`FbHiw+oR;#fQ66F8>CE1egS z+z;n?BO>2PpHy}8+)V7Y1Ed->D8}fjQ`f7tZI567lq@fQ>)q6#@;quib3b8;zhNwq zLUVzL@(~(!>=J+Jc|e}}sd0D{Mmt4;-utEOug+YWAY!u6B_dX@piP{ICS4-cAczYo zWR7<)=0=5WOW;R!nopGJ1OaLaUD6g;qCLSV>mzbcsG~GzEkp^`@Ia#g&8e7tX&adk zTLH*K5;eP6Ct092>|615T|cK)98;_QCFwp}#Y@lOvO8Uom3Ifm#kP#nnozd3Q`kQd z;R;^y>hKkZ9|_LbR@O_n0QSk2-z@agWS`pmm`iEA^%WhGkR*0CDsMqCu%KGt|1IEj zNJ`i~`ZSj)h0g)cxz2&{S|J4aj?_z z8HfzY*RlBuS!Ih5j%~?rEZH&LZc?FQ3KAkr=DGHw1w2Kp; z{nPdr?Bd_C=p82CmO<+e;$;4fTb6HItfP?ttN9S~jJ=SalfVd1d0~=X*t7=^p7o zFTB=J6f{G<5bfyFeNgMU%rikEa`MPN^EXqz)?ed#z-M9tC&L4w-RnD(x6=3S*9FnX zb*~dySx4u4KRoslkyNCkIMmK7TLRa}%^R%dRMpO)t6Raor?;AfIEgtSycskD5g-=@ zm&vlh#&2CBHB1cXl&#wDT3UOJSzQ|6Jhzf%5-1*$K6}-9>evw;#LfhQx0#x*CTYqtT<~fMLD`# zHfv}1S|jK(uSMncsSpDl46{|RQF4j#d=3gyKG{)r`7o>!|514w42AL2>X_qlBaM@& zG0$a%sL{!9_x+)s8>D1F?13Mg#_CgHZ3hP0Hn16bOtcB7>*%lYW%Wo@phlZrScCyv z=J`XXDYew~GNHv>oQsYHVzr97EN{t=;zxV=t_jMW+M9k2)e{lfH&Rb081bU|8~#Zk z<=tP)x>E)rpz%7L2DQJEzrSFV^L(^SX|VsZv&d;K(>^vG%?aOmTA%0@Q!W=sT4sEg z4}p;n)Dl(;nSu;Mk|3eG{WlQD2x%ML1A?*|2g^CP9w+;wY6w3QwWO6@c>9@lA>p0Fv)MA z4A<-{Ry_;=CI~p(^POI%--EPFq`Gah7&T*0l`hl#`}8DX{Xf*_iD&#D?- za$dMa$B+KgJSof5ZYGGo+HP)pOwQ5FlZCa$`jROhvvkRMpZwd*lo`jVjIvNhD%pd2 zJ}NxQr#A2j6-$)Ng^Z4A@S&2JL$Mp2hV&k1+RtFpjOX06W@QgO`TKI6(lflHz%cTW zGyrWA)fQD{mpYKW>eARIYY(Njq?wU)y}b1^ozpVHXj;%^W&IePQ*@ZQ>OWOcXufNY zaSsuJurH&m8iQ^RS0wG&!`Q%nJE(O6L->katj_$3I=zuS*(Enm0QR|ulClB)w^lTT zzPw1ZRcLLofDLC-Jkey@NVnySsaFDc<5z3KR)i+}%C6 zdy6)>6o*iPdvGc4?h@Qz?k(?l#{LKSu*SM(<}uGTGmZsWFxo60^|W8?+OSn1Q{80XSqmG3p!R94_B}?`u zZ|i>W`e?`V(#@`CN5+=s*tp(=rO7LKCXjK9UUf5DC8)ulQ@IWCle(3o zD0;X56{ELLE2?la_A3!>D<=~wYOuFwUe7?RA^TT(a*7<8Js%G&;hxgYB4y9G{AkF0 zJrG-K!YO^y3Oy%3XDQ;wetIVbfaEjLQ84qjA}S8l$Za(tZUt-5Zz6_m#Ot-K zLA;Oomyy#}*YV5c>s%{9pO$%J()C=nC zE{+wP-;pLvOH%aikX@Ql??L>I@usz-E|A1*-)SjQJVZtx;LAceoi0m43@p-Own!bB zZL85@1;^&&c8;ImMD3TY&sHA-TAx=v7Y`Q1l`ID0>gy5x;k@E4t;nrG>#`lo&OM`) zU{kS82kJq;c*qV-KgONwcIAHDcxQb$d-bveFwk{EJ0lsNs+&cL8T!fpAW4xhnt9O? zVT%SMa+h6I(#vV}W$AUd!=dAHDR4rSs^CG!F}EG7>inkwW97KTE6=bQ-PM4OR+AQ}Rl7^tey;un6rn^;Jb++RY86T8fryH1^X!f9}+`RIxa z&rv)ehiv(@NlQ#|z|GU4^Z}!YbEq)IhT1zszN3F!p3q|(lhK8wf58%!`MKmlaV6qS zNv~}q{b_x&v$|ubYl&q5n}p(=Jw+DEw~Lbp*Hj=-73Lmg`<1{g%#NGav}X zr#dR;1tBs5ZZv3ejUS_i9GSlPH}yscFO<18B@&2K&b=H-Pk&)StLOQI0ueruoX_1dU!LT~jO^Zp9m7>igsdrEj^0y7EzuShzJc>l7E z71*(|lPNv0Q{<{Vh~6n2Z5CseGLTKoIAub?lrmIZL~)s6R%!eNgdRE~#K^!MYq=|& z8|TdDYCTPk)=EWfqD&)B&Uue-v!k5eIHHtnaEZQGvFU+6{uBo3))5mr{YCj#>XXAc zdRP><>KySgxk``9o?W;0pN4G74>GzBO2AUdvHSUalK`)G{dyYVIm0CNR%uB=cYRp^ zXBTbuFR~n1?dNkFsAzo^{^hH?a4eVx%$~AkXZhv&@KLt4=PESB zn1-lYm|5GYP`AZOjdMD_Q*v8;kxc{}G4(K@y`utYTcGLYqlSM=Oqsant=P{BQ|>gw za{43iApfAcK8UY(uDz6!NQz%Qw2xv>x6zvNileqh?Jt)pXmErftCsTPdYm=SeBu$O zuM^M9*`sN87`0R+0`mGzz)Zlg4$T?WnE@r^Po)K)i`)EF9NwXLjOB*|Dzrb>CYF& z+g97}M?1Ql%!vBpxSH9!aXgCr`hUL?0!jJ4?FcMj$n1RgSTVkKkoO@h^tNAvn3jZZ zJ=&8YM9$^5F5KuXEM1J?eZw5*!378UEU$WPEuGAsEam4kZ>8W79vWBow^c;*?0zSXN%#IPY3=dY zuC~P)I}{m1;45;URZotjy7Q14A?QMz+I$kv$n`5~bm%^(?%CLHfd^853oYku1+nj) z@l5F0@t_)VLruqKPJ+GDQzPHg@C>~;`Bn0c-~&0|gWZ^uy?~p&rRwJ%fFoAI_|ncZ*So?vgFeHizm2o{Iko90lJ!6k>X;+^JPr>U7kh1e*{Yy?p z%~14+VZM`+!=vg03B-rXC%oOtsVF7H6W)l_LDwRK*QR#$v9H8nGzW7mGhwl>*}w%@qxUde28uv!3mzorTvjpsO0G$@sm?v%R4u@JV^d1R zahv6|au6>a)wrhuHd1FS_Q%YNqw{|`fHl!(r&t&Ep?8>_xf>^wT`~>Du@9|0QP%fdl-=wX8^G9{GkP&&dvb$YT*_;5 zv7+6!IL(sv6F)s7#W-Ht&w)Lj@|$9U5GD{|l*(|D1Haj*a~CQupAlbA^fV%m`I@u10mY38KMi_wOz z(ST^`Ehq-b&faab&N)s1((31)%=ac$C9p1E5><;becF6+B-`h;OQu&k9SO=Q)04ok zV8Vm4qqG@8RN;UcCg{9E-U}2KYAm!|?KqEkk`zMyXy@ zeBA;BffNZJz^f}DU5x?_dAgcdLiDEJhr{mXVr%A)?X5PIu!)Nl}0;3E&!fF7at}#D#08ot z!6GxCUC+7PB6^=Y`p3?^6@j|?Xdu}!IO)4O)$+@ki5VJ{d(9#F>p|Wg?+|W3_C3k78ScbDo`BP)HHu@aSSfl zK(Y=3zt^Bmm=|t>>xrwvOS_%Hpqx!m)Zig7#9qeivnZF*|5U_T!GPUQMu}G-9q8$p zQSM-D{JXN(nG)wc0K1etgos#iEvT9oOugRWHbjN`^>XAhK(*bS#Mzrh@S15w;)ge< z33~~RLfS{8G2UG{!SHIB(~6hO!O!B5rzvf31|rBN@A0$p-`hBoBk6!_x0LqHT3)+b z&BP?Sd!aZK^!Squ4kJ{^p(EXKx0**|0ax9%YR6nB3djO1Rs9>Q&nYyzES6%R`18LS z|51^cX}OGm&VXL4u9(Q1J1a( z@n1hUTZ-@~zBna``2Jm2ZtS;w46&OqZ~|V?G{F9n#5z-}QMh&5&UMsUX&OT7 z>F1ob0sKMA5})h?%3(3%HVnE7X+Np`Lh9s21x1vLsdmHKcYTTilDVP_?2^aStzZZ? zeO#XZyGhb}DJkke9D#K@lqF_*Q9E33=lGIz!xiAlyZi~9^6S4T(<5!L^H)>^uU&MK zY2LT1q76|dIia#y7ymN9UH&%Z+7hq`cg}`x=F?##$mWmoc`J)wa8m zSQo$Rkc~#>9gV3)-kMvt6~4u3YE2?!-`q^jb^qgyV!b}xiVSzeGCo%1)`elWTiBPn z?!1Y=HU(!Xp0Z|v?=$w#DM?G-g^!JS&nTqGDC{=@Imkm)zn{{B z9f{?EfdiW;4$jflogauOY8e+KxQ%^{9`fG}Il;56##0}N$6Q%-{64mNrEU6+_u}K+ z`VIP7347`qg8mC|QOJJri^8%F%&i;0fi&SDAsnAH^o+d>e~`Z#dNa^feYx!}(ssB_ zWT>?G$2(2ME6r#GwbrHSxl_>mn6HH#rjN^QfuR$Tk9V8s-DfF|hWt)`c3;9{61lQ9 zZG`Q52wc`KjPEF$C})l?DMRS4u#^!oQt5jEj_2AiG8(-8K7Ynu?byHX+uj;hJk*v@ zokYVdeP1{3qvJD95bsmHdC!PRPA6x%3icv-bjo|4;!+cJkj0%Irw_JNfKS=zub-aP zQepZZYvq4fr&`>5iGd_?z-zytUaFY_YFZ2HsBYn$QxBpUflNDnYrf#P$=R)|Nb9aYiYc|cNCXrp6w@5MGoQZ$8fYI7% zHk9`l2WtN1ZC9IUCvEy?O`epHe-0~wzxtlE2;sc1<#})(n7{hJHlbJUSgd9|iF9NN z)yUwGAO@kn|M=eOy#af3_I*r!oX1%cSZ}>VHK3L@8%Wk-Jr(4cXU4=)p5gNe_g zf3!TMW5n(<46y39|Ms1&{i}$sZXeYm;x#smFTQQQciwc2hxt1E;p6t(iL>xtBp_ZZ zX{Y)<8liQyfY|Ti`0~lfAHKF3IXv^$G6bdI!5iYFDDtV?yYef z{fKBUd#pAV(mPd<(h+r4Xm8(~%A!tgr{wKE{^8yvGK`!FFxgN#52xeC+?2xGeWv)R z)L;r#BN=cCt#2VB8aTO|#9GvM^ari9xVGlh3k9@O=a{L$2pH`1N#%u0Mz@u+Y-^IZ z9-sdL5NPjHZyng-upIm=Jgj|d7d+xE8?sPoysqm^Km%RKBY6)MHOqAP0aZBM6Qfko z68`s>l_$agE{=H`b5)-jc|vHB)%+@{1#|gK*GrP!k;TJc^|`v6_TL)#m_K{SdCzlV zoEH~NFij^Wc`fLU6O@9OQ%>`cHLbmnh_ADMxoB&>y~M%Ilv)I%2WHIR6v@axjjWsz z0Zvv+U>9#yXrgTzXOd2*{WdwMEo!IR)Xn+xPMH~m?$_Pp1d0`cAO|BxjAN&bl#!86 zF}GKBxFa_EbN1u+y0a+gktFu(dSQPPVYMI7VCmMcygNowHu!ruO~f>`GqPaAzqeT3Xg;Lq#=KN+T&M7L#w~w2E0J!Bydh3uFm}nF}Nk;RwY7h z$u_gVZzo|>Vqk3JBzr#O#dpksm(-D+TOAhD-?>SKWVmo`1rF}&pN$u_UpE7HnT7Ag z0!ed2!ljUJcL+rt?D|Sk+YhLItvH$aUiO^iz4n`fCx7|8fu{dBlIL0(ol{N-ietVR zW;lFbkj!6rdPl)qrRol``~BohXz>@3HLk(D{Z22zbf=LUV$^2&1?y~fB$C0z4Se$Y zm`t4V{pdaqS(n(OexZ;eeKe6DB5RW$_QRg}a5%8gjb;%82b&;kdf8pECVy z>weC$(K+#OBK2k!LEcMBerafendG2FaL0Sf=};T{E@p}%@>ow{X1kW38@Sh|jzO4K z65IGA$Tn7S`x3HP04Fpf>ssAoRB1u3Io$AGyDqZ*exLDKkRxkm&A>R(YJVY%#i8-} zJckDsl|#&0LYco3aqqi(m@`DQJA{i%a`XdBnawyi z5P3Ya?2#pY9D5vswd=}oV_$!d;zD}9YsouTSPFyNpnG?U0sn#1`6`H4PYLbgMdlAm zowlACKEWZ0m={b-HIUHQs>Bt;zxXbNF+!2B942(Z5X|2$eA~$ljZ_Lq_oR@boOQ=R zcd@`+J2ee-;vOZT^EhYYh8>YgNEl(1JCP!Qk?o&n1LQK9>gny(kKsIkT(CoGP#qFK6uD!i^S%*to*-ClK(%BQX&TrxO?I<0bgLe`2%C7 zWZ&zFX-fwNp+b)RthoIlSA;-9R}Ati- zkJ5WZP=B5;0Hw0&i@{StI@zEAU1 zsytN_V)5hKfsQ;WeNr=j19hO*)(2E`rVoJqOL;Yh!OKi6f=QlF^YhltYu+^! z#`)4d`1W`0dl|T4-IMbB|3ZqP6dRui5RfrG^1DUesQfk15Q86xwoOIUPz}SGL51NJ z1uIg0ii}nhX6{)imggCC3m9x1-z+6+{rSH7Zm$6Ws(jE7$IEwYYWofktaX+t0X(h3_YRDIzW{$=K^xbi=T_}kKCMh3Y_1I;fw zSHXS0Cnr??^=;2%30BA+OPy9fj?42+fs67+U&Zgzv|ngFFq@P3@ zHY0bh&N--+?i=zKTn~&n9;Og9t)VgG@;*G|>Q3kQD~D>cuKxWg{Tx?-Mf=={ z$=?EOLawUO*@BYF?HngSYdQ7CzKPA*6a6>WE8GYIf#KCPJHR9$WFC2&`R?4kE*gjQPVSff<2&ej#3CFO#fxT+iehzLXv_6HaFBu0xll=z<>HtMeuUw3Hu@ zaeFUglw`^|dL)Prm?XwMny}XQWKIHKN>Vm}Dx&{`DrztPV zGrgweS!HtD_UQ}_cSALeQkN1dF2JdJNTGr}&?q&Z3K4?rj&!Q)%9*Zlk!HiX5gla; zAX!rI$>4H@iz0N>dF7uWLGmV91vwFVI?fnUI5f*+lyElEp{)M0W{X_H3X ztj5~wAz8R>ez^!Mik1&Wlx)D4qa9zkqJIlG7SJd_sILEXC52tDwhEg;l`KwH_XpZ% zNGP6C^8vrbvX}V;U!iCn2Nh*#MC{WgsNOD|HD{1l)#qVikSc(S)p+HfxYT%e+vxjS z@AXH-oR0Gn#^gq)Mw#{5CHovWUOYoO8pv2vr9c_DW0tY*L}-L+JU@g(7J3nq@9)sK zg(McOMLBm!UBR0N3oFt9l9h$XZI)N{0_=ZDm{S?KZW05r8AbB|p8eBdh5|?T!=Bt0 z_hF!PhUtmj9L!?iMOOaQHCRh>KQeSw>hHEGD~n|O&%*8#KT275tc`?r;Zyy40sHXk z&*C34JaeJnU%vn7B%!sIIwP$b#E?XY>oOxMnN(dG2k->sMAy1j8Nu&supF!LRBvA3Rvzc?1K{ zjx$nM?DFns#v3zo11bC9Y%mBV*1W?u`rGtR+FRWW;47x^DGdsB%1niRA(Z;jWoP&U z?hLKsog>8XZdF!w-1*^7?SehtevN*8+s*nI$$bb%kg zA}%~VFsG4N4aep;Hg;XPjL$nwk4OdxYCu3B*+Q9FQw$CQk&~~lD0{vscpV4np0_=o zcRqI|bkqB>zS@sMin2+8oUTr|0|?REw?Ix{ep%ORsRJf#ASrAF=`31+;j)1|+GvMC zm1*L-O8roxh1z$=FmBe|!7(%A<)Uu6Bi?1F+T62euCoUHvw+sNYf!W4rxcHG1?kTg zN~bcatVVRc2~GuA+Z6$e#=pYN;)ENRvgp=Y(95VJigM~lJ+``3AF|vlB*6*0$XJ=o zA?qD429@c}&=?mkVeM@@soXy6EyS2ZN-fZZMx$-xnUz@@lxO~((Wsl^^Gh@2?gRJwOM@S-%~*7>1c9K+xql~X8I z=TXcWqH)4F4dXs1s_G@+O&z9@`oL0sR|Gd=kG+U!>5C~7N9^F#p`I5!oh<+o@Jr7 z@BAYri!3o7mc)+6B7Lgly?Yra=J8M7V@75FcJ=CN`IjfGw&2aU;{>2iGN3p^&b-mp z7}BUwe1?Hw*go>UW1D>oZ2+IFJa1N{(w&ohlE zIxCP>*gCE*6l>Ub#E7u5r?=eCB>cw!OUxz;m8}>)c}Grjxz+`XgW)9JoYSnP&NJml5xQ~#Qb80lT1$1Gv}FS61#LNwNY=o z2XIe%n<%6#$n@4CEPRH$sVIr2zd@1DuvQjPR!EX=XHpK9^%o?z_C8CTaM~_X?M@=} z&SIin`Tm=OLR^})Ilk~xh74UKw0fgDkpLP^L~^i%xjHS=zFQqpr7-2kJ!F2TsM*c3 zR__ODq<|^3Tul^h$XCxZSNny|TN+-}AL9+uC?C>Z|GS}d?Ybp*sp^MzO zPlw8sp%}oR;WDQ=YK5@2WWHF+v=s~k|$-d$^3Z!8p`DrZ-Q!KuQ)R?aZaxghk<0><$ z4lr!^-zJDXeGYOP-lnWA!nrK@xGV%Qsr|39JCFVRpHzxGsZaUt|4F4-CkMT??_STf z_lF261s?n0>|k&~DP1>b%NAZzGtANqgiqi1^`nDTJVGZXdN_oz=HWIQ$>x?u4bhRk z`{6azhtTglqEtspuyv55X8i{a) zM^VVzDSV^Tx9jA)eJVU;p#tGT6R*IOjOA01P)wG411ehCM0&u6e1WkX3vyTf% z2}eyCFfvnSMIDI85hcej5~)U*$R0rXE-(&7)X2EEHp&@EF35z1FeWK)kDC8zudby2Z+IO~hvH<`b&?7?_qucmD;+f^ab+447AcC?q-aha!DC zCDj~|ZD{iRyI|x4)~N$|K-GFa^b+=|vmEBn5r{pYVJKoZMduu=2b~eQymj`v$oY}2 zQJAI5&0^T;9Jt^NGE~c(rG_R>sZIZdcKiGz=(B=qpo0flB{tRxk|0{Dpe)XD6d4rJ z0w*Dh;yR%^tqTkCwe}RYJD(B)HPUek!PF{+_4#%bqHNN|!ZRhEfJ$u6=5U7Y`c$_h z^ZTTngChP=irFwEa+7>5N3QXNyW$!k@x^JZufB()jmsZs(oh?mBpD}$zkEdiSg=!&l^gqUWlA8?bp3QI18*SABk>aUWaN=EX}7GX-qe6MM+nsb(U>c`ZI$T;r3mf5UJHC)F=3d_hdW~|yZmE$e5wLbD@p&f()19g?R3@u!C%TEU(m)!9SSwFQYLpVeA3^k%6`)llSJni!HIH;P}R6 zhk**6&F@=PGfn^x`)`5}cRgVpK?i}hG(cR6AC>=dc*K}XuAo?}a>}h3)!-$GaCTw1 zk|cU+;>r%v@8j=NgNhWsay>*4@9H_{257eg0823@rri7;$5A(zcnK&)!Vj4Hgw6_M z&GjMFUMn^~BTAwgCzL!q*ZsGF)G8B@^ktu@*9sx8S6dimSK#2pES%1kW`i33UGWP? z+6}24>&jpdx{1z-qxFtudtXf5j(@^bxB_qZbXphX@g3yM+Jvbk8Bvw=iG|3Un|)=_ za{_+P(&#nqEk8bN|DXIg+kbJ+wDjCV;s3)qqv_A@xt>fP4*|~vsLfUEZ@n0SsYQI~ za0cgEChJSpTyw$>)XqeQ%W=V6?JT$5abzcHd$}9b$+a-gSm1U z24rGG6e=D3Vk@5CPXw|hf9SryGn)BOaXjNB)AxHUd^GMd?q=m|1(VV(?!+dzJBVwk zTDQRSLKiVIyxw?BXa85d>tV$t64Aq{3L&$$1PUR;d$?y!UQJ=PQe0X;w|-?_f}-d- zZpI{zcFVB#eiG63-o;HDG~!q@-9&=%&LU?Z@0D)W@nH~58h3Ll-`w>QasrckdRf(O z1^N#(+EQk0&Rvu?GJ86=EVbYFi=O0o#xIRYv}WIZ5r=&?B*;d>WQ|P2JrNm}X7|I1 z@BU&T>D~Q#Jk26K^umPpS}W=+5)mtkJ&{Eek)GZe8etsK-dW?VLr0V|rhl)TUq@mC z#OfT>uA$1f!A3_E#wAp&kS)4?Gic;Vv`}Q&WM=n?taApFplytYrCP#IV&@sEidZNl zm8KM)D&k3C0Q{)#vfND$xo1ctB3o#mPMj%f3<~=CM;_m&<+9P_x>v=9~&n z)A6o$3p4gl<%^pRpJ2uFWD-aODi%FHeUPAj3v3-xn>vg*?bX zSYR(45Q!fVT-_GX#Hr6oMpwNBY(|1%Qz)A1U4w&Y7|Rp8ANE15sp0%?qZOK75YErq z1|7`1=r(CFYFTF{VbZt><$@|XZ-+2h4>}I$Ul<`(6P9krdPtj{VoLto8Y)aCZS(pH z9b-VkHgNqT9{+C9C_l1@=O*&(5;8x~X_K+nD=?1cxjbm`t9YdhS$7AHFYC%AK~cC* z-Ns;m*go58Bsj45luUuxk^P*AJi(<8lgysT=-g6Fp-G$*am{iBQ{MEnskUSiNhe;h zAmO3%W?{SD#q+ksFxxI`(ValZjAqSY*0Az)YemSMkOPoZc(4%l}QqQa*c&buf

z)#qR*QWVi*H1Syf7|Zo+B{%x91#`it!N4gFI?4D73v3&oU8Oe6slg{TVrrQPXGKz= z4BMv{_DNuqyQ;+qvqu(|a7v1o@WJq0io@W{h=q`QlPaNcrMYUlw=x$19EpNF4b|T= zBW2h(JQrl^r}&b@(!%mS{k3(u+`T-^IftP;{f)$VW0-|FSB6>Vj9|+xN*lTT`VK7E_YO{G|JT8(IY!X z+Uzslv5#hj%0I3VtdPN5A6MSLXjM~UN;$JWnqyffTZ!zz=8l%NMDxO{b%;Q&7UDMQ ziQ%E>=RLYkBxECBMqxa6)Gh6o@9iVmy&X_ z3`-RiRae$&?cuYoCo|m#D*?r@go(CU9ra)rpUyAMkTC-L=H4aW`WfTi51Fi>nY~D? zvo9?ak5BD(KA!GA=8b7;^W{b<>D^sZ;^1QtS$8cFj-P=vbdfHPSzJ2w)zvH+iJL8g!J4b4q|9kV*i}~H#GG> z1Sj>&I$HIs=_6F9os=%T1z-WLsp$G-1l6hj%ZfEg-Q-veA2YY03?qylhG|=l42;cA#e%_3`HCfo*RpgNirZt zwSx-Yac~)G!xs_eQ@@frgi+BJEmLz~GD|UMP_UejuF3M85Ed+uz%SU!x_klL_ihSOjY$;xMQK?HWM} zWG8WF5GC=O(t&UBFG3}8oZN5Q=zNWhFibmr*9srM9EQdK(?wsMFR5!t>nE$dRi|+I zMi8y2@c9ndGRbL%$hq- z*^PVFZYb@4Zy@zEh@v9w%)q~?L(7blB2e2+0dh6Gk~ns@qUAm6H*=y;dRo>nblaMO zq<#{#`MsGe@lW#+pJXsMkFi}=J(-0RR?*B!D6+y^>)V^Q9zIS%jjhr$6hGLK2q)j( zbzcS8V_q9>n$bwq9q>o07g|BPxBMsGn^U}4xjM&ioi`QiC>oJk-!oV?He!5#=C}`) zJOc@FSCubo9cJZ+@SiX--F!6vxrcbG$+8qJvte2o`hFBE+ggka>V<@W5U;3_#MzFh zfI#wRMIh04FXc#Z*K1 zdSl4y_}nTK?Rh{L`^0R2q$w0bMFjc;5F4~5_>%n{4?%Dsuh$85%aV}nykd&h6Sty- z&PRA$y1~OjY=7*O;Be_Tzh@*dKX3n7Px0{TLE_LG6l%$5Z+SM=h4VZwF}8)V@fE8eexskS=9)nmeZU zFjHryN@olHps4tf^#YYAOjii0Gnuv1tdP@)7N8L%Xu+He8^fTdUNM3gN6~XSa7jVc zwwa!;tmTc8dQSpZasFm2c*hM5Y*xAMcXcOP-o1G-rU6`M*@Blv>;Bmp>rH?#g}1|W zrg~zCXYph}PML;959^=_GzJu+_SnAZV?8pwL=~efyiUJ7ZiH}w>Dr@)Ba>uYnRm6ZKJt&aqvQkEa{JFrX zE0X)0(_M&X#;vip;U-+=B_--V?BLxhGkpCAsNcE6rd8DcEXzipKgR^yha z?eTVv@uuQwn8qIJ3(e1XAHca1$G6_T7W9cWi6o06uO^)g((6U%RxY`liTOdiKUVzn zeN6UZSpOC`jSh*@a?D@n{lK2^sCANS&pnmAMnVG4Qfn^;19=LdyL(gySY0#P@QiloX9`wMF9X`@8PhunbG~cbxNSaApZAu?Uzv=yy-HRE}J0 z;3ymvvg=E^+fy`T`v7iBjf+Z(+N1sz1+1kUWY5K(=?G#MG1JKl1F|^gE|(D$AfDdf zj~qS1%SF{rCz7w3`%5iA3}#8@Blvc|M~=U(VSY#Y&G0VXT|_#5L#v#FmIZgw6oerO z4@0lJvleCpf#}ry^Tq|glmOJ)m_=bx0iPu|k6~G%s11#bT&y^!6ZX znkKV7}Zf6>z$*ntBo`8fzfuUaETxUV5PLUYvPBOts8bg?3 z-i_^vU4%rKpHNwWeTx_R0tHdLea4A>ET_O?MS{SR0+*2$m$(;$eGc8%PKJX+TxREW zxUp#uR-2E@PHG{1E-tmhF+j4tQ~4U9D5(gvWy=}u9!yWJeT+PjP}Tr^m2-7@+zDAA zo!#3lYjF;fa5Hfg8BB!qch+eozZ=a}47?tlt=`EHt8PNM72Mu3%Vt41f9TYYUCYh7 zx}L4R$A)wJlx=B13fQ&yWzEKCwW+?^Y&J=f2$WugjU&ZjLl=NUM2y~>rTo+NOGrqV z#bsAYk_emwzbDq9PN81tg3CO=0jnwbF2MsXUNOV8$mr%V9bKYNLlKQh&BW@J zcQhc_PzqB}xvF+fYMGCY^eKZ0qQdGXTaoDnr{j-=97%cbg9icLOL9B@PSGv7y>_&+ zlgnDm^L5i3lC?iJbfW(k;+tir>7mk-dXPw$j5(iU`Q|5auMWnOkvbSh*2|GCLKP8N z-`L1^454LbvtXKJ|3}@>I0H-J)kB&MI-i&OerwTRCXg;?B~k|i8*ss-{2vE!FwGLD zLK%8b97RH_DSFff(#qgtpyO@P&8n~9lH;2_^Ut|>?Z4l7La_b=PG8=@>4_(Au~0SG z6yhGQQBMWr@K7869Pi9ZXCefJgiPFpEXtbj_Vaj{KS8|)-W`e2p*)Das|mIg9t#$j z#$g(wFmNMiatgoZ4{=gnlHX!o~8@x*MO1Y5{ACXRy%| z6Ny7uyV$v;M-}#fa?j1zYNDoeaoq;{1nlonuU_O(oe{NE_g&c=jJw!^k93*5f&N>#AG2QyZXR+*$(Q*G))@l4z7M6-l zgA)Fxgr%YR9d%h;SOi8_xbVsIxM|3CwKj|; zM#9`ssZW&1eJ-`S3(&V{2*aJ)`p|;_U#Fr4nd7-e{Bc(n7Sj8#)ujY5GC(hy+KP?< zRoV43AmT=`s^RpM0EJF_Hi;cYBb)f!97wAKKw4sl^Vj9UXS9w-1EO_E+tz5jd*s%w zeUHxT9OHoPcX%)pG4C##I%EEG-7Na6hHDSXgQJ(alh4Pd(RNA`jEY(!O%!5UX;Ruo zNCDG`Xn{xXUqVe7c3Ra|-LwR5<@aes`;ea1I$nm~xN4w!yw;o3UZVD&tm5=su~yvw zJ_G&VtYX`Hqt{KY+O2*aFWVnDb0^XM!LweS89sobOM#OmK2{6@MCNbRV`I9hOSY1i zi?$~gu>u0zBjC<;lbC9e_Q#uq#oIOij1@k0jZ$WbfK(lnc3-759&tXqLP1D;e1lseqBfS~dB070TJ1|qBYBWw<&s!p zOhgnT_l3;$4Urw6wSkDX)1R-bBJJ@>9Q`isClxrZ=Iy9tT zi6eh(!Cekm6aTEt@XUglBelDOhz?tDX*jWpP?M3*T`-sctUi?qQ1-O2(`+7mBF@5L zw4(~8-e~bL5$w+-@Lw6wihNu3$pM|eIg!ZUYzT0GeT;3t28*9{=mt=M1dPH-x%96o zz{?F-8wwEqTOS0ZI*@H;2d3=?;!b5m+>&*MHFCXK*2}5D_c@FbxAz-iiF-TmAv$CX*>Gkdbr9<|KF!Rr zh@W@gqA?jMUmRjYQd%NjF#&Ikk?AHhREft4axZs!BGY<87S_5u!yeVPSmNYso`?D7 z9{oM{iad^H<8CCC6^iEZ|DPnfTTuq1pC+!}R^sbcz1du5 ziO4q&q9F=}hr8YRqG6$|7SxN_Ew88hv!~8alf3NcM7<-Z+OU!=$!%Yn$MA>A#cTNZT1FGB$3vh9Xd|ahdFg$`bJm~`3kcA$73`QE1|Z)THgv>n5kV~|?0qh6gZu~lE{`9lIr6X-Qm zZ6lg#I+&QZ1e0fG$Zp77m^~Y$l1^VJJ{WBFUcuD7oVqQWOr*JK@LdDsnLn;`Ior$J zUWC)wyBQw+sf9`zS!$?6cTZ6MZ+dNvmwDu$c|0_lmx}gqL9OL3Wb9QuYqiBm@5(U= zrt;q(x6>*u76;MlX|(J~na|nM*3;U@W8Q^m33DN8@Y4d;-RQ=Pzd%a%g450ur6MB} zTN=1V2E7&tuMSkXgha#-3hiOpiK0seluomP<6lgndKyTtK@HPip5{6r#_TcbN}I4 zhLxtKrb=&c9n)4c7_$+=liYs}4m`?is@+#}QA^cF9@d>6rZ?2JjY|AT49j8qaugm! z?0?PE;wk2%lsh}6X^p!3{PTr6_BQ=Q-NLJhx>+soLWg_#Qw!gJ1+{WXKD?=qab+zv zf}H>N6xW;d7stKB1(F3`5WQVRMl>Ljjo1J-k}IY{&_UczG=Ywm)&6%oI$()TX&Pv* z7UBKa_|n*h)Z-$VV}mE9OYawcIEWb6*kUnYuDoWN4+*dvV!gmu-cneTK5(NQ<~R&Vs^?B5)f z)Lti_B+iYTGBhtdii@#aM(?9(x+2ni3 zqUOmmH(N5%^C4fm?h@*4CHS;R2}rzg&k=&QC-Y32s_|0C?Jg5r#} zEnJ-7!QI_0p>YWo+zBqhgS)#0cMI+$Sa8?I-KD#6x8QEK_c`}e-P$krt?Pg4>gx5+ zHRl}T8weiLPM-MKZA3LV%Z8sdHBX8*wacyu^m+@!qql}S%Fl9JY*L zzY=e<(_o&I1jopZTYsqElCSsqe8{F+)|zI=ZV3)pl;I&Nt&a_Bh7CFVt$BKpbePPE z!6QED0M8W2Pcu~e>Z!Z(HWzvTnr=yGd`%%dMA}4*U&q<7jK&#mhOLGEUWd9cCY(^! zs>v`0UQoG#73pcS1gGVB@WV~NM`IrxU`V>W7t1?J#ftsPap#Lh`=dI~Sq)pY>isY<3ZnEqZTho+|{AtV-$rE%%u2|jmHa&2`mS$#37}1f$Omlhq3OEXD0L%D| zCD#^xiXauHog*+C#+bnb#zDjSq&Xr;XPign!kg`MtSpMxb#v4blWQR{jJy+^Mn4fa zWbm%gRlN2D4H8QI`aLrL4WLBxA4LI#f^^Dv-JtzEh!oNcZ5HJ9k~USr48FV(%PM;S5aNRqa+j32?rClqPh zn7mLAY>stfA*?Jq;BaR^g(G3rdq+)Zt?1sY(YbE?J0;YA6bJB9c06xTn1g<4GL)vs zts%_mT&QFS%K`ho?Gvy6N$K6cj%9G57_oz7mp;vH^A4v?s z51S1kn>DGPXATr-m=FiTF($;4PnnV3k;(F;;}Vpc+3)*LVDqd;0WVp-n9RHl_B*mP z=zv+`rR>n8+S*68+0l69O=K?qq2--9Vy97#ARr0vn04C-{_~vc}v3!tSTv zx2L_OC#89iQ@qXbQV}LoQYLnQIJbliFZ>q$2eKbH<};1y2NI-Q1dLO%O=t!^VbKT` zy_BdgT~I1x&2!u=qv3|6@0bDV7inpGS^GLusN?Eh479?(?2ME$lNm`47(Q=swh}Fl z5PkSW@;CM~Dw|C*3WXo*#~pZ+RN4K6DcbP*od-^(96$X%3XHXh6uHI<1XKcpJp6Ck z7|gcK%8Fz16!RQZ3ePFLhv@wNx<7Fxu;5p5M>Du)4P={TVMY%YrgfoByg1 ztfsNx0w`+pzbaKhG$*}wRf#j)798L;3V3GL4kZhDah6{(Sp(bq624UCsULN-YO7@6 zA4{BS7`qL}jXIeb=4f~&_s(;`=Iz+3WR`a|YrOu~%^yl+L`8YoCgg(X{A=Zofenow z{B~J?_JDKP|u{ zHb=oZ3>BTe?MwK+hW9LsatGFRO-zx3nQ+%dx;Fl;XEq@N9_pV3jh!2ay!@QUlt~jn zT8f2<%9{H4F#XcNPWT62pekWmSHwOE6$;LAB^d`!%BcQP~Mk?d}2N6?G*#8-Mz>! z6}GA&IuxH6{+>;9iSjQlTf@_0U|!i+Wm8Z(&&BgigW%^VY(|1FhjdeacVTXaYw0Tz z4vM%%@(82)?ENX;Dk5SGppVUUHyG(_eZ)5EPb-5(%5rDU>UGPRn{*&vv{uL|0dF)k zAU$68;R1gViPIvkueAhHdDh${H|mzIa>_oHUVV+EO1L4iw*DAeKYg{% z>ad(=2>6*HA-m;l$}l9^dtBZV3Jmxa@STXjrSBKN(zFBfR^dWMJ+UO_Di53%D>f)~ z_V)_L#+Br%Px!f?TV^kDbL;Yj43QYW&pUpkS0uIO76KCg{NC@~zfu{k5v29!+>(!{ zv48#D;JxjN?KTdG%849u8eB1vESEVv_-QOMtd2Z{a?YDWiDgSoTI(jAnJpT(YDk4E ztyO%#{H|AsSo1@o_?%bQ(om@w`{~oV&j0iPH}0>4Ko7UU22T%&a*Mzh>qrsHI|BRW zp3SeUUeK1p>K4R1S}9)+6h65YY<4nu#}`5RzGmKPO85)%zhbs^LX?+L z%6HL@;EmNt$9$+%O&LlFiB>4^z<91FY46J3q@RtT! z_M;b}f7P2`KZ_<>#+#LFhT(gv!f9@iWJJP-Mf_@7(Kmb0Np$C+KBoD`FHM&}^u0jQ zl6{~b&h+TR^kIIGlaU!kPPp8o=?z<6DppMfKIjHiIMr60qIV6Cuj5T#BcFYsPJMK{ zal)K)uBVmo3z?PE#*OTADSXX#nXF8%xD!r#$mB0UxRac6FACQ$^9b97RO1$T{K;&j zzoZ%c;7Oe3cp1;!@ste4@YX(rk19SmJG8saG3TW$G<=cwg1h2YEf?0wLpT=~ z-FRmX_R|Ct5W|N>lRx0+&5bzG0y>>5aHRVXTn^RW8m-UEj7`JnnqvQ4aF&Wb-s#z< zm)4fKou6XXDR9pR`WGs13(9+wUm3|d=$RU;unX?%BA)jwlmjC&ru-K#FY8V5R@@2I zRygI*&mWkxQG}A*Cbw67*<}o9{)&zzi#IuNV$sYGmO>Ht&4I}jR|AqM6h9$>ik9Nq zHgId~9W<|ny!aqXLV2+ty-q+Zl?VL;iOiX_jT?nFBY|;!+=NTrEA~?oRqI5)94D>! zBz58JGScZ~EsjqCUmkB?uqmEGurpR~5WTnc)?^X6FUA~xRWHvPJWi-FN2YAa$k{$Y zNndvVqp9QrdH>n_FHPm|IF8Q? zw;5n$$Ig`+5zpMFp-PW1(?MqW(8o82j)Iqb|2@t+Pm@cqmiBf)nut(`b*-JqhN$Pm z>|Nce>zP2yX85-60TvkQzK70ntd`@@n)k)}bavimLp`4R44o(&6#w<%F_l7Ag9*>sQ-L1$SiA?L*BxgfVi?1ga3BiiudrXu=Gl15_v!&YZjkqD3U{xAg1ubfy-~C z+&jQN#6?*e?o{STa|~}b<^u^+%^&~|n_-%{XYV^RT6sqfF%N;tJ@RVt$S10(b4~10v+%p<_D`d zHLNodd;Yl3+}%HOkw{q#0J?2pLA2c7VPew7Rs@1Y5pOjzLIjjelH4YwYRh?Q_*y4$&rr2Bj8gvI;=L8rCIe5`U zBg>eS)Hz4bA}yabiu8H`a{-E;h5F$?m|uWKi^DRCzLLvt%V1Ib24#aIESBU^jWtbg zh*ilTlU!bLD>;9=X?SZ|z1Jx@xp8S*5ZBxdT8|8|045@Q{OMm<3~v`TS||$=r20i6 zE0j4dOIu=pOG5=>NriJ3%wR(Be6CHJ;M__UG9G43j4$r!kX@tgB7seB4MZOtHuTMuCJBVt$Vm#IL|OATQL- zf549iJk@jy+8TltvV&gVds>3Gkj7pFgd8S5%kLm%&zIdHIhZj^aV&glNfOoy2i%Yq!A`plU$XFiZuLk}{ZH$? zszA=?AJFVAv&`nM+b_N+Q2cSgHI{t9{Q3W#t(OJ*O*Q70&yb+xop!TCctrw)6hR#) z1D(kZvWHwluI~atYD?TDL-naJ!!CK^WwNzZIrK3`s7rJ-Ua^ed^1T&@mg6gjj`7oP zg>3|;T?or300^PfC$o_U7w~^Bbaf1P*ad&b?6B}xMIOaRMMS!G=i+VQWt#C|&XMy3 zhsmy?5%O}NN`+kTT4pU_A<1!)N!&@b9vfUsZiOW{OY~0+-nd`kE^G9sHVn~AJ=DK3 z;A4g5<}lfY#6J?aD)Y;us-~_xAH<|M)p-%Przxjq|@=uo&iZB|TAv&oTtdwi4^!8zJ>qbnMbe zIAp_Kt9L8-JBc=1956&btjne@^Zq3tsO4Lgmy_W}QJ)QBcgTp+L4_CG9Qk{LYQhVf z(bY7$i0x^?uFmSE=AOw8=oo*31>fbB8Pp3K$UKfWBD3 zhjMl4(>&U-;&6?TBqGeKR<#F@zrBBprda<4g@IAW?{RWnG z>|U5st6Gz=af>SK6^QYynltTnUVXc&?V0+AN96o>%*W%n|BPAx)v+;O$gBGG<&O1i zaT4ee6h46R@b54E`*vxU$2&>P?lKo>Mbw6oQTFWZwc|{rCWRH#z_jjtjx)w@b!h}v zEIRZ;L8!v|_dAmgjXTrFBg+c)8~Z*o?f9^D&Xvr26C^wQ&RG zze2KkBDip3A$3TbZwGrtrEyh-c={;=S}Hf>gT~c-8tlx1G^}MFW+0P%y(+~e%Yk{R zXwE2(MM2TG)k3)bIZ4LiPIFIV`f@_w;*z-6k1Aa?&!`o6&Fn@d z&d;udwN1tyFK)g^skPSq`D^et(Xm%op9$8GvG@K^$1ECCIo$+gmeJ)vbAZznVBk+oS&LvC|~RGESdHf6ta=<7pQ9TG@)Qk>do$5%HH8hma>H zU!C;xEF3z|pSHjZBuW8>bGwf+O*PQq2I9U`0XI)JA1|OGqHE-Y@>0! zZgV@8UDv|Ohos_H3+vS%i$0)Qk5YGjOsGf_>s`NgU~7nR2Ez`KAeV#BdUw=0S_Bvn z&Ek||q2NXS31|>)(~b-Pk~tf%e-kQTPr{qGUQ%RyMI+YCEH$lm2#!>N-az`CcCe>^ zU;yHez*x|E{yIH!*@O!8L#Iuvb5r1wCGr7#1a!c63@pAeL8HaZ19a%mlsU3v0P1&! z(9R;oexUKxD96 z2Z@j_%Q5T144;ja2V6-|C-+rvltNss{~Ym}9fR1Xd1aJEYO8pjBdBTPyJeaQ_S_IQ z=@_?lUKrX6i}TD?0L$`q$w^>FTw2F>hxCs48w`)H9p(W5GpimVT=2n)Ks9%Z(7H)^ zZ}ltRM%z#%wr}V2Kl8T};YnKDv~2&?Vj9lXRTnhi)fzFD$I0tJr?slYP`P1z|PEDGgu<_Q6K)-je}yPx9uxl}|78 z7b5A#nGV#Z#W6ZBCQcu4ZNq>JknbkfpLuVp9dNLf1r;s#!txH5GM(M z@vdiJ_aKYV6leJ?ejQcIqJs;?q17t4+4~D>aWvwv%-=ZC7|(_iU;F!^TaWX@ir1dg z^}2?Jgnu!dp~^Z$em78u)aK*e=vfA6n9s5K0L-EPJiQwIUbNQz9T4;F4jc+h@$l;n zc{o+`^>V9SYx6`lxc|--9DxeQLi+ISa%;Rr?4^=O_lM5c6qSZCD0J_`gU3BO&(Pf{ zGI$vz=@I|%&r{^R0}o4|iBD5cB=Fj&BjAimhGexEXV>>Y+{nt3ssum#!H6 zc_xc&L$*%~GaC%uz-^mYz{LTPl66&GgODvC-|QexGG5IjSc^8?&mS%iDyUvV|HAO{ z)qgNsJ|R4U6pItoC1k}7@%p(Hk>=cIoK<Hb!1Z1oa{)IQYiFdO`P^Mh*o*zK6*ZXx8MU?k*Bt?MFR^HQ>9|RSZ>*HvrY+n zmR!*Y>R$W^n6Vag2JtOtDW+UkH9iY#9sjtZ?}@=&^Xp96ax_YLTz}Mtl6X<2;a2I9 z0zQkYH?N0rD<^6t|1cfy+$w;d{38p~U)i*@S%ivx*`5ATxs>IwN^96gM>HskbLvk6 z$~T@EvAMCJFn)YBj-_%Ztt=*7Y4g?7b_u1RY1?jauysS-|kfDI6GfQtdD{oUbrB8R^xNdE7c zhJdMi9rI1%uF)p<5ocy(qKGpuP5Be32q@;VDh?`Y(<5=1fS#|P{!dx|FFTAWSPUAe zB}C#NJ})3e&`G3EHbMqF1fkwBrX)94;kTmKjm0yP^&bASX=*p&9NbqmdoJLL`qx5~ z$V3i7TfS z;(a}~r>w6%PNqUV`Y&^N(LGuY$TUVfm+!kD_i1F!D3J2VXT3tRjD54SsKo!;ppq0Sh*oR7)V{_CG^D3)CR%{VsCNa5$ehM&a)_Mg zQdIPxGP-B4q8@UEw!c%09_@dJSa1NAi{; zsD(FhA~CWpvjbDxR{)r^n|!=Vngh9d!HsRiDtvMw-lALhRR$j3r`NOj zbW_Z|{qwx9IoiZ*+1IoT7suW1KXLJ{8DhX&hoR7Q&9eIA%TdLiQ}Oa;*}&2vFrXJE zp(yeMBk+Ft2}{?TN$k^Z>+|dA7?khR47uK)l>HGa7> zEJ6cGcz^ZEm};2yeS5l45%%=Hv3chA2X%}3zP~{tBiSDnh-)oa;fYh|L;P6AEAM=} zACbZPcMd9+{8%_dbQi>ly)cjmbj>T(0q86k0SN^i`jqRdNn&rg65Phq9i?^8lWh+{ zB`)nmT|5l8xdoOuBK`-=Xj^;`zU9L=u&sS@SEGB3mPFOLY3W^NwX_Ur=GDgf58t3q z-Ou}u%?*PVLIQ)|`$mbuMT%g+ziyC`?wSp!(Im)z;#sJzHNhe@QZdB1t9>n=?{6cw zK=8Vnug^`1f8%`xc@9}fyGhAeFx=;{Ve}DGg;H|Hv_KZ@`hFSF*}RHjjmK6N>mVIC zLDpHIRL6_rv#I(rh993zvm}iz)rH z%NMH3j;X1s@45Z4CGkc|?BdKkMQyW^j)ei4|HlI8HkWU6<_kG^qjL-)6c!X~qAugk z!ab5E{~U)MT{0?UMVGp@3qV>(wnv5VAo}1kDxhQ{w8GB7uC}F(?eABd{Zj_rVUB&- zb%~(IW-MLo`(qrn2lqQWf3KHQ-rQk}CuK)=O`G}rjVOln83v$={fCan2v2}HuvDup zrCt}Atg>p%vl6dGvF@}?&JbIUcuy4N>jvS<_>?;hOb>jQCVv~PAHKo?8CZ7bu4(7_ zY=iPu(Rb@Ayg6R|<_BG?RMuVW?E=5PbuQ0^xNb*C5d{-0GA;-YW(50W`L7gA>*2>r z(&tLX+`%|+aWWMdS^IdcsB2dVl zt;aos`R9--94&J1Qa_cQj1l}An|)%^W6AOT__H@IpaU=ejNU2K?bgm$TPBCuHyE8w zI&CPKnNklW95+l27e8IIH{3LY?j}K464+{CUtXoE*}C~Vh)?iBCVvS(>|jniV9ECD z?@q>Va(zl5CiiLk7kI&K(Mye7CzbXG1*QezlUORXlPuf#6W$gPCm?!<6F9tDn7Qqa za~hq3C42IVg$dsCQ032@Qq#D1WF^ncf|dYK zW}oeOo=yN*f-qPhS%=;`U_my$-C|PhQoi`3u7Aw}wtcQ_hzG*;G`e3Ab~^7%_@hAk z5d<|hhk}cZgOv_J%j|-A4?ZBlBoad#;J_YZbI1*i!U_hU!z4J^ocdd`gao6s_dT2)eg|&qCPk}Ifc^DY z-}Gy@Ey5Dvz&Yu%pJF550br2ShiPaxfx@o8qZVVsMobp5?9&hbgw9$66fA6%#rs)T zO5 zMWdzc+q3v3rR?ERi311&p9rHQAN#*7TgCJ&lh@+#!frf{ee(`Cm^I5O>Ul+YG*18A z2t*O{e>_>IkPAs_@;9OLLuo$*kP+Wp z+>V%UdkI)u^ljO8ghFLaugqB0%-Z^?0XeH4evG)+G*pGtd&PEcEh^cUQp1)Yex7>&IGArAsQ!|EtJegn8swVxVWFk&fNxreG7PHC zl+1GbNMGNvwaI_)Am)N;oA=CD$SOk${(-q;Xs7rQ#B1TZ#hm@%Uej1xP7$hqfSuiH25wO-*e>L@HnT_e^IcgaRO-(F8C;;?Gi-60_Wu^1C+qm@+IQ zxPPctYbm0h104Z*3O?!V@$*WntTB+Ce8E$ejTOL-!1@`u)eb~ppYd9Jujq?LDXPJP zo||Xs+3bM+NJqiTaisHE9poSjbpFH9b_csB-TBsbBFrBi`zcvJZTL!^*|6kQ5Dth&&TiE@9SM1 zXW`HHIF9gfvp=h)re>7=-Xq2AdTy7%KhQuV`jrqcX;ZCZ7sM#;5l)yB^>4&(<`W?FB5*f_J=AZDz3&fA2e{czYc+BJoICmR1}--jHd>h&`y4XGy*f zWNZj(vUDK3QNgV&Fvw!;DqGY)QSX(=TCZJ{RQl`V3&-qctJYXbqz=`nHnT&aq`q?l&Q+(BkXvaz zr4>ltQ9adbDVOy^sFJj1fHQ_EJ=?Ec(wli`FEQFSGv)+e+Om+@I&?|Z>&Ml7tQ04O zUGs+6yZW=|bk8hCt1+bYcZ&L)-CDklPOItS?t;&H7!LgNH z9=@^81?OT2+oV{M?W3>}FhlO&NU{fKCCiuEk=hNGy+0(T51Yeq2Os!Dt*Sr?_CJAH z4%?Lj>Bjg2>6&fk;kFstf=e1{m3S{w+dXWyH&k(O)@}rR1=WL@c4>37SOQ+@@t>%n zo(S29rZ04wqJKU#7E84pNsd2X0<%bMx=X34J@yqX3R(dn&r#ixr>Oq30O09Zv zr1b|4t(+w>6|;|zf=JDMOXo<$7E%_CqBgtE)C(dyPezdR_H*?ETKA_L+$A$FRW8!~!uQW_qCWAf;yY^6Z>)r+Io2uic*+oZQ6t!3|d=Y1e+HZ0> zG`Arh9S@0A84&3(u+`Utz(9kbU`m1q<8sp`n?*n{t=#scFnq$6s|<*}4Oo6?i2NuU2y{#f?8-QJ+9zs>v2qb4k=pwL z6{U4sbf-ucT3>VuDM%O=sRu%zO;B9!(!g`nI+LmAJe7q=*1Vw$VKhAGhQomP%qCa3jis^ zZmb%etXsXy6t_x?3HA21Hm-{ddeUbfe#W@!LSt-rH1^Ii>r|;*t2nRbzJJ?I{b+_0 zY1hvhFjw!@?$*+Iewga`dKWNnSqZp0@@~z-(j9d& z$hTU<-*~;{wb~~A7Qn(;61>4A7$DO4f?8p&CLiaNx^5E zn>M~#&fpxxkL9r)h$li1#lCA(Dp;`Vg5B9o`VCraE(S0lYkNtmGxQ5&vu67$&nkPP zxxkV}WGv9b%^IKRb3v|cljL*A4Ul|%*0tfm%u zudXv;v$f?+^c_O7G+L*!qg$@oQRC;NWVx`cMng9>>9J_BP^thvo}&x6KMnKr4ksM; z5}Eeb|3f%*%WI)$Oub0(@ahB1WMrWR01YRN;et0+eGcU=l9rrNBo%QPrx2H z+HrNap4U0$EW7Q-lThU+tKk_}_oFK#R{vaKxK4if=Gfgq5ArjI4kSdQXEaQ)Zz_0{ zeQ-U+wKkb3jFl^E+?*YdR-|I~I;yXBe4-fAWyh0#ic9aX#-BGwFXA$Bt0c3nAvSjg zPA;Z8UzFASvDY{U3s}*1cluAi;b%$L>s#ZS;%6BxjOX^Bzx4mNmTYdgq4-@3i6soi z8Oy%YS%;bmMbpW&LMjzqd-Yk+Z#j(;auaFo21<4mS{zzx124zz;T6cQA3Lk-|Cm|u z*!J9o0>EG_ioeMnP5Gb=2jko|$NOf-7X2#J4Q=TGsdq_+F3Jg=qt_K{jALt26 z*gU%UIF3fG8rrxb?;^r4&9MTzHt~kKx|2W!UI;Imtl=o;% z(RTPRt|CNL6}e-%AhD1vh`Nf9y+~*1bAMy=))EsOz;fY)K^`dNX=Q)U$Lgq4*r4-w z5~E=w`X>2PdY^P=UP_DAi^z;Zvstse_sd09*XvS%5JXZ9FNRdlTU+xl(|BqSfXNf9`FCg4n)<01rt1>_O;Jxsf)-w)a(ivzg-Cf0 zoRbl_XYK89ak^3L@BLsi&syINm%IiHfiy1^TYAu`RczTOz{4#bj>>@QB#Ft^;45kC z32#x>8~5rFHW)YWF_6xTY)5%%?StTCrra@%igp7>SMcJkw|j4C7B;N2tO}K%`%ID4w3&(qCPM_9?M355zL)go*F}ere1>sf}5fLl9sUd z$9I}J)NPxR0LXKE(LC18g$T)X>%|Z!fdP2HI-FM7H)2!~Ahz~-j|Ci=#cLg}kA|*q zM@*+FrmANlv-1>AEcAFYg@)k_yV$tzd?Em>knAOUV0xKRUJUKT0HIpr&+D_i!K9rT zEz__LFl69f13+(F7B1py{b=~~yag=bOvrsHYtX_-~vDW`ReJ>?+7o)uxuJ;Oi zo>KuxR`E}HJ8S-YXYJa`A>R#>h2fu8DFzo zZ&svv-{V(7o~O)R5PfntvtNIuoxgyE@e=^oeROAWHU-Nyl|q>!Z&XOa(3^Ga>Ym{& zwYt0`cGi%pfdzk+NdYVq;ZlRxIY{>BeUAEW7ulY; zBU*#|{he%hx-*}4wPjjf%8T={1}@z`=!3tgzri#BEC(XCXeFM$aOnm_1G7c<4}11L z|7hxychg{aEeg;6gy68?8}?-_aZM?2a$K}E^|`(*j>IIsg!NsO^+>I}TfaZy{L`b6 z9>fU}Ng3j%Y590j$jRZf1KJ)^j~^`$pvgDaO*1j%m+@X-t11$0+*Rj(zB0_%@kXVW z9bX*APzBz@*fX5hq@D;G=37lC-1~!01Uut5uFZ&J8S@eKG?$igcx=x|9n$JQ!vJdN z0R|X2571mh7ze+n6ECK%|0*IHjKPjwTiVQU098SOt+SungOm(<%Q_NWW2pJ)0mLw_ z!O2JUL8P$}!?B^~XQ$1}#%~54$eY1L_Ez`W&zHXE(EOGBw;ucW@5f-)+U8}o?`Kmm za$F`$?cvdpkrfT`j!K~I&*C_?<(W|;HiXE}XMHbf&lkJJ9xfjJQerRG&Y`p3uyXP* z^)#HAGg@j=qe*>UN-k)4GsaFq+RU*$G7_J&t6Ey#7iH}OVtG(6dWY_0z->*74oX%! z;MV2t*Rc#RYjz6=FSUD1%>>jj2zcOjv&f-Efu!584M*k4fiD<7Q zQ*E!v52qfS*&C07n$4}(;ocG|!O+n%boTGnCcBE3Tze(|x8i9DkplMLWmdCfF|j#k zQsi%rB2R}^uR5DUqHu%#q3JZc`qEvpskx200YdRisc?{1nATuDUK1RAxe9v&Cb}MM z6e+re0}6)3ALab&usfwGr!2Dzi>&U1GqKa!V9&drmA9Eg{j;=`>83QL39QNy9@RW` z{J9Kan+aKqW(0vTlCUv3##ybft$ZovDMF{6k!p34TP&v?7;)(EiE#($f$p@kOIAr* zGF}oOZ(^98FOEL;`^)md?~W;XNib}HfMyPhEGdR8=)=*TE6QeTR`c~aEb)+>LGZV- z15qlrVA2n(&kvE!=>SwaNt$=)UY|{qM)0xE8Q*oA1vV`P2J_XLeQYH3nIa7P){L;s zj?-mQhPJMC78U@*eA9~oUZ2}Wn!|zFZb)Tpt0ZBSaEe&sQx`Hg5PgHrsxJO~3$vaK zS4P!_MVz4Owf9)sWVL6ye^6WajOn(dl+tg*lS5A@Rfuau+C55)gFKc))a@JI2(aRQ!;ciq48uR3m`^?FV?CUU&`Ic!e@R?p#$ z<3x3Pqh)G1m3oL1=&RNFGtY{ONX&&@3Ar9N%RPbYBa^oi_$5n3TWb zvn$X?cV13na3VP&?5YNj^EVymZ_boY;}d5J3D8CyTP@y;ZesLo&lFk7oA{^DX6(}S zexqPd@Eb{aO}~~9xIv7 z?fFgr$4{S6n1wf?r`OD!-n~N7|A48P{y4d>2uH^Tb&e}n6z_=&)?Y6|Ddk3KmJL4+ zvXiXeA62W2xZlf}TzTz`yMIf#$z>@oYF4EDPOSs`l8&xKjwkL29nl%CE=bM|m<<@m%b0|%j zog?rYo%R2^A50A)1tru_+4;L5kw-9}4X!D{Pzp2Jsre*0-)3^J&jY=)t5v{)>9eQ# zJY;rYyN-G}aAt1z+El!#UCOxs5W-#OZ>*QcTEXJ1GLe;`#}kVmS*~s&QRp9)`+cGF z^47erZTuq+#N~2bAEML(+mcH5rpy_20s$giIAQiIli-uPbBKNckisdJSt|@Z!65-IHd8IIx9sjIKkz)no5dA|GMm~v*w-dFv5A-KtCH#=W(N6 zcIha?fk8yMu5P_kUdvtrI(r%96JK^s=#+KDqsK0+F$UbvXCxNLkI22T^#V zh9iMc1y83VAZ-p=Rdz?tVgBEG|q+{L;7xRCjtmC4IW5orUwW@BugokX6h!V&&H+;fEmF2)kG6BF|9 z>f&qLXy!inbG4rXd&7OST2-QC+y&-1R|Ii`Rdcwh1Kc_{@?aodE-#7fOvk#jLRjqTAF+e_5$IR{{B~+7VlJ&kfke`PEEHNP`)drKMN$o|v zEz4qSVb(G9o4*j9pyF2B7kJlId~qE5Fr22z0z15L)G@fM zVu^T=Xwt?W z{RuWy?%sf81cHL_{t1_SGt?UxHJ1>=F3*RzlP91_?Q2gr-C6i0lFl186J(;WbI#%> z>s7V)v31L6SCsal?*O?K1VGeA!ni^Yy}r|gPo)@ zlb-M5A(`tsMyInE((dw1KopAkGHWT~S7S#C+eq{k!g`dFzxM})vF+R4)o6n+rc)@p zdNZ_O)Mc7Dn}szM-8Of%&o+B~bBt4cx>nAHTZeAV`n10-@P0PL<(W~R{mpg^wdFdM zN>dopotyRMv%iM;>#AfCN5+4>D0sb=1}`NCeRtiLFy1V{l7;y01oGYZZpC^Mwq!+F9+{0-p?(iKK!K#W9`xR~TGKWYB-UW_N}h+j*_2~Rj$ z!YNBuBj*Ae!tYv*KT ze$H-v=6B&O!0Lmu8&RbSquQZUawOX1yz>rpA$1rg13K1M`&9FcB6+0KT!S^tY$)tK zL|psCM}BQ6;S(fD!yZ_oLb4M!nOqyNFHl5zCG137fb~rB*zmL&QRTC`Dl~JucgJ=_ z55O5Ft_%VhE|FvT3-5%K0|#a z;Nf**<~!hnkM7xkkxSFNXiR|J_sUj}bi9oGv0-7c(d`J6=rQ#6ini81kmZNfTHJf>gk?YJ-B?1cUsS%Z(q>7h`{J+2$P`fyLPJaY^wtbHlZPah}> zm<~7KM8um`o}<)TBf@7aMfh${UPKs5E~=s25oP^0=V`$gu{zAGyF_P8%XJ`Mg@ITGZaitg2=6^Lm^NhiOb+6*r7~^LS z+-(>hh#qWdK5nN2c^@ZlJ_6ZA1ax>hu@oYO<(J^CtQxz~akN8J1x zc%x6*h>}tuc0INX9_Ku5WK`L(+Sq*ccz(VcXD}dH!BVD(szOSGUDam{&wUcu6j%r97&66`1_`dmBt610S3D4fHlI?R%bE3mxk;>|<@rSdv{Q8KH!b8W1 z{|pDH3f$A|#X!?9fZk|?#c6jb5dEi~e9sH*QY{$uUp2jxFjdTL$(i1fe=-n{x!J!vnG?Wxry^p28aKnB*-ais-^}ZVbEvP8&ka?QiXBS{ofGk%9l2$`e3+$=}4Kk#@51LwaCznajb0^)YNv3 z-{oJX530mY4we|zaL`_|yjj@R=EcS~pN}tS1|g3!*-u1>=uMNw?$jBEWKV z!2NZO3NLVFQ-yIdAcihqHPY{f$UHqnXnM2%GA#r{tLgSyC(Zs;;)GF z{IfMqZf!ksQ?&LL;Q><>wevv$m|56$euU1t7Jj(azFqk9!<|d#>{>H&n;83_&9LXN zkjyr%=w6Iwg(LQt51^Ujp{G-2->V0YXGf$M(yo!n>;JbNEz@7H2ELzP{Cj0O_k8|n zuZi_nO>cm^Z||4m zV4^wo^gSkA=e=C@T_N_sp}GXdnfi>n<$c$zRSjZ3++6kapKbyNJw6^+8J9BKN!o35 z{-4y}zj+Qyrs)TrvlPIMbJUG3bHV0a)*B0SpL&mK-1X-7edjxK@0q!C{)2bknZ4J3*0Y3;Ajh_{yzk!?oPdtn+8`lr zxap<-zXg%n20%N^DtNc#Z#frhenOW86*w}61owbFB zOV#g;{yDdNe>;udWGO+NrxgBte!B@lxU-A=#69D#!irf#PFX$V|JNx?pex?|{_+?l zx$EucEHAwG=~sLupa@w$5V6_y@eD6X>F{RxSJKxa(4CK<~H>1=$+Y3o*58Gn|Fp+-FH+!@WgT zd(jTDMA(owKfoK|%T!uaqcMeTPPOndyxx&$J3>?Ug6I!hj>fM7p$iu@maQ}Q>7a{6 zTByFDx%2aidi#`F@Ro!>I&zH^c;W58v$r9D^5N2~t$)vk%pm-c9fD$VWep(aO8Kv1 zo^zGOnI4q?VDEiVy$H^_f%QEm(_vymlk^fFv6B>W3}VxnF(X;6(H4K{3nBG0%c*#| zT!!>ky3=BRe6P2aK?YMkUT^D-Eu{CtL9A~EE>D}c6KXf_o$OZ&QcB!y)QYX3zfG2Y zg)_9q^|c;&qJ{Kcqi%_B8)Ex1 zJCx%xR0^ovr6%#A@r4pPQL*No=~qR#4fP6H;Bh!*TfVWYotp!Koy}1G8UWiu8{~Bvd%`T&_mm%2!EFk4agNf6vh^wop)^{aTp#F0 z$LNah{Zr(P?YiMiR5tx0CRh*bXyMQ@jy+8ww$s|>{VuG|cYMASw%GmDewBf-CeJ_e z&*#4vvywn8%q8Eu%{Rhct0*1pqr*JakR8#VTWPeKJ6MeaTLPZ~*v@{R8{60ea02T2 zu;jB2jw_Yor8&LXz)d&ny6rWbc{ZQb1>LqmA(SRR)TgKF%%@OaoPA7Z}H2MDS|Iw>E|%PCk3Rz*Lu0O>^+x0XvLjvG3}-wFOslx~)W4=b@kL zxkSD0b4oG#urdm8n&L$*Hl)<;xLVt<@};9Y}*gWl4D7At7uRS@oO%L_{p6D`pp$ycTs$ufToUmU7o+?YoC}1k zILbdZk%W)~x`4jbmffoCQMA;&q*>tU$h~=x6&`_6hI0`wYMUE2`Rsd*~1!Fb&6pJk1)$C|a zPiauw1o;w+TlCWErgIMYA}`yb;|`i(X@r+L=nk%OcT+0S5^&cf{_p8$nF#wGg~|_* zPk_v-TQR3@KsfTH)s&&8|44GWUG2~+WlQG(@j0vvZq&?^hEvcuQg}Ci^hW6Xxug&ZF`f^+xqt^CWcEaz#s*}K* ztmM*7Rwq;6nAuhG@QaRf>g(apAcLpW@5gj`_&F2JkWJITa5EIiJ%tX{lWglvZHdX_ z#V$PW^jd1cDnMWIsa9?3X1s|0!5N){J)vc0N`jdnVe4ZA7+>azGwuiGOSi4g1I;X;{IPDE4mF?=7{mL(jjQ z@Ta!DtfOv7oFWx|*EC9Zvw0(wA75Wz?_3)8T8JIC6uCw?Q*9SQ>kVFY5WRLOLeB4h zrrv}*HE7uXAY~q8L08kzqqf4@_@XN}$bt{>s_BT6v2K0b0;vV z3H#Jd0-={bh-@yz#6yc7P^HU-;Y$R-gSMaa;laqz(p5A6Jc0#9M`&ZH(s@d@z5-oe660;8*Y|GY@Sne2?*%+P#`fl)ygi`iG&(6(^V z`OoWJv0imzX4R)pXA&c(vAWmA??02C#rFV{Q3NLyKa5Y;Jj6)l>ID5tHR+v^T^k@i zAVFVxHC8+K$pD^NN9ZKJ2(Q_X0f9>nLIg}$JNNq7yCQ1KE0*?qy%KCNeyjS)@~si3 zZbV`1Ad5ziVfZVO1&1d!BTu7~`q7Lpcao#G(`cHcTQ&Bqg&UyAsZ@9E#T=I2f*H!` z z0+r7!sX%ur!xbyV`wg_qK5L_@to0(C7(Z8x@PKB(J7?5ITLn=fmz;3<5Xwef><}!P z0fVIwE!N6{L=%vU1F9k`;X#D%8JFC#@MYVp5VXJB5#E7gAHtE$A3FHNgxbWdf(l9m zeVgTy4fW`HUE~EFMgh#B3Nop(V6Kp#^BVo5Zs%FF<=<67RO5ExIokZ!xkb_>!-`UC z11l*CL8fPc@8|8RAgc${;>c8c4W1OQ0Mv`}VewA&zJlX5AMwYy8_$1{#gbEy6(PoA z&pQ|B>fI6LD(aiI__TL>t`U;etq<|losYKn_yg|Qmbpe{MqBqTS$(}+?{9|tYzN8m zobT{VSD6}!M#mHfy&j3y+fB}?1!5pc)n->w{wu9gci=Pm9e$ou#*a4{cHmT7s3jKg zt&+$%E>ZJQvVI0>%x!Gm>4TAe#N;7MCM-}jbNBCQ$293w3!j6`G{suX%{Sl1I3Yju z0?yUs_xSjx!?O~$6Tn;(r(*RiwZ8AMRq83C53wSh!}H{+y&qa8qe`xoG%k8kX+}Mg z%(l(XI=AI`=Y8Zx{fA3vgbw^x+K$S50`41tfPPupCx&baN0w8ryo@>2Krq|;!P|Y( zuH_846+(hQs*1O)8&lpN(_&-Sc}2oc_4&)wGIlwZ9yx|29zt47|&dK zKaZP+G+oq^+JT^rizVM-0Fa3VXxe*AD1hc5!brtrl9^KUs3aJ+CY)p0OfTg-2#hP& z2d-7SaLZ!^Jh&#hk-q0@9EZC70e1>HYL8rW0~Z%7Rrk1fudH!BYRGGCS>97~(Pd^F zyFXqapzdd!ffJ>ZJ}Nqk?(NKPM->%?*|x?@%MG@?wU12EMl?wl=AY|==uI}4lZ@tM zlNgz^9k2vD-(Mz>a)rO0Xg~GycK&vjRAe`G@@j8|dS)Xzb@-AAY%TKWrBtOs2OEzV-0@t~YsVbjE7tXmU;^IgPj2-&#=H!~+ln z$M`lOh1owd{$uHx)j!qf`hQF5{$uG;M?nz`+W1Xt5$P36Ac9RFxr@T5Qzi0~h7O1t z3{x_++>-Cn7!sz)OnX$t6{RyXU$szQsfRW9mA#X+ zcp(xdvA(u&q1iBiBi+rWSr{h}7d)~dEy2S0)&(MHJ~4+WnTvX}Ceuvu)=u12_HZcf zg6%!iZ1mj6lGMNfhGXRAq^;NGqOBex_;H8z`9B#~4UuM8JGE zB@4Y?6-Bn}1$Ie{vKz>Jn^uJu^|{ITr70P~A~d2a-T6}b(&=rj{E53MLji9rIf)R!*nfb#SfvE!gnsU3nFN-?4$OgP{{t|H8m&1IlO9^6y+TX}qdt1-`u+9I56HmQej;?<;z z&0g>NzT~*-EQ`MFFJ7cdHPabOaj&Y|q*gqJlpjF)6&%y!kJ=L^z_f%>)BQ`ao06}> zFs_NTtro>#!%fBMF<{wKbCWxVpge4(oT6TjU`SE#g1Qq-4 zBAF@)6j-T+$y|$4%YnYqLdL^jJ)N~}NpcoY3 zqFVPKSlTyUlUMyED{nRDy@IsX7CSw#bGWOGqkLp3(h~%*BaJ`>ri&doO`-i1l`fb| znb~K#XeNRi-#*2rp5Box+BvwPHd}pmD3oL3*snNASTX7RGrq$G>%3ZhTDGm~?f)OD z{^3YDAJ#;060#ibDRTSrl=|F%57r&@W1KT|`OQ5>dw={+=<8_TjnX`pdh;psdr%d= zLCLPWao+pRg=N^gTZpOK1HT#o-9)il;>)VcAbxYvgvJj9x=ygdS|lB!@A&ThndOvB zipQNCAvY?%%=Auzem}qO-YeCSk5R27I!i*)HapZ=|6WF zLTEdC)nb%c9))j?Z8L);-43613fMu%T=9&q^r=|Y*!(PDLvYWQyw=??%**+=if8~y ztLgL4^~*gFk=gaNaEd;Yd@J`sB|T6)J5Yf0?Q-hG#kND4f{^k`bamdtxy$N`(4MS& z8eP!t$JBu2e62s3Uk={d9om^F+w1JH0hR{4fz+lwYze3PnE7V#20;w=a)9rIJ!WZ< zKS022u;ulJ2|ORPfyqvKuYmlp-8SlMZh0H3))C)z6jw=h61Z<&?&(qYDa5~Pj?rf! z%Y#}JM437KH)^1rt8H8;9Q3i~mz!7Q<;#jD@qj+Pg2Q82Ya<0th7(a{E&X0@24Iy7 zxl!&GIv`z((;fisaK!xeb_2}EqCfK8g&t0>XQ6Yr*HkjjNl3-j(5?SkUNsHBu7@U& zj9kW#dJi=nopV(+w8qtP?%>1!Ou5v0jS%SJQE9Vwp}2gIPr~v9sSdHZ?{CHq4w~*0 zyKMM46sOpcXzI@tH!k7wy?qXB-&zP!%S(t+m6(XoC;85T=*N8I(*DCyQrxwU{P7)?dNZ~ZMmV_cmstU($Gmu??gcd429+09U#SY0r2J;a+ zUqWv%8MpiQScytvS$cKYu;4<%%XW1cX)>xP=}LOc$;;DWO0p%xMH(`OpXW4xqtFpR zpEL?a5d(1u&FxrkoSGgKC(3KVBUnGOA97qjxC&JJYp>J{()?S!_JoJJF&NEtBkGVS zpv5C9dPwvHhO-t@X;G-F6k8%oA;ppI zRIXgE8(&Uj0W{1%@>0CJEjd+U32e&Tf9m-TYfj^MDj6B4kw;YvR5^Y*$YP;xk2YnPu5JiO$!32Z7CQ0kGO`EBg@&-1Bwz359L98u| zmuXiu`SG_L!dUL;n?FB13>r&(Vi~ssr}RKsQB(zX4Av~Z7vky3 zI=We7Ob*Q1hm#<`XEsz|uEL)o|IZu6e$8`bCQw)XxQ2oBRfFNncix)@7(XAqLyI#Z#&d#E|7oi_a=w!9G92&V!1|GxGKQG~-jZ*7b@hGac4@3EmR+18 zdW=eiyQ(Ikv90(|8~`HnRN?b^Zr;2&jds-0a-`T`ljPgXDEHw4)G{G~%8T$=*;#m9 zo#q82ss1etqy?MRyy4EYVT3z^(%;{HH7{1zvUWEQ@}}@uJ)K=uC`nh(3Y0yV{%lZd z5PdpSDLGjlG=qi{oFXvzYn5V=XI$Tf@63js@Ginb>J^v>0sw665j=Ly#6n@gf44CV z($MT{8zVmt%*fvT42fHaJdTl`c)h^`gNca;!e2@lISd+H_@Bn+C6bIZm_v09(ZBes z1%vYtYMkR}tuv+8>@EI_2hPWk5d%;$0<5~29mF&s2^C%eGM2e=%D77qz!J3uq#vT+O2uiHMu4``Ymcf1yBt<;E+dZs&6|~ZEcIL?8Q$Za(Id>i0^)`fv zyN?mMI&;PuiRvw}bd7$JH{SiRH>?M4>D$}93;2GzLN0ubx;zP!XyL{rKeF`ya-eor z=XvAR%m{kG%1C%Vdkg6P@sealO(-~-@1t*Bz7h?JNz`j*tf}=trjwMibbGdjB736* zF%>0+w!`yKbMk+=#C_}EIkzzcw-_vuhfoU25e6iI{wE87*92Mb^Z&20@Ja8=dzyJRynnjx zSUu`Iv0^g)^PGc1CzMlPxH@dWG)FZgdNlThSEj>L-(ox(vT2Z(he|zpLh9F>fN^ypW&VC;9r)2Hi}^U=F44H;+?%&MEWw>Vt7G zk?jnSXn!19K=_9C-MgW}=vj!Z6b(LySl2Vc7(k`*DJQpvAD}WasFvM3xPA4>|7??| z{1Ba2CqX~Utb~D3=i94+W_6aJBGr-gl7luDUR17KNIZEHu@b z7AE;|gqs#YL{PWeGyM+2Zvl@utM=E>qlR}JJQaMk-#{1xd_!i9g24xZ@R?o(1g^yp z+xdMt->i0#F`_q+N!jNZ1>O|T3I~O}JUAp7izw?N?qbpI!q*-LoA-pdcMz$P^XBr0lPo@? z7y2Z>e&_=0EmRWtppL>4@X~h(bRk}aTc7d$U%EEMys>3TMGk+|q?065KW}4wwZ_SgEh^e3t1$V2sd+z=g_S+qI&D=0*q-Jo6rqDG^c{I{#T z>0eHEE3a>NOu!_AC~OchN6mpzQ+;-XP!bgqbNHTl1u_t7YV_y~yq;fmOq9r%_zQmV zOT2ONx{BFZTeE?lsouP~WfjT=D~qDACHnCStH!+&p7p5##8rTiG?bS2eaC<)Byh`x zQVy+wU}J3x>um}=B1e-{5Fso(RHjzjQ{WZG&cQ7tt^Vi=(5y*Ie*aElUTbcmenOA) zkq2KzwtdtZzlX&lGH75+(4k2^fK?a1Me%I%y*J(o-22f8-}Zfi+^?eCKxPhJH+9>4 z&YCA{X;dlcP;N)9Ro#yR9wzqIFEj_>AW!X~$DDAHW@TS=o`FrgLrKXy;DK;fQ zk@ZCH$=YJejCSYJna)fnFxFl?j6*P*Z6_Ef9?*?>sZrO3&270n2(f7D-(f~-C{f-5 z(V8#5mRvxBZMU&R*d66<6VBNt3UxX2f9sd2N0g7wt$F!%j|^{&UhV(GLHtQ;1=%$W zZ+3k$#&-=*KFKc4?wgm7cE9*|U>+ixdppzF^Go&2NgU$cno2a{j%Km(<@y*nNFdc{ zu+${-$%$NwT3aB}@_S}E>oIi*W&Jy*SAU9 znJa`Bt~qo}Tu9%$h_nN&d{YnZ`GU;-nRQJs4Zo;7CCDB6m{M|Cj+o^twT^|SDrsyHDk^`Ri-72_4qS6V$o21j>TQ&4 zPP#6saGW1wF%e7Nzydxk%xHQi!Q9%mCxVA51|`(anv-C2ul4szOYSHZGuOf>{o*F` zB?pC0PH%BQh$BJx26NPcfIIKvz^}};jM1x;Q$zogR07}w_3;dshza+$unW=M=hAz1 zxX2%<@+uf8$yb4Y#Jd8abb5VgFGKs4N5uFCmAJ6uwYh&1h}{lhLu)W}msv?$3zZMv zU*p*jy}G94GZR4@vV2@=9`!X|bcvfN;b;<)fQ)!&!r*8s_GAi1W<}3xd9K!hQ&%q+ z#Gz8Cu2$2^r4tZXL1O$)P66;Cnt$^wuX?Y_nYYX_c z*}cv7lH@;8{JhUS!IE-JU9~%c%)=7DKXu?}Rhza`ZnszM3uhE$_}(q1YbVZ4)^kGs zKDo_41LFC-VtPEyi}+`AhfV-@o$`wm))JR}w*# z5t%#@C6FRcsQB_ zJs}b)^O9m{Vub(Oj{ZM{Go0NC&qzEbs=nXCVe=nQ@iO%mU3WeWrrZ{A+C@7uZP6QUT-;awT zERG3?Z0lJp`CX2G-R<4Q`JQW`l+jrbjt@(|BgvPH4}K)R{^pCofC*x6hRiuwV@x&H z;iNl>C7ktP_4B8y`@okMuNQkU5;Z<>|7_|&x3`3QyMSP@)u33V2}l>vu~(*4J0sH$ zRJjHvj*wE{lDi-qCX%1TZW0+2M(hF$b$7{!6sfjkQs1+*OVN#~PXKh{cu4$Lf{yGu z7r=-W$JM8w$2ESt1 zi2YH{nO)fQig4|wp&a&@vjB^bGiR$Docs_wAeA+cidbh${Q?`aI41fd0f4nC!P=-X zCp)LXTF-@=MtoS{o&Wu@75WA!gevhHd9FfeiHvOjgRn)``4$6D*oVDjN#@9ZN(YSk zJm;ZtB+ZcCR<$WMgn`Vk*DO5qkJmj3$sgZowaV#3kIFf8ePLp}WAuWdTg}tA(2ai(XpjSp1L}7kB z4uri@%Zj+3-$d2~{JWz(BBgi#?tlJIo~G?M=OJ_a%TMhY@9t59yKnjUMI0i*vg~J7 zLZ)L4>egT1C6&=nI4tPm|HGo~CQfP$i!C=nFZ^uz5JUUWE|sgKlH&-n@2k(QCt`1! zRlj-9dDf@(*=x#pktHY&OSS@P(tQ_4hvDX7miRZwHlonX_0O+svHDtz@E()Q;nVK6 z#}53?S&3jT47msS#TQKl{dh3>*L;}1}K8b_{YfJj@$>#>pXy@=2i81ns0i6L`+9ehF6#NEi$e^ z^XZbh{pr(zx;owC*MRS>E>(|rs%AZdmp&a;ABmXRZ>M*{*ze1YVs@nxu%IrpTk4V; zQoCNEiU^qgYw?NU>LQhe>e06o)Z74?t>(@Qvn>meQ80NilxVp9$D`g08_U#{VJJcL z)Md4q zsu5|LqgU&E%ECRgHpR#)!{3BIw{-qf@jbOLH`QM=igLe-p}OARyNSesoTPW=>a}ew zM(G3&JB>g$(;|`;O`>(A(iV)55c)(v`0yXuR+Y)Ja)No7q@|S@`i%zGtxzj^a4vk0 z8gFf2?f7&XXij7}90vBb*3>HTP6m^{J%v$xFaFX7)sTfC7>ymUo4jJB-sz94li$bP+<$&@;HVc$H&AjI}0B?7{{hI#U%??pL80QsS|aT}!u?$-Wk zJZmvL>HJipxxf;0H;%rZxT2yI?`Mg{pBm%KrWHX{y`rs!1_{~&kj3vt_XZBv$EHvW zb-l$M!FhOik+AU#lpb1Oejc|#3GJ``gi>|mdqU%qu}>KYfydUpm1^r|Fs;=H?WqRm z0d$|EFmRjs@;(mVLD5c*#eZgRe|}CYXw*;Deo9bFo$J)1%l|23FFx0MX)8&jr1JW9 z$WvVu6m~<#&4BlKfbU0oL}c=CU(0m-Emib5Wan*)gb3---#K)l+3I8DqPWEK!CE46 zYW${kFEt|I)^j&mP*ZuH0AGgfW8?A}pWn(JQ(f4Xx22G9h)OOeAwgS`luX>Gwz&AM z=;xDcfUWj;ZGPQ^xaBk({N_ZxuK3_Um$)&ydcPqK>1E-1>Z7cWl55x_h z%<{Z22;g_y;c3~P=jIi7HyP$}T(3KF1(qWZ!uXG}URCiwwqmAV5SGpVq3i!UBX7-| zD^&8{4gh`_mYhOXZDvqxR@EtE9AJ)JB$OU}+bsX7&lLTw%Upqn52#V|g3coBtzDW@ zpDwY4{*Kxe4@IJkTse9b+sqhZd*CI}S2ASbSTGtnnuUU0PcS-q(#XemJ~lns!*Y?o zezZFtOiB)lgjlbr%5yC+KYRn$%fj8g`q%UrF^Hll&^Gpd6cLT-vp_mdkO>?e!Cz!7 zVo;(HLQo&>dJ(7N_G|0u{2;{^wS|9WD;F;x-nd5>$5pH-P^|x`ZQ1<5Ey<_Psw<*( zbgAv26k<7vXp}ZeM*2p1j0Z&`%D;h;O>Ot_0wUBfbKi}`2CmL0-NHI|tgXMEy!}6( zn;uUPv<>EWQNh)L*HNoZ>=jlWIMD@ggFaS=fKg;o?DJwz<8){$LTWhZk zILq8Ljm;c^EzG>9o_fr0kKte3t1Xf?vd=y^{F0&2E6QJkztaSV3Fs86YTWf_MG;jR zb-z-y#Ab$gLLxgAuq9vtD#W`PHP z>l`{-EAvZapA9w?;`_;gVf=7+@~BP7=~@!D0zIxcbzTB`7EYh&-M0qcn?o^8Ot6-7@AZc!^zJr~h_>Yfe*=!BK3{#x5Li#H<(M z4=jN?AB@@gPhFg@hOJ(QV!7yg+z{!bXT)$l-v22Bx-f`0lx<*sZ_4#ZR)1`s!j8SQ zeH(Hgw0fSK=%)&z90tCguToYqkW-e*<%5SVrV$(To*WsTLx^g+o1q6Ms^lPjZsjUr zO7qY86XgK>MGq-T=x+&=1=q?>9&90}VM16U}L>zL8MN=HlD5t9iR{m3bv<@U4J z{e)*R(4CTUwRLUgz?HK)?#;RwA~>hewul0M)mHEaugmkq3F{YeDt)j`T=208Biovy z3Z*S*qwaU@w&w^;p`6l)Ksc^H9^DRUQD#O&=zddhDg`A_oTvbp%0u{>C{@u^O>6gT zUO+H>-DzSeePBpZbI$y}`2WDO8-?c#<6^O~>&R-&4yC1&cfaB;NBJM$cGS^s3-FtuKzN)+;LOrzLBq<}7;9K4o7VaVA1$NjDvmb=k`grD z?(2i|Gz&wLD|enz^D&iZ9E`G?FfRX7H6fJuM#p2diPnY6hN(o%9E^P6VvOqHbY4QF zQYJlJ)q(@UTouh;p9{7>o7q`_RWtB%-x7c+cdN^5meN!GUZ?L-NG3;A?P1pAn+LB< z>(!dMLC^d|n_6OvYtOHUorsSEy2x=p42;jow+el-MArm|&Rgre?lR$o({4*$R+Za!jZ+*tWjY zF%UE{N507o;(!|aSKDg-_u)iP%WoJLoq)8{y8|DJjoKR^d(6GT?yIKb8&8jOT%`V=!X8E(|Q}PiYI_n(quKt=BGu85+xH+7YLv0f4S8DLm8eA^S7kaTg~T%=htUMIi6Uq&W{0$ha07V zKkMlVLE^pZZO4iMs7*;{AA%-;g6sNi^uu3Gp$kD@PcbJdkvbnjGJl#S*eUg*eVo@QvaF`FHymb$0GgsPhrA1F`nE|9qe>-}D(%0cwq z@uM*;{^>Bni;Hx`!R?Jo3mW;{^7Wp}T(J7;xnRd721YEUAo_nXnYjOjX)o9yEbIRV zruigyJTHn%yRPUVB z<|<|MzHysahvmdOHcQTBoEIW22)$w!=Hl@xd;y2PEy(V;fD`2XuqOLrpC5AAHK){p zlPbcFJ2y{NQCdKm@p&QoP&*u{>2OJ9x9$&b=wekWzF;c;B)Os1-*yNBlIX0I zxLFx96{bde_uWMF1~auO*?c1n5nA!xi1!XgHv9&8E_B@9uh_5NeLF_-nf9s!s$i=$ zn`7V3$MxqkNtod39YSDb#jzf=i5+4QSqF~%^Obc(NSX#k;8TdxCjf`QWDrP8oiYMv zoxGr{^|~kLYV`RUIp{rYU&DRNuf#~d7Oa0TQ_dSBQ8p)G1{)KpP)^QjDN={LVfhYt z8A_?AZpgsOjs-5IW011wT0l=XD4U2#q;Ih|EsSD~>jX^fgxfAw&iW;<(ezS_ z&DVXz7=+!ACmaI~TK4%Iv=x$BWuTPRV*Zwo`p&|Ya{v4FXBi9%#1(@Up{6O-N;3RayxH;& zM*dM$Z}|H1fgY=N>J8DlFXoCX`X`cpWLDv<=ybV=$fW#!oByB&RC`4jMNkd7Ib0t= z0SpSy3D&f9f07<_0*C;>*~s9OFc`@@jE{*11S5U@5!bJ~;t5KM^yay5f9tVwE#Dg5 zntmEu-VyJ1 z#;=-sCm~y^ZmrOJqIz+<+`Z}oxm<0&TC^t*cKdMh>*$7`_8#x-;GJ9R!t=1-dfnAT#fcr1qE{*xRG<$>R8Sg$3$ z@(l0YTMh?B$=lVTQadU_l@q&l9aVpPv*EiDpaumeF>SeiO*T>~KGnpZsZ&*0~z4;N8R!>+nrc=swLU5rgtX&L$Z$yR^V z@HhzhY*Y)4M@ZO8aHt>dMN)h-Wc!adJb$b^;BK7Ki!YutqEk;@yDg5Nr+p0hX%6-P zkRBh^Q(Jv+7`z7eI# z%bERvs6>3AFBR`f+YAlOWblCM0vgWij1UPb7w~dmfw-TP5DvOt&1Pn$a4po^=X>p2 z-X)8QhOWXNajoJ%7frz~DLdPR;>-1kSa1|}I*)qQZJ zx4ut!i6}-nryOq|F&K?atgmp%7Ng*x9(_}7Cb`jK3@c*P$duobrU-Ng&D*;5x}oON z!N=VLG8%@vrwL%nRxJ|W8)j_2xFBB-*C%1GE~`#?e@jhGe+n4fpIwME_*>Kw8h6^= z8AiBrdlGc@Sep9CZr2dub1VNc!6zlUKgr4JV$A!rb#rtk{&0PM^dx1!P2I*|LTMs# z^{bspX1&2#x4Hf&!~L1pofFIsZ>9#H2Lv(J2KdgXz{upl7YzTA;iIOB5xf0=a|2IA zk?iARv*hYiisb2_VSiokuM9pZ3KTx>&m$pzpb>8h)un1jkaP%OF{jCq*|=nl?2#mV zhD1-$Je%4QvXQAI#B_P$|L>~ZARJRM9IG?O7zX!0?p*D-Z$EAv{3|4mx|GyzK+#~H zA7F@%Th}`J1aq$Du}W(QH^jJ}mo<-s=6nJRe`U&JaU14~!Lw&ne$kWOrV{?bHY^Q~ zpY?nO9RjL_+kKB7c%gxwQmeY+5S))RBBwjnaNyTscSE^eK20oR_x*WzIrIH|&d2lM zAD~K?dLQn5GAa7!N3N$t1R{|J#=fr3lO}A~JMv=d8*n7EuW-N!DnHdSfXi2OF#8*6 zqY9phcvSS?^)L(Z+De4aYypzh1<@n*L^u_lK!3bO1B|Lh^11RHC++8 zQ9?2hXHUi@1WR&`?_CQAiL3YY3U}Wt^qRUiWLCGhsfV)cpKHWl$!AeD0DCu;iUUUa z+aJiyu0JSov<0Z;8zt1+xEoJsKIOJQF?sEH9s=Jw!R{3Ski7BwMn+X#5Jvq zt)HSVaqKSdPZz(S2uF#a!9vzt(E_VuZAU94*TgQ%+6Eh|H0RiKYLcJH`QUe0OKTk* z;FF;IO4XW*)0?7R?;Qm$sDJD-G`jf!FTslm=@^`S(-nU6uc;}aqS=SHVC8l$95i>g z-Hsw@mZzC#w2p#q-0rL3MpC6l?Y1B4tSmyTy$YETPm%#V9CR* zj!#)U%1$il7JN9*8LWOj@}?bHvYRG_-?F(2>2iP85n z>@REHdPQoSAfYV*on-1*b($&V8d~Ux;nWn03Kj(fZ2aI7hNO0ML{`How#U!VsS3nw zk`f`wLnsMrg&D2ut?R|y8tt61p6o8?8zK%Zw7qSI>%KW0Z703i8n=Zjn6+`AIM*a! z4#>8aT4Q(IDj7zw=g#+Ol27!(Ts-Jq=#2%Mzi^~sIPlWUP-a++m$2WuEiYeRr0M^$ zor1kYdqLtV`}$Vp-{UVT zue^{ag|p#aw7w6dWJ8?GU1=aG$~+g0jf~ynxW(R5z?OQgo||jWUsu&Of{ovh*4WQ6 zz2V}}l-{0!Q;KvHefYnWQqca5F1t_M>h;wXe1LS0s|H-lO{2#zP&4P&Kj*DraMD!B z*%;@Ij`DCiEVsA-sm-C9N;ERJq-qUk>~Dv*@;mw*^MG>-C;>IS4ENxgv1y5-y+i0kI4tf38jhdaBlR@wjjy9KEa?+i%CE?;jrB zC`~8rtlb8gjDMNrnm|$Q4{J@V`TV8@*#R2ryfeFX3F>KK5n zR?WNfs zuDeX~JK1h&$%*3&%+|Q%SM1Q(XL3OqEzhCvW~E}Y{7t4YYFPuv6Q{G1Sj{KRhev5X zXNOzLZ12ElCGXd@<`^~G){S?*cLw_Y*!w+*?ap^KSRLO{ffL?(5h!fIenfF+Dj{fV z{i9ACSv zi@wukI=r=XFAt#vgWe9woJ11c3Zw3l^Jtfb?Z=Ra?f^Kegai!SJ?xyy5fv_VVUV0h zM~X5RHTYAv^!>{BPArXeZlZb*D8lvaT)`eA(e-zOl$#zhGY8s$8*kCpXx&(Qi6t(KzRm7kQ#1FY2Wqj5w;?1FENf_VR92ccVk`eF%5kPSYF?}xk|^n^{WOrG zm}Fnc#`GrXoRb?zg4;LsT~TC#p*@>Wks{8QUcgJpbwx^g#*Le92p)h$o}zxWi(I2$ zg{w0r3<&v^Dg(T(mwc>8BtG@fI&pQJ@EQ}0!Ob5JI{&T=8VxlY<2o;E+SFEPcEvY3 zN7d~?SpWiaK$nUlj@U-L5ppN0iVzWn=9`RXxH+hmYiayLDH9uqF>&=|Y%#j9jSPQE zbwxf~NCK%Kz-5v~X9PK&5ptAl$^I)eSDf6NyT zxFBWnSY?#^*vAPd&8{Uo?{AK=Ar@dU))_2gAZ#Ewd9gAVt!F6TP@7%paijV9*G*%V zWm={m8vyN~4g4kv%c4<)o7GkJcDVJDa0|XSg+Wt0c{g)qRWaH()?nPeYI|p`kMS{$_h$)3RM_eivBr7urI< zMW48I?bY3U#>r-O-=-5-*5KZ=7;Y3+Ccv$8gR|dMsP=n-_dZ~AOg!EYJYDrJ#Voga zZ6oV>U2yAd`@@yxHtn1HLw^cp=B+UvBD&_{%A2^YwOcD*iHYynjt|*?zdncXH<9M& zr^P$h_6j57Jcw=6V~0?_*t`kCI;aAbLqF~^ncQtJ-uAEFJZ!!8OD_&iH0XHhU{{XW ztGA~5=|^lIeikWNc=13$3IhA`Ic%kfC`yn6L zw|i?h^Z#)57F3?W@3sdP#SNOyxs2@ECOAl=f^rF3`a3=9p@J#;tB zNWF7E&wH=$TkkJ8Yn^rW*?V963Z=0Gm$cE3dh3antZL867Wpyu!aALi(;A-{bW2CG z&}f3e5qG`p6Crc_@Ym-Kta)KCUiE_wmNq_Q?oC@K4<}>2jg+UQX}DSMS_b7hSfFFt zNmi5@nWCsQp}u)@uBpGm{q$j?saUi=b-*2B+Nl3`_%F%3AN)buw8WReA67VJH^2ls zxu`2IJl#H56mqcs&07MV z2TgU}gaH0)jF=nv@@exK%V|r&A?4>W90MMAXQ;=as#~v% zKJvjrCE)vd`>#mG*tNYVs<-7RS$dr(lH2vFhgKb9Gow;{*#TY=_;>Tyt%@uegi8c; z!N%9yW?d$7{Kh$9BR&tY%#NQ(<0q5(Ay=H=pCoSADu$aF+RsMHG+k%21EX6pzYDYv z%elJ$_$+*k*?r$10eg$Eizv)>=!Rlz7AZ_IlGFjiLz+f+|GJ{t=K*4IHdH=LJUu>f zTxGd(y|`3}!CTALzCWhYscLkwJY5m0=+roO#+t#QxgKWFk`o)Q8r5`%&=gjps{li5 zzY_6#=fJ%3>gD?JH+IQ|Iw2@}zPf;NZ!(pq0mGf6ZR{Wv?26#48|XXmhg{n`Ypy0D zV8*IX505hoYk5IlZNDJ+;*kJ)4uL?ZO#2|Dagpiuo}#;3n3xXFo9gX@>RIiXm7P~1 zv<07d?3(YIyuQ@HdOI$%ghj+YvZbM^agv{(@X0S*iSF@~LiYG7Y)>yx`YKiz7wymd z&r8Hs5U9on@Y-zt@2E$AQuE;LjBy75%}^!PG3zFAui0 z#l-Wz7`RG>L;!Ux-ZG&T{nR?iWYJ4r~J<9}?KffnhfJU-Kq&uJUgcMU^ zt0Wn!DTq<^@m-u6n@(6-OPsJ@99-#XARKd%eaVV~)%$kP;e4c?>%CL}*@2Rv+}90n z$xjChGZU_ERMIFsE~Yg4C^io@ZqXJv)}E7Wm5e6m>?wVy3F7U2ovuyl zlQmmar{C@3KV21nx4a5yKM@+QT>UcT9D<%FO%~0sTibNef4gfK0I}gp$pe@SvXZex;s+=ph z{G8z|oE2nb>UgD%iH&W~Zvwu=>=}huzr*a|WR1$S*KZloMF2fZTN2SRhV`x|XC|6T zf1zQu7s!Cm)ja!@;H9#!UJZT=RYdnUcG)|Pw)X`!r{M65JhJ=w{dt7%`FA`f1zh5s zK)N52PVS!eSxNqOJ~*|K%GhceLscBg?h4Cs49J*CAjK@Q=Cpr+c% zT4m~E4VJspb-V88?q~D%iw9+5i}7Af9c7>zkb#vKdJO*kTTy#00v4tt5OL+Az936( z>ENs(M;3{wuQDog;sN@Tadz|s1i6eN5OhyOUGTOE`jtDOdN@E2Ca99bMx!uv+K{ZQ8j4dUCsMFEZtY#n*^3|(qBEz39OA{aB)NoF{INASpLzXr-d;n zofdCso=C36s2bQLuiqwIPWx|*wRpGU;JbwD4f`WZ{CS5yMahAtb_%RY(MShd%3BDE zWAVraV^cz5V8t$&Ox|=gc#Ik(+zk%Mf`xq#$5|Isn|}M2PkMjlQPUp8xUZNP%r7m{ z`h`aTD#2<>t|QeSy30xf984h$U%se=?@YmQja@vgW+lggy-mH_Mt%pT@=$Q07$Haq-3M^>(C= zwWZOd^3SP_r(alZ2}c2rYTJ6R!VAD*klzWZd+LL+R4J%jUybCajzfGFTKunc+MU}% zNT~R+)m9b?;aC*1EOu5D>2Z-iK7nQ|45kioZY`&msf$K#Dn&aoeG zm&OSE?obXXFzxZEzI@w@u)a&+QcyjJ2f7}HxUd)1Q5DFt+2plI&4Voin%BCnTe%t~ z{^eoax$S3v#W^lSJ8bI>z9;9>c8qyH`>*F^Y?Yfa6IML6|*b^Bgt&!@peyCXMs z%hwcfipuP$vu=kG)4z4E>A61G#pKpSRzOKExt1vd5Fi$f86uBa34Y1t5&o}Vh-PXQ zjwvCd)iiWZBe-rQ#RXb3v>NkFYR4|Q;z`9SfR3z@i5D)J^mN2~rI9qjB70V_tg)_W zzeq81IA4+rD;({(zvcH(LNP;K#u4%)`z|i7EIz_SP4%5#^NjG_Wh)Et)rI;?Qawt% zyhztkm{@a;s*$umBOc*}0VI+BE+#UG5*3|Hm?2=5!Es}dG5hseaVUqs1E3Fk2El0L z(EA!Z?4E{ez9%GI`~4h1#pbKF2Wha>y8nPcg&*P=|Bv~=RO~=`M0}0QC~~LU&v&P?NpDJ;;bbHPGM(Zr3(jxFZO)E<)`_IVPq;-uhPHf9|(evo}Gf5 z#;Q}Wu)*qwlqyr*^bu1EdBt895RJAQtKDYritF035B3DBM=-Sx1_pk%!Rlvx-X(5q z2g}ka&f-JOr?QH$v@DnNpn)+>VM$L^+pjid7n7{F55~PPFjs>ya7zXBpU!11kD3Gn z*K5veDzN-dZZ-FRJ6bMlPyphnd)EJE{z;bqV0B?eJP1lTZ=ZEPn5GEe!-E=I(q}UO z7Irk8E&Om~(fHf4GOFtGTB2ZJ{^f5IuBoT*;?+_V42+Z`M4XgbG&nQb$uqaj%K%HOgl@dmM3T1l)3 z1{7R9N!KDl%I?}9orrXf-m=qXxo8bfp#a>FG*H*gzYGE^&}*gN8^p3U!RYT1hqQuZ z#)x>rpeV|O^61bo5@M`AG#n@WH}nBKd&<9iEv>Sc73+l*I!u2#nq_}=2y-Lq`nbNS z^t&@wtShW-5H=`xpOJgd1=lET?0y1!aQltGC?MY}3bXQCzY0(aw0zMH5soYXgZZI= z{t0Jx_#OP`p@^ej{Uc)ifyqF~*WnC?1E@n_b4ABDPHyx`vpM9o^oEte=dBF%^G_4g zZmK2AY@U8i@Hz{;TC=Yt)CJ6@QdqtvduVwdpoVrKGOg@pAYWtCA7v?4Rx`ogV|LS~g*B?DXB6 zI#pYJ-AZsyx%rQppWj+s&WAFO)Hk-y#NnwX4l{eIU)HW#_s%wv9k!;`VcdMyTEenr z@cZkt^bJ;Qa=@TWDxX=a+$spn{*o1G<0(RU3O%j6X_*}&NYfu`MW)#LQU;vn ze{Zv;A%O>Ei^Wu9F4K6*IsPJowHDoF({pKJSw6oYI~p=kDD!b~FshuaELKQU zOK?CJ`C&aWev+XK--z9@rni1>)4IAglJWt}^y+4?C z$${2y+XHN4#i_e6u{bX z&A$B)>y^2>&3*9EFY)N;Gs%OieY?w!Wl{%yTjQJK1yTmuBV3c6OebOi&)bFu=io~) zmLg!mw-1VuiEhO1Kt(Wz4PmoYTy2Yd!&V)oSDtL%wAnsxP0WPn^{4wjlJ-9G7V`X0 zRe1J*|G$O$+{$!L``l04h%eD%El;a~1l1<0?ohZoJ8;lj!+hW9C)O1h4`#ct4 zxP2m$v$Ni5j4UVzg5Y71%$P3WE5hqFF@$%HN4(?E$BmNmOH$$s<{KWBRcfW{i(RTe z79j;>GMm~oezOLYkXK>DWWFCr`QrbE^F}%jCeOwQ*2BlMM=A<(;g#=8P_rx)JMPCP zRxgv^_m4%mcCb7>3_GV74)h6O@^nk~zVPKQ!ljEInhk=;gJZsa5C!BZ}08njQrqPCU+~D}h=l zSUtHsZTWRr8c96@y(XrcW36vM(6ExVAsfTFk6tR;KMWQ8L z`;eFNkP0y~%gfolbP=k`hD-sglbQPF<;wcAzgJYP^m|N*L+U61LoZL8!!3iGd3$fH zx9Wz4zlL?6v%ce;X%4=5ZrN6l9fj2ZR@d(&)d5@_5A(9-TGWH|hh76JGRM zf@BB`aTH^GeZkzcsoy8&WGcg9c<+rTK4pALrOhqfKH97)`Lg6RYKmh~)snqmc>~Hw{-1j4CL{hCG5@Z^@JaGj-qRBqTWe~Tg?jZRK(QS?T<65G&AL!g? zM3Zb`yb9q0WPr2o_*qG%gGg-8xLV01Js3lg%7A~duMf=3tq$v%2FFEF0l~Hz7Whdmj@^%Aw`r!& z2JHn;a6uf=987*B34V_CDq)az=t=w?@ag zQb#e>oiXz+(epRxCjypd4&11!^$s;{)vJpedhtXB?P}9oCV~ES2`Mh` z;QjLcxzj)CfEqpuYbqVB?1KLEb2>5_JwM6-Qj;Sq_8j z+Oqc=844b%;fmsTuGGdVl0|C$k(5+>a)@LwwxFDRUTaRxwm+JSd%pUWD+RLif57hY z=6x(6sJ-FOn+_wxSpnvl)O^dpAx{x#7|8Dfw^aGn%-IMT0Swt4OImsf7+CG>3$8b= z{=mSL6T9BYWrq~AWo?fY*eZAa@D?%uE4gK%YQhlRwhB+d06yJzT zqMDEaWx4)iikJqRsQTZ zuSBiWedf7N!LWAs?^WCo6%WCJcXj+-P3me92T0SG_5%E-ZcRc;>nvd)iLrPSl9^Bo z9uD6E7}w!-6^D%6>kkoQaK1QX{I+5)Zd)V>dPRxy8AEH=w8aeAAmhF?{Py$aJK^Nm z-k~~JSoK{pT>`ZofifaYl+Pbkr>nob#5s=VQk=xVWRqd%%qfwf1f z+GP3I3}v+-oZ>sm>k@N_ruxrQOSg`;4QkQO(cU#kB^Vhg&or^&GxTw3wCiUjnZAy< zUVM5gRpb@97z~J_lc7M?tR{`ISv`aIJ1%b{;rC0QWkVIy0;FyxR)kY#@t4~wWLh?&eJSWIUcsRyl3ol+MF2X`1PEkr) ziAuzdwW|QWv0%UyZ%2HqD{!|&K`Mx+b6Zjf5MoZwH(hRTTL%Rk-nfe|4?@9iA|hQe zALJ|P*q^p6|PXNtJ1dt+=Y*$9?t$M)g0r!oI z^A+_a?fcO^wQWzQarw7LCp@GSrd|D*xa`?kW`6jV>;3ofORMe8ODL{`e#9y!8zrf( zPL4cyot@27DtJ8~0sl(ptCn2DO?S|KqHTXx))Sb^=RH&LZu4HN)X2q`1$F*i^(}?; zAb=QeZ*oTq7!-B4J}(Cf9(iGp$2s<3iNp&8`Uq1oQlIgjEyAl`yVA%zo!c(B{PH^& z$qDK)2vc=(MZEmIQ6`f@2gi1W1b%)XN)F~C78osNBn9HMT5>kIjo>OT=5c2 z#Lp};^6=_?Q3Z#hpwQ>UT?+@X1K<%08z`M72m9|vU*zWo;w2#$@xWSR0&7D`xJZUq zR~ZrCcp+3;XGE-;HiM}rZb-3DQ+Nr zu$q^vAD03qzAh>oS|_lB%v@$eKlHIPURQFNEl_<;v7+XnVh0Uqyi`Oh2lm+4>A86L zl&=mXA5tQ$)Zet&KcAUCFqw+a7i4CxTUv%N5%H^L*Eu>{osxB*hq~V(_Viv0?^3Nv zcU>c-e9q7PbKlgPp*?*d^)S*-l)b?;B6FrUBD*+G#RYlbqND0qqOr^hafX(1*E$d> z(Zx#$?i}S25ffx_9)06I8E9o59f%7Rp}&+B)-a395iJK@4@=via?sHKWO`l4y{Q<< z^qOmJz}Cniqh0TqK78jq+Ler6K#(fvI%S zy%B3SC)v{5-!W*|LziIj-(eQ9fucI0jbcw_u}Lqw8(TCF7M=dY0ZXHlIfi;=$F-N* zm2^^h9Mm4p-wfh*o)_^ZI#>Zg)|*RIGHpE!qTR!$i`tW{zQ0k@8(OF3=orS~Q%%_6 zSw)Wr1Qd4~lxhlj_j_J_#BWzh14tT3p@3OZX*6%!=1p18t-z}PdI3C;oK`5kqIx^w zO4Juw`qGk-7HE+O%8|rTGM-PfXi7W8`En8r=lF*mFHfQP28naye5ENTPYp%gfft-| zg+|w?d**M0C)8FF27`mhma37+!X|MO1TB-FF83T{v#AmBvVLf8r$Qo%e8Z`q_3@Rc zVgRd6j7Z+LfyPP?ttT5D*Nj65O$x6xg$2oTO#x3o?GfeQ%v65x=@3C)Ea9wEwBBu#nwfr4{{l_Iv_b7A=qT9Z6I&+kR?1H_J1A-NL`7c8rz4F^Mhn?FKzhpm8Psyr9`Aso1G>u7ut7bX^%C zls4G7&zeBOmSjINvx;v_)J=+eSX)g4Qx?9L{Mo?c>PjReZrn^373-%bsq-T(PNrnX1(_nu)eNk)k`5W z!A`xfOb@D~*`V4={#g6byDTS2T`O1^&t3Gi;~PA460=Er*IPew&;)nVr%ViZ2vwd9&*pW3K+teIu&FmtL4H zW^X6r0Z%adBNnjdE+R#+ePnv?fB!&cg8Z7bcQXz3*3Jb-L%^4TVSG!cu0Fm+LFjlu zOHsi2afRoXaA_Wyo40HWbg($JVja@5qt1X+EcY)#Tq_ z#e1{$+G}ke&$Ea^0|TFv4{^_or!yi*B$KspWng-{oT||UwbE6b(CY-zc)K`8+mK0i zriKyM9`lu~uCeM3+gRo9X5qW4>%H;UbFC-zcYhA`zN`Kf*dMicrmGxHY#q5Ga`>}F z$ENbo&)Z+(K>(b|+1ht#wLT_~ihrY=nVM)aeooZ%QXvpToAQ zsjPn8=iKUrJmJ4Wyb?c6^>yt5Z2ElO?=cT!7|JO3LA@wJw+`S$A53MrvD3t|KGA;LG~_nBxyJdaIGS^ z+A|v!mg(nagsq<;*7i1y3kP|zsD(PS9vMt8b`7L`5P07TC0p3D@AH2g zJ8ODO(j<*C5D^rNKn;w3Lk1kJsI?h7{pxd)lD3@Ye~hCqdbuA7fy6K4<0EZ$&jKb@ zxbqc*lk07BxyJR-Bc@+2)(g?(>?DRVlTC;5hC1o*o8_w-u!S){sT&CP|9JMHnL)7- zwi?$bzP3T1x2%Us(wwUNs@C}+!VzPdJkLz)O==GlS!U_}-WjN@!%sxy$c)&E8VL@; zp<=E#`yn5Y?~L{x@Zd9W*0;yWDx?x9Tct_bz~4~No5(KDn&tT;YVNbuimn9(8WUxu zW8Qlr%UCK)GktZf%_Th%xr{Ajp7FZvnKPcVVY`Aq=@2~17EIC0nasLOqQkQ?dYv9} zvxxUytxqaax|pW2Z~!&hGT}LSM1!s)8A7NI;#bnu`>IM&NhIMQARCSCIvC4f(+wDm zeGGvhXO+J}8#BAXDy^)|oE(+|;Pbj! z3S`|LNIdLXJ@{6raGWhkUh%6}#Cx(dt?fLvw@DIl29Y2{x z51N=jD>q42O&8X}1dFV1O7?jAZ6+?lQ$2rFa-4_^MdjGCX=qFGFGF}JYM6bT-`j0$ zbA2v;J@{8``kWK@Fe=O;n?!kIr~JyRaof~n=NE^dBzLs2%V4+CHIF{tP6S8YErs;l zEtmRrslTjj6|1&WfPpi^oPeHMHpkny&#VTL49?=slb-k`xjwcGxiR{sLYA^b<`caA(Y z9S7X<7W1Fc6*T#-qiD2)vB{XBxTh?70e$|eYy}JBfz6Lm{r@x*kC$c*ULF!~a~NZ( zh7!eR|I5gIIJo`O{T$x&zi|D3s`QDDF%JfQygv)qYCAk^jYgWe6icq3Wjd_=$^81v z+41~HZ626M5E|kUdS)5&!Gr*Zo0*6UZ(YBOg@{ee_C_c$m7QuiljdKS($9_7PiONctx>a%1l4_?R0fOR8Vj=CvPN)KHy*;-_%b+R#sH^WT+4F<0B* zFsf6}i2B8$P^JzSlu>eKF@zVqv+D%)dC(bcyn_d$$A8-M!f{NYtlQn2Ak~5%)X|G? zb&S;Xp2jyj5hFFKK}ArU{T^GR&={j0?T1fWad2|Fsr`yOeGx;Sd@++pEgz?@5vtZk zdHJ-H{p&L7FNnU|rg5WF*fuJYf1Q&%R0yKEwxO2@V0AMxTRf+O6p5nVzOlrI6zpN% zExk+Ts8YdqU3sNh(d6ZWKe#$~DzWtO2!_5JD5CD4ZH|6qWMD~KA(^N-YZ;zSvo zIr(tj>sR5B^`&*I;KT*EcQtXsjbVgjL|~Ltvy3h`Wk@^!7?~X=AtmKnW_BIX_FR7Z z-1?!#J?INW1#C}~Sr6_wW%i~0BDTGGe}6JNVHWxG9OF{Fcr+eQ=*v#)&26U#=TbRW z!MU?{?}8^$3xA>lyOj z9A96MJ)7shNb?U;@rKe52L){Njy@Z4ik(sA$T$;w4wtavmK6nA4E>I#*Q-GHeuo09 zPmFfB9(Lh0o~K5Y*600)nd>Ltt7H6Pa+PqE;Dc&u-RlT-`8Db2uX{vS{`Ba8Puq5I zi6`-|)@AS_3sOW3k0Rke zed@K=rPjdz)2G&1P)G#)cXXeGl6GqKNBAkDC>r4qL^fw)t5Yv((ymJN0@c-igf=mj z71c~FxG{P#+>lkK!~NcB+3dE|E#~a;9P{Ml?yUBrd6DMCHSAr|1z()}@Q*cxqe0nr zw1>2=^H%r0UEI_^>1B0O?w;-|{+`!)s}LA?3xb~kGp+M{DMqSarJtpRJ4^U%%UVRU=#KAhC zc$GQz59KZjkz|mDRlu?=?Lq&`Ja~4*{8uCNR~_bWiJT+%kpeBm+Y)iCMMY<{HoiF0 z^f7ML_TcHTD59YcGM`-Owg|6k*tL(3kBj_u7AtOe*V`0HeIHK`xa{oq?5pZpz*Za( z>_TKB$Zy8fcU1yQrU8%HO#!2QL;t8rOPe;W0phI5D_4!3ynLdDQ$OX|KrOHSG^SXF zhG&HG|->hDdeR3+(aT+Bkf8?DtreEd?#z^)xLJ*q{HK_FUmChj!@ zRHXI?QU&oOt3=6ev7JtQ3SCk5e&WFMWp^nF%tufDn-&EaP#V6`-;(qB_-Vj!osqnn z5DigmM{|%~f8O$f#vRRZ-C~KTs`@;mIg6Lm$ZA|pv9HsCfK2$(J|`%5beN1WRrz=Yjl)XXZs#hmt&*W;Ju@0s(L#S0b1 z$RQKVCLYDi3fd3}o_9^|8)r?j6p?n8-XZ?hI7_us+o+0zDo9(U{i*6zc;LXXEb(w=xW-V%8BO=R@c_YTfkO&)Nf?`MkL6aQG% zC9}c8J?$$&aXXEr=`TETT@qQ9n`8Pln5?t-`&Qru*FkUN3;%ncby@X$Pd4cbgB=tW z#+m5751h0N)sNnm<;(JH;bmt5M!lS7RO@M|2QET8raKl9sKTERaqgl;t`TP`xr4S`dJgQrY>3+frdJ zv%KAm4MSj^g*njVkQ~aEhDAurNvuGmCZy|CKDy&^si_~W0+qRK%UqXSy#zd@ejiqU z(a$uPsH+o|fev&MOQ+LJc`- z6T*A_gnBCXROW8ut2(x>gLyYL5KeJxmRr7Y`HTrW3@$1<^l5VjOBR>706OUm{Q953 z`2*RDG6+TSJa6co)BKiqlQRym$CicR=Y5{-YD|LvdP*a+h6Agc`r9i}C^sS^%{cpQ zTUmRb_y#+usPUGBL_qMHjmv-NN1`JCLnq8`%TfG)>4Y*^4`9b9_xl~wCvB^rpQ~4U zcBX$y?`8oPQc4j}69@(O0kc~Qu6+vniduIhQv?}k9n|b0XcCfrlQc7PJ0{RQfH>It4^j(!hnLomU^!{gOfB)zt#{75s# zH^R}WcQ1a4y5UZttIt=sXkqbfP=&%|JdJt=CdOi~?DN{+rYs855?+u@-QTIbUX&jW zgyWFb|Jt?DKhN*k?z1c=7oPenXg;G6in5I+UMbN6PH;1}m8$ssn7TeOq|6cg26t7( zXtDMULC)vkF7_7{9t7Q3{>dNIYV`Cs`(R-sG=#3a5^)^PNiz&~1?2faLX4lQlWxhM zn-;&W?ig(We1jV53xm<3t?Z%vq|a;J0drl5N}qEjV}}(px2YRE zdL(2jlzGtj;siBz{1St&Ak8#c6*GW5u6^&m#CvlIwK!4?$f&N(Fo`?mqu*Mnr0w*qZ(3kbvEXViKe4atFc=ONk5d zBK1i~xjD2HCq@6d{@H}tQTu(N*3C{Gj14k{tFU}BKAx8L+pZEA@toW=c}Yjzhh^vP zS)@%$!oBhSoLxan{GOybzar5>%@YrIuY~wKc@}p`<#7D{Mt;WF!z4;&(M5ZSsUpEN zd*lz&juGo(Ec;w~)yt2R{sqCG;0=jUJfvx)`fD#R-fsezppxgYE^a2lhTV-?4@1&@ zujJ5)5ma|?i;meHDlUA>YV!L+=GdFYZH@-)EYW=A9l{6QCeYTWVHgu9!qI1dCXb^y z3B{n84*M-&&vK6KJd3V%ZHM#6_B-KXM=`lbi`;733B8A|^r>611u7hX315|4r33QJ zMgMQ^H>tqBsTS$h;=)iT5VVln|Kf36#pkF}Ue5%U&NAv+ZjX9+UQLpyw=DTpovq+O zf8mdj{tImls9pv=L#KFdlhikki32QbN_Iz2u$Nf#&BI}}J1M_R-%hocABH4Wd}~>& zQt+jX^A^X+`r~F%3qg>w#D9wRVF=m8)@h%(o@a=B7yB0-4}H@Ras1;R24Lc_6Q$U~ zi3U?;8VFh!i;RYX15%nrDleR_wUMd7 zD*7Ucu5B?)KsG+%(7s?Rw@1@|sLLGAT^bB%BZ~)_W{dSG24?6)UN8N2Na94Hu6dz# zi8Un+uMkfd<3bTNbz*4&f`8coj{I+AQah5q#ZZSyf6mH(QLil@s>HdKccxbZPjmdT zx<4Qb>UgW!tufKPu)+RRHqZX+vnQJb;b8oFZ7(@3IvYhJkWieX0KR9;<^Yko0r7JY zv<#sd?R2vrI&nR|vwEK#Oxp&~Q1utR#nx z<^qu-IGaBd)&zy$D-11p(kMvOHOlv_{zlxPmr3~q8LlThC)F~Y*@)czkUCR9=zdwM z_Vv(P9(3|fX+}il()5qDd{QrVg(m%}AX^j0@x{+H6zwm5_bMM)gpzgsL~JjmBcbj} zWc&IxKVs;N4aMH^q>j0FRA30(DsVJ+W?XYf-@K(a1R>|6^q^T2bYv9aI8^U>u}as) zZZ|F5KY{?}nynq|FcQh1CHUl6Kj<6gExG!#Yz7ARu7TWI7ltk6dS^llf_=Ln7^()5>vG%H}=?n^|dat;gL$c*^jxWe2vpHhNp-tq>aBY+(qGjNzqG=hVp!(b7}u_ z1t+AaoB&PY4cNAg2>M2)aM_GvszSYt>QoT)&%~4oK$omW`S|X68Zqx5AMv4l52bH; zNDYg=Wj*hN+iiQrJ#f`r*F~-kkY++a^p1sp>lLzF%>BTyS<;Nttft%2N3!KLic}?= ze$X2ah2Q#LEoiNbe^ieJ@HZhe&8014`Jja#P#?+cZF29KB<6)K$j*bv*&=Rc5cMD+ zAKd8s%=45}%77=KNERxu(H-z3i$qPInsZc-wabRGFKJt}+$Gx9^pumh&7JK1N2Nr7Yn^nHaKqI(U8k ziR1vSFXVA*f2~_dRi`LT+BKJD2QB2Nsz+$*Z^?w>Rxu>+#;I=zZjTJj%+|Y$Z^4se zuiP7DbK~h3u(NM}DFWaEop!^JC-7dp9U8~yqe<<^{tsX8o888#pHDq#n!EpTVh2)P ze0gsA_~`lDFGnGVE`{G7NeX{mxpYUqJ1;W)a3AgoZ#2A~HoBvzoAr)(Y5*KoSJdnM z*)6&KwabCG)Tf<84fA3tYP}kby9tufsa?f>z40B*Q132l{8KNmflp`i*Ze! zYar*D2?a8@wc6Q!!2bF5aCQ|zR~$zL%i9jJYf_{L-l&Ky3YFLK59P|n@nYo7An@nU z_${J!Z4%u@`NbmYe{naX^u`o#yU(g$qloy?k@6A^2$%5GZgIZzn}y@NOm2o2=fXQs zqCd*?3i44!6VpV|Vgd-0P!HW}>Y2!8NdsFe``kG*2vc&);%PW1&EA+g-3ijs!InqZuoMTq z`ki%txV?w4ru>skf`>3W)6aB&y1yhzbT&t}!;*ZlK>0ejB5alTQx}S6*b~8_&o}~E0 zENnx@5FE^td=>l^b)`?OfQja6-+xyQxI}m1VvnoRTTT{d8J~hvQ ze7>~Jg;xwfnuN%~QGNc!Z|cJG@-1*k04bpN>}4w{F>B0gLNau$%Z&N&&rLkJ7w}Ez z_PZy7ay}qlR(-mA@5i7W%|uD_4VMtG{a%5CeJ$OvGsUm@yCGtMM@7nMm&8t7m$A5DZ^tE{`+!qFf)%O%hKHV8P-DpaAF1KP4?&;{j<&2a_TV_cQPz(UZE?4Yda+wp%GcWeiH$#+zX zxxxz|mv7U!hBt?a(K4moSq_)n=>|TaPdHsRMz+RIT33H@ zBura8gd+MrSj00S%%e&hxVs&<(tr#Wz^r6`GT9S z+XaSQ*dI&+`aT7$3|p+)!Gxm7=g^^)yKIBF%Wn@7pua`$koW1VJRsC-SUiS}@vV*bLs@i}0 z>?fNaj&-}HpRV!!Cq5@-{x8=dKI^5pf-{37z3|$gPqkk4g}!i~Ov?>x1yC1fgqP~; zrAXZP*=7J~ZWS2)QiW=Zsc`JiWvgVW6pG+&1WfL&#J?mS^*@{GO2tyEl%i$uJfJ*c zB|=p{u=W!>h}<103T~FhV-i5g)mh~_GC!{`aEjm-pw0@?rr=>_E=0!nKtwI;GL9Z7b+l)EZ z!j#Ci_VVeotgw~al&7%~JU#T1jY)ome~K-An3_B_H^zwpX&0^^$)svWbUA(MhbuDPx)N(tm@tNS>zqbz3_`k{)O`h>x9Z?r*iC4 zt8ZZBo@Z$CI<_*K$cVeD_?>tVsO)P?sEdvVFo~q$c4qA~aP?_lZ#bxt`Un|(f^u)f z_!SsYSGg?Za9EppFGVC_Zq0O0un|utj0K+POSl+<4IRq}FNPd3oP7~{{`n`Ts}G-^ z*uMOSF&EH5X!h{cL-_x~!~eUj{Ew{GS|7E0N1g5YSao`4DXnadn zsMoDS13Yw`)03XipA`Leyf*e~{4hEiv#^dqiO(B4pRQq0NNOdhzq zp|JH2Emf>B(KVinth);U{+N^<;T;TVpHrvC$G}1cr4c%oC{X^AQrcGeV&^{2Ajq?tt-c$GLJTU z@B?z*3DQ$ZVPy*T?+T0wJ{0{Q!rm&XjW_BWZPDT_mQvi^TY^)dK(Qjlp*RJK26u|P zyBCMz9$bS!-!`^A{xRb=`iqux zv=$RlzP^L(OaDu=Pi(ZOR*iSh4!3A=zJM4Feex}4|6}pp8e;)%3WH0B# z1f~Mh$$(f#?J#++)jwF_q&C4CD~s_kERpk3Fy*C}G`>b+k>WBD; zJo<#(kw@FRF_wZ0&rkwOo1~g^)8+N+$-25iFtTRkNU9JF3P-#&|6GpyHw+XIg+lrm zg3AbF)l3|g6T2aOCb|wiS`Scpt(8+pk2$d6(bzZ4@4XzsqZ`s4tq!+E_Pa|zAOxso z7O>6f)GYO|j#9Y3u)EFrkBU|JghpEq-n?U~HU9O__VW8*qT!~0BnO|V*FbW)C+|gN zkR>Nf0>V+W7MVr#dBK7O7C0KVX0_@bk}7{B$naelWFxvDEDx6?&Mq6mC0;M$=;lOI z>i=fkJ)Y*}T*>ezFbVc-8sqwOBawq_Ehv3&7Zr>atPjz%R=F?AYiF4p^g zs9)1dK;6mbT7zv=J-%9liDucm&EVr}&UWqgDNgdunwX8$_2^6B4R@#QJ3?+@P?=G? zuFp6J6(d6+!FjRn9J_%~T~yNwMG6Q|QlDM3qpVe-^GL@}3}|+#90`1)(*DTylHy=N zgFUsTh`hJxom%e<$q$L~cV4u@=%U!UDT*pvogdn^(87#8Y$?j`Z&Luv>{1tB0?jHn z`qFml;21x$?TE?=#dr(N#uQT$%D<$h6SF5vk6EJ`%R?``v4;<3nBEIJRQ%==4UGemC&*h7qqwkf#|uuL1rhH;rnHCW~G9h+(W}daaXUuybZ)5OV~_HJPN!Hm*)4S_A}pLV)_`)*5if(B}xH8Ko8R*CX@XxUL9k= zkaxz_tm98^mU9xnYzJ4o7)p&5HmW&=@iE$1>Oe&J*UVI1D*~>)J-8?(Ecf>aFA?L3 zk;X{otkm-_N&$a&aA+a*Tf~O#1C?Q_?VRQYOn(L{PMGq#zyW-M(~mE|?l@Hc+VbSd z3H9ksa?#%0lIG`KHqLl=+A8*>0Q!B&0VNJoyNqvT#JZUhdl6c!!f4$hQHaH?8SeSZ zm!pe{s-|D~Aq5ggzqBX$pQP>CgGr!;xR}-dhfYCBvQsg(qbYg53xv4OI+9Ji*8gXz znH?6gMIW?TjC90U@Aj_he%!Y^YVZC+%C~r|0}9FOBD$n)tLwh`DrV6zr#f~Z7%zjw za}v6Bs#&*}0^Kzo#l>|VU6!V-RbBS8P;T;8w>{)FwNCKUW@H|$kBs*Z7C*EGy8ODF zAyfi|#0~2ULG`Fc7v{KX`16jjGB@UGpLU6Oyrs8hJ)e94M zUXNw?#AHB4(}9gIKbYOWA4f0%-sfWJ z8vyLLM&vljfX(fNa=}g7%S`0JdXdC~P%#gadcwhNA>;^^U}3Vfb7Xl&H^$fYJ2|;> zRhHakRf7%roO=Grhph>;pEnOMn~mVW{Bo{CqOpIt$?35b&WJe;XV;S&oa50d^YFJ7 zya)DdLDjWpm_gT688Uu|nrJ}C6GK9?<}e3{e6U1uHBAPCKPe(k=cXgcu{-$_ltWnX zkNh3kLw}0*s$V&Q0a5c+VY5awr78W|SN_xqVj?4U#!OcGvU?71J=%fM9X18>n!$mM zDRJ#9ukYA~`R`yLcA~v#Q{!Ip-ca-Tp#-<{pC5v8mCk~b-JqRjNKVeeV)c8nIFT#>FdC#LQpc<9P( z(S_ik#zn00H#vHDOF!Zqf3a7pe5DnEzs_(9JO|wc_I!z^RQi3!DTde$KD4Qn%xNVm z`1KF7)xQu;nfQ6tl1#hz_JH%&PTp`oL6_q*Ox07}ZpHex&(4$?J$jqhG)YSN^rS#_ z%UgDC$NRMtCENdDKv57^(*HnQ-A!UK9`5(E#Zx0+-x;&AJQ&9@`U93vFz0)SGOqaZ znpSK0-w{?`nGe4n_?@7m%l^TAvTUjR%~{*+!hoXIZnkYw@tHryG%}W_9#-PS@_Cl( zx=#(;1c^!0gA5Om`GWJXaOH?x2Q8g=1|Y_p*@9n6Y$X59!2g7jDqBJ|_8MSOb2A)G`;V-fZ$1@ZdRN>91JwQ}GVNy+Zt)jvM0 z2`>B|%uN;utDVfB*>aE=*0-U|d=ba_-#)|hc&hP(lH5j~EO7&)PC~=Q6^w?*s0-Sn zv6Wz=AfE{*b>4xwAiZ$6Vd8uzQ=;vtw{?xLgiKdttbfrp8ff;1mG>nJ{^Kn0hL9LX zM#qw{t@r>E&l*eYpVg#Fv;z<+35a4YTv;=j{ba4!$6G<#0t(Xg3Rg^tXjxu?A@a=N z(ok1vk=Cl*0jxT3noPzOl|D?28{6OfFccRZRBXQ$CwyDyIaI}&^V`5)`pmZ;6})( zw>3fTHb)iE^am~vJzWi3dl*f?c9CH9d-8ua?qR+g@7_9(M3`pmI2Cd@-H@1Xmayg{ zz;gQ|ca!|(tj3`4Lyt~W=b~ySY|*%Gb2i&a^j!975HCLoz8xI3`HhdUvv@o%LU!t?&QdJ5iykm$Z^=Ic*lK7PwL*vdiDp4hCA@`t^EjO1`vc+i^kTn$y2VRnBtd=c1l?ef)X=7O~-&Fem35o=CmXG z#p0<2)n9ctO;6u)R*{ueB__&!5&DtlX<|EBG}H$?t4eH)XOJZY3UgSJV8&A)`*!-J z%>a;{D_58EODY#J*ZN{@iSzs!CfGh7#wm11BaQUsB7+IGryAzsMK}=1!5EK|h>@cZ z--til3U=!BJf@|_w;7+tcxdp@;@^2?7dAJbOr>_pFy_@0$HsQTRr(07KOX%3L7}pS zb$0RT(AzN>AIOgDxGuygO%;xn*WILlx0lj+)GOkjr+TupeE2QotRhn#f%^^m?kyzA zC8WGZsEFDw>^r9tC8xo{dyqw%s>2^3p>924lfgKp0-Ja4>~@+Fx$b*XTWRZ+k%fDW z*!p9|Dm49*Q@c&@pg$3m?G#hG}dBqqhh{FD{6=W+ekw17Ynt z-qUGn8-2W7K^(D%`-ydUk@E9joVc5|4>+x|l&dJTk{d^Z3u9XJSQDvB2wu^+zTw5O?)+s4o(`Zx^AvMaSC2F zx_&Yf*wG7O-GJi8k8FagWC^la@9)w2)x1jl(i@sAlHDA{B#A^An3X91SToOgh+w@4 zYh0a(Yjq8G!O{uRf`Pqj(&FM;V+Mxv$$>@Se!+N@fQ(2_f5mh&#iv>ai+Wcs;FK6a zoThaxR$L%aIwQ81flGxgB7k@uqZrYAQ|!?=-M~)SjBl2DsO5l#$H z+}CgwHjKsMyk9|j3%D&^+V{Ad2hz4k-Ce#@Q|BHsHc;c!GQofjAP1FKFGyHVjD+L@ zqAGW|Ohn6^YE%WS4ho_r$9>soIIN*zJQquxgC5v!%f9s3oij4eju;o9Y5_MBM{18# zPuX3+Xa1M{4EXb(Seo{}>WTjZ>0|PHVM$-K0;FZWKGr98qU$66WsMWL#Jc=kx9<@o zIot+3!o?e-Q!=e0DvM~Po$~7QteJ27xcL(l)GbVRk5hmo^LJ(yEY}r|_CWA9yN*hJ zMf!`-3dR8a!4IB=0^K${1i!pd;FOGL8D|v=!4v~p@Jdfg`-rS34mzo9>X%SjRr<|* zUuYmPTVEfJM^=fbvT93TGYJ07Z5CCN55*jy-SJCMgsxmvm3#fGam;peard4WG!mw+ zK^TW8>0GA(9&Tz1qUm{D!k%89IJ-)s1k}7!VrQfo@d}8B38uZ%y@H!k#Z_{Dq^Evz z$s8z72aP)X8)KU=e%^FbR7h(hg?PyxzzemZM$lx%`Az;!b)q^gj(1QKB`Y(-Rxls2 zW^@16O2b{=kz~^QnyTfldPx&|LfIyIJ&CUR#*YS*As{}(d0*r$H4!_b_sPurk)*DY z5(4rM&X`X01O$We58pniyD5mhv8s3cr)*6c@KM<6B@PeQ(y&8J{}M@Z$3I!O`qIZJ zqGY*SukhFgEk!Jc?VzF3$qy`$v)~coJdj0kKvv)m3^r~b-W8Cr`A>Z2=Y7c&2flXl z4|Mucp*X2Na=!!M0J7b4&RtpdX>?Nf4*LrqM(b}?1I4+rjGcZkolp;Yj`tEf(D`d^ zgr{DMWqJcX#Hg3c(QSVFzU1wEW#F&m+CYrtbq~>%jmq2V;3;hOZX%QP2JfHmV0(zH z9oiSF`)=t)r=qk;todxApu$Oo0utj&i#y%zvUyJ`ukg^h+c*|;8t6m(O^s(B-9=`I zluc?7GW@PdawMDUhW{$@FGH(eUd{#~k5!05Zj_&Bh-=zoezLCB7L|y9#f|(;iAW+5 zmBg2oV%}UXNBm8dD@C-K`2Vb=*5$S3$)Em@J?(#2v7`oC??X;+2yD3S&({jEVxE^) zvTJe?25(Pa#MqAuM0JTlZB2U4zxh6fbvlHnMWn=ho;)mQZdD=J~IB$ICY5IujsqAv*?Q;5e6&V|THAh2oX< zz(pD}cX;TOT|y1i3a7m}=tyP_d>^vJ5+OHboEy@I0sTbb7$II6bV7% zu4XW?5G;Y=aJEIHhW-i)cwZwTGB5o=Qa*+*HJ)Z@#EzxMWQEB?2D}z9!6YP5)C&X@ z>YkG8vJzDRO(BF7S>33C64e5k`87 zioPCM74QRrijJQb>3W8AG-4Q5Ak;8-RuOUpDITT45~|u0f0#oTFd^7u!eX5 zJ4SwiH%;Pj9ZbIZTSWzZUi?Iz;?A{fIs90g&QNH`V7I8ug1vb81B^i#@XAhlT%K>a zl2S~U4I|ZFTl0ss>zBd}tF>3OfiMYCZ1LuiX1RM01>^AIp+eekWJJ|&gCTy)@^rI< zu)nm((ZaWr&w1dOJjN;P7i}*9U8ah!+cQAzgkd-uBeC1m!O zWA}yJ=-HU!og`#Hk3Ck;=>~8RY&cz^8Pk0wO#70GW-HUkD(IBuoF)(zWk!G z@~ozYQ=o{hlHE%S2}wNCJ)mSY;&7&NrT)R>IaCT8e!MUfE1D{B{59X!(g9lg+R3(i zR0wVe8F;M&bUZb3Ix~V|JN)X=V*0E&(PIFKbMk{r-!%bI@i8!J2Hy0)7U}gYjnV6p z3<`4ITggXcB8#;n8tO}l3?}?%et-1yc|#hH-`7p~bG81Y zrR%&KLVML~*K--m1uEn~2IQXgf2|>{1kNP4jCH~#sq=j- zqIl02uP%*UBiM`*8}w$N=p7txe2xentl!NB8n~EWvIYipMhc(;1@Ti`-0*ykw5@H& zUqnx^?=#5!NJwxrF45QI$37D)W%fQ55zEAT?YUqA*{_G6L_6B4V4jJ&vxcw>!@5F} zu-x&_$avv2=nwN* zci_}~0VU2Z(wgVWwL^gTPYWM&?{y3-FIi2JX{X(8XdL`#)`PakU-x71Tnj2z*>`w% zyXT&X)V2LGV3;3yd45@FDoWTsk%@lqD4#R&hYe>pq3d*mfg!WrsN>NRQ$B!88JT36 zwDG*$tJBX{$+5ULUuPvT1STEO_nZhu6HUaPH5;3??eaPj(RMDrtPQI2bGyIIEvrrw zX!LkCC;IvoyBe>n*Ir+e^3}`%AEIWoxj4Rjy-$aVYv+hf)BHfsXwtp{@Aj2`DHnM; zY5$NVa=&(6^qu%)3E%GMry%lG+`CvNiye6`FdBQ{)6^TK`k;n#XhNz{+(DA9iJB$z zvm}v%J`sh{0{_i{qYog9?s1I84gcp2r=_`hCA&Lo(j(USdYj?Qvprp}nC1Yzsv55% zP4&~C---na;4xCd17B8vrNiWeU>a1d8r&+=6Q8zn)to0}BT2GS17>dfzaLUC)J@w& zEq8F-)yhR5{I~9jn})lIH&M46CO|)=-qa7-x78 zyonf=6;{B%pSXErmS`R*VE%({%~Kj zuzXq5eYs)TOYK@c^lrp8w_#iWy7tWkRg(InLq@Fao0~oZ9hXi!MI8$C+kdYuK4{&o z?B{t@Imp1H;|3-?{4C~a4jZK(sYX8ABQLHlG{j)wG$I<5jRq^7dOovJV6fN2B4O&i zb>pYLk%fh<^3KR1wC_k+`T^AdK*xNe$Wd?_9T1h}8sE+SqEZ3!{sliBFl7`>+dO)@cMA z@AhOPmTiNsm{0#t7C?c27xWPtS70yYdu_Xnk0V#jIwOnCHY@DnqycohqIv$I^$O5U@D*DeRm} zjYlS|T!uv^_hPeD3*5A5BU@3d#?hV1sP(bp`Yns;7nOKeIPMp5cxjh9pRAcICLw_D z$OO%T`WK-?K|7OTXG}5{UqhF4?l|`MvhMM$J$-!H(`Ml_F6TdkjCG<1nN$pr_8_$% zI~R^%=QLXJG6j}0?5;CHVVR(yGVv`<@@sC{v+sqbsh>xjBsW#?c!#Qn)H(e{$`oUy zIx{#NOy6Nc2-1XF>*^a(xLA#Yee0KMdwbW)=7xR{dp@2-F+l~mA>F6+=A)g_StRYZ zq_B}vd`>}DJ?O8q;YS3&=kgUo!`BV;J&j^OZxz)axZL3*L{ov6>XuMhee^0xH(tG! z4NVX_r?SS%27Mfg;gRk^USJjA>Xg1d;!-GI0-DWb{1w`qO9can#7x4M1`8jNLb0DGD|WfYLr4TY@Yi zZ93FYz2Ipva}Y`fU$Y9g(;P+?=M85!(+7*sYs7o(IOd!m;7ziD%b49#@v=Xk@(stodzgW$)lYOl zqgT#Rtm(mHOPq>`ZM8djIE4G_;{+RZwcNi ztBz}`9-n^c{wiBS7o^*+oZrW0;v{jhk-)~FybaO)s}V%$r0-+cV#KfqvV|i(hss6) zpvrO@p@e@!YMR#@0POYq@Cd@+#hhV7!F=;VuT>ajv8fw*R)mP;N>hkjD^m8qHlxbZ z(brSm#Crqz30AaRw)BIBl21m=*z+DbE&`1^ByEp`tnpUXY>Hl6BhJeP5r)lsay=r1 z-oChXu3U`RZ%Y~3)(AiDk2-ns^P2D;6p2HKHe55W$4Eq8_YsJ0$Kb2Zn3Udis#t`U zO6>PjkX8}>6b%|oP#@~pSP(}=5HERdv^CR?u?LYtst`LknlEc zhNu^nw4&KRl>GJX2?LmqW11R}I$+_C?d2S0MtSaOSFJ04WS@eGv>dmwh-#ry9vhz_ zud%mKO!Y1Lx#&9jEg|}-GE`(pK>{|}#4Iy%W2p@)PHm~NUQ=XR(!HA;{E#S4|GCL0 z3oGCI=9Kg2bISY_O2Uw`{TYe{FP0r|bX}=!N@4lCH)ANZ$c19P*UM?i4O?rmP`&r1 z+6Z3o24CeH?b4{fLW8US5aQvikhpSY@i11Ldw9lwHq3j4x)hg;;nKqsdXVF}d;rMtx5$}th`#}^nZaL%VNXgm zOWi#^c2QGN74kV(qLt?uhDpKxm&>I;avhySyM-l02@f_KRXTLWLupv)H}UH(^m>SgBeq1$yQaq1^|G@msp2_0}4H6-`N##_&3)E~(%itkKtdbN`vpsETck?u^ZOvbm!W7LJ%G zt4_SVQKUi1{s^(_`Dz1gzQ}50U^3li1-CggVs&{pELs$8d=o8U;J?+;PZBnBqA>}n_{_2P0f}WR=FxeVsgMu9tAPkb zBht5CJzq-x6nLGcGU%?NTkxcbfya}>XV^>hF3<=(Z}tFjeWl%@5bdq+v+&@Im?Bsc z=@}s5xe?NefH|Klebyy(^wgI~@-#)+;2xW&e(oj9_2@T!h_v4plmEb`?4JP^BSf@` z-)8S=<`)QoZZ0>3LwFvU0UMU@L{*E_w^}QDOAeiOsGueX)E~D;L0}{(zrOqOvz~wS z2CFX%5FaUC^yAa#oq>l0H1)G66w0VjKrH?QYG#MyVVi%vvd9P9J$sS~1u^R*pR4K` z)V{>ba|Xl?6xy@!8g996LcIDczJ4jeVN+A*&{p>}6>vT{TmEd`gLkhACcM`vGpe|8)E<#OlD)wb+_uOmZ*D+=7AvWks z{x>mFLhO!g&Hs~abIQp6JnhZnQSW2H?}YY#o2N8S^;_WjU_7ms)!%G`ZuC8#0_T=2 zHMNh%U)DbK&2h3$ds<$k_VmWO*p0`7M_e*$brIZLk652(lAC4-nD>I5x5$PX;vTLW zIUxkD-)RjH|0x`i_k=Y-abno7aQ#yvg zZm%egTF!Sl&3QXr!G$75<8pMAi^Jvk?~L(de?+Bd|8D=lZ~`7>q#Ko8yXi$^3G5nC z$oq;vk6_NW3S$tY$Y>2V$)A)uW`dN;z0reN_hJ6{t?F${sn!V#95NXG824i0GZIGG zL+Vb`xClu>CIP&+Z0SP?W9_M+kpk|M{-&xD92tcB=!H^Nx{_aN&wuVZ7eA6yw@vtube!=Rn_1n^pMA)arN*N&U# zW@NR`eFg9_;7CbSoeA*>kaL+G&o`uGA4flF^rA`ZF?91)cS#eDUVM(GM3E^Kp-u&S zTauQYiPid3u^8_ebH*+DB1{ya@6(vy&erLS+KLVDr*9%WeksC!OKod|!P15fapDnQ zKyc!LOIx^pL)SLDRSmPx(3r(pFz<=jE6NjU0KdDB(nMo+1 zmHa;tS7mE?-F)wH<_6PeK1%oX&GXx)j3kjBLx;%)f6*CgQC+Lr>< zb*OcK-{K9CX9r~k941!p%l?3{8r-L|1+@})!+$=LpNS8bZ-;nQMemK`HLHX4@ z9{?#UJtzb>5mHHBr?hvo&Scw&dzf29dnCE%^7U)fV8^u1lLgJSG^7ZWR`yLM+oGfvJt+R&%$~g6Kw9Ay9b)2G-gX`vD z+#bPw0tT+Yxl|<^S5^~PP$R8S!9$I6=!Wh~&0{!T4KlYr>R6!%m>1)ET)2E4WCKC3D z0Ui<9&M9@dR_tU3LhU;XGe0c;k33D1aQuP55!*n@y#l!<->Q$ME#%sJDfNQ*6Fb3@ zdMBRnE)%WQ^rEoQ`!`+a1r`NeK(X?4pV5_y(e86C?jOs8A7g|G7C%xEiG9Q>bf;8y zW-65s=T?Rls~d;(^cj~_PiJh*D5m7T2HXe?>k!`G6bP*$a5fldGMgd4K2I}o(B)x#CWd13SE9T13Pr@k~^)O=nZ5k`Y z^qlXT+P5igKaD67^G_Wg!6#Yp6Nt$o2LF6@t35-c&^ z`+-t;N^n3D82&(%JEIZUgXQO(NtX-S$ubopH`b}Shzh%RSq(Y>Rp}-d1 zWVg=_w$*)y$1-~^IV*m?Q@&s;?It|y*8hP9C%bA~(mBFoMtB@=Q_k3uKEmn#koWQF zuzM-NLD8cC02EZ4_CBfcK1})$V+o{>YR1%PX^}eDrEQpU8pTr=_^S(CVO6r4$Hh98&Ejox6Jn(S3tR6L!jSBvf3Ge2p4ORxgS{LiRqys z>0el2!}o&8aB22U%#PtiQ(lSiKrB!xW#S@EZM-)!eyx9n7S&hAGrn`Z8T^eSh6Hpr zd9?Ox4#h&!*hq8D-Qo4rjFg`HPIB~pIByRb1zF*|;!m2;r6mWc$1z$b)Bt)GmRn%M z+lH`1A(3ND*GGFQNuOKeyEEMI7;oWn0n?=cs>iUcr>Q&7%CYn7wc# z|G4}v_5N8Yv<-UhE9PJq<71i}I0WKW(9TXG!xdeYwe#47=@4?hG5(CSS5Rcg2 z%F%&>oMnB#Oma-4iVd|DI`<<6_kn$FYSn?S99@p~wHoDrJsL-2xJnBk#1f%Kr;YhW ziR$z9WZ4F&fxX*aIp|!)R7G(qGm0DX=Fip`;4JRmS2s1SzMeG>hd(|vE-fQpUG%ms zO{>?wPd%D=x;1pLuoCPtNdKjie$SraEKL$?AKgBbbQsu=6Eif)NNH!yi3)$$x%=Q7 zH03Zk+?eEV2a)sGLYt-s*vd(6P=FY8W8qaIh-}^)_TLl%+)k%%>6r0Ug~0`Pv$z*q zBol%s*>NT$@=NcFPBDXEFP+%0!1U(c?VN_Z_w26%>=HlnC|gwz0QCx2Lg^*%-C{@Iu@b@<~UY`9%sN5(@6fY zYx1`pW@kH5hNye3nklKH6!~rms3Xyu_gp|%_u6IrUV-}ZqP#b}y#?w3wb1N<4S$v5 zC?y5%I=z=HJ}GoZXg*keBk8_xW$-Q!t)xxB7R@3Besa+o@5hW*vc^8zgar6{-856z z6|}+$NI@u-eOOpYoJ8sXYAVt8a-<_C}{X?{%y5kfRmg%?uw7q@L7AiKBjm zeF3DI?8I0*ZKX!=w@=>ls;cs-pKvhq)+LElJS2{nCqapQk1H^PSWjsqgylJ~?P(Jb z5cD{iKMnfOjopxeT^0MvzyAx&`qT5iEf(S1fj7E|TwFYGX_NQh29fM%57qvx-=(ci zmO^A$0=H4B-t&{0K6p}8DJe0Y>6UZs=1S2vlk?2t$e9WujaF9$=zAWJedD})*B5}l zsf`<7m>N`%!%I33s`N9Itm|{GyYfZSxy&Zqd*P{_2G0Vpyi4+$J1}j0oYd2kr0(B; zu1=q5j2=`yJ6%Ii`sHP?jePfuQ&a|_`!tPBpsng{1^2jd`tW^rPUo}|G@^xM6S z4aXz5&?@(AyY`)!U16i{UaE)H!G(&qfD~z##b4(^2Uz7oNY)Q@H%rEJj+m|(!z|Gr z(pvd5^Q0$@?E|wBK}Za{@M~@!PT{dn9nxHQr#78$t#`=@&ffsf0Bhc0P33$@-5*ra z>5V1e1@M|0#AqZwO2$t7yGPSP+E?mm6pHZHtl$3d?#Xu_;GM9TWJy?VzbVN}(~dG4i0LoVaeC1B?@ zMJ>yGgbG8=BNuuPZ@_{o`lEUw^_zC8;xbAiEcN^GZ-Nb_mfd^jReNsJ5ep-+r8U|=?G!JS)s#T z^f%yOBK>v}xgSW3)uwBV*9hF+Fv&)|@j$2=ea4&?wl;tqDzs(NUT{l620Xrp)^q<( z|8K#{vIL>n+K6hI42ZsxDSJ>U#($?#Kb``aQK}rfx}HRWZe3%4#++`o{Ij&P6j{=@ z_4G3=iDGV_$|O82hlih7C>NXF#gmx@xyPxE&JYqUP6qt>G(!ZA@7r9Ew>!eHisXK^2Fvm$Xy1I0y09{7)+GJQON~ z!oa({A06%chw6bW)!(bU>Rs&SjRP-Kg$`$+A*e`m@@Psbo;hRxhqTMBvBAsZZcp>S zg+kn=pFcZWgKrvXo(HcfWe+^+ZojLI}-xyfSfcTEF{xH{ZGb z?LcfaaEB)vPin1IsK5@I<2&k+Af`}Uuh`Muu-ba9<%j2eQcuzjK{POS?m8W=F4>tp zUA8Ur-goydRNn9u3 zu;hy>tE7~s->re6?;-{xs6@_p4wUuJI z@4=@e$^bUr?~*3LStA}vqFCYSXjB&}p&0>n6rDwWy(y}bZ@;Opm_ufyID|dV2?2hm zHTbRd`$(9b*ik7^{i{zvOZR2Ns>f6JLu*^Ntl_%N(_w*M&BF#%4SI_p^tvsIy`p0b zzpM(>V;qyARB5lEhwGB4huK6 zroL@cq;AN>SmzC0bD-#MC!cohJ%|4nuS%}>*?6BFd$c-FkH1DBjY!q;=-JV)?0H5l zu6APD2(67a;h!GI>csD57R38H*y*d$##|kZoJXyZO3q3LlPRH-il;vzKc7#m28s#h z_70S|9z5Dc{pD&OLYP;Zx#|^+H=pI(H4X{|P3{nOuvwH~M$rO9kyS}q2UABCy zv_(y)ZX@Dqfn2(n-?iZ9X!%*o&(L{uNZaZ~i;IRivT=Xdw0-?|aj^3b(2O&{S%Ub; z;bmm@+rjg|!}ltIrj@qSj}e7EI~*FJa7^if(m8ou)DXpC?lm@>GVdBhWB0;2aXv#o zN1Kb#l$v5O}6VtbttVGQkVIT85+{s!!h zzbHk+j~%-#j?j-RM!Nd8x~_Jy&@BR&sGTB9DC(;C1DKcFq7`@ge4mfdZ3v9OBjTzLCz0i|Fx2iA>&+nH!)HR;WE zLTGG17*A7bWmHQ3tGwZ(9b4QdmdT8zcG=e>mnyt&rI$p;0*Mos>9b^vhrsVhY~h+)$g-UG=i|GT~QO#(pq+@+V=fM zu*e>tI|*Nl(JPUS4g@b`w7&pVO{!wVXf`1`@^yl4YxtW*s`4o8I@6|yfp| zifD%lB!(t|I1=;p?Qc$(v|-*~)24`Ba_4`t&Uxkz2E| z@2w;7FDZ33pxk--z;pitOY^)6FRSM!ULaDyOY_HNoQv*UOD%^J$GZ#5ZawsP&29kU z!|EGrX5?!RhbA24z4j*5$)W-<`^>7SsAn_v%3LtkezQ z=}Z#@rwHO^Oeel}RyM5!BR6Z;t@i%kV)wsQZ$p6#l~ii%kAQ5?GVpfafAhV!q~#=f z9jbsd4=4Vf(~?!vw9(Sg^0&3M0i#|7Zf7a9bpJDxk^GGgC(lg$VwdarL*^I_$-8Zz z?BsPhkpHCcj=BaGrmN9JCfsht zGTyT!S`dpQ@D`kY7wz00m*FzL5tkITVDMM8Sz)xpe-yXOAmT#dL%e_)#KTFz1G6@(w@kIX$IZQuaPj z9&OITTkr0vg_Jy(k5<(z#L-Jpw0*Txiv!Pp4y8mQl(28g{ocLCwR(hnB-Z}OzGdcV zoVV>ns%yqa81Gls&;rvJCvxI1uS;ql2AqdyNynWCAaNAJ+RK@VwOFux4uWFvM==lx8Y0Upye9^}@sVdPB@1koHy!gLsAkEFdA#augY&r?YJ& zEZ{E}Lt-2-&g0aIb8IoI*I;K9#&*qtAULmS^688s{+Nzit=U2zc`(JOZ9#AlEZB?@ z#F8^7HdE84ySDPp(*)M;MMn1W#pI&pW4~uS)afzBsX4|Xm1XgJErjSMSQCRG5EjaM zlM2j&OX21FD5eH_SM>%b^p-y-&xUcx5$t1Fj$x}9BwjEP3_!5^OfUEgvq~lX8&lm# zf#8feEZbCFN~~zP?K4bLZ`brew(n*Xo7luFS{uO;zfjR29C(jt8^$i9Xvq$P*8ABc zf72jJuRTDlBe6f;zAVKl5QCf5=SxGQ`N&$Qz=_`E!cf!xP7Pw=ab}_xMF!fIigz%} zN*i)NSbr9nFsF+;!JMN9n}$D?a|NMm_QBfdI2_LEDW{!1dmpD-u9^L67-K)tulYmp zu;P-k?p3lUBs8F0a5?esru<{gTESlpq51L)9o-p%8Xj8bT$4G4v%_$f8C7=lB`oA` z*7c$U6Ufg*1uQuYSqK3aK_zN#>vxzu`+B9iCFSaB*YBS5zrIH#)=te(Rs-n!!b9_J zi8h01N--)~7^>BjCk1M)z2A(U=uGII4v<5}m6zqy4%RE#m0=_y*qselg}a0|WxP?B zK2B)v06jKKAi$|`iW7P+$J~?C>E%wFrd4^VXtWWmd5`*9c#_s-mf#vrm56{Z6YIo`K*8_JeRQ54No-*^M;D-B*KxcgHM%}6H)lFrR> z|A!i3tE0sO0x$5MKHqYU5G;MaSUvFN-7n*#M!7Ggk1X(SMNAy~;yicQvXR~=w}jM> z`MYA~v#KZ7Ye4c8_D4d4VfbaAD+LLMCkeq{PQIcAo&aT>-C`W_#^D= zlQiC>E7XRodI3+Pe)dE?dpWc+zHaf&>pdOVhlvd?U|ei3Ko=zfGW~1-+DlMLORBMJ0@Qqk4if(2gOKG6+EhdWXXb;ppM0(VF{*jSS?zdn6J4-VU zMiBh?-}`J)KJ5WgUG;b_d&V;gZvn!OEuycVc@+7g=M8Xq_(|`~bg8D9KU4htrZOGs zET@w+g;><`3F5xd>k)=+1#VFz;bTpX(U{_R-o$M$nQJRs2!xxWX#>Jx;m(?Fx!$Xa zolW^n-ngD;->Xhnq?}AJt)tyV_br0e0s?==VNBEJxd&O05?+ZC<1N7L+a`JhsRQF&qkuWi(`Hp6(d#7;p#v zDsc!ypVg}NT*}(t@%cJv=9ABN7N424aD8AKME}I*i?cv1(<}J(agtIjFG#K9Z$b9P z`P=+Z7_gazo{T`FtkT7kkgfJdOP3di7NTwIENPx-g+c8(;W=dMBW0JO?=|{-VA+|n z*208dP*pFw^=m3c>VOTxSQ{*fIjPQOFn>u)7ccW^Qc@qPz%hRQI}yzDe-O?8fi@Fl z&^R2s?#GqwwHhji8>mktymH6xhsozL8pTG?LZh)%Du4~Fb+BO$p{;keV}$JG#-}as zQf9Qt4b;@~>#volqNxNuE{+e^Aj^X+l)jut{lyVS{e##?&~|G%0Bzk%4_l~ z)l{y1luS;EQtusqmvqlNA^Mr5si)f*6B|R%t7YE=T8;9<9(f}k4)FD}9Ilt`Y~91d zr8=KcZM>MXG(>b^F5HBA_$(YzMp(26D{ITyM#&a`xQ?@I;f!=s6*wDBB1ux;9fP__ zCMbz${b=Rl&_gxnpNY%tqe6_Xf&O%wak3 z1;7%KDi8uGC*4Dh<&9&}bUW4-bFGUx!$j>=Ubt;Y2GtWE)WCM8uo=0c%y!+?@H>Cd zV2lM%6n8C_;KeBvDPEwsJ4K2+1%eeT?(V^g7I&B6?oNxl zyIb&+f31D?bDndT+~wj+X677Yyo3F%?7ZPt6YR^TXZE`ZhT$Pid}Q;omcGac#w38W z&6+UzKmi!+J!WXEM#;0a#6C$pD+|TZM$e73yG9ge39Jg5DZ-ShOVtLl)024TFpu4^ z7-GZcTtl7ZtU5$+u#WY}dw271JH|^q`rL|>#94iVI4W71n+g8u7~}dKns|MOIj5hA zYRhvk-~Z5AsV0wC`ci_rDHVHFY{ zrq=@>!KbUTV%wD;p=CEVZRenxlK*O*1ztp%Y~U~`j~TNU-?T#-ZgdEj;;FIN?;JzoooNP%$%VDSTT} z0QaG2_vhS6sC)Ik2-eaP)qgmxu_Dt6n$(`)h@H5<`*)9LX}i|B zQSCNnAL_!&skhbux6h_`F1~BJfzA5KWAM7eFn?QHtbY#)+#P?387!0amDRogmuYPW zU%-Pw9+x5V%4n($(OL7yes|wHnb#e7w%sIUGbyT1$<(G?f4d2z_Q;hewyW0hD%yVJ zIbxUDcM(}q8|olet$06$B$&S6N7_5KqKOUnk!t<&2GQT`UBL8!;H>Q`GX9nw(CGO& zkfT1xk~+b{W4gitLQOp#=7s{dMKAednc}e@ReF~fkzeA}C)Y=j=%-z~reP&)V|f*X zDsOa5SEP$k9ZZ#j^NQFT^q7a)ts7!pvJAh5_^y5j_VZrR*-@Yh9~Xn;s1gP6;bf4g zT?1dWUH6c~Z!Le(Gv4B&I5rM4mZISEeeE_)9I&;au}tqi>wm+zA#L7m zZc+)3G{i8X(CdHSsX9YF;Ja+={=qkAh>{{f@0rMIm_ta}Hc^_7Z8h#XF1(buR%DvU zZ6|#wTqG&tZhL)Ik$Ch~n#|1r2qN<(&*DUea`7xxaYuin4#gKaD~o=*RMR2nj8- z9DPhs+9f{>E{Z#->S#i68m$BC4Qr!b3=E@KAG%$(PUOh!Rl1XW{=@K`xdb9OdTPHeR8wP$Ok*m#OitL&?rLpViQK@jbsu@jv~)13pnbbAF+wDUjE%b8W;w6?1Hm%3ip^gI!z#k998%IB|7bIbNkd5Hv;Jvuv% zs-k7&o^EGmcn3F5+4XhSU#-C{Jc&<1al=N6wZ~b}k_>xnH1M3`bZG9ZeuihKu<9!} zUx(PLQhQTHv!>J4HM=KeO^r4V*G6D5BM1HX zlBALh{}8!^z?C?&XAnZm%DZO4n7C8+1+GywvZ8gtfcU|uC<9BV@y$lA8mZ?~m(&;K|X{Ll%ZNP+^0 zzb!xeMk(w;|2Z8RiKTozCz++mqftK;hjtuq2o%`yfb>Z%*+ zb!pw^g|-a$V4An8JcMCsMy1xTE|;A?q+G<^io61RD_(lups;@>?X+gCC{-6b?=mOh zD;b?bsgLyyzOd1`qZPgEfqwO;OZ!~gP82E*ScX>90%3q;O?| zr>=Lz{V{#NpU&bVNP8QETt_Gk2zH~%0n1~vYJRavu6UBf(q}QAN?(uOeuo&_>8R#( z3395m-EyK`Q*$%aQ5e!LthBdFXs-lFlVpoR`@1W;G5N168+2Jb4U&)KDnv3zW& zte=lHwD)9S29-D3v4U1Q8kSva>T6mu2?V^xF3nSmQC;C1gH5%GyXNzz7tNTvr5*1E=LI&`8j>DIrtjKI z3o9^3Rez+cBf=;tKqar!=eQ!&?)rW$sE0X%k{M)`gxHT6E5|SHu?^IAC0r2u!L?OJ z2uw4{CY7>o9l9JdRg24>M;ieY0Y#jHW)o;*()kO>vs5XIgK(A4Wzv+IL$;)jf<{K` zjL6CU5crm29pV<92w%&-=j}+{*`54f;s4@mq9`G+&*AwTNp#j4$Hc+kT#0c(5E6?5 z+*DO8mQ#zMpl?R?iLx+*q zFm%FEp$y-7C}D>_({u-Th-}N_IZk)F!w%fS8EN+Iv z{!o~1vLnfE8%Ca0Ju{0#_MpRO7m=~<@~paR5IrG1?;|Z{^&GZj1Zl(VBbX$+ZYDx5 z`|daxn#g)14SgvEr5=3iKR>7n&uPg?Wg>E!Hg^@Nm@v)<=8bLo^I7*h8d6g!#j1T= zBgF{m(&WDZHtU2?(=Mj=AjrH5R@L&_WhMHM^VCaw-+ASd3b}IN=IOzQVdM<`Fi7M6 z5U-$lAneS1V6fk)b!|re)i*VecGNzOI~yq+z`H|_|8Uot;g`hrW0&4`)}H(isZ#fL zcAG@^cbW+itiY`(l<$ilD6I>p0%T?%zH3=2*KndM;wnr!?7OHYAIfiz-qO9_>f^2v zdFJ#7?6&C+eAq2I=>4_deR2Dk)F||^B^T{yY!(#B%Ao2R7}zmum3+kI`6bv@wr*Uw3CLc$2a{-0syJMpqVnUv>E ze_y1ro`_DdrM=qRpFrOSe+0=95Zn4(8aqAZHFYd@yxh4t>5ZVx@l2$aXFJ83I^L*@ z+%Z|YovwziTX^I2$7~j&QD+Ui_=1!iY~mRKfT!bD zyU5iR0x-Iel4uzvNJ%YqjFamUQ=dMY8RU>j%aVkdG1y|oX|;RILljq!z8RoBN6(dx z?El=XGUMT>u4^8~1ziy9XdyQADp_s5Hfa@t$8LS7%4xggeO>bX=W{ga-FOOnC)jy! zza#>iTnPQq%clwK{~k^<+n6WER8}s;R0H!=VQX2tZLU>KJB)q6XlJXIGB=L_482b8 zyq;XvT6-h|1Qcn1{-g99NSR_&Yqh_Hb=%NGbIS~f@^S3I#*S5R++1Sc_*fRX%x3F8 zSucXYuQaoIlEq>lrFeOLO|bL&fk+2C_AF26`qcI9mDd_YIZ@i21Xj126}PTA?`2*x zmP4?Mbo6b}siZtUdb8p6v*&*Q__X%gC!Ds5%h@NgtG2E&iPD||U@1=ob3>L{L%xBY z*R)H-9ATvGb7htjH{#XTaMQO#ou{hT(YLmj_O}NuoyjXGIq|g(H4DG`AWeDjUOkpa zK+RN*HzINzYGrSlm8aYs1p`BneqvvGzaTN=V2=({`OPQD6$&u!HH^(pLlQXev06?E zxZ`II{z+W9uw@<}bH|K#J>#Gs7ufwNzj>WG^cjwp)$K)?GMv_u$u8BvI>4eL^zM_^ z3aXaq@r8V+ALRSFf$h{n3bYL`#wmP~JUDJWBGj#?;DZF&Wk@h_HY0*%`ib%10_z2y zBPpqZa1>o31fK9*5oLlR5Non}*R#IiO`-+68X+dw_CkEVExtkY6$L^h9v$anrJ^NU z%jS+O_IE#CV3?*Vn$Qn}=Mf`L04|$|QK)vL6=S*4Pwfad%a=w&LJe8fDPHr4BUx@o z{X@WGTxvHTZLx%SiD~)Q9aO?t+09%*p)H_fxu#DK(HDNVx?C%*Z{ldr88Df&rhAr9fax~Gzs;qe{^`n+)*Y< z{N_~%+SHB|v9!IKb!5_B={g#Cr5h~B5jt>WQf7m#fs6K`$IKvLL!F(i)rnEO^%D_# zq`qTdhoe4=oPuylw)*nY z_i3hfIiZy|0gM^WV~u!@IJJp-VO3aWjHzs#dJBHPeVMSPWzpX4qLM!kcp%L%TTcCv zIJUyimUhNK5pw#p^Zb_4{5=amlSu3pQ=qBa)UK->GaMYBmYX;JDTnlXg+y^k#h1bQ zE>RN-b)g8sNw=Iantdyvun&#L7J1@t zdP?D-k_NU>uoh+z%a?wt40Lu*bQ_^Td`wBuRwUq_Vuxty+Q=ke9WoSM!(&72!-@}< zz+r86TM_?u_24s6tD}t2xQ3<~OIx|aHApqy4-q2x!hg$&63t#6IL$Wc$Xd+h2gwp% z%2Nv~ml78HCx%iXAlpS5iLz2WDiO_j|9oZSL7&?1Zc3}AdTD9MY8%Uf-{%HYBRe0* zBW~l|>a4H2{`;3cVUh1p+c{L{f%4XNA7ej|0p4^CVE{a`5qJvg1liA#>;Ny)8sro72n4oQj{d=s z0c$}K_kWG4yB)6gO3U=# z!cU8nCN7)#-q)>$n`NeB3)lN0raUcaD033N!tjc1iK;0SIq8ZP#LPq%OIIiP&i$Mo zHyCHb+XO`H^?DqpUu7RJ&c)uhn%KshI~}Ls3tW!`Mvm?4t`7BlIwoSqhwE)Y*)s7M zReTOggKK>?bJal;#c@V3{>ZHgB4%de3oy5It6KFrE0|MR(iUejN@Slj^t^1iYyPi% z)LG>Iyy3jfv+CtaYAxYVtVMdf!YEd&JNXXAnjK~fl02Qi9H+;(RqM++X*-=XJbHgde%s=R zP4p!y96qNBN*MtxQe~UD{mxUJc({5w`I$}Y+yPtRs`?)XNUz+CccBuI05*q6H8Q<_ zcCfgU{SLBn8MCcB_4#L>@H!53akeCl${&Ov0sDR*r5w4_UsWR$f;Hwb{;WC z)FQ%Y2BcuLwBTjU^e->RN6s3sdEtLQjgmN~xurfE?&f_ZBv1zPqIxFRR4gyWgei*u zVEz%VekjQ$C969)b4Ag4*za3DJmfh;e5i}u6)@`giHLTycq?DdY|FdxNX@vn#Slgl z-&c|>VRX2_d<5?4AU1-zR*JP$0~2(NMw{RgUun3(2tQH2SRXP-!$Ki^e<+1Md}zPs z=Bx5d(s9bWdF(eg_$b~-Dg2|3@$gxKH1J;XU(qZhfWDHpc#$&!O|>@GPM4}&3`JfdH#V3grKX_##Jc0*7xG~!E6aaR zWzvAjuKbU~r)}-{TbCGwO_KPahMrBX@L8*TB>HfekJZ@Yr0&AnRxMIXLv z7cB^6H>^f}gb9^=^t*SZazGe6J>W9~_(fy!qWI(Y))xi2cW=V9swy>#BPRCuS7jPW z&nhc?r$oHbp9|OjMOHwZZwCZe)6U%dFAJt({&mD_(d6k#j;i}sf?!{^|(^B+L*Hz~s=IccKcfmiIr^P|=E zBE*}(m{>2n-7nJXB5PPuN4#$iw@U{v4Dm=dB9pB##7b*_V|FocvlK?;Eg~=+`YjYv z-1z|$8=uqd}Vj$;d`)9U}fEC94Y^}{AQ>4!`%v%l$1IMVp`ScBHrukBlXbh%{P zchd}f<4m@k^z0(dWx(@KGY_)x|Hg&V(o^4u_B|w(P8DLMx$w&*cYYPDj`#azmcJ+Yi$mpu%OD)@wq1o0-jp{WL2-F5#A1kSQCpGu z`tG|>f{expwZc1po?1r9wai!pWh7+--g-e1Bz-4&(s6c?9a3mJ0~_DC&Q*2g?J|WK zwbD%?096PN@>uZkMkpMi%C2(*!~b>IMQ@Mdpg*n6W8$5P-MX!iGLhzPi2!~e;|`1F zY5@UN3CCKqnb$64k|W|8YfzrWr~0dQf%iUl&S*yWrI(riN*FTlwh;90ffd6qCQWT6 z>U)7UCq_f5{R1}pUW(4fFeRkKp-)q7rs@qfdE7%at+N-~t4GT-MPqnOD3Z}`Igc&4 zb1UFRPGLCH{W3&&VaCUERo4)~H!HGik@{OL<#b60Tj+(wGL35g4UJUS9U;niJ&wm2 z2UYcK!Njk%o%3hRjDEXKg}Ev2Y`dI`UH-vrq44%c_3_^tDC7TrD=jtehtzT(6T>L0 zNCdb>}QNgiJmtPO6dd}+8U}q7w^+>TMCnQrJ*!kkPEeR9># zw!17+e4rJXO54%sZx^$<;M%N*Ah7+bd_~MEhCR4tPC6m53CAe&pH)S*c-NftKY#K6 zHE3=~)BCHfKoQdqlY0&sSF}wD%TL;~!&`_`6H{L4p>}M@?_LomS^-o>egDd(6xL2N6SV{nueV z6zD>_eZ8{pQR3+)Oz=xzfBGTVLl%h9nt1GN!~kY{*ZfGu-U3Vvm`5`s>Zr{%iAaC3 z&BQy${7#4HNO5+pd54xB=%uM28EPw6+v+;u_%Wng1M(;_*Ia?2M9fU8!Fe&0L;)ke zJ2nr|Ry`!>7e=r*Pxsq$xbeHiFI0i_fXh9#10!^X`E~xytsR4^TzbIhTF)n?r!i%p zO7Dj|L zbWBl@3)`_U&Hx>QWK(m_zSZ)nMvC!=^!?hKnUN|l!;A#{JJ!x|jQ?Y{Yu3qb8h7x{ zfyoJX{~g>LC@eek66=93Jzo#y2}gC~fh?JK?Z+Ua1Z44-m#wFf`b?%fuP|)xltq2( zN*5@MvEU+f*q{aUF@rv=0=p74U(3(aA^?M|Qq{jx*%9gl6>0j%0vgQ94M~9$t^q2x z_YJ9%PtG;4kI*jrY1c`rOdy1DGWi9YwV z&^2W*Sjx9$XE^fVv|uS|M26NIoO{Lw@gqZ=J5pJ5&}r(SZ@#c#Q4mLth1^IdT-cNR zW(sxSyV7Qw0cz1{zXjGRXxV~Mjq){+nLUY8BAb6OuyPi*J7~dLW0^iyg9DJ~kZ%GX z$FPpOsC<{cvu?y0;r!{xT>foUcKS6j0NwDiaF$MGGk!aw%z+6M72Fz-f{ih*neplg za|jf?cfUw7A?6mtdA!0K5D?K&9a) z0><~KGU+ZC+rW@ywU8PqfQF+uuvS#I$Cn2Vm9z{q1aNCb`uVWDAFGO%hY-5~;^=SE z_n@k^!e@=cp4A8g$Y3|AbS}WqBjw|b{96+J(We-M1Z9T$c~#T3VIz9$Z)<<=ntiVc z>_Qsi0+ap%lT21i4Qd^~@|Q$^EDwg&Sng$gL|Dcj>Gr|vP})3%CYXK$#Z#66EQe0< z%jllcf%^elQZdq-mx8%U*u%~WNK3sLeOt)Yjc(cS$4M|1RQ`}Q;>Np+MI=`4;TC4B zv7#A$`tPpk)%9UR`+45y@_grurgsaVotbQ5@PQ}){`|#@NmCuJ<>lz%rNOX(i;xJ? zwtWKLCa4roD3tu7|0jJAKwZ!`Rf8>rSwuO zo~lop2?>)x*?S}$hds%dnhaz9;X`*&6b=o$v23e?$2dC&lg2YdD~k%7-3(oL5S;W) zj<@~a-rhPVVdbZBoW%qY2WVbYpLW+(szsam=lMR%Ndkj}TpWg}0Iu_}+N6FF8Uel8 zbf-Y25naC|XLxOnAQowzmXXA-WtYR*MI3hjoY3w0NpfJE9mY5i$DEUJ;{YELi}_fM zogszqAI;(0_pG{{uPs7fjDS0RhkWKrt0*{>zKmcYIuA9H(Gg;Ac!b|kzOq>`BlH|VhO6b~e&hzLs)5d2_)+fA)3Uy-8 z*oqDdOdnAZbRdykuOu`MI3^@v^9}}O{>%#A&>IrD#Q#<{9&R!be*1m5I^$8wQ<*Zu zvJQf+6Rz4xNARHIKCmn6<44N*j*5*)@IvwV=kGwbg2PZ&3d^6XSR~DHAUQ9D==jMS z>QN@8LZX$f&$-}q7uz(K!%)cRxr<(*U>>I(n9IRqfF)}uKl}(NYHfPH%uxIj*4MI$g08obiFd%aJ|3nW85PdZ89ZYX-d4;DbzWDTn002t1 zsWtLrtBQJs90@!%Bma``gK@E zGBRpa5VCx4XGLu9vxQ>%BQ(LNLKM3PxRGRIM|$$hD=StaFN@hfn(WUT?$6qnJKhdX z?=}uSfu>Hs6&*TH9Nry-=^5CvBm`g{HlRuA{mjeba}~h zUqnD#$%k%YSkp>JKL^_~pq0_oHWVevjHl6UnF-KRK$A!6i+N2eUtwJ=Qx~2)AON$8 z+ge4NrkiRvjt^p&rI)a`L;m{Q&cA*2^m6x`jrr_QqwvQFGHL*(?dsSfz8*m;Be&63 zm|hBz-rB+VwbT~BJd*b8NSk#fgu$Nj2^ zZETiMgj$f@R+OpYKtQ{EVLaE^Vu#$G&+Me6hs8?!lw}y}%gjVSaRol43lejia7%lX ztf=hR43~;D*EQs&3A#L~;&_%!qy9*C5#i$a%^W@n)|?C3 z+(x)AT#!L&Lnn_f@bXNv6(qUzXXpr(7GyGy=*d&jyGbd$g zwOojQIhr(l$P?}#<1Cq&0_K8fQaM=z8v|k9{N=S>@iMYmx6RO)FNaLEf(55pe;}JL z!2W#THoQNutVbUrTZIxyR;D{EYHS*^^*x9bZP@wbPBlc;yCT`ob@}`? z`@FuQH)VN_NzQ^%!!8d}2)inNXRN2y&exQy3)PIRpV9I2$8QDuQ zpteg;OTKt$Kw`2SEx})a#4LOwA>z%->Dq+dDh~rSR@_@)4w5}%!GWdzhXI}Mt33&* z=)iPfdaY7+Gt4Hail3JO;%vWuDLQ5MzMX8k-PREl&3yNywkf(x`fCJsR5$+EmuIkJ zXhWO0B?T$6bC5`i0E9AEIa+&c2MC|IzhIUDfU5yCgYf8_PX_<67=AhKU>q6WMDp*2 za6Yo-r_D4TrPasEeODvPUzVD4kMTX(&bEC!!00qP`CrE+EWXQ!rVs(Ru@pD{909nq zzB9k6xP2|>5UQ%TQQ|Y~xbwTd?}96>QN_8;8jWm3=}$KW{`HI*!j9j>I#(Is|C?I+ z7k8%ZLkI}kCjuRlH_kaW^^5xRm}=dF5@mW8&fv%=1@AUcJtIpI56>Xm{_xpokzDqM z5w0i2c!ebW;f{uSdnn<(bcD2Uj>%!`R~xVXqjg8fugX)`lLgA{#2)qlIZL=S`bb*t}WJ~BVvW4a!%sG~19hLHC zbM}jnmGHDj_UlEQG{o_o5u8|Q78sf{`S-nLH_1*kbDkN~)5qNuR_%PV=bDKAZZbS( zaaOxrht^3|x4H^JOde2PT+6WD&sK6%m%lhd87UB3hwh~w=Ia*cIV2vw$D6qag1xo- zOG>WFFF`!pmGu?OaN=uxmQ=sUs~$CqI--grrt+wcXf#}QuI&gf>9;9&<$7d};io@t zkyp1j6Kcez;<4r1wckIl0b{X?GKkfs0py2k!SR(^mnHUReY{6yOY=f@P}m|nxT|x- zh^(aktRiMG^Gzm92CDj?$S_7@u1SF08IP9g(w*(~Sc2k6pGVS$uCyS_gX(l)a?{ny z_r_&#C`np1s!74qvK7KD<&DbkPR{w-G(5y@9rNp-;iQhSD!nXmO~4@=_waGs;M))N zB+HsCls09=Fn0qQYq6Qx*wM+^yH8j8o#|EkEeSu>+ZLjx)_?$MQmKaUEIFFCus zdmtigiv1KnL1FmuEB(JzM`0 zY)+?_dNUl>V;`nS{_x5w@SbpJmtI&V;wLfcqf1ONfe%|w@iH$Dg$j&PNHrxV&f zCQCxISQhjuXzQ1Hul}9L%0Lu;8(Yw0d5`)$>Vmi9c2Ii?DvTjOA>U*1kHc_1?jW}! z{ghe0?Q*H?bWr%E#MYhiOvLV-qHTH2<08;CEWzKfx*s3eMyW<1Cjt4nnm1Y2nTRaa zMQ}KS)ev%ltUxrSXVpjP+y~u#!B>C5SzARp*8vmieR@BK)cHF8F3)&1tAER87?tyh=Ilyt5UY#8e zj%tr66DIlNxjTWFUh#4NHVREq~J+2kS z(?|k5Ff%oUYT{Mn&xAbEjPo0IrMq_T?JgeTEV-$|`aW|f(Bwow?y=Dx#3U73lEgL4 z>^rv0*DUTyys+~V9RW1?_A^|a_I3|5t6j#0`J1qd9UoL?Qh>m{WS*?78V~*Kfs*>K zns}a>=DkEW?@6l)<`{n=uMw@e22t{7&P#0Z$1)uf`t->mKiNfedXM3(Hg%!uuZOG8 zE6qQg^PZU<5R2ggmutt!cdPk(%e=ndfbFQW2lG{ybKslMzXyfxM*pG8br6^DgW5&j zj_GVAsG|0Z9MGz^MKBn~6^`S^W9FCSz&ooB)s8zXQhvx1?s3Hp#>$d@(IwrOI|mso z`+`k^%p;R9gl6guoT(!jmAV2=I4ZO3uiq4f!YAsulS zD7T&Vw0eZzMo%4b-z;MXB^PeiH$lUdEsijcxj>+DsMpn4S(kAu+cB9CN?(O%ym0FV zB*xgIx4>ab-S=X1i?<+Zcf`OV2d9C9^u-H;--b>n))Tu1gR)qBl8= z$yQsrg)D~!U>UGzwgAC>Mv&2$ z4|0H^NJ zDin9H`g>j_2sT^Hk}xdD`VT*nL7Iz@x4OjnB^$TIv@Kx6O~rna>Ys47n!*whB;|b zAARvlLn-NM=6f$|e_|SEr!)mUFjE`zo`kd>3>HN>CSUf%U@dY8Cz7_RrC#u!UqH<& zTgixV;rw4O23Ko>GV}{ng)~X~Kmyma0*>+x2R{&^`CjE0TdhK*P&AY@H+s@zI*}Xw zi*Jz1|IjIJCRI39x(#kDJ00L08H}lw;>McdukU-ek-oyyu#HS1^_(-xQ^T2$sJPI^ zVN(D{x5_vm&ZP-rL&v;%hckca7$C2_>rzo0XHPJOo?pYw5Kxcbd=sGD<3Na4W|qkxDBRy0Sgfgf2=Lv_=W0+He-~ zcLtkt(s%6lWoFoMj`Xx_XMP-zF7qT(LKiDve$BSx|AdmFIyZv zAZGEy7A(Qyf8n!lhyKls? z=%EZuE9)rQpdp!^mOsoO7Ml49KP>SXRnrLc_SaL-fqz#wh!{utss=> zflbFmbW&qCwS9>_hpN^sLnEth!A*=Cv7WB;YSDea zoU`UxQ3wthbDkyJe2-3}_8?MRVJy?#xPz1sBNqtT!Fq67Bq^QbXT zE*&fe;jVRSi9$KPNh_l8Cz`+dxstAZPK`$R?^$`+QLLCirKxvr(f7wUeOqxg3J39S zVufREdzC656;$?QkrtQ#iQRDB5dY1(lz&Z=fk3{6gqNlJ*$)}2CfTf6u-rKr*O-qm zuf=%c`Gi?;C@m^>JrZTbqOI=?Px-t%{}Z;j=h&V>ui-s$>{wbPi;DFALv8KC-Y