diff --git a/.github/workflows/python-quality.yml b/.github/workflows/python-quality.yml index 92bf46c3..80a4542f 100644 --- a/.github/workflows/python-quality.yml +++ b/.github/workflows/python-quality.yml @@ -16,5 +16,5 @@ jobs: run: | pip install --upgrade pip pip install .[dev] - - run: black --check bench examples optimum test - - run: ruff check bench examples optimum test + - run: ruff format bench examples optimum test --diff + - run: ruff check --show-fixes bench examples optimum test diff --git a/Makefile b/Makefile index 3a86ea62..0a919537 100644 --- a/Makefile +++ b/Makefile @@ -3,12 +3,12 @@ check_dirs := optimum test bench examples check: - black --check ${check_dirs} - ruff check ${check_dirs} + ruff check --show-fixes ${check_dirs} + ruff format ${check_dirs} --diff style: - black ${check_dirs} ruff check ${check_dirs} --fix + ruff format ${check_dirs} test: python -m pytest -sv test diff --git a/examples/nlp/text-generation/quantize_causal_lm_model.py b/examples/nlp/text-generation/quantize_causal_lm_model.py index da279ab2..1b96cea9 100644 --- a/examples/nlp/text-generation/quantize_causal_lm_model.py +++ b/examples/nlp/text-generation/quantize_causal_lm_model.py @@ -120,7 +120,9 @@ def main(): torch_dtype = ( torch.float16 if args.load_dtype == "float16" - else torch.bfloat16 if args.load_dtype == "bfloat16" else torch.float32 + else torch.bfloat16 + if args.load_dtype == "bfloat16" + else torch.float32 ) model = AutoModelForCausalLM.from_pretrained(args.model, torch_dtype=torch_dtype, low_cpu_mem_usage=True).to( device diff --git a/examples/vision/object-detection/quantize_owl_model.py b/examples/vision/object-detection/quantize_owl_model.py index 26c3c45d..92312087 100644 --- a/examples/vision/object-detection/quantize_owl_model.py +++ b/examples/vision/object-detection/quantize_owl_model.py @@ -12,7 +12,6 @@ def detect(model, processor, image, texts): - inputs = processor(text=texts, images=image, return_tensors="pt").to(model.device) # forward pass diff --git a/external/awq/test_awq_kernels.py b/external/awq/test_awq_kernels.py index 4ba56879..7452d5a3 100644 --- a/external/awq/test_awq_kernels.py +++ b/external/awq/test_awq_kernels.py @@ -13,8 +13,8 @@ # limitations under the License. import pytest import torch - from pack import pack_awq + from optimum.quanto import AffineQuantizer, MaxOptimizer, qint4, ungroup diff --git a/external/awq/test_awq_packing.py b/external/awq/test_awq_packing.py index 894df0e7..4fea6054 100644 --- a/external/awq/test_awq_packing.py +++ b/external/awq/test_awq_packing.py @@ -14,9 +14,9 @@ import numpy as np import pytest import torch - from pack_intweight import pack_intweight from packing_utils import pack_awq, reverse_awq_order, unpack_awq + from optimum.quanto import AWQPackedTensor, AWQPacking diff --git a/external/smoothquant/smoothquant.py b/external/smoothquant/smoothquant.py index 24471170..d44ee12f 100644 --- a/external/smoothquant/smoothquant.py +++ b/external/smoothquant/smoothquant.py @@ -8,9 +8,9 @@ from tqdm import tqdm from transformers import AutoModelForCausalLM, AutoTokenizer from transformers.models.bloom.modeling_bloom import BloomBlock -from transformers.models.opt.modeling_opt import OPTDecoderLayer from transformers.models.llama.modeling_llama import LlamaDecoderLayer, LlamaRMSNorm from transformers.models.mistral.modeling_mistral import MistralDecoderLayer, MistralRMSNorm +from transformers.models.opt.modeling_opt import OPTDecoderLayer def get_act_scales(model, tokenizer, dataset, num_samples=512, seq_len=512): diff --git a/optimum/quanto/library/extensions/extension.py b/optimum/quanto/library/extensions/extension.py index 23ce0e6f..03257e2f 100644 --- a/optimum/quanto/library/extensions/extension.py +++ b/optimum/quanto/library/extensions/extension.py @@ -11,7 +11,6 @@ class Extension(object): - def __init__( self, name: str, diff --git a/optimum/quanto/models/diffusers_models.py b/optimum/quanto/models/diffusers_models.py index 9130b620..5914a93a 100644 --- a/optimum/quanto/models/diffusers_models.py +++ b/optimum/quanto/models/diffusers_models.py @@ -42,7 +42,6 @@ class QuantizedDiffusersModel(ModelHubMixin): - BASE_NAME = "quanto" base_class = None @@ -188,5 +187,4 @@ def _save_pretrained(self, save_directory: Path) -> None: class QuantizedPixArtTransformer2DModel(QuantizedDiffusersModel): - base_class = PixArtTransformer2DModel diff --git a/optimum/quanto/models/transformers_models.py b/optimum/quanto/models/transformers_models.py index 24580a0d..39a6dc55 100644 --- a/optimum/quanto/models/transformers_models.py +++ b/optimum/quanto/models/transformers_models.py @@ -36,7 +36,6 @@ class QuantizedTransformersModel(ModelHubMixin): - BASE_NAME = "quanto" auto_class = None @@ -178,5 +177,4 @@ def _save_pretrained(self, save_directory: Path) -> None: class QuantizedModelForCausalLM(QuantizedTransformersModel): - auto_class = AutoModelForCausalLM diff --git a/optimum/quanto/subpackage/commands/base.py b/optimum/quanto/subpackage/commands/base.py index a0dd0dff..95f25777 100644 --- a/optimum/quanto/subpackage/commands/base.py +++ b/optimum/quanto/subpackage/commands/base.py @@ -23,7 +23,6 @@ @optimum_cli_subcommand() class QuantoCommand(BaseOptimumCLICommand): - COMMAND = CommandInfo(name="quanto", help="Hugging Face models quantization tools") SUBCOMMANDS = ( CommandInfo( diff --git a/optimum/quanto/tensor/activations/qbytes.py b/optimum/quanto/tensor/activations/qbytes.py index 0b06fd1d..6efaae03 100644 --- a/optimum/quanto/tensor/activations/qbytes.py +++ b/optimum/quanto/tensor/activations/qbytes.py @@ -26,7 +26,6 @@ class ActivationQBytesQuantizer(Function): - @staticmethod def forward(ctx, base: torch.Tensor, qtype: qtype, scale: torch.Tensor) -> torch.Tensor: if qtype.bits != 8: diff --git a/optimum/quanto/tensor/optimizers/absmax_optimizer.py b/optimum/quanto/tensor/optimizers/absmax_optimizer.py index 4db7dd5b..88cd95a0 100644 --- a/optimum/quanto/tensor/optimizers/absmax_optimizer.py +++ b/optimum/quanto/tensor/optimizers/absmax_optimizer.py @@ -24,7 +24,6 @@ class AbsmaxOptimizer(SymmetricOptimizer): - def optimize( self, base: torch.Tensor, qtype: qtype, axis: Optional[int] = None ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: diff --git a/optimum/quanto/tensor/optimizers/affine_optimizer.py b/optimum/quanto/tensor/optimizers/affine_optimizer.py index 71b9cb61..80cf091a 100644 --- a/optimum/quanto/tensor/optimizers/affine_optimizer.py +++ b/optimum/quanto/tensor/optimizers/affine_optimizer.py @@ -25,7 +25,6 @@ class AffineOptimizer(Optimizer): - def __call__( self, base: torch.Tensor, diff --git a/optimum/quanto/tensor/optimizers/max_optimizer.py b/optimum/quanto/tensor/optimizers/max_optimizer.py index 8b4f5f62..a300be7c 100644 --- a/optimum/quanto/tensor/optimizers/max_optimizer.py +++ b/optimum/quanto/tensor/optimizers/max_optimizer.py @@ -24,7 +24,6 @@ class MaxOptimizer(AffineOptimizer): - def optimize( self, base: torch.Tensor, qtype: qtype, axis: int ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: diff --git a/optimum/quanto/tensor/optimizers/optimizer.py b/optimum/quanto/tensor/optimizers/optimizer.py index 09b8c9f9..f4a37bde 100644 --- a/optimum/quanto/tensor/optimizers/optimizer.py +++ b/optimum/quanto/tensor/optimizers/optimizer.py @@ -22,7 +22,6 @@ class Optimizer(ABC): - def __call__( self, base: torch.Tensor, bits: int, axis: int, group_size: Optional[int] = None ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: diff --git a/optimum/quanto/tensor/optimizers/symmetric_optimizer.py b/optimum/quanto/tensor/optimizers/symmetric_optimizer.py index 87a817bf..fa5c3dbc 100644 --- a/optimum/quanto/tensor/optimizers/symmetric_optimizer.py +++ b/optimum/quanto/tensor/optimizers/symmetric_optimizer.py @@ -24,7 +24,6 @@ class SymmetricOptimizer(Optimizer): - def __call__(self, base: torch.Tensor, qtype: qtype, axis: Optional[int] = None) -> torch.Tensor: if axis not in [None, 0, -1]: raise ValueError("axis parameter must be None, 0 (first axis) or -1 (last axis)") diff --git a/optimum/quanto/tensor/qbits.py b/optimum/quanto/tensor/qbits.py index 17b35639..6f0b5e58 100644 --- a/optimum/quanto/tensor/qbits.py +++ b/optimum/quanto/tensor/qbits.py @@ -54,7 +54,6 @@ def backward(ctx, gO): class QBitsTensor(QTensor): - def __init__(self, qtype, axis, group_size, size, stride, data, scale, shift, requires_grad=False): super().__init__(qtype, axis) self._data = data diff --git a/optimum/quanto/tensor/qbytes.py b/optimum/quanto/tensor/qbytes.py index 79d9577e..b18d6f4d 100644 --- a/optimum/quanto/tensor/qbytes.py +++ b/optimum/quanto/tensor/qbytes.py @@ -37,7 +37,6 @@ def backward(ctx, gO): class QBytesTensor(QTensor): - def __init__(self, qtype, axis, size, stride, data, scale, requires_grad=False): super().__init__(qtype, axis) self._data = data diff --git a/optimum/quanto/tensor/qtensor.py b/optimum/quanto/tensor/qtensor.py index 2ce4c8d4..5c8ee856 100644 --- a/optimum/quanto/tensor/qtensor.py +++ b/optimum/quanto/tensor/qtensor.py @@ -30,7 +30,6 @@ def qfallback(callable, *args, **kwargs): class QTensor(torch.Tensor): - def __init__(self, qtype, axis): self._qtype = qtype self._axis = axis diff --git a/optimum/quanto/tensor/weights/marlin/fp8/qbits.py b/optimum/quanto/tensor/weights/marlin/fp8/qbits.py index 3b9db6d0..0cf24243 100644 --- a/optimum/quanto/tensor/weights/marlin/fp8/qbits.py +++ b/optimum/quanto/tensor/weights/marlin/fp8/qbits.py @@ -61,7 +61,6 @@ def __new__(cls, qtype, axis, size, stride, data, scale, requires_grad=False): ) def __init__(self, qtype, axis, size, stride, data, scale, requires_grad=False): - assert axis == 0 assert data.ndim == 2 diff --git a/optimum/quanto/tensor/weights/qbits.py b/optimum/quanto/tensor/weights/qbits.py index 3afce3f5..f9ca965c 100644 --- a/optimum/quanto/tensor/weights/qbits.py +++ b/optimum/quanto/tensor/weights/qbits.py @@ -31,7 +31,6 @@ class WeightsQBitsQuantizer(Function): - @staticmethod def forward( ctx, diff --git a/optimum/quanto/tensor/weights/qbytes.py b/optimum/quanto/tensor/weights/qbytes.py index 6d316218..68d0f65b 100644 --- a/optimum/quanto/tensor/weights/qbytes.py +++ b/optimum/quanto/tensor/weights/qbytes.py @@ -28,7 +28,6 @@ class WeightQBytesQuantizer(Function): - @staticmethod def forward( ctx, base: torch.Tensor, qtype: qtype, axis: int, scale: torch.Tensor, activation_qtype: qtype, optimized: bool diff --git a/pyproject.toml b/pyproject.toml index 4190f66f..85972389 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ dynamic = ['version'] homepage = 'https://github.com/huggingface/optimum-quanto' [project.optional-dependencies] -dev = ['pytest', 'ruff', 'black'] +dev = ['pytest', 'ruff'] examples = [ 'torchvision', 'transformers', @@ -50,19 +50,20 @@ version = {attr = 'optimum.quanto.__version__'} requires = ['setuptools>65.5.1', 'setuptools_scm'] build-backend = 'setuptools.build_meta' -[tool.black] -line-length = 119 - [tool.ruff] -# Never enforce `E501` (line length violations). +# Configuration for Ruff +line-length = 119 # Same line-length as Black had + +# Linting rules: +# Never enforce `E501` (line length violations) and other specific rules. lint.ignore = ['C901', 'E501', 'E741'] lint.select = ['C', 'E', 'F', 'I', 'W'] -line-length = 119 # Ignore import violations in all `__init__.py` files. [tool.ruff.lint.per-file-ignores] '__init__.py' = ['E402', 'F401', 'F403', 'F811'] +# isort configuration (to sort imports) [tool.ruff.lint.isort] lines-after-imports = 2 -known-first-party = ['optimum.quanto'] +known-first-party = ['optimum.quanto'] \ No newline at end of file diff --git a/setup.sh b/setup.sh index 602ff887..52faf8d0 100755 --- a/setup.sh +++ b/setup.sh @@ -12,6 +12,6 @@ else pip install --upgrade --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu118 fi # Build tools -pip install black ruff pytest build +pip install ruff pytest build # For examples pip install accelerate transformers datasets diff --git a/test/library/test_quantize.py b/test/library/test_quantize.py index bfbd784f..21d8d88b 100644 --- a/test/library/test_quantize.py +++ b/test/library/test_quantize.py @@ -96,9 +96,7 @@ def test_affine_quantize(input_shape, dtype, qtype, axis, group_size, shift_mode "zeropoint": 6e-2, "float": 5e-2, }, - }[ - qtype - ][shift_mode] + }[qtype][shift_mode] if group_size is not None: qa = ungroup(qa, axis=axis, orig_shape=a.shape) assert_similar(a, qa, atol=atol) diff --git a/test/quantize/test_quantize_patterns.py b/test/quantize/test_quantize_patterns.py index 505066da..11b85d8d 100644 --- a/test/quantize/test_quantize_patterns.py +++ b/test/quantize/test_quantize_patterns.py @@ -36,7 +36,6 @@ def forward(self, inputs): class ClassificationModel(torch.nn.Module): - def __init__(self, input_size, output_size, hidden_size, classes): super().__init__() self.model = MLP(input_size, output_size, hidden_size) diff --git a/test/tensor/weights/test_weight_qbits_tensor_quantize.py b/test/tensor/weights/test_weight_qbits_tensor_quantize.py index 149c8ff4..5453993e 100644 --- a/test/tensor/weights/test_weight_qbits_tensor_quantize.py +++ b/test/tensor/weights/test_weight_qbits_tensor_quantize.py @@ -49,9 +49,7 @@ def test_weight_qbits_tensor_quantize(input_shape, dtype, qtype, axis, group_siz "zeropoint": 6e-2, "float": 5e-2, }, - }[ - qtype - ][shift_mode] + }[qtype][shift_mode] assert_similar(a, qa, atol=atol)