Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

architecture: factor HFCompatible out #954

Merged
merged 10 commits into from
Oct 24, 2024
26 changes: 14 additions & 12 deletions garak/buffs/paraphrase.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,36 +7,39 @@

import garak.attempt
from garak import _config
from garak.generators.huggingface import HFCompatible
from garak.buffs.base import Buff
from garak.resources.api.huggingface import HFCompatible


class PegasusT5(Buff):
class PegasusT5(Buff, HFCompatible):
"""Paraphrasing buff using Pegasus model"""

DEFAULT_PARAMS = Buff.DEFAULT_PARAMS | {
"para_model_name": "garak-llm/pegasus_paraphrase",
"hf_args": {
"device": "cpu"
}, # torch_dtype doesn't have standard support in Pegasus
"max_length": 60,
"temperature": 1.5,
}
bcp47 = "en"
doc_uri = "https://huggingface.co/tuner007/pegasus_paraphrase"

def __init__(self, config_root=_config) -> None:
self.para_model_name = "garak-llm/pegasus_paraphrase" # https://huggingface.co/tuner007/pegasus_paraphrase
self.max_length = 60
self.temperature = 1.5
self.num_return_sequences = 6
self.num_beams = self.num_return_sequences
self.torch_device = None
self.tokenizer = None
self.para_model = None
super().__init__(config_root=config_root)

def _load_model(self):
import torch
from transformers import PegasusForConditionalGeneration, PegasusTokenizer

self.torch_device = "cuda" if torch.cuda.is_available() else "cpu"
self.tokenizer = PegasusTokenizer.from_pretrained(self.para_model_name)
self.device = self._select_hf_device()
self.para_model = PegasusForConditionalGeneration.from_pretrained(
self.para_model_name
).to(self.torch_device)
).to(self.device)
self.tokenizer = PegasusTokenizer.from_pretrained(self.para_model_name)

def _get_response(self, input_text):
if self.para_model is None:
Expand All @@ -48,7 +51,7 @@ def _get_response(self, input_text):
padding="longest",
max_length=self.max_length,
return_tensors="pt",
).to(self.torch_device)
).to(self.device)
translated = self.para_model.generate(
**batch,
max_length=self.max_length,
Expand Down Expand Up @@ -89,7 +92,6 @@ def __init__(self, config_root=_config) -> None:
self.no_repeat_ngram_size = 2
# self.temperature = 0.7
self.max_length = 128
self.device = None
self.tokenizer = None
self.para_model = None
super().__init__(config_root=config_root)
Expand Down
3 changes: 2 additions & 1 deletion garak/detectors/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@

from garak import _config
from garak.configurable import Configurable
from garak.generators.huggingface import HFCompatible
import garak.attempt

from garak.resources.api.huggingface import HFCompatible


class Detector(Configurable):
"""Base class for objects that define a way of detecting a probe hit / LLM failure"""
Expand Down
107 changes: 2 additions & 105 deletions garak/generators/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,9 @@
https://huggingface.co/docs/api-inference/quicktour
"""

import inspect
import logging
import os
import re
from typing import Callable, List, Union
from typing import List, Union
import warnings

import backoff
Expand All @@ -28,7 +26,7 @@
from garak import _config
from garak.exception import ModelNameMissingError, GarakException
from garak.generators.base import Generator

from garak.resources.api.huggingface import HFCompatible

models_to_deprefix = ["gpt2"]

Expand All @@ -45,107 +43,6 @@ class HFInternalServerError(GarakException):
pass


class HFCompatible:
def _set_hf_context_len(self, config):
if hasattr(config, "n_ctx"):
if isinstance(config.n_ctx, int):
self.context_len = config.n_ctx

def _gather_hf_params(self, hf_constructor: Callable):
""" "Identify arguments that impact huggingface transformers resources and behavior"""

# this may be a bit too naive as it will pass any parameter valid for the hf_constructor signature
# this falls over when passed some `from_pretrained` methods as the callable model params are not always explicit
params = (
self.hf_args
if hasattr(self, "hf_args") and isinstance(self.hf_args, dict)
else {}
)
if params is not None and not "device" in params and hasattr(self, "device"):
# consider setting self.device in all cases or if self.device is not found raise error `_select_hf_device` must be called
params["device"] = self.device

args = {}

params_to_process = inspect.signature(hf_constructor).parameters

if "model" in params_to_process:
args["model"] = self.name
# expand for
params_to_process = {"do_sample": True} | params_to_process
else:
# callable is for a Pretrained class also map standard `pipeline` params
from transformers import pipeline

params_to_process = (
{"low_cpu_mem_usage": True}
| params_to_process
| inspect.signature(pipeline).parameters
)

for k in params_to_process:
if k == "model":
continue # special case `model` comes from `name` in the generator
if k in params:
val = params[k]
if k == "torch_dtype" and hasattr(torch, val):
args[k] = getattr(
torch, val
) # some model type specific classes do not yet support direct string representation
continue
if (
k == "device"
and "device_map" in params_to_process
and "device_map" in params
):
# per transformers convention hold `device_map` before `device`
continue
args[k] = params[k]

if (
not "device_map" in args
and "device_map" in params_to_process
and "device" in params_to_process
and "device" in args
):
del args["device"]
args["device_map"] = self.device

return args

def _select_hf_device(self):
"""Determine the most efficient device for tensor load, hold any existing `device` already selected"""
import torch.cuda

selected_device = None
if self.hf_args.get("device", None) is not None:
if isinstance(self.hf_args["device"], int):
# this assumes that indexed only devices selections means `cuda`
if self.hf_args["device"] < 0:
msg = f"device {self.hf_args['device']} requested but CUDA device numbering starts at zero. Use 'device: cpu' to request CPU."
logging.critical(msg)
raise ValueError(msg)
selected_device = torch.device("cuda:" + str(self.hf_args["device"]))
else:
selected_device = torch.device(self.hf_args["device"])

if selected_device is None:
selected_device = torch.device(
"cuda"
if torch.cuda.is_available()
else "mps" if torch.backends.mps.is_available() else "cpu"
)

if isinstance(selected_device, torch.device) and selected_device.type == "mps":
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
logging.debug("Enabled MPS fallback environment variable")

logging.debug(
"Using %s, based on torch environment evaluation", selected_device
)
return selected_device


class Pipeline(Generator, HFCompatible):
"""Get text generations from a locally-run Hugging Face pipeline"""

Expand Down
114 changes: 114 additions & 0 deletions garak/resources/api/huggingface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
# SPDX-FileCopyrightText: Portions Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import inspect
import logging
import os

from typing import Callable


class HFCompatible:

leondz marked this conversation as resolved.
Show resolved Hide resolved
"""Mixin class providing private utility methods for using Huggingface
transformers within garak"""

def _set_hf_context_len(self, config):
if hasattr(config, "n_ctx"):
if isinstance(config.n_ctx, int):
self.context_len = config.n_ctx

def _gather_hf_params(self, hf_constructor: Callable):
""" "Identify arguments that impact huggingface transformers resources and behavior"""
import torch

# this may be a bit too naive as it will pass any parameter valid for the hf_constructor signature
# this falls over when passed some `from_pretrained` methods as the callable model params are not always explicit
params = (
self.hf_args
if hasattr(self, "hf_args") and isinstance(self.hf_args, dict)
else {}
)
if params is not None and not "device" in params and hasattr(self, "device"):
# consider setting self.device in all cases or if self.device is not found raise error `_select_hf_device` must be called
params["device"] = self.device

args = {}

params_to_process = inspect.signature(hf_constructor).parameters

if "model" in params_to_process:
args["model"] = self.name
# expand for
params_to_process = {"do_sample": True} | params_to_process
else:
# callable is for a Pretrained class also map standard `pipeline` params
from transformers import pipeline

params_to_process = (
{"low_cpu_mem_usage": True}
| params_to_process
| inspect.signature(pipeline).parameters
)

for k in params_to_process:
if k == "model":
continue # special case `model` comes from `name` in the generator
if k in params:
val = params[k]
if k == "torch_dtype" and hasattr(torch, val):
args[k] = getattr(
torch, val
) # some model type specific classes do not yet support direct string representation
continue
if (
k == "device"
and "device_map" in params_to_process
and "device_map" in params
):
# per transformers convention hold `device_map` before `device`
continue
args[k] = params[k]

if (
not "device_map" in args
and "device_map" in params_to_process
and "device" in params_to_process
and "device" in args
):
del args["device"]
args["device_map"] = self.device

return args

def _select_hf_device(self):
"""Determine the most efficient device for tensor load, hold any existing `device` already selected"""
import torch

selected_device = None
if self.hf_args.get("device", None) is not None:
if isinstance(self.hf_args["device"], int):
# this assumes that indexed only devices selections means `cuda`
if self.hf_args["device"] < 0:
msg = f"device {self.hf_args['device']} requested but CUDA device numbering starts at zero. Use 'device: cpu' to request CPU."
logging.critical(msg)
raise ValueError(msg)
selected_device = torch.device("cuda:" + str(self.hf_args["device"]))
else:
selected_device = torch.device(self.hf_args["device"])

if selected_device is None:
selected_device = torch.device(
"cuda"
if torch.cuda.is_available()
else "mps" if torch.backends.mps.is_available() else "cpu"
)

if isinstance(selected_device, torch.device) and selected_device.type == "mps":
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
logging.debug("Enabled MPS fallback environment variable")

logging.debug(
"Using %s, based on torch environment evaluation", selected_device
)
return selected_device
24 changes: 24 additions & 0 deletions tests/buffs/test_buffs_paraphrase.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import pytest

from garak import _plugins

BUFFS = [
classname
for (classname, active) in _plugins.enumerate_plugins("buffs")
if classname.startswith("buffs.paraphrase.")
]


@pytest.mark.parametrize("klassname", BUFFS)
def test_buff_results(klassname):
b = _plugins.load_plugin(klassname)
b._load_model()
paraphrases = b._get_response("The rain in Spain falls mainly in the plains.")
assert len(paraphrases) > 0, "paraphrase buffs must return paraphrases"
assert len(paraphrases) == len(
set(paraphrases)
), "Paraphrases should not have dupes"
assert not any([i == "" for i in paraphrases]), "No paraphrase may be empty"
Loading