Skip to content

Commit

Permalink
Make the complete pipeline in main + add workflows for linting
Browse files Browse the repository at this point in the history
  • Loading branch information
StephanAkkerman committed Nov 3, 2024
1 parent 4683987 commit e1f3f91
Show file tree
Hide file tree
Showing 10 changed files with 113 additions and 23 deletions.
14 changes: 14 additions & 0 deletions .github/workflows/black.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
name: Run Black formatter

on: push

jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4.2.2
- uses: psf/black@stable
with:
options: "--check --verbose"
src: "./src"
jupyter: false
12 changes: 12 additions & 0 deletions .github/workflows/isort.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
name: Run isort

on: push

jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4.2.2
- uses: isort/isort-action@v1.1.1
with:
requirements-files: "requirements/requirements.txt requirements/dev-requirements.txt"
2 changes: 1 addition & 1 deletion config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ LLM:
MODEL: "microsoft/Phi-3-mini-4k-instruct"
TOKENIZER: "microsoft/Phi-3-mini-4k-instruct"
IMAGE_GEN:
MODEL: "black-forest-labs/FLUX.1-schnell"
MODEL: stabilityai/stable-diffusion-3.5-medium

# Datasets
SEMANTIC_SIM:
Expand Down
69 changes: 58 additions & 11 deletions fluentai/imagine/image_gen.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import hashlib

Check failure on line 1 in fluentai/imagine/image_gen.py

View workflow job for this annotation

GitHub Actions / build

Imports are incorrectly sorted and/or formatted.

import torch
from diffusers import AutoPipelineForText2Image, FluxPipeline, StableDiffusionPipeline

# Current options for text-to-image models
text_2_img_models = [
"black-forest-labs/FLUX.1-schnell",
"stabilityai/stable-diffusion-3.5-medium" "black-forest-labs/FLUX.1-schnell",
"lambdalabs/miniSD-diffusers", # https://huggingface.co/lambdalabs/miniSD-diffusers
"OFA-Sys/small-stable-diffusion-v0", # https://huggingface.co/OFA-Sys/small-stable-diffusion-v0
"stabilityai/sdxl-turbo", # https://huggingface.co/stabilityai/sdxl-turbo
Expand Down Expand Up @@ -56,27 +58,72 @@ def get_sd_turbo_pipe():
return pipe


def get_sd_medium_pipe():
import torch
from diffusers import StableDiffusion3Pipeline

pipe = StableDiffusion3Pipeline.from_pretrained(
"stabilityai/stable-diffusion-3.5-medium",
torch_dtype=torch.bfloat16,
cache_dir="models",
)
pipe = pipe.to("cuda")

return pipe

image = pipe(
"A capybara holding a sign that reads Hello World",
num_inference_steps=40,
guidance_scale=4.5,
).images[0]
image.save("capybara.png")


def generate_short_code_sha256(prompt: str, length: int = 8) -> str:
"""
Generates a short code for a given prompt using SHA256 hashing.
Args:
prompt (str): The input sentence or prompt.
length (int): Desired length of the short code. Default is 8.
Returns:
str: A short hexadecimal code representing the prompt.
"""
# Create a SHA256 hash object
hash_object = hashlib.sha256(prompt.encode("utf-8"))

# Get the hexadecimal digest of the hash
hex_digest = hash_object.hexdigest()

# Truncate the hash to the desired length
short_code = hex_digest[:length]

return short_code


def generate_img(
model_name: str = "sdxl-turbo",
prompt: str = "A flashy bottle that stands out from the rest.",
):
if model_name == "sdxl-turbo":
pipe = get_sd_turbo_pipe()
else:
pipe = get_mini_sd_pipe()
pipe = get_sd_medium_pipe()

# pipe.enable_model_cpu_offload() # save some VRAM by offloading the model to CPU. Remove this if you have enough GPU power
# pipe.to("cuda")
# Play with these parameters to get different results
image = pipe(
prompt=prompt,
guidance_scale=0.0,
height=1024,
width=1024,
num_inference_steps=4,
max_sequence_length=256,
# guidance_scale=0.0,
# height=1024,
# width=1024,
num_inference_steps=4.5,
# max_sequence_length=256,
).images[0]
image.save(f"text2img_tests/{model_name}.jpg")

# Temporary solution to save the prompt in the image filename
prompt_code = generate_short_code_sha256(prompt)

image.save(f"img/text2img_tests/{model_name}_{prompt_code}.jpg")


generate_img()
22 changes: 22 additions & 0 deletions fluentai/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,25 @@
# It will call the word to mnemonic pipeline
# Generate a prompt
# Generate the image
from fluentai.imagine.image_gen import generate_img
from fluentai.imagine.verbal_cue import VerbalCue
from fluentai.mnemonic.word2mnemonic import generate_mnemonic

vc = VerbalCue()


def generate_mnemonic_img(word: str, lang_code: str):
best_matches = generate_mnemonic(word, lang_code)

# Get the top phonetic match
best_match = best_matches.iloc[0]

# Generate a verbal cue
prompt = vc.generate_cue(word, best_match["token_ort"])

# Generate the image
generate_img(prompt=prompt)


if __name__ == "__main__":
pass
1 change: 0 additions & 1 deletion fluentai/mnemonic/imageability/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,6 @@ def train_and_evaluate_models(X_train, X_test, y_train, y_test):


def main():

# Load data
embeddings, scores = load_data()

Expand Down
1 change: 0 additions & 1 deletion fluentai/mnemonic/phonetic/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ def word2ipa(
ipa_dataset: pd.DataFrame,
use_fallback: bool = True,
) -> str:

if not ipa_dataset.empty:
# Check if the word is in the dataset
ipa = ipa_dataset[ipa_dataset["token_ort"] == word]["token_ipa"]
Expand Down
1 change: 0 additions & 1 deletion fluentai/mnemonic/phonetic/g2p.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@


def g2p(words: list[str]) -> str:

out = tokenizer(words, padding=True, add_special_tokens=False, return_tensors="pt")

preds = model.generate(
Expand Down
4 changes: 2 additions & 2 deletions fluentai/mnemonic/phonetic/phonetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@
from fluentai.constants.config import config
from fluentai.mnemonic.phonetic.g2p import g2p
from fluentai.mnemonic.phonetic.ipa2vec import panphon_vec, soundvec
from fluentai.mnemonic.phonetic.utils import convert_to_matrix, load_cache, pad_vectors
from fluentai.mnemonic.phonetic.utils import (convert_to_matrix, load_cache,
pad_vectors)
from fluentai.utils.logger import logger


def word2ipa(
word: str,
language_code: str = "eng-us",
) -> str:

# Try searching in the dataset
if "eng-us" in language_code:
# First try lookup in the .tsv file
Expand Down
10 changes: 4 additions & 6 deletions fluentai/mnemonic/word2mnemonic.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from fluentai.constants.languages import G2P_LANGCODES
from fluentai.mnemonic.imageability.imageability import ImageabilityPredictor
from fluentai.mnemonic.orthographic.orthographic import (
compute_damerau_levenshtein_similarity,
)
from fluentai.mnemonic.orthographic.orthographic import \
compute_damerau_levenshtein_similarity
from fluentai.mnemonic.phonetic.phonetic import top_phonetic
from fluentai.mnemonic.semantic.semantic import SemanticSimilarity
from fluentai.mnemonic.semantic.translator import translate_word
Expand Down Expand Up @@ -53,11 +52,10 @@ def generate_mnemonic(word: str, language_code):
axis=1,
)

# Calculate the mnemonic score
logger.info(top)
return top


if __name__ == "__main__":
# generate_mnemonic("kucing", "ind")

generate_mnemonic("猫", "zho-s")
print(generate_mnemonic("猫", "zho-s"))

0 comments on commit e1f3f91

Please sign in to comment.