Make the complete pipeline in main + add workflows for linting

StephanAkkerman · Nov 3, 2024 · e1f3f91 · e1f3f91
1 parent 4683987
commit e1f3f91
Show file tree

Hide file tree

Showing 10 changed files with 113 additions and 23 deletions.
diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml
@@ -0,0 +1,14 @@
+name: Run Black formatter
+
+on: push
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4.2.2
+      - uses: psf/black@stable
+        with:
+          options: "--check --verbose"
+          src: "./src"
+          jupyter: false
diff --git a/.github/workflows/isort.yaml b/.github/workflows/isort.yaml
@@ -0,0 +1,12 @@
+name: Run isort
+
+on: push
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4.2.2
+      - uses: isort/isort-action@v1.1.1
+        with:
+          requirements-files: "requirements/requirements.txt requirements/dev-requirements.txt"
diff --git a/config.yaml b/config.yaml
@@ -8,7 +8,7 @@ LLM:
   MODEL: "microsoft/Phi-3-mini-4k-instruct"
   TOKENIZER: "microsoft/Phi-3-mini-4k-instruct"
 IMAGE_GEN:
-  MODEL: "black-forest-labs/FLUX.1-schnell"
+  MODEL: stabilityai/stable-diffusion-3.5-medium
 
 # Datasets
 SEMANTIC_SIM:

diff --git a/fluentai/imagine/image_gen.py b/fluentai/imagine/image_gen.py
@@ -1,9 +1,11 @@
+import hashlib
+
 import torch
 from diffusers import AutoPipelineForText2Image, FluxPipeline, StableDiffusionPipeline
 
 # Current options for text-to-image models
 text_2_img_models = [
-    "black-forest-labs/FLUX.1-schnell",
+    "stabilityai/stable-diffusion-3.5-medium" "black-forest-labs/FLUX.1-schnell",
     "lambdalabs/miniSD-diffusers",  # https://huggingface.co/lambdalabs/miniSD-diffusers
     "OFA-Sys/small-stable-diffusion-v0",  # https://huggingface.co/OFA-Sys/small-stable-diffusion-v0
     "stabilityai/sdxl-turbo",  # https://huggingface.co/stabilityai/sdxl-turbo
@@ -56,27 +58,72 @@ def get_sd_turbo_pipe():
     return pipe
 
 
+def get_sd_medium_pipe():
+    import torch
+    from diffusers import StableDiffusion3Pipeline
+
+    pipe = StableDiffusion3Pipeline.from_pretrained(
+        "stabilityai/stable-diffusion-3.5-medium",
+        torch_dtype=torch.bfloat16,
+        cache_dir="models",
+    )
+    pipe = pipe.to("cuda")
+
+    return pipe
+
+    image = pipe(
+        "A capybara holding a sign that reads Hello World",
+        num_inference_steps=40,
+        guidance_scale=4.5,
+    ).images[0]
+    image.save("capybara.png")
+
+
+def generate_short_code_sha256(prompt: str, length: int = 8) -> str:
+    """
+    Generates a short code for a given prompt using SHA256 hashing.
+
+    Args:
+        prompt (str): The input sentence or prompt.
+        length (int): Desired length of the short code. Default is 8.
+
+    Returns:
+        str: A short hexadecimal code representing the prompt.
+    """
+    # Create a SHA256 hash object
+    hash_object = hashlib.sha256(prompt.encode("utf-8"))
+
+    # Get the hexadecimal digest of the hash
+    hex_digest = hash_object.hexdigest()
+
+    # Truncate the hash to the desired length
+    short_code = hex_digest[:length]
+
+    return short_code
+
+
 def generate_img(
     model_name: str = "sdxl-turbo",
     prompt: str = "A flashy bottle that stands out from the rest.",
 ):
-    if model_name == "sdxl-turbo":
-        pipe = get_sd_turbo_pipe()
-    else:
-        pipe = get_mini_sd_pipe()
+    pipe = get_sd_medium_pipe()
 
     # pipe.enable_model_cpu_offload()  # save some VRAM by offloading the model to CPU. Remove this if you have enough GPU power
     # pipe.to("cuda")
     # Play with these parameters to get different results
     image = pipe(
         prompt=prompt,
-        guidance_scale=0.0,
-        height=1024,
-        width=1024,
-        num_inference_steps=4,
-        max_sequence_length=256,
+        # guidance_scale=0.0,
+        # height=1024,
+        # width=1024,
+        num_inference_steps=4.5,
+        # max_sequence_length=256,
     ).images[0]
-    image.save(f"text2img_tests/{model_name}.jpg")
+
+    # Temporary solution to save the prompt in the image filename
+    prompt_code = generate_short_code_sha256(prompt)
+
+    image.save(f"img/text2img_tests/{model_name}_{prompt_code}.jpg")
 
 
 generate_img()
diff --git a/fluentai/main.py b/fluentai/main.py
@@ -2,3 +2,25 @@
 # It will call the word to mnemonic pipeline
 # Generate a prompt
 # Generate the image
+from fluentai.imagine.image_gen import generate_img
+from fluentai.imagine.verbal_cue import VerbalCue
+from fluentai.mnemonic.word2mnemonic import generate_mnemonic
+
+vc = VerbalCue()
+
+
+def generate_mnemonic_img(word: str, lang_code: str):
+    best_matches = generate_mnemonic(word, lang_code)
+
+    # Get the top phonetic match
+    best_match = best_matches.iloc[0]
+
+    # Generate a verbal cue
+    prompt = vc.generate_cue(word, best_match["token_ort"])
+
+    # Generate the image
+    generate_img(prompt=prompt)
+
+
+if __name__ == "__main__":
+    pass
diff --git a/fluentai/mnemonic/imageability/eval.py b/fluentai/mnemonic/imageability/eval.py
@@ -137,7 +137,6 @@ def train_and_evaluate_models(X_train, X_test, y_train, y_test):
 
 
 def main():
-
     # Load data
     embeddings, scores = load_data()
 

diff --git a/fluentai/mnemonic/phonetic/eval.py b/fluentai/mnemonic/phonetic/eval.py
@@ -19,7 +19,6 @@ def word2ipa(
     ipa_dataset: pd.DataFrame,
     use_fallback: bool = True,
 ) -> str:
-
     if not ipa_dataset.empty:
         # Check if the word is in the dataset
         ipa = ipa_dataset[ipa_dataset["token_ort"] == word]["token_ipa"]

diff --git a/fluentai/mnemonic/phonetic/g2p.py b/fluentai/mnemonic/phonetic/g2p.py
@@ -13,7 +13,6 @@
 
 
 def g2p(words: list[str]) -> str:
-
     out = tokenizer(words, padding=True, add_special_tokens=False, return_tensors="pt")
 
     preds = model.generate(

diff --git a/fluentai/mnemonic/phonetic/phonetic.py b/fluentai/mnemonic/phonetic/phonetic.py
@@ -6,15 +6,15 @@
 from fluentai.constants.config import config
 from fluentai.mnemonic.phonetic.g2p import g2p
 from fluentai.mnemonic.phonetic.ipa2vec import panphon_vec, soundvec
-from fluentai.mnemonic.phonetic.utils import convert_to_matrix, load_cache, pad_vectors
+from fluentai.mnemonic.phonetic.utils import (convert_to_matrix, load_cache,
+                                              pad_vectors)
 from fluentai.utils.logger import logger
 
 
 def word2ipa(
     word: str,
     language_code: str = "eng-us",
 ) -> str:
-
     # Try searching in the dataset
     if "eng-us" in language_code:
         # First try lookup in the .tsv file

diff --git a/fluentai/mnemonic/word2mnemonic.py b/fluentai/mnemonic/word2mnemonic.py
@@ -1,8 +1,7 @@
 from fluentai.constants.languages import G2P_LANGCODES
 from fluentai.mnemonic.imageability.imageability import ImageabilityPredictor
-from fluentai.mnemonic.orthographic.orthographic import (
-    compute_damerau_levenshtein_similarity,
-)
+from fluentai.mnemonic.orthographic.orthographic import \
+    compute_damerau_levenshtein_similarity
 from fluentai.mnemonic.phonetic.phonetic import top_phonetic
 from fluentai.mnemonic.semantic.semantic import SemanticSimilarity
 from fluentai.mnemonic.semantic.translator import translate_word
@@ -53,11 +52,10 @@ def generate_mnemonic(word: str, language_code):
         axis=1,
     )
 
-    # Calculate the mnemonic score
-    logger.info(top)
+    return top
 
 
 if __name__ == "__main__":
     # generate_mnemonic("kucing", "ind")
 
-    generate_mnemonic("猫", "zho-s")
+    print(generate_mnemonic("猫", "zho-s"))