From a35d9c8f51de01361427da5f5449487247a98262 Mon Sep 17 00:00:00 2001 From: arnaudde Date: Wed, 10 Mar 2021 15:55:44 +0100 Subject: [PATCH 01/20] Clean up --- Makefile | 7 +- code-env/python/desc.json | 2 +- code-env/python/spec/requirements.txt | 4 +- .../recipe.json | 986 ++++++++++++++++++ .../recipe.py | 0 custom-recipes/google-translate/recipe.json | 327 ------ .../api-configuration/parameter-set.json | 2 +- plugin.json | 3 +- python-lib/google_translate_api_formatting.py | 34 +- tests/python/requirements.txt | 2 +- 10 files changed, 1006 insertions(+), 361 deletions(-) create mode 100644 custom-recipes/dss-plugin-nlp-google-cloud-translation/recipe.json rename custom-recipes/{google-translate => dss-plugin-nlp-google-cloud-translation}/recipe.py (100%) delete mode 100644 custom-recipes/google-translate/recipe.json diff --git a/Makefile b/Makefile index 1705463..b34b3c3 100644 --- a/Makefile +++ b/Makefile @@ -19,6 +19,11 @@ plugin: unit-tests: @echo "[START] Running unit tests..." + @( \ + PYTHON_VERSION=`python3 -V 2>&1 | sed 's/[^0-9]*//g' | cut -c 1,2`; \ + PYTHON_VERSION_IS_CORRECT=`cat code-env/python/desc.json | python3 -c "import sys, json; print(str($$PYTHON_VERSION) in [x[-2:] for x in json.load(sys.stdin)['acceptedPythonInterpreters']]);"`; \ + if [ $$PYTHON_VERSION_IS_CORRECT == "False" ]; then echo "Python version $$PYTHON_VERSION is not in acceptedPythonInterpreters"; exit 1; else echo "Python version $$PYTHON_VERSION is in acceptedPythonInterpreters"; fi; \ + ) @( \ python3 -m venv env/; \ source env/bin/activate; \ @@ -26,7 +31,7 @@ unit-tests: pip install --no-cache-dir -r tests/python/requirements.txt; \ pip install --no-cache-dir -r code-env/python/spec/requirements.txt; \ export PYTHONPATH="$(PYTHONPATH):$(PWD)/python-lib"; \ - pytest -o junit_family=xunit2 --junitxml=unit.xml tests/python/unit || true; \ + pytest -o junit_family=xunit2 --junitxml=unit.xml tests/python/unit || true; \ deactivate; \ ) @echo "[SUCCESS] Running unit tests: Done!" diff --git a/code-env/python/desc.json b/code-env/python/desc.json index fca0a34..1bd8f7d 100644 --- a/code-env/python/desc.json +++ b/code-env/python/desc.json @@ -1,7 +1,7 @@ { "acceptedPythonInterpreters": [ "PYTHON36", - "PYTHON35" + "PYTHON37" ], "forceConda": false, "installCorePackages": true, diff --git a/code-env/python/spec/requirements.txt b/code-env/python/spec/requirements.txt index a34bbf8..599cb8d 100644 --- a/code-env/python/spec/requirements.txt +++ b/code-env/python/spec/requirements.txt @@ -1,5 +1,5 @@ -google-api-python-client==1.12.3 -google-cloud-translate==2.0.1 +google-api-python-client==2.0.1 +google-cloud-translate==3.0.2 tqdm==4.50.1 ratelimit==2.2.1 retry==0.9.2 diff --git a/custom-recipes/dss-plugin-nlp-google-cloud-translation/recipe.json b/custom-recipes/dss-plugin-nlp-google-cloud-translation/recipe.json new file mode 100644 index 0000000..fbcf5b8 --- /dev/null +++ b/custom-recipes/dss-plugin-nlp-google-cloud-translation/recipe.json @@ -0,0 +1,986 @@ +{ + "meta": { + "label": "Google translation", + "displayOrderRank": 1, + "description": "Translate a text column to a different language with the Google Translation API", + "icon": "icon-gcp-translation-api icon-cloud" + }, + "kind": "PYTHON", + "selectableFromDataset": "input_dataset", + "inputRoles": [ + { + "name": "input_dataset", + "label": "Input Dataset", + "description": "Dataset with a text column to translate", + "arity": "UNARY", + "required": true, + "acceptsDataset": true + } + ], + "outputRoles": [ + { + "name": "output_dataset", + "label": "Output dataset", + "description": "Dataset with text translated to another language", + "arity": "UNARY", + "required": true, + "acceptsDataset": true + } + ], + "params": [ + { + "name": "separator_input", + "label": "Input Parameters", + "type": "SEPARATOR" + }, + { + "name": "text_column", + "label": "Text column", + "type": "COLUMN", + "columnRole": "input_dataset", + "mandatory": true, + "allowedColumnTypes": [ + "string" + ] + }, + { + "name": "source_language", + "label": "Source language", + "description": "Language to translate from", + "type": "SELECT", + "mandatory": true, + "selectChoices": [ + { + "value": "auto", + "label": "Auto-detect" + }, + { + "label": "Afrikaans [af]", + "value": "af" + }, + { + "label": "Albanian [sq]", + "value": "sq" + }, + { + "label": "Amharic [am]", + "value": "am" + }, + { + "label": "Arabic [ar]", + "value": "ar" + }, + { + "label": "Armenian [hy]", + "value": "hy" + }, + { + "label": "Azerbaijani [az]", + "value": "az" + }, + { + "label": "Basque [eu]", + "value": "eu" + }, + { + "label": "Belarusian [be]", + "value": "be" + }, + { + "label": "Bengali [bn]", + "value": "bn" + }, + { + "label": "Bosnian [bs]", + "value": "bs" + }, + { + "label": "Bulgarian [bg]", + "value": "bg" + }, + { + "label": "Catalan [ca]", + "value": "ca" + }, + { + "label": "Cebuano [ceb]", + "value": "ceb" + }, + { + "label": "Chinese (Simplified) [zh-CN]", + "value": "zh-CN" + }, + { + "label": "Chinese (Traditional) [zh-TW]", + "value": "zh-TW" + }, + { + "label": "Corsican [co]", + "value": "co" + }, + { + "label": "Croatian [hr]", + "value": "hr" + }, + { + "label": "Czech [cs]", + "value": "cs" + }, + { + "label": "Danish [da]", + "value": "da" + }, + { + "label": "Dutch [nl]", + "value": "nl" + }, + { + "label": "English [en]", + "value": "en" + }, + { + "label": "Esperanto [eo]", + "value": "eo" + }, + { + "label": "Estonian [et]", + "value": "et" + }, + { + "label": "Finnish [fi]", + "value": "fi" + }, + { + "label": "French [fr]", + "value": "fr" + }, + { + "label": "Frisian [fy]", + "value": "fy" + }, + { + "label": "Galician [gl]", + "value": "gl" + }, + { + "label": "Georgian [ka]", + "value": "ka" + }, + { + "label": "German [de]", + "value": "de" + }, + { + "label": "Greek [el]", + "value": "el" + }, + { + "label": "Gujarati [gu]", + "value": "gu" + }, + { + "label": "Haitian Creole [ht]", + "value": "ht" + }, + { + "label": "Hausa [ha]", + "value": "ha" + }, + { + "label": "Hawaiian [haw]", + "value": "haw" + }, + { + "label": "Hebrew [he]", + "value": "he" + }, + { + "label": "Hindi [hi]", + "value": "hi" + }, + { + "label": "Hmong [hmn]", + "value": "hmn" + }, + { + "label": "Hungarian [hu]", + "value": "hu" + }, + { + "label": "Icelandic [is]", + "value": "is" + }, + { + "label": "Igbo [ig]", + "value": "ig" + }, + { + "label": "Indonesian [id]", + "value": "id" + }, + { + "label": "Irish [ga]", + "value": "ga" + }, + { + "label": "Italian [it]", + "value": "it" + }, + { + "label": "Japanese [ja]", + "value": "ja" + }, + { + "label": "Javanese [jv]", + "value": "jv" + }, + { + "label": "Kannada [kn]", + "value": "kn" + }, + { + "label": "Kazakh [kk]", + "value": "kk" + }, + { + "label": "Khmer [km]", + "value": "km" + }, + { + "label": "Kinyarwanda [rw]", + "value": "rw" + }, + { + "label": "Korean [ko]", + "value": "ko" + }, + { + "label": "Kurdish [ku]", + "value": "ku" + }, + { + "label": "Kyrgyz [ky]", + "value": "ky" + }, + { + "label": "Lao [lo]", + "value": "lo" + }, + { + "label": "Latin [la]", + "value": "la" + }, + { + "label": "Latvian [lv]", + "value": "lv" + }, + { + "label": "Lithuanian [lt]", + "value": "lt" + }, + { + "label": "Luxembourgish [lb]", + "value": "lb" + }, + { + "label": "Macedonian [mk]", + "value": "mk" + }, + { + "label": "Malagasy [mg]", + "value": "mg" + }, + { + "label": "Malay [ms]", + "value": "ms" + }, + { + "label": "Malayalam [ml]", + "value": "ml" + }, + { + "label": "Maltese [mt]", + "value": "mt" + }, + { + "label": "Maori [mi]", + "value": "mi" + }, + { + "label": "Marathi [mr]", + "value": "mr" + }, + { + "label": "Mongolian [mn]", + "value": "mn" + }, + { + "label": "Myanmar (Burmese) [my]", + "value": "my" + }, + { + "label": "Nepali [ne]", + "value": "ne" + }, + { + "label": "Norwegian [no]", + "value": "no" + }, + { + "label": "Nyanja (Chichewa) [ny]", + "value": "ny" + }, + { + "label": "Odia (Oriya) [or]", + "value": "or" + }, + { + "label": "Pashto [ps]", + "value": "ps" + }, + { + "label": "Persian [fa]", + "value": "fa" + }, + { + "label": "Polish [pl]", + "value": "pl" + }, + { + "label": "Portuguese (Portugal, Brazil) [pt]", + "value": "pt" + }, + { + "label": "Punjabi [pa]", + "value": "pa" + }, + { + "label": "Romanian [ro]", + "value": "ro" + }, + { + "label": "Russian [ru]", + "value": "ru" + }, + { + "label": "Samoan [sm]", + "value": "sm" + }, + { + "label": "Scots Gaelic [gd]", + "value": "gd" + }, + { + "label": "Serbian [sr]", + "value": "sr" + }, + { + "label": "Sesotho [st]", + "value": "st" + }, + { + "label": "Shona [sn]", + "value": "sn" + }, + { + "label": "Sindhi [sd]", + "value": "sd" + }, + { + "label": "Sinhala (Sinhalese) [si]", + "value": "si" + }, + { + "label": "Slovak [sk]", + "value": "sk" + }, + { + "label": "Slovenian [sl]", + "value": "sl" + }, + { + "label": "Somali [so]", + "value": "so" + }, + { + "label": "Spanish [es]", + "value": "es" + }, + { + "label": "Sundanese [su]", + "value": "su" + }, + { + "label": "Swahili [sw]", + "value": "sw" + }, + { + "label": "Swedish [sv]", + "value": "sv" + }, + { + "label": "Tagalog (Filipino) [tl]", + "value": "tl" + }, + { + "label": "Tajik [tg]", + "value": "tg" + }, + { + "label": "Tamil [ta]", + "value": "ta" + }, + { + "label": "Tatar [tt]", + "value": "tt" + }, + { + "label": "Telugu [te]", + "value": "te" + }, + { + "label": "Thai [th]", + "value": "th" + }, + { + "label": "Turkish [tr]", + "value": "tr" + }, + { + "label": "Turkmen [tk]", + "value": "tk" + }, + { + "label": "Ukrainian [uk]", + "value": "uk" + }, + { + "label": "Urdu [ur]", + "value": "ur" + }, + { + "label": "Uyghur [ug]", + "value": "ug" + }, + { + "label": "Uzbek [uz]", + "value": "uz" + }, + { + "label": "Vietnamese [vi]", + "value": "vi" + }, + { + "label": "Welsh [cy]", + "value": "cy" + }, + { + "label": "Xhosa [xh]", + "value": "xh" + }, + { + "label": "Yiddish [yi]", + "value": "yi" + }, + { + "label": "Yoruba [yo]", + "value": "yo" + }, + { + "label": "Zulu [zu]", + "value": "zu" + } + ], + "defaultValue": "auto" + }, + { + "name": "target_language", + "label": "Target language", + "description": "Language to translate to", + "type": "SELECT", + "mandatory": true, + "selectChoices": [ + { + "label": "Afrikaans [af]", + "value": "af" + }, + { + "label": "Albanian [sq]", + "value": "sq" + }, + { + "label": "Amharic [am]", + "value": "am" + }, + { + "label": "Arabic [ar]", + "value": "ar" + }, + { + "label": "Armenian [hy]", + "value": "hy" + }, + { + "label": "Azerbaijani [az]", + "value": "az" + }, + { + "label": "Basque [eu]", + "value": "eu" + }, + { + "label": "Belarusian [be]", + "value": "be" + }, + { + "label": "Bengali [bn]", + "value": "bn" + }, + { + "label": "Bosnian [bs]", + "value": "bs" + }, + { + "label": "Bulgarian [bg]", + "value": "bg" + }, + { + "label": "Catalan [ca]", + "value": "ca" + }, + { + "label": "Cebuano [ceb]", + "value": "ceb" + }, + { + "label": "Chinese (Simplified) [zh-CN]", + "value": "zh-CN" + }, + { + "label": "Chinese (Traditional) [zh-TW]", + "value": "zh-TW" + }, + { + "label": "Corsican [co]", + "value": "co" + }, + { + "label": "Croatian [hr]", + "value": "hr" + }, + { + "label": "Czech [cs]", + "value": "cs" + }, + { + "label": "Danish [da]", + "value": "da" + }, + { + "label": "Dutch [nl]", + "value": "nl" + }, + { + "label": "English [en]", + "value": "en" + }, + { + "label": "Esperanto [eo]", + "value": "eo" + }, + { + "label": "Estonian [et]", + "value": "et" + }, + { + "label": "Finnish [fi]", + "value": "fi" + }, + { + "label": "French [fr]", + "value": "fr" + }, + { + "label": "Frisian [fy]", + "value": "fy" + }, + { + "label": "Galician [gl]", + "value": "gl" + }, + { + "label": "Georgian [ka]", + "value": "ka" + }, + { + "label": "German [de]", + "value": "de" + }, + { + "label": "Greek [el]", + "value": "el" + }, + { + "label": "Gujarati [gu]", + "value": "gu" + }, + { + "label": "Haitian Creole [ht]", + "value": "ht" + }, + { + "label": "Hausa [ha]", + "value": "ha" + }, + { + "label": "Hawaiian [haw]", + "value": "haw" + }, + { + "label": "Hebrew [he]", + "value": "he" + }, + { + "label": "Hindi [hi]", + "value": "hi" + }, + { + "label": "Hmong [hmn]", + "value": "hmn" + }, + { + "label": "Hungarian [hu]", + "value": "hu" + }, + { + "label": "Icelandic [is]", + "value": "is" + }, + { + "label": "Igbo [ig]", + "value": "ig" + }, + { + "label": "Indonesian [id]", + "value": "id" + }, + { + "label": "Irish [ga]", + "value": "ga" + }, + { + "label": "Italian [it]", + "value": "it" + }, + { + "label": "Japanese [ja]", + "value": "ja" + }, + { + "label": "Javanese [jv]", + "value": "jv" + }, + { + "label": "Kannada [kn]", + "value": "kn" + }, + { + "label": "Kazakh [kk]", + "value": "kk" + }, + { + "label": "Khmer [km]", + "value": "km" + }, + { + "label": "Kinyarwanda [rw]", + "value": "rw" + }, + { + "label": "Korean [ko]", + "value": "ko" + }, + { + "label": "Kurdish [ku]", + "value": "ku" + }, + { + "label": "Kyrgyz [ky]", + "value": "ky" + }, + { + "label": "Lao [lo]", + "value": "lo" + }, + { + "label": "Latin [la]", + "value": "la" + }, + { + "label": "Latvian [lv]", + "value": "lv" + }, + { + "label": "Lithuanian [lt]", + "value": "lt" + }, + { + "label": "Luxembourgish [lb]", + "value": "lb" + }, + { + "label": "Macedonian [mk]", + "value": "mk" + }, + { + "label": "Malagasy [mg]", + "value": "mg" + }, + { + "label": "Malay [ms]", + "value": "ms" + }, + { + "label": "Malayalam [ml]", + "value": "ml" + }, + { + "label": "Maltese [mt]", + "value": "mt" + }, + { + "label": "Maori [mi]", + "value": "mi" + }, + { + "label": "Marathi [mr]", + "value": "mr" + }, + { + "label": "Mongolian [mn]", + "value": "mn" + }, + { + "label": "Myanmar (Burmese) [my]", + "value": "my" + }, + { + "label": "Nepali [ne]", + "value": "ne" + }, + { + "label": "Norwegian [no]", + "value": "no" + }, + { + "label": "Nyanja (Chichewa) [ny]", + "value": "ny" + }, + { + "label": "Odia (Oriya) [or]", + "value": "or" + }, + { + "label": "Pashto [ps]", + "value": "ps" + }, + { + "label": "Persian [fa]", + "value": "fa" + }, + { + "label": "Polish [pl]", + "value": "pl" + }, + { + "label": "Portuguese (Portugal, Brazil) [pt]", + "value": "pt" + }, + { + "label": "Punjabi [pa]", + "value": "pa" + }, + { + "label": "Romanian [ro]", + "value": "ro" + }, + { + "label": "Russian [ru]", + "value": "ru" + }, + { + "label": "Samoan [sm]", + "value": "sm" + }, + { + "label": "Scots Gaelic [gd]", + "value": "gd" + }, + { + "label": "Serbian [sr]", + "value": "sr" + }, + { + "label": "Sesotho [st]", + "value": "st" + }, + { + "label": "Shona [sn]", + "value": "sn" + }, + { + "label": "Sindhi [sd]", + "value": "sd" + }, + { + "label": "Sinhala (Sinhalese) [si]", + "value": "si" + }, + { + "label": "Slovak [sk]", + "value": "sk" + }, + { + "label": "Slovenian [sl]", + "value": "sl" + }, + { + "label": "Somali [so]", + "value": "so" + }, + { + "label": "Spanish [es]", + "value": "es" + }, + { + "label": "Sundanese [su]", + "value": "su" + }, + { + "label": "Swahili [sw]", + "value": "sw" + }, + { + "label": "Swedish [sv]", + "value": "sv" + }, + { + "label": "Tagalog (Filipino) [tl]", + "value": "tl" + }, + { + "label": "Tajik [tg]", + "value": "tg" + }, + { + "label": "Tamil [ta]", + "value": "ta" + }, + { + "label": "Tatar [tt]", + "value": "tt" + }, + { + "label": "Telugu [te]", + "value": "te" + }, + { + "label": "Thai [th]", + "value": "th" + }, + { + "label": "Turkish [tr]", + "value": "tr" + }, + { + "label": "Turkmen [tk]", + "value": "tk" + }, + { + "label": "Ukrainian [uk]", + "value": "uk" + }, + { + "label": "Urdu [ur]", + "value": "ur" + }, + { + "label": "Uyghur [ug]", + "value": "ug" + }, + { + "label": "Uzbek [uz]", + "value": "uz" + }, + { + "label": "Vietnamese [vi]", + "value": "vi" + }, + { + "label": "Welsh [cy]", + "value": "cy" + }, + { + "label": "Xhosa [xh]", + "value": "xh" + }, + { + "label": "Yiddish [yi]", + "value": "yi" + }, + { + "label": "Yoruba [yo]", + "value": "yo" + }, + { + "label": "Zulu [zu]", + "value": "zu" + } + ], + "defaultValue": "en" + }, + { + "name": "separator_configuration", + "label": "Configuration", + "type": "SEPARATOR" + }, + { + "name": "api_configuration_preset", + "label": "API configuration preset", + "type": "PRESET", + "parameterSetId": "api-configuration", + "mandatory": true + }, + { + "name": "separator_advanced", + "label": "Advanced", + "type": "SEPARATOR" + }, + { + "name": "expert", + "label": "Expert mode", + "type": "BOOLEAN", + "defaultValue": false + }, + { + "name": "error_handling", + "label": "Error handling", + "visibilityCondition": "model.expert", + "type": "SELECT", + "selectChoices": [ + { + "value": "LOG", + "label": "Log" + }, + { + "value": "FAIL", + "label": "Fail" + } + ], + "description": "Log API errors to the output or fail with an exception on any API error", + "defaultValue": "LOG", + "mandatory": true + } + ], + "resourceKeys": [] +} \ No newline at end of file diff --git a/custom-recipes/google-translate/recipe.py b/custom-recipes/dss-plugin-nlp-google-cloud-translation/recipe.py similarity index 100% rename from custom-recipes/google-translate/recipe.py rename to custom-recipes/dss-plugin-nlp-google-cloud-translation/recipe.py diff --git a/custom-recipes/google-translate/recipe.json b/custom-recipes/google-translate/recipe.json deleted file mode 100644 index 36322d1..0000000 --- a/custom-recipes/google-translate/recipe.json +++ /dev/null @@ -1,327 +0,0 @@ -{ - "meta": { - "label": "Google translation api", - "displayOrderRank": 1, - "description": "Translate a column to a different language with google translation api", - "icon": "icon-gcp-translation-api icon-cloud" - }, - "kind": "PYTHON", - "selectableFromDataset": "input_dataset", - "inputRoles": [ - { - "name": "input_dataset", - "label": "Input Dataset", - "description": "Dataset containing the text data to translate", - "arity": "UNARY", - "required": true, - "acceptsDataset": true - } - ], - "outputRoles": [ - { - "name": "output_dataset", - "label": "Output dataset", - "description": "Dataset with translated output", - "arity": "UNARY", - "required": true, - "acceptsDataset": true - } - ], - "params": [ - { - "name": "separator_input", - "label": "Input Parameters", - "type": "SEPARATOR" - }, - { - "name": "text_column", - "label": "Text column", - "type": "COLUMN", - "columnRole": "input_dataset", - "mandatory": true, - "allowedColumnTypes": [ - "string" - ] - }, - { - "name": "source_language", - "label": "Source language", - "description": "Language to translate from", - "type": "SELECT", - "mandatory": true, - "selectChoices": - [{"value": "auto", "label": "Auto-detect"}, - {'label': 'Afrikaans [af]', 'value': 'af'}, - {'label': 'Albanian [sq]', 'value': 'sq'}, - {'label': 'Amharic [am]', 'value': 'am'}, - {'label': 'Arabic [ar]', 'value': 'ar'}, - {'label': 'Armenian [hy]', 'value': 'hy'}, - {'label': 'Azerbaijani [az]', 'value': 'az'}, - {'label': 'Basque [eu]', 'value': 'eu'}, - {'label': 'Belarusian [be]', 'value': 'be'}, - {'label': 'Bengali [bn]', 'value': 'bn'}, - {'label': 'Bosnian [bs]', 'value': 'bs'}, - {'label': 'Bulgarian [bg]', 'value': 'bg'}, - {'label': 'Catalan [ca]', 'value': 'ca'}, - {'label': 'Cebuano [ceb]', 'value': 'ceb'}, - {'label': 'Chinese (Simplified) [zh-CN]', 'value': 'zh-CN'}, - {'label': 'Chinese (Traditional) [zh-TW]', 'value': 'zh-TW'}, - {'label': 'Corsican [co]', 'value': 'co'}, - {'label': 'Croatian [hr]', 'value': 'hr'}, - {'label': 'Czech [cs]', 'value': 'cs'}, - {'label': 'Danish [da]', 'value': 'da'}, - {'label': 'Dutch [nl]', 'value': 'nl'}, - {'label': 'English [en]', 'value': 'en'}, - {'label': 'Esperanto [eo]', 'value': 'eo'}, - {'label': 'Estonian [et]', 'value': 'et'}, - {'label': 'Finnish [fi]', 'value': 'fi'}, - {'label': 'French [fr]', 'value': 'fr'}, - {'label': 'Frisian [fy]', 'value': 'fy'}, - {'label': 'Galician [gl]', 'value': 'gl'}, - {'label': 'Georgian [ka]', 'value': 'ka'}, - {'label': 'German [de]', 'value': 'de'}, - {'label': 'Greek [el]', 'value': 'el'}, - {'label': 'Gujarati [gu]', 'value': 'gu'}, - {'label': 'Haitian Creole [ht]', 'value': 'ht'}, - {'label': 'Hausa [ha]', 'value': 'ha'}, - {'label': 'Hawaiian [haw]', 'value': 'haw'}, - {'label': 'Hebrew [he]', 'value': 'he'}, - {'label': 'Hindi [hi]', 'value': 'hi'}, - {'label': 'Hmong [hmn]', 'value': 'hmn'}, - {'label': 'Hungarian [hu]', 'value': 'hu'}, - {'label': 'Icelandic [is]', 'value': 'is'}, - {'label': 'Igbo [ig]', 'value': 'ig'}, - {'label': 'Indonesian [id]', 'value': 'id'}, - {'label': 'Irish [ga]', 'value': 'ga'}, - {'label': 'Italian [it]', 'value': 'it'}, - {'label': 'Japanese [ja]', 'value': 'ja'}, - {'label': 'Javanese [jv]', 'value': 'jv'}, - {'label': 'Kannada [kn]', 'value': 'kn'}, - {'label': 'Kazakh [kk]', 'value': 'kk'}, - {'label': 'Khmer [km]', 'value': 'km'}, - {'label': 'Kinyarwanda [rw]', 'value': 'rw'}, - {'label': 'Korean [ko]', 'value': 'ko'}, - {'label': 'Kurdish [ku]', 'value': 'ku'}, - {'label': 'Kyrgyz [ky]', 'value': 'ky'}, - {'label': 'Lao [lo]', 'value': 'lo'}, - {'label': 'Latin [la]', 'value': 'la'}, - {'label': 'Latvian [lv]', 'value': 'lv'}, - {'label': 'Lithuanian [lt]', 'value': 'lt'}, - {'label': 'Luxembourgish [lb]', 'value': 'lb'}, - {'label': 'Macedonian [mk]', 'value': 'mk'}, - {'label': 'Malagasy [mg]', 'value': 'mg'}, - {'label': 'Malay [ms]', 'value': 'ms'}, - {'label': 'Malayalam [ml]', 'value': 'ml'}, - {'label': 'Maltese [mt]', 'value': 'mt'}, - {'label': 'Maori [mi]', 'value': 'mi'}, - {'label': 'Marathi [mr]', 'value': 'mr'}, - {'label': 'Mongolian [mn]', 'value': 'mn'}, - {'label': 'Myanmar (Burmese) [my]', 'value': 'my'}, - {'label': 'Nepali [ne]', 'value': 'ne'}, - {'label': 'Norwegian [no]', 'value': 'no'}, - {'label': 'Nyanja (Chichewa) [ny]', 'value': 'ny'}, - {'label': 'Odia (Oriya) [or]', 'value': 'or'}, - {'label': 'Pashto [ps]', 'value': 'ps'}, - {'label': 'Persian [fa]', 'value': 'fa'}, - {'label': 'Polish [pl]', 'value': 'pl'}, - {'label': 'Portuguese (Portugal, Brazil) [pt]', 'value': 'pt'}, - {'label': 'Punjabi [pa]', 'value': 'pa'}, - {'label': 'Romanian [ro]', 'value': 'ro'}, - {'label': 'Russian [ru]', 'value': 'ru'}, - {'label': 'Samoan [sm]', 'value': 'sm'}, - {'label': 'Scots Gaelic [gd]', 'value': 'gd'}, - {'label': 'Serbian [sr]', 'value': 'sr'}, - {'label': 'Sesotho [st]', 'value': 'st'}, - {'label': 'Shona [sn]', 'value': 'sn'}, - {'label': 'Sindhi [sd]', 'value': 'sd'}, - {'label': 'Sinhala (Sinhalese) [si]', 'value': 'si'}, - {'label': 'Slovak [sk]', 'value': 'sk'}, - {'label': 'Slovenian [sl]', 'value': 'sl'}, - {'label': 'Somali [so]', 'value': 'so'}, - {'label': 'Spanish [es]', 'value': 'es'}, - {'label': 'Sundanese [su]', 'value': 'su'}, - {'label': 'Swahili [sw]', 'value': 'sw'}, - {'label': 'Swedish [sv]', 'value': 'sv'}, - {'label': 'Tagalog (Filipino) [tl]', 'value': 'tl'}, - {'label': 'Tajik [tg]', 'value': 'tg'}, - {'label': 'Tamil [ta]', 'value': 'ta'}, - {'label': 'Tatar [tt]', 'value': 'tt'}, - {'label': 'Telugu [te]', 'value': 'te'}, - {'label': 'Thai [th]', 'value': 'th'}, - {'label': 'Turkish [tr]', 'value': 'tr'}, - {'label': 'Turkmen [tk]', 'value': 'tk'}, - {'label': 'Ukrainian [uk]', 'value': 'uk'}, - {'label': 'Urdu [ur]', 'value': 'ur'}, - {'label': 'Uyghur [ug]', 'value': 'ug'}, - {'label': 'Uzbek [uz]', 'value': 'uz'}, - {'label': 'Vietnamese [vi]', 'value': 'vi'}, - {'label': 'Welsh [cy]', 'value': 'cy'}, - {'label': 'Xhosa [xh]', 'value': 'xh'}, - {'label': 'Yiddish [yi]', 'value': 'yi'}, - {'label': 'Yoruba [yo]', 'value': 'yo'}, - {'label': 'Zulu [zu]', 'value': 'zu'}], - "defaultValue": "auto" - }, - { - "name": "target_language", - "label": "Target language", - "description": "Language to translate to", - "type": "SELECT", - "mandatory": true, - "selectChoices": - [{'label': 'Afrikaans [af]', 'value': 'af'}, - {'label': 'Albanian [sq]', 'value': 'sq'}, - {'label': 'Amharic [am]', 'value': 'am'}, - {'label': 'Arabic [ar]', 'value': 'ar'}, - {'label': 'Armenian [hy]', 'value': 'hy'}, - {'label': 'Azerbaijani [az]', 'value': 'az'}, - {'label': 'Basque [eu]', 'value': 'eu'}, - {'label': 'Belarusian [be]', 'value': 'be'}, - {'label': 'Bengali [bn]', 'value': 'bn'}, - {'label': 'Bosnian [bs]', 'value': 'bs'}, - {'label': 'Bulgarian [bg]', 'value': 'bg'}, - {'label': 'Catalan [ca]', 'value': 'ca'}, - {'label': 'Cebuano [ceb]', 'value': 'ceb'}, - {'label': 'Chinese (Simplified) [zh-CN]', 'value': 'zh-CN'}, - {'label': 'Chinese (Traditional) [zh-TW]', 'value': 'zh-TW'}, - {'label': 'Corsican [co]', 'value': 'co'}, - {'label': 'Croatian [hr]', 'value': 'hr'}, - {'label': 'Czech [cs]', 'value': 'cs'}, - {'label': 'Danish [da]', 'value': 'da'}, - {'label': 'Dutch [nl]', 'value': 'nl'}, - {'label': 'English [en]', 'value': 'en'}, - {'label': 'Esperanto [eo]', 'value': 'eo'}, - {'label': 'Estonian [et]', 'value': 'et'}, - {'label': 'Finnish [fi]', 'value': 'fi'}, - {'label': 'French [fr]', 'value': 'fr'}, - {'label': 'Frisian [fy]', 'value': 'fy'}, - {'label': 'Galician [gl]', 'value': 'gl'}, - {'label': 'Georgian [ka]', 'value': 'ka'}, - {'label': 'German [de]', 'value': 'de'}, - {'label': 'Greek [el]', 'value': 'el'}, - {'label': 'Gujarati [gu]', 'value': 'gu'}, - {'label': 'Haitian Creole [ht]', 'value': 'ht'}, - {'label': 'Hausa [ha]', 'value': 'ha'}, - {'label': 'Hawaiian [haw]', 'value': 'haw'}, - {'label': 'Hebrew [he]', 'value': 'he'}, - {'label': 'Hindi [hi]', 'value': 'hi'}, - {'label': 'Hmong [hmn]', 'value': 'hmn'}, - {'label': 'Hungarian [hu]', 'value': 'hu'}, - {'label': 'Icelandic [is]', 'value': 'is'}, - {'label': 'Igbo [ig]', 'value': 'ig'}, - {'label': 'Indonesian [id]', 'value': 'id'}, - {'label': 'Irish [ga]', 'value': 'ga'}, - {'label': 'Italian [it]', 'value': 'it'}, - {'label': 'Japanese [ja]', 'value': 'ja'}, - {'label': 'Javanese [jv]', 'value': 'jv'}, - {'label': 'Kannada [kn]', 'value': 'kn'}, - {'label': 'Kazakh [kk]', 'value': 'kk'}, - {'label': 'Khmer [km]', 'value': 'km'}, - {'label': 'Kinyarwanda [rw]', 'value': 'rw'}, - {'label': 'Korean [ko]', 'value': 'ko'}, - {'label': 'Kurdish [ku]', 'value': 'ku'}, - {'label': 'Kyrgyz [ky]', 'value': 'ky'}, - {'label': 'Lao [lo]', 'value': 'lo'}, - {'label': 'Latin [la]', 'value': 'la'}, - {'label': 'Latvian [lv]', 'value': 'lv'}, - {'label': 'Lithuanian [lt]', 'value': 'lt'}, - {'label': 'Luxembourgish [lb]', 'value': 'lb'}, - {'label': 'Macedonian [mk]', 'value': 'mk'}, - {'label': 'Malagasy [mg]', 'value': 'mg'}, - {'label': 'Malay [ms]', 'value': 'ms'}, - {'label': 'Malayalam [ml]', 'value': 'ml'}, - {'label': 'Maltese [mt]', 'value': 'mt'}, - {'label': 'Maori [mi]', 'value': 'mi'}, - {'label': 'Marathi [mr]', 'value': 'mr'}, - {'label': 'Mongolian [mn]', 'value': 'mn'}, - {'label': 'Myanmar (Burmese) [my]', 'value': 'my'}, - {'label': 'Nepali [ne]', 'value': 'ne'}, - {'label': 'Norwegian [no]', 'value': 'no'}, - {'label': 'Nyanja (Chichewa) [ny]', 'value': 'ny'}, - {'label': 'Odia (Oriya) [or]', 'value': 'or'}, - {'label': 'Pashto [ps]', 'value': 'ps'}, - {'label': 'Persian [fa]', 'value': 'fa'}, - {'label': 'Polish [pl]', 'value': 'pl'}, - {'label': 'Portuguese (Portugal, Brazil) [pt]', 'value': 'pt'}, - {'label': 'Punjabi [pa]', 'value': 'pa'}, - {'label': 'Romanian [ro]', 'value': 'ro'}, - {'label': 'Russian [ru]', 'value': 'ru'}, - {'label': 'Samoan [sm]', 'value': 'sm'}, - {'label': 'Scots Gaelic [gd]', 'value': 'gd'}, - {'label': 'Serbian [sr]', 'value': 'sr'}, - {'label': 'Sesotho [st]', 'value': 'st'}, - {'label': 'Shona [sn]', 'value': 'sn'}, - {'label': 'Sindhi [sd]', 'value': 'sd'}, - {'label': 'Sinhala (Sinhalese) [si]', 'value': 'si'}, - {'label': 'Slovak [sk]', 'value': 'sk'}, - {'label': 'Slovenian [sl]', 'value': 'sl'}, - {'label': 'Somali [so]', 'value': 'so'}, - {'label': 'Spanish [es]', 'value': 'es'}, - {'label': 'Sundanese [su]', 'value': 'su'}, - {'label': 'Swahili [sw]', 'value': 'sw'}, - {'label': 'Swedish [sv]', 'value': 'sv'}, - {'label': 'Tagalog (Filipino) [tl]', 'value': 'tl'}, - {'label': 'Tajik [tg]', 'value': 'tg'}, - {'label': 'Tamil [ta]', 'value': 'ta'}, - {'label': 'Tatar [tt]', 'value': 'tt'}, - {'label': 'Telugu [te]', 'value': 'te'}, - {'label': 'Thai [th]', 'value': 'th'}, - {'label': 'Turkish [tr]', 'value': 'tr'}, - {'label': 'Turkmen [tk]', 'value': 'tk'}, - {'label': 'Ukrainian [uk]', 'value': 'uk'}, - {'label': 'Urdu [ur]', 'value': 'ur'}, - {'label': 'Uyghur [ug]', 'value': 'ug'}, - {'label': 'Uzbek [uz]', 'value': 'uz'}, - {'label': 'Vietnamese [vi]', 'value': 'vi'}, - {'label': 'Welsh [cy]', 'value': 'cy'}, - {'label': 'Xhosa [xh]', 'value': 'xh'}, - {'label': 'Yiddish [yi]', 'value': 'yi'}, - {'label': 'Yoruba [yo]', 'value': 'yo'}, - {'label': 'Zulu [zu]', 'value': 'zu'}], - "defaultValue": "en" - }, - { - "name": "separator_configuration", - "label": "Configuration", - "type": "SEPARATOR" - }, - { - "name": "api_configuration_preset", - "label": "API configuration preset", - "type": "PRESET", - "parameterSetId": "api-configuration", - "mandatory": true - }, - { - "name": "separator_advanced", - "label": "Advanced", - "type": "SEPARATOR" - }, - { - "name": "expert", - "label": "Expert mode", - "type": "BOOLEAN", - "defaultValue": false - }, - { - "name": "error_handling", - "label": "Error handling", - "visibilityCondition": "model.expert", - "type": "SELECT", - "selectChoices": [ - { - "value": "LOG", - "label": "Log" - }, - { - "value": "FAIL", - "label": "Fail" - } - ], - "description": "Log API errors to the output or fail with an exception on any API error", - "defaultValue": "LOG", - "mandatory": true - } - ], - "resourceKeys": [] -} \ No newline at end of file diff --git a/parameter-sets/api-configuration/parameter-set.json b/parameter-sets/api-configuration/parameter-set.json index 37a95f7..c6ac016 100644 --- a/parameter-sets/api-configuration/parameter-set.json +++ b/parameter-sets/api-configuration/parameter-set.json @@ -25,7 +25,7 @@ "name": "separator_api_quota", "label": "API quota", "type": "SEPARATOR", - "description": "Throttling to stay within the quota defined by Google Cloud: https://cloud.google.com/natural-language/quotas" + "description": "Throttling to stay within the quota defined by Google Cloud: https://cloud.google.com/translate/quotas" }, { "name": "api_quota_period", diff --git a/plugin.json b/plugin.json index cba4afa..0ae85c8 100644 --- a/plugin.json +++ b/plugin.json @@ -4,10 +4,11 @@ "meta": { "label": "Google Cloud Translation", "category": "Natural Language Processing", - "description": "Use Google Cloud basic translation API to perform translation", + "description": "Use Google Cloud Translation API to translate text data to another language", "author": "Dataiku (Alex COMBESSIE, Arnaud d'Esquerre)", "icon": "icon-gcp-translation-api icon-cloud", "licenseInfo": "Apache Software License", + "supportLevel": "NOT_SUPPORTED", "url": "https://www.dataiku.com/product/plugins/google-cloud-translation/", "tags": [ "Google", diff --git a/python-lib/google_translate_api_formatting.py b/python-lib/google_translate_api_formatting.py index a73374c..d90d2c8 100644 --- a/python-lib/google_translate_api_formatting.py +++ b/python-lib/google_translate_api_formatting.py @@ -17,27 +17,6 @@ ) -# ============================================================================== -# CONSTANT DEFINITION -# ============================================================================== - - -class EntityTypeEnum(Enum): - ADDRESS = "Address" - CONSUMER_GOOD = "Consumer good" - DATE = "Date" - EVENT = "Event" - LOCATION = "Location" - NUMBER = "Number" - ORGANIZATION = "Organization" - OTHER = "Other" - PERSON = "Person" - PHONE_NUMBER = "Phone number" - PRICE = "Price" - UNKNOWN = "Unknown" - WORK_OF_ART = "Work of art" - - # ============================================================================== # CLASS AND FUNCTION DEFINITION # ============================================================================== @@ -92,16 +71,17 @@ def __init__( error_handling: ErrorHandlingEnum = ErrorHandlingEnum.LOG, ): super().__init__(input_df, column_prefix, error_handling) - self.translated_text_column_name = "{input_column}_{language}".format(input_column=input_column, language=target_language.replace('-','_')) + self.translated_text_column_name = generate_unique( + "{input_column}_{language}".format(input_column=input_column, language=target_language.replace('-', '_')), + input_df.columns, column_prefix) self.source_language = source_language - if self.translated_text_column_name in input_df: - raise Exception("Conflict in column names. {} already exists".format(self.translated_text_column_name)) + self.input_column = input_column self._compute_column_description() def _compute_column_description(self): - self.column_description_dict[ - self.translated_text_column_name - ] = "Input column text translated to the desired language by the google translate api" + self.column_description_dict[self.translated_text_column_name] = \ + "{language} translation of the {col} column by the Google Translation API".format( + language=self.source_language, col=self.input_column) def format_row(self, row: Dict) -> Dict: raw_response = row[self.api_column_names.response] diff --git a/tests/python/requirements.txt b/tests/python/requirements.txt index b7bb4d2..37fd1cb 100644 --- a/tests/python/requirements.txt +++ b/tests/python/requirements.txt @@ -1,2 +1,2 @@ -pandas==1.1.3 +pandas~=1.0 pytest==6.1.1 \ No newline at end of file From cc0b72c636822f001ef09670411a3b3a2f6cc0b4 Mon Sep 17 00:00:00 2001 From: arnaudde Date: Wed, 10 Mar 2021 16:14:06 +0100 Subject: [PATCH 02/20] Fix .gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 63025a3..cf2df1b 100644 --- a/.gitignore +++ b/.gitignore @@ -161,7 +161,7 @@ dmypy.json /*.tar.gz # Idea files -/.idea/ +.idea/ # Output files from R CMD check /*.Rcheck/ From 3062c76de58aa143de8bdd05d6a59002bf6efafa Mon Sep 17 00:00:00 2001 From: arnaudde Date: Wed, 10 Mar 2021 16:14:52 +0100 Subject: [PATCH 03/20] Change log input from dropdown to checkbox --- .../recipe.json | 30 +++---------------- .../recipe.py | 2 +- 2 files changed, 5 insertions(+), 27 deletions(-) diff --git a/custom-recipes/dss-plugin-nlp-google-cloud-translation/recipe.json b/custom-recipes/dss-plugin-nlp-google-cloud-translation/recipe.json index fbcf5b8..53247b9 100644 --- a/custom-recipes/dss-plugin-nlp-google-cloud-translation/recipe.json +++ b/custom-recipes/dss-plugin-nlp-google-cloud-translation/recipe.json @@ -952,33 +952,11 @@ "mandatory": true }, { - "name": "separator_advanced", - "label": "Advanced", - "type": "SEPARATOR" - }, - { - "name": "expert", - "label": "Expert mode", + "name": "fail_on_error", + "label": "Fail on error", "type": "BOOLEAN", - "defaultValue": false - }, - { - "name": "error_handling", - "label": "Error handling", - "visibilityCondition": "model.expert", - "type": "SELECT", - "selectChoices": [ - { - "value": "LOG", - "label": "Log" - }, - { - "value": "FAIL", - "label": "Fail" - } - ], - "description": "Log API errors to the output or fail with an exception on any API error", - "defaultValue": "LOG", + "description": "Abort execution if any issues are raised. By default, errors will be logged per record in the output.", + "defaultValue": false, "mandatory": true } ], diff --git a/custom-recipes/dss-plugin-nlp-google-cloud-translation/recipe.py b/custom-recipes/dss-plugin-nlp-google-cloud-translation/recipe.py index fc580e9..fff39ae 100644 --- a/custom-recipes/dss-plugin-nlp-google-cloud-translation/recipe.py +++ b/custom-recipes/dss-plugin-nlp-google-cloud-translation/recipe.py @@ -30,7 +30,7 @@ # Params for parallelization column_prefix = "translation_api" parallel_workers = api_configuration_preset.get("parallel_workers") -error_handling = ErrorHandlingEnum[get_recipe_config().get("error_handling")] +error_handling = ErrorHandlingEnum["FAIL"] if get_recipe_config().get("fail_on_error") else ErrorHandlingEnum["LOG"] # Params for translation client = get_client(api_configuration_preset.get("gcp_service_account_key")) From d17ec34274ddc4b7e5b3efd7a01b2890354f9c47 Mon Sep 17 00:00:00 2001 From: arnaudde Date: Wed, 10 Mar 2021 16:32:21 +0100 Subject: [PATCH 04/20] Remove .idea from repo --- .idea/workspace.xml | 93 --------------------------------------------- 1 file changed, 93 deletions(-) delete mode 100644 .idea/workspace.xml diff --git a/.idea/workspace.xml b/.idea/workspace.xml deleted file mode 100644 index 99877c8..0000000 --- a/.idea/workspace.xml +++ /dev/null @@ -1,93 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1614267678949 - - - - - - - - - - - - - -