Merge pull request #5 from clowder-framework/4-add-other-visualizatio…

…n-support-for-smm-extractor 4 add other visualization support for smm extractor
clowder-framework · Oct 12, 2023 · 1a1b814 · 1a1b814
2 parents 1371138 + b8f2b7b
commit 1a1b814
Show file tree

Hide file tree

Showing 21 changed files with 213 additions and 42 deletions.
diff --git a/name_entity_recognition_extractor/CHANGELOG.md b/name_entity_recognition_extractor/CHANGELOG.md
@@ -4,13 +4,20 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [0.1.0] - 03-15-2023
 
-### Added
-- Initial release of the name entity recognition extractor
+## [0.1.2] - 10-11-2023
+
+### Added 
+- Organize output data to folder [#4](https://github.com/clowder-framework/smm-extractor/issues/4)
 
 
 ## [0.1.1] - 10-03-2023
 
 ### Changed
 - Support Clowder V2 [#1](https://github.com/clowder-framework/smm-extractor/issues/1)
+
+
+## [0.1.0] - 03-15-2023
+
+### Added
+- Initial release of the name entity recognition extractor
diff --git a/name_entity_recognition_extractor/SmmExtractor.py b/name_entity_recognition_extractor/SmmExtractor.py
@@ -1,19 +1,22 @@
 #!/usr/bin/env python
 
 """Example extractor based on the clowder code."""
+import posixpath
+
 import pandas as pd
 import json
 import os
 import csv
 import types
 import pickle
+from datetime import datetime
 
 import logging
 from pyclowder.extractors import Extractor
 import pyclowder.files
 
 from algorithm import algorithm
-
+import requests
 
 def save_local_output(localSavePath, fname, output_data):
     """
@@ -78,6 +81,21 @@ def save_local_output(localSavePath, fname, output_data):
     return os.path.join(localSavePath, fname)
 
 
+# TODO wrap this into method on pyclowder
+def create_output_folder(dataset_id, host, secret_key):
+    url = posixpath.join(host, f'api/v2/datasets/{dataset_id}/folders')
+    headers = {"Content-Type": "application/json",
+               "X-API-KEY": secret_key}
+    current_timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
+    folder_data = {"name": current_timestamp}
+    response = requests.post(url, json=folder_data, headers=headers)
+    if response.status_code == 200:
+        return response.json().get("id")
+    else:
+        print(f"Error creating folder: {response.status_code} {response.text}")
+        return None
+
+
 class SmmExtractor(Extractor):
     """Count the number of characters, words and lines in a text file."""
     def __init__(self):
@@ -107,13 +125,22 @@ def process_message(self, connector, host, secret_key, resource, parameters):
         output = algorithm(df, userParams)
         connector.message_process(resource, "Running the algorithm...")
 
-        # upload object to s3 bucket and return the url
+        # Create folder to save output
+        clowder_version = int(os.getenv('CLOWDER_VERSION', '1'))
+        if clowder_version == 2:
+            connector.message_process(resource, "Creating output folder...")
+            folder_id = create_output_folder(dataset_id, host, secret_key)
+            if folder_id is not None:
+                connector.message_process(resource, f"folder id: {folder_id} created ...")
+        else:
+            folder_id = None
         for fname, output_data in output.items():
             if fname != 'uid':
                 local_output_path = save_local_output("", fname, output_data)
                 connector.message_process(resource, "Saving " + local_output_path + "...")
                 uploaded_file_id = pyclowder.files.upload_to_dataset(connector, host, secret_key, dataset_id,
-                                                                     local_output_path)
+                                                                     local_output_path,
+                                                                     folder_id=folder_id)
                 connector.message_process(resource, local_output_path + " saved...")
 
                 connector.message_process(resource, "Writing metadata...")

diff --git a/name_entity_recognition_extractor/extractor_info.json b/name_entity_recognition_extractor/extractor_info.json
@@ -1,7 +1,7 @@
 {
   "@context": "http://clowder.ncsa.illinois.edu/contexts/extractors.jsonld",
   "name": "smm.name.entity.recognition",
-  "version": "0.1.1",
+  "version": "0.1.2",
   "description": "Named-entity recognition (NER) (also known as entity identification, entity chunking and entity extraction) is a subtask of information extraction that seeks to locate and classify named entity mentions in unstructured text into pre-defined categories such as the person names, organizations, locations, medical codes, time expressions, quantities, monetary values, percentages, etc.",
   "author": "Wang, Chen <cwang138@illinois.edu>",
   "contributors": [],

diff --git a/name_entity_recognition_extractor/requirement.txt b/name_entity_recognition_extractor/requirement.txt
@@ -1,2 +1 @@
-pyclowder==3.0.4
-
+pyclowder==3.0.7
diff --git a/network_analysis_extractor/CHANGELOG.md b/network_analysis_extractor/CHANGELOG.md
@@ -4,13 +4,20 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [0.1.0] - 03-15-2023
 
-### Added
-- Initial release of the network analysis extractor
+## [0.1.2] - 10-11-2023
+
+### Added 
+- Organize output data to folder [#4](https://github.com/clowder-framework/smm-extractor/issues/4)
 
 
 ## [0.1.1] - 10-03-2023
 
 ### Changed
 - Support Clowder V2 [#1](https://github.com/clowder-framework/smm-extractor/issues/1)
+
+
+## [0.1.0] - 03-15-2023
+
+### Added
+- Initial release of the network analysis extractor
diff --git a/network_analysis_extractor/SmmExtractor.py b/network_analysis_extractor/SmmExtractor.py
@@ -1,19 +1,22 @@
 #!/usr/bin/env python
 
 """Example extractor based on the clowder code."""
+import posixpath
+
 import pandas as pd
 import json
 import os
 import csv
 import types
 import pickle
+from datetime import datetime
 
 import logging
 from pyclowder.extractors import Extractor
 import pyclowder.files
 
 from algorithm import algorithm
-
+import requests
 
 def save_local_output(localSavePath, fname, output_data):
     """
@@ -78,6 +81,21 @@ def save_local_output(localSavePath, fname, output_data):
     return os.path.join(localSavePath, fname)
 
 
+# TODO wrap this into method on pyclowder
+def create_output_folder(dataset_id, host, secret_key):
+    url = posixpath.join(host, f'api/v2/datasets/{dataset_id}/folders')
+    headers = {"Content-Type": "application/json",
+               "X-API-KEY": secret_key}
+    current_timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
+    folder_data = {"name": current_timestamp}
+    response = requests.post(url, json=folder_data, headers=headers)
+    if response.status_code == 200:
+        return response.json().get("id")
+    else:
+        print(f"Error creating folder: {response.status_code} {response.text}")
+        return None
+
+
 class SmmExtractor(Extractor):
     """Count the number of characters, words and lines in a text file."""
     def __init__(self):
@@ -107,13 +125,22 @@ def process_message(self, connector, host, secret_key, resource, parameters):
         output = algorithm(df, userParams)
         connector.message_process(resource, "Running the algorithm...")
 
-        # upload object to s3 bucket and return the url
+        # Create folder to save output
+        clowder_version = int(os.getenv('CLOWDER_VERSION', '1'))
+        if clowder_version == 2:
+            connector.message_process(resource, "Creating output folder...")
+            folder_id = create_output_folder(dataset_id, host, secret_key)
+            if folder_id is not None:
+                connector.message_process(resource, f"folder id: {folder_id} created ...")
+        else:
+            folder_id = None
         for fname, output_data in output.items():
             if fname != 'uid':
                 local_output_path = save_local_output("", fname, output_data)
                 connector.message_process(resource, "Saving " + local_output_path + "...")
                 uploaded_file_id = pyclowder.files.upload_to_dataset(connector, host, secret_key, dataset_id,
-                                                                     local_output_path)
+                                                                     local_output_path,
+                                                                     folder_id=folder_id)
                 connector.message_process(resource, local_output_path + " saved...")
 
                 connector.message_process(resource, "Writing metadata...")

diff --git a/network_analysis_extractor/extractor_info.json b/network_analysis_extractor/extractor_info.json
@@ -1,7 +1,7 @@
 {
   "@context": "http://clowder.ncsa.illinois.edu/contexts/extractors.jsonld",
   "name": "smm.network.analysis",
-  "version": "0.1.1",
+  "version": "0.1.2",
   "description": "Social network analysis is the process of investigating social structures through the use of networks and graph theory .It characterizes networked structures in terms of nodes (individual actors, people, or things within the network) and the ties, edges, or links (relationships or interactions) that connect them.",
   "author": "Wang, Chen <cwang138@illinois.edu>",
   "contributors": [],

diff --git a/network_analysis_extractor/requirement.txt b/network_analysis_extractor/requirement.txt
@@ -1 +1 @@
-pyclowder==3.0.4
+pyclowder==3.0.7
diff --git a/preprocessing_extractor/CHANGELOG.md b/preprocessing_extractor/CHANGELOG.md
@@ -4,13 +4,22 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [0.1.0] - 03-15-2023
 
-### Added
-- Initial release of the preprocessing extractor
+## [0.1.2] - 10-11-2023
+
+### Added 
+- Organize output data to folder [#4](https://github.com/clowder-framework/smm-extractor/issues/4)
 
 
 ## [0.1.1] - 10-03-2023
 
 ### Changed
 - Support Clowder V2 [#1](https://github.com/clowder-framework/smm-extractor/issues/1)
+
+
+## [0.1.0] - 03-15-2023
+
+### Added
+- Initial release of the preprocessing extractor
+
+
diff --git a/preprocessing_extractor/SmmExtractor.py b/preprocessing_extractor/SmmExtractor.py
@@ -1,19 +1,22 @@
 #!/usr/bin/env python
 
 """Example extractor based on the clowder code."""
+import posixpath
+
 import pandas as pd
 import json
 import os
 import csv
 import types
 import pickle
+from datetime import datetime
 
 import logging
 from pyclowder.extractors import Extractor
 import pyclowder.files
 
 from algorithm import algorithm
-
+import requests
 
 def save_local_output(localSavePath, fname, output_data):
     """
@@ -78,6 +81,21 @@ def save_local_output(localSavePath, fname, output_data):
     return os.path.join(localSavePath, fname)
 
 
+# TODO wrap this into method on pyclowder
+def create_output_folder(dataset_id, host, secret_key):
+    url = posixpath.join(host, f'api/v2/datasets/{dataset_id}/folders')
+    headers = {"Content-Type": "application/json",
+               "X-API-KEY": secret_key}
+    current_timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
+    folder_data = {"name": current_timestamp}
+    response = requests.post(url, json=folder_data, headers=headers)
+    if response.status_code == 200:
+        return response.json().get("id")
+    else:
+        print(f"Error creating folder: {response.status_code} {response.text}")
+        return None
+
+
 class SmmExtractor(Extractor):
     """Count the number of characters, words and lines in a text file."""
     def __init__(self):
@@ -107,13 +125,22 @@ def process_message(self, connector, host, secret_key, resource, parameters):
         output = algorithm(df, userParams)
         connector.message_process(resource, "Running the algorithm...")
 
-        # upload object to s3 bucket and return the url
+        # Create folder to save output
+        clowder_version = int(os.getenv('CLOWDER_VERSION', '1'))
+        if clowder_version == 2:
+            connector.message_process(resource, "Creating output folder...")
+            folder_id = create_output_folder(dataset_id, host, secret_key)
+            if folder_id is not None:
+                connector.message_process(resource, f"folder id: {folder_id} created ...")
+        else:
+            folder_id = None
         for fname, output_data in output.items():
             if fname != 'uid':
                 local_output_path = save_local_output("", fname, output_data)
                 connector.message_process(resource, "Saving " + local_output_path + "...")
                 uploaded_file_id = pyclowder.files.upload_to_dataset(connector, host, secret_key, dataset_id,
-                                                                     local_output_path)
+                                                                     local_output_path,
+                                                                     folder_id=folder_id)
                 connector.message_process(resource, local_output_path + " saved...")
 
                 connector.message_process(resource, "Writing metadata...")

diff --git a/preprocessing_extractor/extractor_info.json b/preprocessing_extractor/extractor_info.json
@@ -1,7 +1,7 @@
 {
   "@context": "http://clowder.ncsa.illinois.edu/contexts/extractors.jsonld",
   "name": "smm.preprocessing.analysis",
-  "version": "0.1.1",
+  "version": "0.1.2",
   "description": "Tokenization is the process of dividing written text into meaningful units, such as words, sentences , or topics. Lemmatization and Stemming reduces word forms to common base words. Part-of-speech Tagging is the process of marking up a word in a text (corpus) as corresponding to a particular part of speech, based on both its definition and its context.",
   "author": "Wang, Chen <cwang138@illinois.edu>",
   "contributors": [],

diff --git a/preprocessing_extractor/requirement.txt b/preprocessing_extractor/requirement.txt
@@ -1 +1 @@
-pyclowder==3.0.4
+pyclowder==3.0.7
diff --git a/requirement.txt b/requirement.txt
diff --git a/sentiment_analysis_extractor/CHANGELOG.md b/sentiment_analysis_extractor/CHANGELOG.md
@@ -4,13 +4,20 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [0.1.0] - 03-15-2023
 
-### Added
-- Initial release of the sentiment analysis extractor
+## [0.1.2] - 10-11-2023
+
+### Added 
+- Organize output data to folder [#4](https://github.com/clowder-framework/smm-extractor/issues/4)
 
 
 ## [0.1.1] - 10-03-2023
 
 ### Changed
 - Support Clowder V2 [#1](https://github.com/clowder-framework/smm-extractor/issues/1)
+
+
+## [0.1.0] - 03-15-2023
+
+### Added
+- Initial release of the sentiment analysis extractor