Skip to content

Commit

Permalink
Merge pull request #5 from clowder-framework/4-add-other-visualizatio…
Browse files Browse the repository at this point in the history
…n-support-for-smm-extractor

4 add other visualization support for smm extractor
  • Loading branch information
longshuicy authored Oct 12, 2023
2 parents 1371138 + b8f2b7b commit 1a1b814
Show file tree
Hide file tree
Showing 21 changed files with 213 additions and 42 deletions.
13 changes: 10 additions & 3 deletions name_entity_recognition_extractor/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,20 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.1.0] - 03-15-2023

### Added
- Initial release of the name entity recognition extractor
## [0.1.2] - 10-11-2023

### Added
- Organize output data to folder [#4](https://github.com/clowder-framework/smm-extractor/issues/4)


## [0.1.1] - 10-03-2023

### Changed
- Support Clowder V2 [#1](https://github.com/clowder-framework/smm-extractor/issues/1)


## [0.1.0] - 03-15-2023

### Added
- Initial release of the name entity recognition extractor
33 changes: 30 additions & 3 deletions name_entity_recognition_extractor/SmmExtractor.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
#!/usr/bin/env python

"""Example extractor based on the clowder code."""
import posixpath

import pandas as pd
import json
import os
import csv
import types
import pickle
from datetime import datetime

import logging
from pyclowder.extractors import Extractor
import pyclowder.files

from algorithm import algorithm

import requests

def save_local_output(localSavePath, fname, output_data):
"""
Expand Down Expand Up @@ -78,6 +81,21 @@ def save_local_output(localSavePath, fname, output_data):
return os.path.join(localSavePath, fname)


# TODO wrap this into method on pyclowder
def create_output_folder(dataset_id, host, secret_key):
url = posixpath.join(host, f'api/v2/datasets/{dataset_id}/folders')
headers = {"Content-Type": "application/json",
"X-API-KEY": secret_key}
current_timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
folder_data = {"name": current_timestamp}
response = requests.post(url, json=folder_data, headers=headers)
if response.status_code == 200:
return response.json().get("id")
else:
print(f"Error creating folder: {response.status_code} {response.text}")
return None


class SmmExtractor(Extractor):
"""Count the number of characters, words and lines in a text file."""
def __init__(self):
Expand Down Expand Up @@ -107,13 +125,22 @@ def process_message(self, connector, host, secret_key, resource, parameters):
output = algorithm(df, userParams)
connector.message_process(resource, "Running the algorithm...")

# upload object to s3 bucket and return the url
# Create folder to save output
clowder_version = int(os.getenv('CLOWDER_VERSION', '1'))
if clowder_version == 2:
connector.message_process(resource, "Creating output folder...")
folder_id = create_output_folder(dataset_id, host, secret_key)
if folder_id is not None:
connector.message_process(resource, f"folder id: {folder_id} created ...")
else:
folder_id = None
for fname, output_data in output.items():
if fname != 'uid':
local_output_path = save_local_output("", fname, output_data)
connector.message_process(resource, "Saving " + local_output_path + "...")
uploaded_file_id = pyclowder.files.upload_to_dataset(connector, host, secret_key, dataset_id,
local_output_path)
local_output_path,
folder_id=folder_id)
connector.message_process(resource, local_output_path + " saved...")

connector.message_process(resource, "Writing metadata...")
Expand Down
2 changes: 1 addition & 1 deletion name_entity_recognition_extractor/extractor_info.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"@context": "http://clowder.ncsa.illinois.edu/contexts/extractors.jsonld",
"name": "smm.name.entity.recognition",
"version": "0.1.1",
"version": "0.1.2",
"description": "Named-entity recognition (NER) (also known as entity identification, entity chunking and entity extraction) is a subtask of information extraction that seeks to locate and classify named entity mentions in unstructured text into pre-defined categories such as the person names, organizations, locations, medical codes, time expressions, quantities, monetary values, percentages, etc.",
"author": "Wang, Chen <cwang138@illinois.edu>",
"contributors": [],
Expand Down
3 changes: 1 addition & 2 deletions name_entity_recognition_extractor/requirement.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
pyclowder==3.0.4

pyclowder==3.0.7
13 changes: 10 additions & 3 deletions network_analysis_extractor/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,20 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.1.0] - 03-15-2023

### Added
- Initial release of the network analysis extractor
## [0.1.2] - 10-11-2023

### Added
- Organize output data to folder [#4](https://github.com/clowder-framework/smm-extractor/issues/4)


## [0.1.1] - 10-03-2023

### Changed
- Support Clowder V2 [#1](https://github.com/clowder-framework/smm-extractor/issues/1)


## [0.1.0] - 03-15-2023

### Added
- Initial release of the network analysis extractor
33 changes: 30 additions & 3 deletions network_analysis_extractor/SmmExtractor.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
#!/usr/bin/env python

"""Example extractor based on the clowder code."""
import posixpath

import pandas as pd
import json
import os
import csv
import types
import pickle
from datetime import datetime

import logging
from pyclowder.extractors import Extractor
import pyclowder.files

from algorithm import algorithm

import requests

def save_local_output(localSavePath, fname, output_data):
"""
Expand Down Expand Up @@ -78,6 +81,21 @@ def save_local_output(localSavePath, fname, output_data):
return os.path.join(localSavePath, fname)


# TODO wrap this into method on pyclowder
def create_output_folder(dataset_id, host, secret_key):
url = posixpath.join(host, f'api/v2/datasets/{dataset_id}/folders')
headers = {"Content-Type": "application/json",
"X-API-KEY": secret_key}
current_timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
folder_data = {"name": current_timestamp}
response = requests.post(url, json=folder_data, headers=headers)
if response.status_code == 200:
return response.json().get("id")
else:
print(f"Error creating folder: {response.status_code} {response.text}")
return None


class SmmExtractor(Extractor):
"""Count the number of characters, words and lines in a text file."""
def __init__(self):
Expand Down Expand Up @@ -107,13 +125,22 @@ def process_message(self, connector, host, secret_key, resource, parameters):
output = algorithm(df, userParams)
connector.message_process(resource, "Running the algorithm...")

# upload object to s3 bucket and return the url
# Create folder to save output
clowder_version = int(os.getenv('CLOWDER_VERSION', '1'))
if clowder_version == 2:
connector.message_process(resource, "Creating output folder...")
folder_id = create_output_folder(dataset_id, host, secret_key)
if folder_id is not None:
connector.message_process(resource, f"folder id: {folder_id} created ...")
else:
folder_id = None
for fname, output_data in output.items():
if fname != 'uid':
local_output_path = save_local_output("", fname, output_data)
connector.message_process(resource, "Saving " + local_output_path + "...")
uploaded_file_id = pyclowder.files.upload_to_dataset(connector, host, secret_key, dataset_id,
local_output_path)
local_output_path,
folder_id=folder_id)
connector.message_process(resource, local_output_path + " saved...")

connector.message_process(resource, "Writing metadata...")
Expand Down
2 changes: 1 addition & 1 deletion network_analysis_extractor/extractor_info.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"@context": "http://clowder.ncsa.illinois.edu/contexts/extractors.jsonld",
"name": "smm.network.analysis",
"version": "0.1.1",
"version": "0.1.2",
"description": "Social network analysis is the process of investigating social structures through the use of networks and graph theory .It characterizes networked structures in terms of nodes (individual actors, people, or things within the network) and the ties, edges, or links (relationships or interactions) that connect them.",
"author": "Wang, Chen <cwang138@illinois.edu>",
"contributors": [],
Expand Down
2 changes: 1 addition & 1 deletion network_analysis_extractor/requirement.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
pyclowder==3.0.4
pyclowder==3.0.7
15 changes: 12 additions & 3 deletions preprocessing_extractor/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,22 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.1.0] - 03-15-2023

### Added
- Initial release of the preprocessing extractor
## [0.1.2] - 10-11-2023

### Added
- Organize output data to folder [#4](https://github.com/clowder-framework/smm-extractor/issues/4)


## [0.1.1] - 10-03-2023

### Changed
- Support Clowder V2 [#1](https://github.com/clowder-framework/smm-extractor/issues/1)


## [0.1.0] - 03-15-2023

### Added
- Initial release of the preprocessing extractor


33 changes: 30 additions & 3 deletions preprocessing_extractor/SmmExtractor.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
#!/usr/bin/env python

"""Example extractor based on the clowder code."""
import posixpath

import pandas as pd
import json
import os
import csv
import types
import pickle
from datetime import datetime

import logging
from pyclowder.extractors import Extractor
import pyclowder.files

from algorithm import algorithm

import requests

def save_local_output(localSavePath, fname, output_data):
"""
Expand Down Expand Up @@ -78,6 +81,21 @@ def save_local_output(localSavePath, fname, output_data):
return os.path.join(localSavePath, fname)


# TODO wrap this into method on pyclowder
def create_output_folder(dataset_id, host, secret_key):
url = posixpath.join(host, f'api/v2/datasets/{dataset_id}/folders')
headers = {"Content-Type": "application/json",
"X-API-KEY": secret_key}
current_timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
folder_data = {"name": current_timestamp}
response = requests.post(url, json=folder_data, headers=headers)
if response.status_code == 200:
return response.json().get("id")
else:
print(f"Error creating folder: {response.status_code} {response.text}")
return None


class SmmExtractor(Extractor):
"""Count the number of characters, words and lines in a text file."""
def __init__(self):
Expand Down Expand Up @@ -107,13 +125,22 @@ def process_message(self, connector, host, secret_key, resource, parameters):
output = algorithm(df, userParams)
connector.message_process(resource, "Running the algorithm...")

# upload object to s3 bucket and return the url
# Create folder to save output
clowder_version = int(os.getenv('CLOWDER_VERSION', '1'))
if clowder_version == 2:
connector.message_process(resource, "Creating output folder...")
folder_id = create_output_folder(dataset_id, host, secret_key)
if folder_id is not None:
connector.message_process(resource, f"folder id: {folder_id} created ...")
else:
folder_id = None
for fname, output_data in output.items():
if fname != 'uid':
local_output_path = save_local_output("", fname, output_data)
connector.message_process(resource, "Saving " + local_output_path + "...")
uploaded_file_id = pyclowder.files.upload_to_dataset(connector, host, secret_key, dataset_id,
local_output_path)
local_output_path,
folder_id=folder_id)
connector.message_process(resource, local_output_path + " saved...")

connector.message_process(resource, "Writing metadata...")
Expand Down
2 changes: 1 addition & 1 deletion preprocessing_extractor/extractor_info.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"@context": "http://clowder.ncsa.illinois.edu/contexts/extractors.jsonld",
"name": "smm.preprocessing.analysis",
"version": "0.1.1",
"version": "0.1.2",
"description": "Tokenization is the process of dividing written text into meaningful units, such as words, sentences , or topics. Lemmatization and Stemming reduces word forms to common base words. Part-of-speech Tagging is the process of marking up a word in a text (corpus) as corresponding to a particular part of speech, based on both its definition and its context.",
"author": "Wang, Chen <cwang138@illinois.edu>",
"contributors": [],
Expand Down
2 changes: 1 addition & 1 deletion preprocessing_extractor/requirement.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
pyclowder==3.0.4
pyclowder==3.0.7
1 change: 0 additions & 1 deletion requirement.txt

This file was deleted.

13 changes: 10 additions & 3 deletions sentiment_analysis_extractor/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,20 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.1.0] - 03-15-2023

### Added
- Initial release of the sentiment analysis extractor
## [0.1.2] - 10-11-2023

### Added
- Organize output data to folder [#4](https://github.com/clowder-framework/smm-extractor/issues/4)


## [0.1.1] - 10-03-2023

### Changed
- Support Clowder V2 [#1](https://github.com/clowder-framework/smm-extractor/issues/1)


## [0.1.0] - 03-15-2023

### Added
- Initial release of the sentiment analysis extractor
Loading

0 comments on commit 1a1b814

Please sign in to comment.