-
Notifications
You must be signed in to change notification settings - Fork 0
/
streamlit_app.py
96 lines (78 loc) · 2.98 KB
/
streamlit_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import pandas as pd
import streamlit as st
from modelling import DATA_SOURCES, MODEL_DIR, load_saved_models
title = "Sentiment Analysis App"
st.set_page_config(page_title=title)
st.title(title)
st.markdown(
"Predict the *general feeling* in a body of text using the traditional "
"[*Bag of Words*](https://en.wikipedia.org/wiki/Bag-of-words_model) "
"model, implemented with [Scikit-Learn](https://scikit-learn.org/)."
)
with st.form("text-input-form"):
text_input = st.text_area("Text Input:", height=140, max_chars=500)
st.form_submit_button("Analyse")
@st.cache_resource
def get_models() -> list[dict]:
"""Fetch and cache assets for all data sources.
Returns:
list[dict]: Each list element has the keys "vectorizer",
"LogisticRegression", "MultinomialNB" and "SVC".
"""
return [load_saved_models(MODEL_DIR / source) for source in DATA_SOURCES]
def get_predictions(models: dict, text: str) -> tuple:
"""Predict the sentiment in the supplied text.
Args:
models (dict): Fitted vectorizer and classifiers.
text (str): Text to analyse.
Returns:
tuple: (predicted sentiments, ngrams used in making prediction).
"""
models = models.copy() # avoid mutating cached models
vectorizer = models.pop("vectorizer")
transformed_text = vectorizer.transform([text])
predictions = pd.DataFrame(
[
(model_name, model.predict(transformed_text)[0])
for model_name, model in models.items()
],
columns=["Model", "Predicted Sentiment"],
).replace({0: "Negative", 1: "Positive"})
key_features = vectorizer.inverse_transform(transformed_text)
return predictions, key_features
def customize_dataframe_style(label: str) -> str:
"""Dynamically set font style and color for the results table.
Args:
label (str): {"Positive", "Negative"}.
Returns:
str: CSS style string.
"""
color = "lime" if label == "Positive" else "tomato"
return f"font-family:serif;font-weight:bold;size:16px;color:{color};"
def display_predictions(models: dict, text: str) -> None:
"""Show the results as a table.
Args:
models (dict): Fitted vectorizer and models.
text (str): Text to analyse.
"""
predictions, key_features = get_predictions(models, text)
st.dataframe(
predictions.style.map(
customize_dataframe_style, subset=["Predicted Sentiment"]
),
hide_index=True,
)
st.write("Key Features:")
if len(key_features[0]) == 0:
st.caption(
":orange[All input words don't exist in the model's vocabulary. "
"Predicted sentiment is arbitrary.]"
)
else:
st.caption(key_features[0])
all_models = get_models()
if text_input:
st.markdown("#### Predictions:")
for data_source, models in zip(DATA_SOURCES, all_models):
with st.expander(f"{data_source.title()} Dataset Models:"):
display_predictions(models, text_input)