First commit to SAWAW

This commit is contained in:
Kunologist 2023-11-13 21:37:32 +00:00
commit af858756bb
13 changed files with 318 additions and 0 deletions

21
.gitignore vendored Normal file
View File

@ -0,0 +1,21 @@
# Conda environment
.conda
# Pycache
__pycache__/
.coverage
# Compiled Python files
*.pyc
# Logs
*.log
# Jupyter Notebook checkpoints
.ipynb_checkpoints/
# Pytest cache
.pytest_cache/
# Data
data/

21
LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2023 SAWAW Team
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

29
README.md Normal file
View File

@ -0,0 +1,29 @@
# SAWAW: Sentiment Analysis with Aspect Words
This package provides a toolkit for analyzing sentiment of aspect words in a sentence.
## Installation
After cloning the repository, install the package with the following command:
```bash
pip install -e .
```
This will install the package in the current environment in editable mode.
## Usage
```python
>>> from sawaw import SAWAWEntry, analyze_gpt3_5
>>> entry = SAWAWEntry('I love the new iPhone 30 Pro Max Extra Double. The camera is amazing, but the battery life is not great.', ['camera', 'battery'])
>>> analyze_gpt3_5(entry)
>>> print(entry)
```
![](docs/test.png)
## License
This project is licensed under the terms of the MIT license. See [LICENSE](LICENSE) for more details.

BIN
docs/test.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

3
finish.sh Normal file
View File

@ -0,0 +1,3 @@
black . --exclude .conda
isort . --skip ./.conda
# pytest --cov --cov-report=term-missing tests/

21
pyproject.toml Normal file
View File

@ -0,0 +1,21 @@
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "sawaw"
version = "0.0.1"
description = ""
authors = ["Kunologist"]
license = "MIT"
readme = "README.md"
[tool.poetry.dependencies]
python = "^3.9"
loguru = "^0.7.0"
pandas = "^2.1.3"
numpy = "^1.26.2"
pytest = "^7.4.0"
pytest-cov = "^4.1.0"
openai = "^1.2.0,<1.3.0"
colorama = "^0.4.4"

3
sawaw/__init__.py Normal file
View File

@ -0,0 +1,3 @@
from .openai_gpt import analyze as analyze_gpt3_5
from .sentiments import SentimentResult
from .entry import SAWAWEntry

51
sawaw/entry.py Normal file
View File

@ -0,0 +1,51 @@
from sawaw.sentiments import SentimentResult
from dataclasses import dataclass
from typing import List
import colorama
color_mapping = {
SentimentResult.POSITIVE: colorama.Fore.GREEN,
SentimentResult.NEUTRAL: colorama.Fore.YELLOW,
SentimentResult.NEGATIVE: colorama.Fore.RED,
SentimentResult.NONE: colorama.Fore.LIGHTWHITE_EX,
SentimentResult.UNDEFINED: colorama.Fore.MAGENTA,
}
@dataclass
class SAWAWEntry:
comment: str
aspect_words: List[str]
sentiment_results: List[SentimentResult] = None
def __post_init__(self):
if self.sentiment_results is None:
self.sentiment_results = [SentimentResult.UNDEFINED] * len(
self.aspect_words
)
def __repr__(self):
colored_comment = self.comment
for aspect_word in self.aspect_words:
colored_comment = colored_comment.replace(
aspect_word,
color_mapping[
self.sentiment_results[self.aspect_words.index(aspect_word)]
]
+ aspect_word
+ colorama.Style.RESET_ALL,
)
return colored_comment
def __str__(self):
return self.__repr__()
@staticmethod
def print_legends():
legends = []
for sentiment_result, color in color_mapping.items():
legends.append(
"{}{}{}".format(color, sentiment_result.name, colorama.Style.RESET_ALL)
)
print(" | ".join(legends))

13
sawaw/method.py Normal file
View File

@ -0,0 +1,13 @@
import time
from loguru import logger
def sawaw_analyze_method(func):
"""
A decorator declares that the function can take an SAWAWEntry as input and populate the sentiment_results field.
"""
def inner(*args, **kwargs):
t = time.time()
result = func(*args, **kwargs)
logger.debug("SAWAW analysis took {:.2f} s.".format(time.time() - t))
return result
return inner

102
sawaw/openai_gpt.py Normal file
View File

@ -0,0 +1,102 @@
from typing import Dict, Union
from loguru import logger
from openai import OpenAI
from sawaw.method import sawaw_analyze_method
from sawaw.entry import SAWAWEntry
from sawaw.sentiments import SentimentResult
client = OpenAI(api_key="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")
def query(
comment: str, aspect_word_or_words: Union[list, str]
) -> Union[Dict[str, SentimentResult], SentimentResult]:
"""
Query the GPT-3 model for sentiment analysis.
:param comment: The comment to be analyzed.
:param aspect_word_or_words: The aspect word(s) to be analyzed. If a list is provided, the model will return a dictionary of aspect word to sentiment result. If a string is provided, the model will return a single sentiment result.
:return: A dictionary of aspect word to sentiment result if a list is provided, or a single sentiment result if a string is provided.
"""
def to_sentiment_result(result: str) -> SentimentResult:
if result == "positive":
return SentimentResult.POSITIVE
elif result == "neutral":
return SentimentResult.NEUTRAL
elif result == "negative":
return SentimentResult.NEGATIVE
elif result == "none":
return SentimentResult.NONE
else:
return SentimentResult.UNDEFINED
if isinstance(aspect_word_or_words, list):
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "system",
"content": "You will be given a comment and several aspect words as input. For each aspect word, please respond with exactly one of the following words: 'positive', 'negative', 'neutral', or 'none'. Do not respond with any other words.\n\ne.g. Comment: I love this pair of shoes, but I think the food nearby is poor :(\n\nAspect Word: shoes, food\n\nResponse: shoes: positive\nfood: negative\n\nPlease strictly follow the format above.",
},
{
"role": "user",
"content": "Comment:\n\n{}\n\nAspect Words:\n\n{}".format(
comment, ", ".join(aspect_word_or_words)
),
},
],
)
result = response.choices[0].message.content
logger.debug("Result: {}".format(result))
results = result.split("\n")
aspect_word_to_result = {}
for result in results:
try:
aspect_word, sentiment = result.split(":")
aspect_word_to_result[aspect_word.strip()] = to_sentiment_result(
sentiment.strip().lower()
)
except:
logger.warning("Invalid result: {}".format(result))
for aspect_word in aspect_word_or_words:
if aspect_word not in aspect_word_to_result:
aspect_word_to_result[aspect_word] = SentimentResult.UNDEFINED
logger.warning("Aspect word '{}' not found in result".format(aspect_word))
return aspect_word_to_result
elif isinstance(aspect_word_or_words, str):
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{
"role": "system",
"content": "You will be given a comment and an aspect word as input. Please respond with exactly one of the following words: 'positive', 'negative', 'neutral', or 'none'. Do not respond with any other words.",
},
{
"role": "user",
"content": "Comment:\n\n{} Aspect Word:\n\n{}".format(
comment, aspect_word_or_words
),
},
],
)
result = response.choices[0].message.content.lower()
return to_sentiment_result(result)
@sawaw_analyze_method
def analyze(entry: SAWAWEntry) -> None:
"""
Analyze the sentiment of an entry using the GPT-3 model. Modifies the entry in-place.
:param entry: The entry to be analyzed.
:return: The entry with the sentiment_results field populated.
"""
results = query(entry.comment, entry.aspect_words)
sentiment_results = []
for aspect_word in entry.aspect_words:
sentiment_results.append(results[aspect_word])
entry.sentiment_results = sentiment_results

9
sawaw/sentiments.py Normal file
View File

@ -0,0 +1,9 @@
from enum import Enum
class SentimentResult(Enum):
POSITIVE = 2
NEUTRAL = 1
NEGATIVE = 0
NONE = -1
UNDEFINED = -2

34
scripts/experiment.py Normal file
View File

@ -0,0 +1,34 @@
from dataclasses import dataclass
from random import sample
from typing import List
import pandas as pd
from sawaw import SAWAWEntry, analyze_gpt3_5
# Create a DataFrame from ./combined_data_with_aspects.csv
df = pd.read_csv("data/combined_data_with_aspects.csv")
# Keep only text and aspect columns
df = df[["text", "aspect"]]
# Remove rows with NaN values
df = df.dropna()
# Remove rows with empty strings
df = df[df["text"] != ""]
# Create a list of SAWAWEntry objects
entries = []
for index, row in df.iterrows():
comment = row["text"]
aspect_words = row["aspect"].split(",")
aspect_words = [aspect_word.strip() for aspect_word in aspect_words]
entries.append(SAWAWEntry(comment, aspect_words))
# Pick 10 random entries
entries = sample(entries, 3)
# Query the entries
for entry in entries:
print(entry)
analyze_gpt3_5(entry)
SAWAWEntry.print_legends()
print(entry)
input("Press Enter to continue...")

11
tests/test_api.py Normal file
View File

@ -0,0 +1,11 @@
from sawaw.openai_gpt import SentimentResult, query
def test_query():
comment = "Our meal was accompanied by a Zweigelt, an Austrian red wine. Spicy, and delicious! The restaurant has a few cocktails and wines, but you are welcome to bring your own for a $20 corkage fee. Each course was presented beautifully, as our server would poured the accenting sauce on our plates. The crudo was refreshing (loved those blood oranges!), and my salmon was dead on. They nailed it! Not too tough and the buerre blanc sauce complimented it nicely. It was served on a bed of wild rice which was a bit crunchy for my taste, but it worked with the dish The desserts were ok, and I unluckily chose the creme caramel which basically melted in my mouth (I prefer a little more texture, so I think it is not that satisfactory). Overall, a great experience and I will definitely be back!"
aspect_words = ["creme caramel", "wine"]
results = query(comment, aspect_words)
assert results["creme caramel"] == SentimentResult.NEGATIVE
assert results["wine"] == SentimentResult.POSITIVE
result = query(comment, "machine gun")
assert result == SentimentResult.NONE