First commit to SAWAW
This commit is contained in:
commit
af858756bb
21
.gitignore
vendored
Normal file
21
.gitignore
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
# Conda environment
|
||||
.conda
|
||||
|
||||
# Pycache
|
||||
__pycache__/
|
||||
.coverage
|
||||
|
||||
# Compiled Python files
|
||||
*.pyc
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
|
||||
# Jupyter Notebook checkpoints
|
||||
.ipynb_checkpoints/
|
||||
|
||||
# Pytest cache
|
||||
.pytest_cache/
|
||||
|
||||
# Data
|
||||
data/
|
21
LICENSE
Normal file
21
LICENSE
Normal file
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023 SAWAW Team
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
29
README.md
Normal file
29
README.md
Normal file
@ -0,0 +1,29 @@
|
||||
# SAWAW: Sentiment Analysis with Aspect Words
|
||||
|
||||
This package provides a toolkit for analyzing sentiment of aspect words in a sentence.
|
||||
|
||||
## Installation
|
||||
|
||||
After cloning the repository, install the package with the following command:
|
||||
|
||||
```bash
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
This will install the package in the current environment in editable mode.
|
||||
|
||||
## Usage
|
||||
|
||||
```python
|
||||
>>> from sawaw import SAWAWEntry, analyze_gpt3_5
|
||||
>>> entry = SAWAWEntry('I love the new iPhone 30 Pro Max Extra Double. The camera is amazing, but the battery life is not great.', ['camera', 'battery'])
|
||||
>>> analyze_gpt3_5(entry)
|
||||
>>> print(entry)
|
||||
```
|
||||
![](docs/test.png)
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the terms of the MIT license. See [LICENSE](LICENSE) for more details.
|
||||
|
||||
|
BIN
docs/test.png
Normal file
BIN
docs/test.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 21 KiB |
3
finish.sh
Normal file
3
finish.sh
Normal file
@ -0,0 +1,3 @@
|
||||
black . --exclude .conda
|
||||
isort . --skip ./.conda
|
||||
# pytest --cov --cov-report=term-missing tests/
|
21
pyproject.toml
Normal file
21
pyproject.toml
Normal file
@ -0,0 +1,21 @@
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.poetry]
|
||||
name = "sawaw"
|
||||
version = "0.0.1"
|
||||
description = ""
|
||||
authors = ["Kunologist"]
|
||||
license = "MIT"
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.9"
|
||||
loguru = "^0.7.0"
|
||||
pandas = "^2.1.3"
|
||||
numpy = "^1.26.2"
|
||||
pytest = "^7.4.0"
|
||||
pytest-cov = "^4.1.0"
|
||||
openai = "^1.2.0,<1.3.0"
|
||||
colorama = "^0.4.4"
|
3
sawaw/__init__.py
Normal file
3
sawaw/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
from .openai_gpt import analyze as analyze_gpt3_5
|
||||
from .sentiments import SentimentResult
|
||||
from .entry import SAWAWEntry
|
51
sawaw/entry.py
Normal file
51
sawaw/entry.py
Normal file
@ -0,0 +1,51 @@
|
||||
from sawaw.sentiments import SentimentResult
|
||||
from dataclasses import dataclass
|
||||
from typing import List
|
||||
import colorama
|
||||
|
||||
color_mapping = {
|
||||
SentimentResult.POSITIVE: colorama.Fore.GREEN,
|
||||
SentimentResult.NEUTRAL: colorama.Fore.YELLOW,
|
||||
SentimentResult.NEGATIVE: colorama.Fore.RED,
|
||||
SentimentResult.NONE: colorama.Fore.LIGHTWHITE_EX,
|
||||
SentimentResult.UNDEFINED: colorama.Fore.MAGENTA,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class SAWAWEntry:
|
||||
comment: str
|
||||
aspect_words: List[str]
|
||||
sentiment_results: List[SentimentResult] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.sentiment_results is None:
|
||||
self.sentiment_results = [SentimentResult.UNDEFINED] * len(
|
||||
self.aspect_words
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
colored_comment = self.comment
|
||||
for aspect_word in self.aspect_words:
|
||||
colored_comment = colored_comment.replace(
|
||||
aspect_word,
|
||||
color_mapping[
|
||||
self.sentiment_results[self.aspect_words.index(aspect_word)]
|
||||
]
|
||||
+ aspect_word
|
||||
+ colorama.Style.RESET_ALL,
|
||||
)
|
||||
return colored_comment
|
||||
|
||||
def __str__(self):
|
||||
return self.__repr__()
|
||||
|
||||
@staticmethod
|
||||
def print_legends():
|
||||
legends = []
|
||||
for sentiment_result, color in color_mapping.items():
|
||||
legends.append(
|
||||
"{}{}{}".format(color, sentiment_result.name, colorama.Style.RESET_ALL)
|
||||
)
|
||||
print(" | ".join(legends))
|
||||
|
13
sawaw/method.py
Normal file
13
sawaw/method.py
Normal file
@ -0,0 +1,13 @@
|
||||
import time
|
||||
from loguru import logger
|
||||
|
||||
def sawaw_analyze_method(func):
|
||||
"""
|
||||
A decorator declares that the function can take an SAWAWEntry as input and populate the sentiment_results field.
|
||||
"""
|
||||
def inner(*args, **kwargs):
|
||||
t = time.time()
|
||||
result = func(*args, **kwargs)
|
||||
logger.debug("SAWAW analysis took {:.2f} s.".format(time.time() - t))
|
||||
return result
|
||||
return inner
|
102
sawaw/openai_gpt.py
Normal file
102
sawaw/openai_gpt.py
Normal file
@ -0,0 +1,102 @@
|
||||
from typing import Dict, Union
|
||||
|
||||
from loguru import logger
|
||||
from openai import OpenAI
|
||||
|
||||
from sawaw.method import sawaw_analyze_method
|
||||
from sawaw.entry import SAWAWEntry
|
||||
from sawaw.sentiments import SentimentResult
|
||||
|
||||
client = OpenAI(api_key="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")
|
||||
|
||||
|
||||
def query(
|
||||
comment: str, aspect_word_or_words: Union[list, str]
|
||||
) -> Union[Dict[str, SentimentResult], SentimentResult]:
|
||||
"""
|
||||
Query the GPT-3 model for sentiment analysis.
|
||||
|
||||
:param comment: The comment to be analyzed.
|
||||
:param aspect_word_or_words: The aspect word(s) to be analyzed. If a list is provided, the model will return a dictionary of aspect word to sentiment result. If a string is provided, the model will return a single sentiment result.
|
||||
:return: A dictionary of aspect word to sentiment result if a list is provided, or a single sentiment result if a string is provided.
|
||||
"""
|
||||
|
||||
def to_sentiment_result(result: str) -> SentimentResult:
|
||||
if result == "positive":
|
||||
return SentimentResult.POSITIVE
|
||||
elif result == "neutral":
|
||||
return SentimentResult.NEUTRAL
|
||||
elif result == "negative":
|
||||
return SentimentResult.NEGATIVE
|
||||
elif result == "none":
|
||||
return SentimentResult.NONE
|
||||
else:
|
||||
return SentimentResult.UNDEFINED
|
||||
|
||||
if isinstance(aspect_word_or_words, list):
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You will be given a comment and several aspect words as input. For each aspect word, please respond with exactly one of the following words: 'positive', 'negative', 'neutral', or 'none'. Do not respond with any other words.\n\ne.g. Comment: I love this pair of shoes, but I think the food nearby is poor :(\n\nAspect Word: shoes, food\n\nResponse: shoes: positive\nfood: negative\n\nPlease strictly follow the format above.",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Comment:\n\n{}\n\nAspect Words:\n\n{}".format(
|
||||
comment, ", ".join(aspect_word_or_words)
|
||||
),
|
||||
},
|
||||
],
|
||||
)
|
||||
result = response.choices[0].message.content
|
||||
logger.debug("Result: {}".format(result))
|
||||
results = result.split("\n")
|
||||
aspect_word_to_result = {}
|
||||
for result in results:
|
||||
try:
|
||||
aspect_word, sentiment = result.split(":")
|
||||
aspect_word_to_result[aspect_word.strip()] = to_sentiment_result(
|
||||
sentiment.strip().lower()
|
||||
)
|
||||
except:
|
||||
logger.warning("Invalid result: {}".format(result))
|
||||
for aspect_word in aspect_word_or_words:
|
||||
if aspect_word not in aspect_word_to_result:
|
||||
aspect_word_to_result[aspect_word] = SentimentResult.UNDEFINED
|
||||
logger.warning("Aspect word '{}' not found in result".format(aspect_word))
|
||||
return aspect_word_to_result
|
||||
elif isinstance(aspect_word_or_words, str):
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You will be given a comment and an aspect word as input. Please respond with exactly one of the following words: 'positive', 'negative', 'neutral', or 'none'. Do not respond with any other words.",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Comment:\n\n{} Aspect Word:\n\n{}".format(
|
||||
comment, aspect_word_or_words
|
||||
),
|
||||
},
|
||||
],
|
||||
)
|
||||
result = response.choices[0].message.content.lower()
|
||||
return to_sentiment_result(result)
|
||||
|
||||
|
||||
@sawaw_analyze_method
|
||||
def analyze(entry: SAWAWEntry) -> None:
|
||||
"""
|
||||
Analyze the sentiment of an entry using the GPT-3 model. Modifies the entry in-place.
|
||||
|
||||
:param entry: The entry to be analyzed.
|
||||
:return: The entry with the sentiment_results field populated.
|
||||
"""
|
||||
results = query(entry.comment, entry.aspect_words)
|
||||
sentiment_results = []
|
||||
for aspect_word in entry.aspect_words:
|
||||
sentiment_results.append(results[aspect_word])
|
||||
entry.sentiment_results = sentiment_results
|
||||
|
9
sawaw/sentiments.py
Normal file
9
sawaw/sentiments.py
Normal file
@ -0,0 +1,9 @@
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class SentimentResult(Enum):
|
||||
POSITIVE = 2
|
||||
NEUTRAL = 1
|
||||
NEGATIVE = 0
|
||||
NONE = -1
|
||||
UNDEFINED = -2
|
34
scripts/experiment.py
Normal file
34
scripts/experiment.py
Normal file
@ -0,0 +1,34 @@
|
||||
from dataclasses import dataclass
|
||||
from random import sample
|
||||
from typing import List
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from sawaw import SAWAWEntry, analyze_gpt3_5
|
||||
|
||||
# Create a DataFrame from ./combined_data_with_aspects.csv
|
||||
df = pd.read_csv("data/combined_data_with_aspects.csv")
|
||||
# Keep only text and aspect columns
|
||||
df = df[["text", "aspect"]]
|
||||
# Remove rows with NaN values
|
||||
df = df.dropna()
|
||||
# Remove rows with empty strings
|
||||
df = df[df["text"] != ""]
|
||||
# Create a list of SAWAWEntry objects
|
||||
entries = []
|
||||
for index, row in df.iterrows():
|
||||
comment = row["text"]
|
||||
aspect_words = row["aspect"].split(",")
|
||||
aspect_words = [aspect_word.strip() for aspect_word in aspect_words]
|
||||
entries.append(SAWAWEntry(comment, aspect_words))
|
||||
|
||||
# Pick 10 random entries
|
||||
entries = sample(entries, 3)
|
||||
# Query the entries
|
||||
for entry in entries:
|
||||
print(entry)
|
||||
analyze_gpt3_5(entry)
|
||||
SAWAWEntry.print_legends()
|
||||
print(entry)
|
||||
input("Press Enter to continue...")
|
||||
|
11
tests/test_api.py
Normal file
11
tests/test_api.py
Normal file
@ -0,0 +1,11 @@
|
||||
from sawaw.openai_gpt import SentimentResult, query
|
||||
|
||||
|
||||
def test_query():
|
||||
comment = "Our meal was accompanied by a Zweigelt, an Austrian red wine. Spicy, and delicious! The restaurant has a few cocktails and wines, but you are welcome to bring your own for a $20 corkage fee. Each course was presented beautifully, as our server would poured the accenting sauce on our plates. The crudo was refreshing (loved those blood oranges!), and my salmon was dead on. They nailed it! Not too tough and the buerre blanc sauce complimented it nicely. It was served on a bed of wild rice which was a bit crunchy for my taste, but it worked with the dish The desserts were ok, and I unluckily chose the creme caramel which basically melted in my mouth (I prefer a little more texture, so I think it is not that satisfactory). Overall, a great experience and I will definitely be back!"
|
||||
aspect_words = ["creme caramel", "wine"]
|
||||
results = query(comment, aspect_words)
|
||||
assert results["creme caramel"] == SentimentResult.NEGATIVE
|
||||
assert results["wine"] == SentimentResult.POSITIVE
|
||||
result = query(comment, "machine gun")
|
||||
assert result == SentimentResult.NONE
|
Loading…
Reference in New Issue
Block a user