commit af858756bb790f76942b8232a4aef21ee3f0e03d Author: Kunologist Date: Mon Nov 13 21:37:32 2023 +0000 First commit to SAWAW diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1784127 --- /dev/null +++ b/.gitignore @@ -0,0 +1,21 @@ +# Conda environment +.conda + +# Pycache +__pycache__/ +.coverage + +# Compiled Python files +*.pyc + +# Logs +*.log + +# Jupyter Notebook checkpoints +.ipynb_checkpoints/ + +# Pytest cache +.pytest_cache/ + +# Data +data/ \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f06b767 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 SAWAW Team + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..ae1bc23 --- /dev/null +++ b/README.md @@ -0,0 +1,29 @@ +# SAWAW: Sentiment Analysis with Aspect Words + +This package provides a toolkit for analyzing sentiment of aspect words in a sentence. + +## Installation + +After cloning the repository, install the package with the following command: + +```bash +pip install -e . +``` + +This will install the package in the current environment in editable mode. + +## Usage + +```python +>>> from sawaw import SAWAWEntry, analyze_gpt3_5 +>>> entry = SAWAWEntry('I love the new iPhone 30 Pro Max Extra Double. The camera is amazing, but the battery life is not great.', ['camera', 'battery']) +>>> analyze_gpt3_5(entry) +>>> print(entry) +``` +![](docs/test.png) + +## License + +This project is licensed under the terms of the MIT license. See [LICENSE](LICENSE) for more details. + + diff --git a/docs/test.png b/docs/test.png new file mode 100644 index 0000000..e078439 Binary files /dev/null and b/docs/test.png differ diff --git a/finish.sh b/finish.sh new file mode 100644 index 0000000..433b7e0 --- /dev/null +++ b/finish.sh @@ -0,0 +1,3 @@ +black . --exclude .conda +isort . --skip ./.conda +# pytest --cov --cov-report=term-missing tests/ \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..7cfdb46 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,21 @@ +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" + +[tool.poetry] +name = "sawaw" +version = "0.0.1" +description = "" +authors = ["Kunologist"] +license = "MIT" +readme = "README.md" + +[tool.poetry.dependencies] +python = "^3.9" +loguru = "^0.7.0" +pandas = "^2.1.3" +numpy = "^1.26.2" +pytest = "^7.4.0" +pytest-cov = "^4.1.0" +openai = "^1.2.0,<1.3.0" +colorama = "^0.4.4" diff --git a/sawaw/__init__.py b/sawaw/__init__.py new file mode 100644 index 0000000..2987459 --- /dev/null +++ b/sawaw/__init__.py @@ -0,0 +1,3 @@ +from .openai_gpt import analyze as analyze_gpt3_5 +from .sentiments import SentimentResult +from .entry import SAWAWEntry \ No newline at end of file diff --git a/sawaw/entry.py b/sawaw/entry.py new file mode 100644 index 0000000..91fda1d --- /dev/null +++ b/sawaw/entry.py @@ -0,0 +1,51 @@ +from sawaw.sentiments import SentimentResult +from dataclasses import dataclass +from typing import List +import colorama + +color_mapping = { + SentimentResult.POSITIVE: colorama.Fore.GREEN, + SentimentResult.NEUTRAL: colorama.Fore.YELLOW, + SentimentResult.NEGATIVE: colorama.Fore.RED, + SentimentResult.NONE: colorama.Fore.LIGHTWHITE_EX, + SentimentResult.UNDEFINED: colorama.Fore.MAGENTA, +} + + +@dataclass +class SAWAWEntry: + comment: str + aspect_words: List[str] + sentiment_results: List[SentimentResult] = None + + def __post_init__(self): + if self.sentiment_results is None: + self.sentiment_results = [SentimentResult.UNDEFINED] * len( + self.aspect_words + ) + + def __repr__(self): + colored_comment = self.comment + for aspect_word in self.aspect_words: + colored_comment = colored_comment.replace( + aspect_word, + color_mapping[ + self.sentiment_results[self.aspect_words.index(aspect_word)] + ] + + aspect_word + + colorama.Style.RESET_ALL, + ) + return colored_comment + + def __str__(self): + return self.__repr__() + + @staticmethod + def print_legends(): + legends = [] + for sentiment_result, color in color_mapping.items(): + legends.append( + "{}{}{}".format(color, sentiment_result.name, colorama.Style.RESET_ALL) + ) + print(" | ".join(legends)) + diff --git a/sawaw/method.py b/sawaw/method.py new file mode 100644 index 0000000..b938f09 --- /dev/null +++ b/sawaw/method.py @@ -0,0 +1,13 @@ +import time +from loguru import logger + +def sawaw_analyze_method(func): + """ + A decorator declares that the function can take an SAWAWEntry as input and populate the sentiment_results field. + """ + def inner(*args, **kwargs): + t = time.time() + result = func(*args, **kwargs) + logger.debug("SAWAW analysis took {:.2f} s.".format(time.time() - t)) + return result + return inner \ No newline at end of file diff --git a/sawaw/openai_gpt.py b/sawaw/openai_gpt.py new file mode 100644 index 0000000..27b447b --- /dev/null +++ b/sawaw/openai_gpt.py @@ -0,0 +1,102 @@ +from typing import Dict, Union + +from loguru import logger +from openai import OpenAI + +from sawaw.method import sawaw_analyze_method +from sawaw.entry import SAWAWEntry +from sawaw.sentiments import SentimentResult + +client = OpenAI(api_key="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") + + +def query( + comment: str, aspect_word_or_words: Union[list, str] +) -> Union[Dict[str, SentimentResult], SentimentResult]: + """ + Query the GPT-3 model for sentiment analysis. + + :param comment: The comment to be analyzed. + :param aspect_word_or_words: The aspect word(s) to be analyzed. If a list is provided, the model will return a dictionary of aspect word to sentiment result. If a string is provided, the model will return a single sentiment result. + :return: A dictionary of aspect word to sentiment result if a list is provided, or a single sentiment result if a string is provided. + """ + + def to_sentiment_result(result: str) -> SentimentResult: + if result == "positive": + return SentimentResult.POSITIVE + elif result == "neutral": + return SentimentResult.NEUTRAL + elif result == "negative": + return SentimentResult.NEGATIVE + elif result == "none": + return SentimentResult.NONE + else: + return SentimentResult.UNDEFINED + + if isinstance(aspect_word_or_words, list): + response = client.chat.completions.create( + model="gpt-3.5-turbo", + messages=[ + { + "role": "system", + "content": "You will be given a comment and several aspect words as input. For each aspect word, please respond with exactly one of the following words: 'positive', 'negative', 'neutral', or 'none'. Do not respond with any other words.\n\ne.g. Comment: I love this pair of shoes, but I think the food nearby is poor :(\n\nAspect Word: shoes, food\n\nResponse: shoes: positive\nfood: negative\n\nPlease strictly follow the format above.", + }, + { + "role": "user", + "content": "Comment:\n\n{}\n\nAspect Words:\n\n{}".format( + comment, ", ".join(aspect_word_or_words) + ), + }, + ], + ) + result = response.choices[0].message.content + logger.debug("Result: {}".format(result)) + results = result.split("\n") + aspect_word_to_result = {} + for result in results: + try: + aspect_word, sentiment = result.split(":") + aspect_word_to_result[aspect_word.strip()] = to_sentiment_result( + sentiment.strip().lower() + ) + except: + logger.warning("Invalid result: {}".format(result)) + for aspect_word in aspect_word_or_words: + if aspect_word not in aspect_word_to_result: + aspect_word_to_result[aspect_word] = SentimentResult.UNDEFINED + logger.warning("Aspect word '{}' not found in result".format(aspect_word)) + return aspect_word_to_result + elif isinstance(aspect_word_or_words, str): + response = client.chat.completions.create( + model="gpt-3.5-turbo", + messages=[ + { + "role": "system", + "content": "You will be given a comment and an aspect word as input. Please respond with exactly one of the following words: 'positive', 'negative', 'neutral', or 'none'. Do not respond with any other words.", + }, + { + "role": "user", + "content": "Comment:\n\n{} Aspect Word:\n\n{}".format( + comment, aspect_word_or_words + ), + }, + ], + ) + result = response.choices[0].message.content.lower() + return to_sentiment_result(result) + + +@sawaw_analyze_method +def analyze(entry: SAWAWEntry) -> None: + """ + Analyze the sentiment of an entry using the GPT-3 model. Modifies the entry in-place. + + :param entry: The entry to be analyzed. + :return: The entry with the sentiment_results field populated. + """ + results = query(entry.comment, entry.aspect_words) + sentiment_results = [] + for aspect_word in entry.aspect_words: + sentiment_results.append(results[aspect_word]) + entry.sentiment_results = sentiment_results + diff --git a/sawaw/sentiments.py b/sawaw/sentiments.py new file mode 100644 index 0000000..9a3ee37 --- /dev/null +++ b/sawaw/sentiments.py @@ -0,0 +1,9 @@ +from enum import Enum + + +class SentimentResult(Enum): + POSITIVE = 2 + NEUTRAL = 1 + NEGATIVE = 0 + NONE = -1 + UNDEFINED = -2 diff --git a/scripts/experiment.py b/scripts/experiment.py new file mode 100644 index 0000000..60b8ba9 --- /dev/null +++ b/scripts/experiment.py @@ -0,0 +1,34 @@ +from dataclasses import dataclass +from random import sample +from typing import List + +import pandas as pd + +from sawaw import SAWAWEntry, analyze_gpt3_5 + +# Create a DataFrame from ./combined_data_with_aspects.csv +df = pd.read_csv("data/combined_data_with_aspects.csv") +# Keep only text and aspect columns +df = df[["text", "aspect"]] +# Remove rows with NaN values +df = df.dropna() +# Remove rows with empty strings +df = df[df["text"] != ""] +# Create a list of SAWAWEntry objects +entries = [] +for index, row in df.iterrows(): + comment = row["text"] + aspect_words = row["aspect"].split(",") + aspect_words = [aspect_word.strip() for aspect_word in aspect_words] + entries.append(SAWAWEntry(comment, aspect_words)) + +# Pick 10 random entries +entries = sample(entries, 3) +# Query the entries +for entry in entries: + print(entry) + analyze_gpt3_5(entry) + SAWAWEntry.print_legends() + print(entry) + input("Press Enter to continue...") + diff --git a/tests/test_api.py b/tests/test_api.py new file mode 100644 index 0000000..3dd72b3 --- /dev/null +++ b/tests/test_api.py @@ -0,0 +1,11 @@ +from sawaw.openai_gpt import SentimentResult, query + + +def test_query(): + comment = "Our meal was accompanied by a Zweigelt, an Austrian red wine. Spicy, and delicious! The restaurant has a few cocktails and wines, but you are welcome to bring your own for a $20 corkage fee. Each course was presented beautifully, as our server would poured the accenting sauce on our plates. The crudo was refreshing (loved those blood oranges!), and my salmon was dead on. They nailed it! Not too tough and the buerre blanc sauce complimented it nicely. It was served on a bed of wild rice which was a bit crunchy for my taste, but it worked with the dish The desserts were ok, and I unluckily chose the creme caramel which basically melted in my mouth (I prefer a little more texture, so I think it is not that satisfactory). Overall, a great experience and I will definitely be back!" + aspect_words = ["creme caramel", "wine"] + results = query(comment, aspect_words) + assert results["creme caramel"] == SentimentResult.NEGATIVE + assert results["wine"] == SentimentResult.POSITIVE + result = query(comment, "machine gun") + assert result == SentimentResult.NONE