Skip to content
Snippets Groups Projects
Commit 2fbcd4ad authored by bjuradotorres2's avatar bjuradotorres2
Browse files

Organized test cases into separate test files (test sentence file, test...

Organized test cases into separate test files (test sentence file, test stop-word file, and test token file)
parent e7f62dd6
Branches
No related tags found
No related merge requests found
...@@ -2,6 +2,8 @@ import math ...@@ -2,6 +2,8 @@ import math
import os import os
from enum import Enum from enum import Enum
from collections import Counter from collections import Counter
class MenuOption(Enum): class MenuOption(Enum):
SHOW_REVIEWS = 'Show reviews' SHOW_REVIEWS = 'Show reviews'
CHECK_TOKEN = 'Check if a token is present' CHECK_TOKEN = 'Check if a token is present'
...@@ -12,6 +14,7 @@ class MenuOption(Enum): ...@@ -12,6 +14,7 @@ class MenuOption(Enum):
SHOW_ADJUSTED_SENTENCE_STATISTICS = 'Show the statistics for a sentence with stop words ignored' SHOW_ADJUSTED_SENTENCE_STATISTICS = 'Show the statistics for a sentence with stop words ignored'
EXIT = 'Exit the program' EXIT = 'Exit the program'
def main(): def main():
recall = True recall = True
menu_options = tuple(MenuOption) menu_options = tuple(MenuOption)
...@@ -52,9 +55,12 @@ def main(): ...@@ -52,9 +55,12 @@ def main():
elif input_number == 6: elif input_number == 6:
save_stop_words(negative_reviews_list, neutral_reviews_list, positive_reviews_list) save_stop_words(negative_reviews_list, neutral_reviews_list, positive_reviews_list)
elif input_number == 7: elif input_number == 7:
adjusted_sentence_statistics(negative_reviews_list, neutral_reviews_list, positive_reviews_list, stop_words) adjusted_sentence_statistics(negative_reviews_list, neutral_reviews_list, positive_reviews_list,
stop_words)
elif input_number == 8: elif input_number == 8:
exit() exit()
def read_file(reviews): def read_file(reviews):
file_path = os.path.join(os.path.dirname(__file__), "sentiment.txt") # Adjust if needed file_path = os.path.join(os.path.dirname(__file__), "sentiment.txt") # Adjust if needed
try: try:
...@@ -62,7 +68,9 @@ def read_file(reviews): ...@@ -62,7 +68,9 @@ def read_file(reviews):
for line in file: for line in file:
reviews.append(line.strip("\t\n")) reviews.append(line.strip("\t\n"))
except FileNotFoundError: except FileNotFoundError:
raise FileNotFoundError(f"Error - No file found at {file_path}. Ensure sentiment.txt is in the correct directory.") raise FileNotFoundError(
f"Error - No file found at {file_path}. Ensure sentiment.txt is in the correct directory.")
# gets list of stop words # gets list of stop words
def get_stop_words(): def get_stop_words():
...@@ -75,6 +83,7 @@ def get_stop_words(): ...@@ -75,6 +83,7 @@ def get_stop_words():
print("Warning: Stop words file not found. Using an empty set.") print("Warning: Stop words file not found. Using an empty set.")
return stop_words return stop_words
# finds the statistics for a series of inputs, ignoring stop words # finds the statistics for a series of inputs, ignoring stop words
def adjusted_sentence_statistics(negative_reviews, neutral_reviews, positive_reviews, stop_words): def adjusted_sentence_statistics(negative_reviews, neutral_reviews, positive_reviews, stop_words):
input_tokens = str(input("Enter a sentence as space-seperated tokens: ")).lower().split() input_tokens = str(input("Enter a sentence as space-seperated tokens: ")).lower().split()
...@@ -104,12 +113,15 @@ def adjusted_sentence_statistics(negative_reviews, neutral_reviews, positive_rev ...@@ -104,12 +113,15 @@ def adjusted_sentence_statistics(negative_reviews, neutral_reviews, positive_rev
unknown_counter += 1 unknown_counter += 1
if valid_tokens > 0: if valid_tokens > 0:
avg_score = total_score / valid_tokens avg_score = total_score / valid_tokens
print(f"\nThe sentence has {stop_word_count} stop-word token(s), and it has {total_negative_reviews} negative, {total_neutral_reviews} neutral, {total_positive_reviews} positive, and {unknown_counter}" print(
f"\nThe sentence has {stop_word_count} stop-word token(s), and it has {total_negative_reviews} negative, {total_neutral_reviews} neutral, {total_positive_reviews} positive, and {unknown_counter}"
f" unknown non-stop-word token(s).\nThe sentence has an average tf-idf score of {avg_score}.") f" unknown non-stop-word token(s).\nThe sentence has an average tf-idf score of {avg_score}.")
else: else:
print(f"\nThe sentence contains only {stop_word_count} stop-word token(s) and {unknown_counter} unknown non-stop-word token(s).\n" print(
f"\nThe sentence contains only {stop_word_count} stop-word token(s) and {unknown_counter} unknown non-stop-word token(s).\n"
f"Therefore, its average tf-idf score is undefined.") f"Therefore, its average tf-idf score is undefined.")
# saves stop words to a file # saves stop words to a file
def save_stop_words(negative_reviews_list, neutral_reviews_list, positive_reviews_list): def save_stop_words(negative_reviews_list, neutral_reviews_list, positive_reviews_list):
positive_reviews = Counter(positive_reviews_list) positive_reviews = Counter(positive_reviews_list)
...@@ -138,6 +150,7 @@ def save_stop_words(negative_reviews_list, neutral_reviews_list, positive_review ...@@ -138,6 +150,7 @@ def save_stop_words(negative_reviews_list, neutral_reviews_list, positive_review
print(f'\nStop word list saved to "output.txt".') print(f'\nStop word list saved to "output.txt".')
# finds the statistics for a series of inputs # finds the statistics for a series of inputs
def sentence_statistics(positive_reviews, neutral_reviews, negative_reviews): def sentence_statistics(positive_reviews, neutral_reviews, negative_reviews):
input_tokens = str(input("Enter a sentence as space-seperated tokens: ")).lower().split() input_tokens = str(input("Enter a sentence as space-seperated tokens: ")).lower().split()
...@@ -146,8 +159,11 @@ def sentence_statistics(positive_reviews, neutral_reviews, negative_reviews): ...@@ -146,8 +159,11 @@ def sentence_statistics(positive_reviews, neutral_reviews, negative_reviews):
score_con = "" score_con = ""
for token in input_tokens: for token in input_tokens:
if token in positive_reviews or token in neutral_reviews or token in negative_reviews: if token in positive_reviews or token in neutral_reviews or token in negative_reviews:
negative_counter, neutral_counter, positive_counter, score, score_con = score_compute(token, 0, negative_reviews, 0, negative_counter, neutral_counter, positive_counter, score, score_con = score_compute(token, 0,
neutral_reviews, 0, positive_reviews, score_con) negative_reviews, 0,
neutral_reviews, 0,
positive_reviews,
score_con)
if score_con == "positive": if score_con == "positive":
total_positive_reviews += 1 total_positive_reviews += 1
elif score_con == "neutral": elif score_con == "neutral":
...@@ -163,26 +179,33 @@ def sentence_statistics(positive_reviews, neutral_reviews, negative_reviews): ...@@ -163,26 +179,33 @@ def sentence_statistics(positive_reviews, neutral_reviews, negative_reviews):
avg_score = total_score / valid_tokens if valid_tokens > 0 else 0 avg_score = total_score / valid_tokens if valid_tokens > 0 else 0
if unknown_counter != len(input_tokens): if unknown_counter != len(input_tokens):
print(f"\nThe sentence has {total_negative_reviews} negative, {total_neutral_reviews} neutral, {total_positive_reviews} positive, and {unknown_counter}" print(
f"\nThe sentence has {total_negative_reviews} negative, {total_neutral_reviews} neutral, {total_positive_reviews} positive, and {unknown_counter}"
f" unknown token(s).\nThe sentence has an average tf-idf score of {avg_score}.") f" unknown token(s).\nThe sentence has an average tf-idf score of {avg_score}.")
else: else:
print("\nThe sentence contains only unknown tokens; therefore, its average tf-idf score is undefined.") print("\nThe sentence contains only unknown tokens; therefore, its average tf-idf score is undefined.")
# finds the statistics of a token # finds the statistics of a token
def token_statistics(positive_reviews, neutral_reviews, negative_reviews, review_tokens_set): def token_statistics(positive_reviews, neutral_reviews, negative_reviews, review_tokens_set):
input_token = str(input("Enter a token: ")).lower() input_token = str(input("Enter a token: ")).lower()
positive_counter, neutral_counter, negative_counter, score = 0, 0, 0, 0 positive_counter, neutral_counter, negative_counter, score = 0, 0, 0, 0
score_con = "" score_con = ""
negative_counter, neutral_counter, positive_counter, score, score_con = score_compute(input_token, negative_counter, negative_counter, neutral_counter, positive_counter, score, score_con = score_compute(input_token, negative_counter,
negative_reviews, neutral_counter, neutral_reviews, negative_reviews,
positive_counter, positive_reviews, score_con) neutral_counter,
neutral_reviews,
positive_counter,
positive_reviews, score_con)
if input_token in review_tokens_set: if input_token in review_tokens_set:
print(f"The token {input_token} has {negative_counter} negative, {neutral_counter} neutral, and {positive_counter} " print(
f"The token {input_token} has {negative_counter} negative, {neutral_counter} neutral, and {positive_counter} "
f"positive appearance(s) in the training data.\n" f"positive appearance(s) in the training data.\n"
f"The token {input_token} is classified as {score_con} because it has a tf-idf score of {score}.") f"The token {input_token} is classified as {score_con} because it has a tf-idf score of {score}.")
else: else:
print(f"\nThe token \"{input_token}\" does not appear in the training data.") print(f"\nThe token \"{input_token}\" does not appear in the training data.")
# checks the score of the token # checks the score of the token
def score_compute(input_token, negative_counter, negative_reviews, neutral_counter, neutral_reviews, positive_counter, def score_compute(input_token, negative_counter, negative_reviews, neutral_counter, neutral_reviews, positive_counter,
positive_reviews, score_con): positive_reviews, score_con):
...@@ -205,6 +228,7 @@ def score_compute(input_token, negative_counter, negative_reviews, neutral_count ...@@ -205,6 +228,7 @@ def score_compute(input_token, negative_counter, negative_reviews, neutral_count
score_con = "positive" score_con = "positive"
return negative_counter, neutral_counter, positive_counter, score, score_con return negative_counter, neutral_counter, positive_counter, score, score_con
# checks the amount of times a token appears in the test data # checks the amount of times a token appears in the test data
def word_frequencies(review_tokens_list): def word_frequencies(review_tokens_list):
token_amount = 0 token_amount = 0
...@@ -214,13 +238,17 @@ def word_frequencies(review_tokens_list): ...@@ -214,13 +238,17 @@ def word_frequencies(review_tokens_list):
token_amount += 1 token_amount += 1
print(f"\nThe training data contains {token_amount} appearance(s) of the token \"{input_token}\".") print(f"\nThe training data contains {token_amount} appearance(s) of the token \"{input_token}\".")
# checks if a token is present in the test data # checks if a token is present in the test data
def check_token(review_token_set): def check_token(review_token_set):
input_token = str(input("Enter a token: ")).lower() input_token = str(input("Enter a token: ")).lower()
if input_token in review_token_set: if input_token in review_token_set:
print(f"\nThe token \"{input_token}\" is 1 of the " + str(len(review_token_set)) + " unique tokens in the training data") print(f"\nThe token \"{input_token}\" is 1 of the " + str(
len(review_token_set)) + " unique tokens in the training data")
else: else:
print(f"\nThe token \"{input_token}\" is not 1 of the " + str(len(review_token_set)) + " unique tokens in the training data") print(f"\nThe token \"{input_token}\" is not 1 of the " + str(
len(review_token_set)) + " unique tokens in the training data")
# prints requested reviews by user # prints requested reviews by user
def show_reviews(reviews): def show_reviews(reviews):
...@@ -253,6 +281,7 @@ def show_reviews(reviews): ...@@ -253,6 +281,7 @@ def show_reviews(reviews):
print(f"Review # {i} : " + str(reviews[i - 1])) print(f"Review # {i} : " + str(reviews[i - 1]))
i += 1 i += 1
# creates starting lists # creates starting lists
def get_tokens(reviews, review_tokens_set, review_tokens_list, def get_tokens(reviews, review_tokens_set, review_tokens_list,
positive_reviews, neutral_reviews, negative_reviews, positive_reviews, neutral_reviews, negative_reviews,
...@@ -271,6 +300,7 @@ def get_tokens(reviews, review_tokens_set, review_tokens_list, ...@@ -271,6 +300,7 @@ def get_tokens(reviews, review_tokens_set, review_tokens_list,
negative_reviews += token negative_reviews += token
negative_reviews_set.update(token) negative_reviews_set.update(token)
# reads the txt file and stores the lines in list # reads the txt file and stores the lines in list
def read_file(reviews): def read_file(reviews):
try: try:
...@@ -281,5 +311,6 @@ def read_file(reviews): ...@@ -281,5 +311,6 @@ def read_file(reviews):
print("Error - No file found") print("Error - No file found")
print("\n".join(reviews)) print("\n".join(reviews))
if __name__ == '__main__': if __name__ == '__main__':
main() main()
,
.
a
i
'
is
of
or
in
by
so
it
's
an
to
be
on
at
if
as
--
and
all
the
are
has
one
n't
but
his
...
its
for
you
not
this
with
like
from
that
more
than
film
what
have
about
story
movie
-lrb-
-rrb-
there
import unittest
from sentiment import get_tokens, score_compute
import os
class TestSentenceAnalysis(unittest.TestCase):
def setUp(self):
self.reviews = []
base_dir = os.path.dirname(os.path.abspath(__file__))
sentiment_dir = os.path.join(base_dir, "..")
file_path = os.path.join(sentiment_dir, "sentiment.txt")
with open(file_path, "r", encoding="utf-8") as file:
self.reviews = file.readlines()
self.positive_reviews_list, self.neutral_reviews_list, self.negative_reviews_list = [], [], []
get_tokens(
self.reviews, set(), [], # Ignoring the full token set/list
self.positive_reviews_list, self.neutral_reviews_list, self.negative_reviews_list,
set(), set(), set()
)
self.target_sentence = "absolutely detestable ; would not watch again"
self.input_tokens = self.target_sentence.lower().split()
def analyze_sentence(self, tokens):
total_negative, total_neutral, total_positive, unknown_count = 0, 0, 0, 0
total_score, valid_tokens = 0, 0
for token in tokens:
if token in self.positive_reviews_list or token in self.neutral_reviews_list or token in self.negative_reviews_list:
_, _, _, score, score_con = score_compute(
token, 0, self.negative_reviews_list, 0,
self.neutral_reviews_list, 0, self.positive_reviews_list, ""
)
if score_con == "positive":
total_positive += 1
elif score_con == "neutral":
total_neutral += 1
elif score_con == "negative":
total_negative += 1
total_score += score
valid_tokens += 1
else:
unknown_count += 1
avg_score = total_score / valid_tokens if valid_tokens > 0 else 0
return total_negative, total_neutral, total_positive, unknown_count, avg_score
def test_01_sentence_statistics_classifications(self):
expected_negative = 5
expected_neutral = 0
expected_positive = 1
expected_unknown = 1
negative, neutral, positive, unknown, _ = self.analyze_sentence(self.input_tokens)
self.assertEqual(negative, expected_negative, "Incorrect negative token count in sentence.")
self.assertEqual(neutral, expected_neutral, "Incorrect neutral token count in sentence.")
self.assertEqual(positive, expected_positive, "Incorrect positive token count in sentence.")
self.assertEqual(unknown, expected_unknown, "Incorrect unknown token count in sentence.")
print(f'\nTest - Sentence Statistics Classification: \nThe sentence "{self.target_sentence}" has '
f'{negative} negative, {neutral} neutral, {positive} positive, and {unknown} unknown token(s).')
def test_02_token_statistics_score(self):
expected_score = -0.18812093738509109
_, _, _, _, avg_score = self.analyze_sentence(self.input_tokens)
self.assertAlmostEqual(avg_score, expected_score, places=10, msg="Incorrect average tf-idf score for sentence.")
print(
f'\nTest - Sentence Statistics Score: \nThe sentence "{self.target_sentence}" has an average tf-idf score of {avg_score}.')
if __name__ == '__main__':
unittest.main()
...@@ -2,9 +2,9 @@ import unittest ...@@ -2,9 +2,9 @@ import unittest
from sentiment import get_tokens, score_compute, save_stop_words from sentiment import get_tokens, score_compute, save_stop_words
import os import os
class TestGetTokens(unittest.TestCase):
# creates file path to sentiment.txt file class TestStopWordsAnalysis(unittest.TestCase):
def setUp(self): def setUp(self):
self.reviews = [] self.reviews = []
base_dir = os.path.dirname(os.path.abspath(__file__)) base_dir = os.path.dirname(os.path.abspath(__file__))
...@@ -28,163 +28,21 @@ class TestGetTokens(unittest.TestCase): ...@@ -28,163 +28,21 @@ class TestGetTokens(unittest.TestCase):
self.output_file = "output.txt" self.output_file = "output.txt"
if os.path.exists(self.output_file): if os.path.exists(self.output_file):
os.remove(self.output_file) os.remove(self.output_file)
self.target_sentence = "absolutely detestable ; would not watch again"
self.input_tokens = self.target_sentence.lower().split()
def test_01_token_count(self): def test_01_save_stop_words(self):
review_tokens_set = set()
review_tokens_list = []
positive_reviews_list, neutral_reviews_list, negative_reviews_list = [], [], []
positive_reviews_set, neutral_reviews_set, negative_reviews_set = set(), set(), set()
get_tokens(
self.reviews,
review_tokens_set, review_tokens_list,
positive_reviews_list, neutral_reviews_list, negative_reviews_list,
positive_reviews_set, neutral_reviews_set, negative_reviews_set
)
token_count = len(review_tokens_set)
self.assertEqual(len(review_tokens_set), 16_444, "Token count should be 16,444")
print(f"\nTest 1 - Token Count: \nThe training data contains {token_count} tokens.")
def test_02_word_frequencies(self):
target_token = "too"
expected_count = 314
manual_count = sum(1 for token in self.review_tokens_list if token == target_token)
self.assertEqual(manual_count, expected_count, f"Expected {expected_count} occurrences of '{target_token}', but got {manual_count}.")
print(f"""\nTest 2 - Word Frequencies: \nThe training data contains {manual_count} appearances of the token "too".""")
def test_03_token_statistics(self):
target_token = "too"
expected_negative = 200
expected_neutral = 65
expected_positive = 49
negative_counter, neutral_counter, positive_counter, score, score_con = score_compute(
target_token, 0, self.negative_reviews_list, 0,
self.neutral_reviews_list, 0, self.positive_reviews_list, "")
self.assertEqual(negative_counter, expected_negative, f"Incorrect negative count for '{target_token}'")
self.assertEqual(neutral_counter, expected_neutral, f"Incorrect neutral count for '{target_token}'")
self.assertEqual(positive_counter, expected_positive, f"Incorrect positive count for '{target_token}'")
print(f'\nTest 3 - Token Statistics: \nThe token "{target_token}" has {negative_counter} negative, '
f'{neutral_counter} neutral, and {positive_counter} positive appearance(s) in the training data.')
def test_04_token_classification(self):
target_token = "too"
negative_counter, neutral_counter, positive_counter, score, score_con = score_compute(
target_token, 0, self.negative_reviews_list, 0,
self.neutral_reviews_list, 0, self.positive_reviews_list, "")
expected_classification = "negative"
self.assertEqual(score_con, expected_classification, f"Incorrect sentiment for '{target_token}'")
print(f'\nTest 4 - Token Classification: \nThe token "{target_token}" is classified as {score_con}.')
def test_05_token_score(self):
target_token = "too"
negative_counter, neutral_counter, positive_counter, score, score_con = score_compute(
target_token, 0, self.negative_reviews_list, 0,
self.neutral_reviews_list, 0, self.positive_reviews_list, "")
expected_score = -1.4990477543373988
self.assertEqual(score, expected_score, f"Incorrect tf-idf score for '{target_token}'")
print(f'\nTest 5 - Token Score: \nThe token "{target_token}" has a tf-idf score of {score}.')
def test_06_sentence_statistics_classifications(self):
target_sentence = "absolutely detestable ; would not watch again"
expected_negative = 5
expected_neutral = 0
expected_positive = 1
expected_unknown = 1
total_negative_reviews, total_neutral_reviews, total_positive_reviews, unknown_counter = 0, 0, 0, 0
total_score, valid_tokens = 0, 0
score_con = ""
input_tokens = target_sentence.lower().split()
for token in input_tokens:
if token in self.positive_reviews_list or token in self.neutral_reviews_list or token in self.negative_reviews_list:
# Compute statistics for this token
negative_counter, neutral_counter, positive_counter, score, score_con = score_compute(
token, 0, self.negative_reviews_list, 0,
self.neutral_reviews_list, 0, self.positive_reviews_list, ""
)
if score_con == "positive":
total_positive_reviews += 1
elif score_con == "neutral":
total_neutral_reviews += 1
elif score_con == "negative":
total_negative_reviews += 1
total_score += score
valid_tokens += 1
else:
unknown_counter += 1
self.assertEqual(total_negative_reviews, expected_negative, "Incorrect negative token count in sentence.")
self.assertEqual(total_neutral_reviews, expected_neutral, "Incorrect neutral token count in sentence.")
self.assertEqual(total_positive_reviews, expected_positive, "Incorrect positive token count in sentence.")
self.assertEqual(unknown_counter, expected_unknown, "Incorrect unknown token count in sentence.")
print(f'\nTest 6 - Sentence Statistics Classification: \nThe sentence "{target_sentence}" has '
f'{total_negative_reviews} negative, {total_neutral_reviews} neutral, '
f'{total_positive_reviews} positive, and {unknown_counter} unknown token(s).')
def test_07_token_statistics_score(self):
target_sentence = "absolutely detestable ; would not watch again"
expected_score = -0.18812093738509109
total_negative_reviews, total_neutral_reviews, total_positive_reviews, unknown_counter = 0, 0, 0, 0
total_score, valid_tokens = 0, 0
score_con = ""
input_tokens = target_sentence.lower().split()
for token in input_tokens:
if token in self.positive_reviews_list or token in self.neutral_reviews_list or token in self.negative_reviews_list:
negative_counter, neutral_counter, positive_counter, score, score_con = score_compute(
token, 0, self.negative_reviews_list, 0,
self.neutral_reviews_list, 0, self.positive_reviews_list, ""
)
if score_con == "positive":
total_positive_reviews += 1
elif score_con == "neutral":
total_neutral_reviews += 1
elif score_con == "negative":
total_negative_reviews += 1
total_score += score
valid_tokens += 1
else:
unknown_counter += 1
avg_score = total_score / valid_tokens if valid_tokens > 0 else 0
self.assertAlmostEqual(avg_score, expected_score, places=10, msg="Incorrect average tf-idf score for sentence.")
print(f'\nTest 7 - Sentence Statistics Score: \nThe sentence "{target_sentence}" has an average tf-idf score of {avg_score}.')
def test_08_save_stop_words(self):
save_stop_words(self.negative_reviews_list, self.neutral_reviews_list, self.positive_reviews_list) save_stop_words(self.negative_reviews_list, self.neutral_reviews_list, self.positive_reviews_list)
self.assertTrue(os.path.exists(self.output_file), "Stop words file was not created.") self.assertTrue(os.path.exists(self.output_file), "Stop words file was not created.")
print(f"\nTest 8 - Save Stop Words:") print(f"\nTest - Save Stop Words:")
with open(self.output_file, "r", encoding="utf-8") as file: with open(self.output_file, "r", encoding="utf-8") as file:
stop_words = [line.strip() for line in file.readlines()] stop_words = [line.strip() for line in file.readlines()]
self.assertGreater(len(stop_words), 0, "Stop words list is empty.") self.assertGreater(len(stop_words), 0, "Stop words list is empty.")
print(f"There is a total of {len(stop_words)} stop words in this list.") print(f"There is a total of {len(stop_words)} stop words in this list.")
def test_09_non_stop_stats_classifications(self): def test_02_non_stop_stats_classifications(self):
target_sentence = "absolutely detestable ; would not watch again" target_sentence = "absolutely detestable ; would not watch again"
save_stop_words(self.negative_reviews_list, self.neutral_reviews_list, self.positive_reviews_list) save_stop_words(self.negative_reviews_list, self.neutral_reviews_list, self.positive_reviews_list)
...@@ -229,11 +87,11 @@ class TestGetTokens(unittest.TestCase): ...@@ -229,11 +87,11 @@ class TestGetTokens(unittest.TestCase):
self.assertEqual(positive_counter, expected_positive, f"Incorrect positive count for '{target_sentence}'") self.assertEqual(positive_counter, expected_positive, f"Incorrect positive count for '{target_sentence}'")
self.assertEqual(unknown_counter, expected_unknown, f"Incorrect unknown count for '{target_sentence}'") self.assertEqual(unknown_counter, expected_unknown, f"Incorrect unknown count for '{target_sentence}'")
print(f'\nTest 9 - Non-Stop-Words Statistics Classifications: \nThe sentence "{target_sentence}" ' print(f'\nTest - Non-Stop-Words Statistics Classifications: \nThe sentence "{target_sentence}" '
f'has {stop_word_count} stop-word token(s), and it has {negative_counter} negative, {neutral_counter} neutral,' f'has {stop_word_count} stop-word token(s), and it has {negative_counter} negative, {neutral_counter} neutral,'
f' {positive_counter} positive, and {unknown_counter} unknown non-stop-word token(s).') f' {positive_counter} positive, and {unknown_counter} unknown non-stop-word token(s).')
def test_10_non_stop_stats_score(self): def test_03_non_stop_stats_score(self):
target_sentence = "absolutely detestable ; would not watch again" target_sentence = "absolutely detestable ; would not watch again"
save_stop_words(self.negative_reviews_list, self.neutral_reviews_list, self.positive_reviews_list) save_stop_words(self.negative_reviews_list, self.neutral_reviews_list, self.positive_reviews_list)
...@@ -272,8 +130,8 @@ class TestGetTokens(unittest.TestCase): ...@@ -272,8 +130,8 @@ class TestGetTokens(unittest.TestCase):
self.assertAlmostEqual(avg_score, expected_avg_tfidf, places=5, msg="Incorrect adjusted average tf-idf score.") self.assertAlmostEqual(avg_score, expected_avg_tfidf, places=5, msg="Incorrect adjusted average tf-idf score.")
print(f'\nTest 10 - Non-Stop-Words Statistics Score: \nThe sentence "{target_sentence}" has an adjusted average tf-idf score of {avg_score}.') print(
f'\nTest - Non-Stop-Words Statistics Score: \nThe sentence "{target_sentence}" has an adjusted average tf-idf score of {avg_score}.')
if __name__ == '__main__': if __name__ == '__main__':
......
import unittest
from sentiment import get_tokens, score_compute
import os
class TestTokenAnalysis(unittest.TestCase):
def setUp(self):
self.reviews = []
base_dir = os.path.dirname(os.path.abspath(__file__))
sentiment_dir = os.path.join(base_dir, "..")
file_path = os.path.join(sentiment_dir, "sentiment.txt")
with open(file_path, "r", encoding="utf-8") as file:
self.reviews = file.readlines()
self.review_tokens_set = set()
self.review_tokens_list = [] # Ensure it's initialized
self.positive_reviews_list, self.neutral_reviews_list, self.negative_reviews_list = [], [], []
self.positive_reviews_set, self.neutral_reviews_set, self.negative_reviews_set = set(), set(), set()
get_tokens(
self.reviews,
self.review_tokens_set, self.review_tokens_list,
self.positive_reviews_list, self.neutral_reviews_list, self.negative_reviews_list,
self.positive_reviews_set, self.neutral_reviews_set, self.negative_reviews_set
)
self.output_file = "output.txt"
if os.path.exists(self.output_file):
os.remove(self.output_file)
self.target_token = "too"
self.negative_count, self.neutral_count, self.positive_count, self.token_score, self.token_classification = self.get_token_stats()
def get_token_stats(self):
return score_compute(
self.target_token, 0, self.negative_reviews_list, 0,
self.neutral_reviews_list, 0, self.positive_reviews_list, "")
def test_01_token_count(self):
token_count = len(self.review_tokens_set)
self.assertEqual(token_count, 16_444, "Token count should be 16,444")
print(f"\nTest - Token Count: \nThe training data contains {token_count} tokens.")
def test_02_word_frequencies(self):
expected_count = 314
manual_count = sum(1 for token in self.review_tokens_list if token == self.target_token)
self.assertEqual(manual_count, expected_count,
f"Expected {expected_count} occurrences of '{self.target_token}', but got {manual_count}.")
print(
f'\nTest - Word Frequencies: \nThe training data contains {manual_count} appearances of the token "too".')
def test_03_token_statistics(self):
self.assertEqual(self.negative_count, 200, f"Incorrect negative count for '{self.target_token}'")
self.assertEqual(self.neutral_count, 65, f"Incorrect neutral count for '{self.target_token}'")
self.assertEqual(self.positive_count, 49, f"Incorrect positive count for '{self.target_token}'")
print(f'\nTest - Token Statistics: \nThe token "{self.target_token}" has {self.negative_count} negative, '
f'{self.neutral_count} neutral, and {self.positive_count} positive appearance(s) in the training data.')
def test_04_token_classification(self):
expected_classification = "negative"
self.assertEqual(self.token_classification, expected_classification,
f"Incorrect sentiment for '{self.target_token}'")
print(
f'\nTest - Token Classification: \nThe token "{self.target_token}" is classified as {self.token_classification}.')
def test_05_token_score(self):
expected_score = -1.4990477543373988
self.assertEqual(self.token_score, expected_score, f"Incorrect tf-idf score for '{self.target_token}'")
print(f'\nTest 5 - Token Score: \nThe token "{self.target_token}" has a tf-idf score of {self.token_score}.')
if __name__ == '__main__':
unittest.main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment