From a7bd62ad739e2ff2212be05a3a05fedccbcbd72e Mon Sep 17 00:00:00 2001 From: noahcostello5 <noahcostello5@gmail.com> Date: Tue, 26 Feb 2019 19:45:14 -0600 Subject: [PATCH] Added method functionality to Review Class. --- .vscode/launch.json | 70 ++++++++++++++++++++ .vscode/settings.json | 3 + sentiment.py | 148 +++++++++++++++++++++++++++++++++++------- 3 files changed, 196 insertions(+), 25 deletions(-) create mode 100644 .vscode/launch.json create mode 100644 .vscode/settings.json diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..92a2fa2 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,70 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Current File (Integrated Terminal)", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal" + }, + { + "name": "Python: Remote Attach", + "type": "python", + "request": "attach", + "port": 5678, + "host": "localhost", + "pathMappings": [ + { + "localRoot": "${workspaceFolder}", + "remoteRoot": "." + } + ] + }, + { + "name": "Python: Module", + "type": "python", + "request": "launch", + "module": "enter-your-module-name-here", + "console": "integratedTerminal" + }, + { + "name": "Python: Django", + "type": "python", + "request": "launch", + "program": "${workspaceFolder}/manage.py", + "console": "integratedTerminal", + "args": [ + "runserver", + "--noreload", + "--nothreading" + ], + "django": true + }, + { + "name": "Python: Flask", + "type": "python", + "request": "launch", + "module": "flask", + "env": { + "FLASK_APP": "app.py" + }, + "args": [ + "run", + "--no-debugger", + "--no-reload" + ], + "jinja": true + }, + { + "name": "Python: Current File (External Terminal)", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "externalTerminal" + } + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..26fff8e --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.pythonPath": "C:\\Users\\noahc\\AppData\\Local\\Programs\\Python\\Python37\\python.exe" +} \ No newline at end of file diff --git a/sentiment.py b/sentiment.py index 71e8bd9..48a568b 100644 --- a/sentiment.py +++ b/sentiment.py @@ -1,6 +1,7 @@ -from enum import Enum +from enum import Enum, auto import math + class MenuOption(Enum): SHOW_REVIEWS = 'Show reviews' CHECK_TOKEN = 'Check if a token is present' @@ -11,60 +12,143 @@ class MenuOption(Enum): SHOW_ADJUSTED_SENTENCE_STATISTICS = 'Show the statistics for a sentence with stop words ignored' EXIT = 'Exit the program' + +class Sentiment(Enum): + POSITIVE = '+' + NEUTRAL = '0' + NEGATIVE = '-' + + class Review(): def __init__(self, review_text): self.review_text = review_text def get_sentiment_rating(self): - print(self.review_text) - pass + if self.review_text.split()[0] == '+': + return Sentiment.POSITIVE + elif self.review_text.split()[0] == '0': + return Sentiment.NEUTRAL + else: + return Sentiment.NEGATIVE def get_tokens(self): - print(self.review_text.split()) - pass - + return self.review_text.strip().lower().split()[1:] + def token_frequency(self, token): + return self.get_tokens().count(token) def main(): menu_input() + def get_data(): - data = () try: with open("sentiment.txt") as file: - data = file.read() - + return file.readlines() except FileNotFoundError: print("File cannot be found") - return data + def get_reviews(data): - return data.strip().replace("...", "").split('.') + reviews = [] + for review in data: + reviews.append(Review(review.strip().replace("...", ""))) + + return reviews + def get_tokens(data): - return data.split() + return "".join(data).strip().split() + def check_token(data): - tokens = frozenset(data.split()) + tokens = frozenset(get_tokens(data)) + token = input("Enter a token: ").lower() if token in tokens: print(f"The token \"{token}\" appears in the training data.") else: print(f"The token \"{token}\" does not appear in the training data.") + def check_document_frequency(data): - tokens = data.strip().lower().split() + tokens = get_tokens(data) + print(len(tokens)) token = input("Enter a token: ").lower() + print( + f"The token \"{token}\" appears {tokens.count(token)} out of {len(tokens)} time(s) in the training data.") - print(f"The token \"{token}\" appears {tokens.count(token)} out of {len(data)} time(s) in the training data.") def show_token_stats(data): - tokens = data.strip().lower().split() + print(f"Review num: {len(data)}") + reviews = get_reviews(data) + tokens = get_tokens(data) token = input("Enter a token: ").lower() - stats = ((10 + (10 * (p/maxp) * math.log((p/maxp), 2) )) - (10 + 10 * (n/maxn) * math.log((N/maxn), 2))) - pass + positive_reviews = [] + positive_tokens = [] + positive_num = 0 + highest_positive_num = 0 + negative_reviews = [] + negative_tokens = [] + negative_num = 0 + highest_negative_num = 0 + neutral_reviews = [] + neutral_num = 0 + all_num = tokens.count(token) + # maxp = 0 + + +# Separating Positive and Negative reviews and tokens into lists + for review in reviews: + if(review.get_sentiment_rating() == Sentiment.POSITIVE): + positive_reviews.append(review) + for review_token in review.get_tokens(): + positive_tokens.append(review_token) + elif(review.get_sentiment_rating() == Sentiment.NEGATIVE): + negative_reviews.append(review) + for review_token in review.get_tokens(): + negative_tokens.append(review_token) + elif(review.get_sentiment_rating() == Sentiment.NEUTRAL): + neutral_reviews.append(review) + + +# Getting total number of Positive and Negative tokens + for review in negative_reviews: + negative_num += review.get_tokens().count(token) + + for review in neutral_reviews: + neutral_num += review.get_tokens().count(token) + + for review in positive_reviews: + positive_num += review.get_tokens().count(token) + + +# Get the greatest number of times that any token appears in positive comments + # for test_token in frozenset(get_tokens(data)): + # for review in positive_reviews: + # if review.get_tokens().count(test_token) > highest_positive_num: + # highest_positive_num = review.get_tokens().count(test_token) + +# Get the greatest number of times that any token appears in negative comments + # for test_token in frozenset(get_tokens(data)): + # for review in negative_reviews: + # if review.get_tokens().count(test_token) > highest_negative_num: + # highest_negative_num = review.get_tokens().count(test_token) + + # print(positive_num) + # print(highest_positive_num) + # print(negative_num) + # print(highest_negative_num) + + score = (((10 + 10 * (positive_num/highest_positive_num)) * + math.log(len(positive_tokens)/highest_positive_num)) - ((10 + 10 * (negative_num/highest_negative_num)) * math.log(len(negative_tokens)/highest_negative_num))) + + print(f"The token \"{token}\" has {negative_num} negative, {neutral_num} neutral, and {positive_num} positive appearance(s) in the training data.") + print( + f"The token \"{token}\" has a differential tf-idf score of {score} and is classified as ...") + def handle_selection(selection, data): if selection == MenuOption.SHOW_REVIEWS: @@ -104,29 +188,43 @@ def menu_input(): except IndexError: print("Please enter a valid, in range number.") + def show_reviews(data): - reviews = get_reviews(data) + review_objects = get_reviews(data) start_selection = "1" end_selection = "" while True: - start_selection = int(input(f"Enter a review number from {start_selection} to {len(reviews)}: ")) - if start_selection > len(reviews) or start_selection <= 0: + + start_selection = int( + input(f"Enter a review number from {start_selection} to {len(review_objects)}: ")) + + if start_selection > len(review_objects) or start_selection <= 0 or start_selection == None: + print("Please enter a valid, in-range number.") start_selection = "1" continue + else: while True: - end_selection = int(input(f"Enter a review number from {start_selection} to {len(reviews)}: ")) - if end_selection > len(reviews) or end_selection < start_selection: + + end_selection = int( + input(f"Enter a review number from {start_selection} to {len(review_objects)}: ")) + + if end_selection > len(review_objects) or end_selection < start_selection or end_selection == None: print("Please enter a valid, in-range number.") continue + else: + for review_num in range(start_selection-1, end_selection): - print(f'Review #{review_num + 1}: {reviews[review_num].strip()}') + + print( + f'Review #{review_num + 1}: {review_objects[review_num].review_text}') + break break - + if __name__ == '__main__': main() -- GitLab