{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "from sklearn.feature_extraction.text import TfidfVectorizer\n", "from nltk.tokenize.casual import casual_tokenize\n", "tfidf_model = TfidfVectorizer(tokenizer = casual_tokenize)\n", "from sklearn.preprocessing import MinMaxScaler" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Unnamed: 0 | \n", "PhraseId | \n", "SentenceId | \n", "Phrase | \n", "Sentiment | \n", "Actual | \n", "
---|---|---|---|---|---|---|
128037 | \n", "128037 | \n", "128038 | \n", "6887 | \n", "as the main character suggests , ` what if | \n", "3 | \n", "tbd | \n", "
5192 | \n", "5192 | \n", "5193 | \n", "206 | \n", "well-wrought story | \n", "4 | \n", "tbd | \n", "
50057 | \n", "50057 | \n", "50058 | \n", "2457 | \n", "pack raw dough | \n", "2 | \n", "tbd | \n", "
109259 | \n", "109259 | \n", "109260 | \n", "5785 | \n", "into the editing room | \n", "2 | \n", "tbd | \n", "
73349 | \n", "73349 | \n", "73350 | \n", "3748 | \n", "concerned with morality | \n", "2 | \n", "tbd | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
25979 | \n", "25979 | \n", "25980 | \n", "1189 | \n", "Spy | \n", "2 | \n", "tbd | \n", "
28724 | \n", "28724 | \n", "28725 | \n", "1331 | \n", "semi-autobiographical film | \n", "2 | \n", "tbd | \n", "
5064 | \n", "5064 | \n", "5065 | \n", "198 | \n", "that writer and director Burr Steers knows the... | \n", "3 | \n", "tbd | \n", "
85856 | \n", "85856 | \n", "85857 | \n", "4443 | \n", "associations you choose to make | \n", "2 | \n", "tbd | \n", "
141693 | \n", "141693 | \n", "141694 | \n", "7686 | \n", "a human volcano or | \n", "2 | \n", "tbd | \n", "
15606 rows × 6 columns
\n", "\n", " | Unnamed: 0 | \n", "PhraseId | \n", "SentenceId | \n", "Phrase | \n", "Sentiment | \n", "lda_score | \n", "lda_predict | \n", "
---|---|---|---|---|---|---|---|
127609 | \n", "127609 | \n", "127610 | \n", "6864 | \n", "one that is dark , disturbing , painful to wat... | \n", "3 | \n", "0.529864 | \n", "1 | \n", "
112393 | \n", "112393 | \n", "112394 | \n", "5969 | \n", "a satisfying summer blockbuster and worth a look | \n", "3 | \n", "0.741489 | \n", "1 | \n", "
81788 | \n", "81788 | \n", "81789 | \n", "4220 | \n", "And how . | \n", "2 | \n", "0.554698 | \n", "1 | \n", "
38122 | \n", "38122 | \n", "38123 | \n", "1813 | \n", "reminds you of why animation is such a perfect... | \n", "4 | \n", "0.586279 | \n", "1 | \n", "
38553 | \n", "38553 | \n", "38554 | \n", "1838 | \n", "is to catch the pitch of his poetics , savor t... | \n", "3 | \n", "0.472868 | \n", "0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
124761 | \n", "124761 | \n", "124762 | \n", "6705 | \n", "Both Garcia and Jagger turn in perfectly execu... | \n", "3 | \n", "0.743028 | \n", "1 | \n", "
138913 | \n", "138913 | \n", "138914 | \n", "7529 | \n", "has all the enjoyable randomness of a very liv... | \n", "4 | \n", "0.691226 | \n", "1 | \n", "
146161 | \n", "146161 | \n", "146162 | \n", "7948 | \n", "great scares and a good surprise ending | \n", "3 | \n", "0.795357 | \n", "1 | \n", "
35603 | \n", "35603 | \n", "35604 | \n", "1678 | \n", "to the core of what it actually means to face ... | \n", "3 | \n", "0.579167 | \n", "1 | \n", "
116843 | \n", "116843 | \n", "116844 | \n", "6234 | \n", "It is life affirming and heartbreaking , sweet... | \n", "3 | \n", "0.647515 | \n", "1 | \n", "
450 rows × 7 columns
\n", "\n", " | Unnamed: 0 | \n", "PhraseId | \n", "SentenceId | \n", "Phrase | \n", "Sentiment | \n", "lda_score | \n", "lda_predict | \n", "actual | \n", "
---|---|---|---|---|---|---|---|---|
128037 | \n", "128037 | \n", "128038 | \n", "6887 | \n", "as the main character suggests , ` what if | \n", "3 | \n", "0.625832 | \n", "1 | \n", "tbd | \n", "
5192 | \n", "5192 | \n", "5193 | \n", "206 | \n", "well-wrought story | \n", "4 | \n", "0.870760 | \n", "1 | \n", "tbd | \n", "
50057 | \n", "50057 | \n", "50058 | \n", "2457 | \n", "pack raw dough | \n", "2 | \n", "0.858039 | \n", "1 | \n", "tbd | \n", "
109259 | \n", "109259 | \n", "109260 | \n", "5785 | \n", "into the editing room | \n", "2 | \n", "0.769221 | \n", "1 | \n", "tbd | \n", "
73349 | \n", "73349 | \n", "73350 | \n", "3748 | \n", "concerned with morality | \n", "2 | \n", "0.863717 | \n", "1 | \n", "tbd | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
25979 | \n", "25979 | \n", "25980 | \n", "1189 | \n", "Spy | \n", "2 | \n", "0.882761 | \n", "1 | \n", "tbd | \n", "
28724 | \n", "28724 | \n", "28725 | \n", "1331 | \n", "semi-autobiographical film | \n", "2 | \n", "0.873131 | \n", "1 | \n", "tbd | \n", "
5064 | \n", "5064 | \n", "5065 | \n", "198 | \n", "that writer and director Burr Steers knows the... | \n", "3 | \n", "0.803085 | \n", "1 | \n", "tbd | \n", "
85856 | \n", "85856 | \n", "85857 | \n", "4443 | \n", "associations you choose to make | \n", "2 | \n", "0.738841 | \n", "1 | \n", "tbd | \n", "
141693 | \n", "141693 | \n", "141694 | \n", "7686 | \n", "a human volcano or | \n", "2 | \n", "0.762257 | \n", "1 | \n", "tbd | \n", "
15606 rows × 8 columns
\n", "