{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Sentiment Analysis\n", "## TextBlob + Vader + NLTK + Naive Bayes\n", "via [this tutorial](https://levelup.gitconnected.com/sentiment-analysis-using-machine-learning-python-9122e03f8f7b) |10-6-19" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "from textblob import TextBlob\n", "from IPython.display import display, HTML\n", "import os\n", "import pandas as pd\n", "from nltk.sentiment.vader import SentimentIntensityAnalyzer\n", "sid = SentimentIntensityAnalyzer()" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "def get_data_from_files(path):\n", " directory = os.listdir(path)\n", " results = []\n", " for file in directory:\n", " f=open(path+file)\n", " results.append(f.read())\n", " f.close()\n", " return results\n", "\n", "neg_k = get_data_from_files('AI_NEG/')\n", "pos_k = get_data_from_files('AI_POS/')\n", "neg_a = get_data_from_files('NEG/')\n", "pos_a = get_data_from_files('POS/')\n", "neg_cornell = get_data_from_files('neg_cornell/')\n", "pos_cornell = get_data_from_files('pos_cornell/')\n", "neg_dirty = get_data_from_files('NEG_dirty/')\n", "pos_dirty = get_data_from_files('POS_dirty/')\n", "neg_joker = get_data_from_files('NEG_JK/')\n", "pos_joker = get_data_from_files('POS_JK/')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# TEXT BLOB" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def get_pn(num):\n", " return 'neg' if num < 0 else 'pos'\n", "\n", "def get_sentiment(array, label):\n", " blobs = [[TextBlob(text), text] for text in array]\n", " return ([{'label': label,\n", " 'prediction': get_pn(obj.sentiment.polarity),\n", " 'sentiment': obj.sentiment.polarity,\n", " 'length': len(text), \n", " 'excerpt': text[:50]} for obj,text in blobs])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## CASE STUDY 1: Kendra's Data" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelpredictionsentimentlengthexcerptaccurate
0negneg-0.15714376WHERE ARE THE JOBS?! OH THAT'S RIGHT. ARTIFICI...yes
1negneg-0.75000096How can we trust Artificial Intelligence to dr...yes
2negneg-0.77500031I hate artificial intelligence!yes
3negneg-0.75000047My dog is terrified by artificial intelligence!yes
4negneg-0.75000068Artificial intelligence is going to melt the b...yes
\n", "
" ], "text/plain": [ " label prediction sentiment length \\\n", "0 neg neg -0.157143 76 \n", "1 neg neg -0.750000 96 \n", "2 neg neg -0.775000 31 \n", "3 neg neg -0.750000 47 \n", "4 neg neg -0.750000 68 \n", "\n", " excerpt accurate \n", "0 WHERE ARE THE JOBS?! OH THAT'S RIGHT. ARTIFICI... yes \n", "1 How can we trust Artificial Intelligence to dr... yes \n", "2 I hate artificial intelligence! yes \n", "3 My dog is terrified by artificial intelligence! yes \n", "4 Artificial intelligence is going to melt the b... yes " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelpredictionsentimentlengthexcerptaccurate
0posneg-0.11250065My dog is excited by the advancements in artif...no
1posneg-0.075000133I'm excited for my child to grow up and have t...no
2posneg-0.12500031I love artificial intelligence!no
3posneg-0.300000121Order my groceries, pay my taxes, take my kids...no
4posneg-0.133333116I'm grateful every day that my child will like...no
\n", "
" ], "text/plain": [ " label prediction sentiment length \\\n", "0 pos neg -0.112500 65 \n", "1 pos neg -0.075000 133 \n", "2 pos neg -0.125000 31 \n", "3 pos neg -0.300000 121 \n", "4 pos neg -0.133333 116 \n", "\n", " excerpt accurate \n", "0 My dog is excited by the advancements in artif... no \n", "1 I'm excited for my child to grow up and have t... no \n", "2 I love artificial intelligence! no \n", "3 Order my groceries, pay my taxes, take my kids... no \n", "4 I'm grateful every day that my child will like... no " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "CORRECT PREDICT NEG: 5\n", "CORRECT PREDICT POS: 0\n" ] } ], "source": [ "df_n = pd.DataFrame(get_sentiment(neg_k, 'neg'))\n", "df_p = pd.DataFrame(get_sentiment(pos_k, 'pos'))\n", "\n", "import numpy as np\n", "df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')\n", "df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')\n", "\n", "display(df_n)\n", "display(df_p)\n", "\n", "print('CORRECT PREDICT NEG:',(df_n['accurate']=='yes').sum())\n", "print('CORRECT PREDICT POS:',(df_p['accurate']=='yes').sum())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## CASE STUDY 2: Ami's Data" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelpredictionsentimentlengthexcerptaccurate
0negneg-0.0545773554that's exactly how long the movie felt to me ....yes
1negpos0.0254672929\" quest for camelot \" is warner bros . ' firs...no
2negpos0.0033343365so ask yourself what \" 8mm \" ( \" eight millime...no
3negpos0.0229254418synopsis : a mentally unstable man undergoing ...no
4negpos0.0432343911capsule : in 2176 on the planet mars police ta...no
\n", "
" ], "text/plain": [ " label prediction sentiment length \\\n", "0 neg neg -0.054577 3554 \n", "1 neg pos 0.025467 2929 \n", "2 neg pos 0.003334 3365 \n", "3 neg pos 0.022925 4418 \n", "4 neg pos 0.043234 3911 \n", "\n", " excerpt accurate \n", "0 that's exactly how long the movie felt to me .... yes \n", "1 \" quest for camelot \" is warner bros . ' firs... no \n", "2 so ask yourself what \" 8mm \" ( \" eight millime... no \n", "3 synopsis : a mentally unstable man undergoing ... no \n", "4 capsule : in 2176 on the planet mars police ta... no " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelpredictionsentimentlengthexcerptaccurate
0pospos0.0236634227films adapted from comic books have had plenty...yes
1pospos0.1310922421you've got mail works alot better than it dese...yes
2pospos0.1106266092\" jaws \" is a rare film that grabs your atten...yes
3pospos0.1038474096every now and then a movie comes along from a ...yes
4posneg-0.0701513898moviemaking is a lot like being the general ma...no
\n", "
" ], "text/plain": [ " label prediction sentiment length \\\n", "0 pos pos 0.023663 4227 \n", "1 pos pos 0.131092 2421 \n", "2 pos pos 0.110626 6092 \n", "3 pos pos 0.103847 4096 \n", "4 pos neg -0.070151 3898 \n", "\n", " excerpt accurate \n", "0 films adapted from comic books have had plenty... yes \n", "1 you've got mail works alot better than it dese... yes \n", "2 \" jaws \" is a rare film that grabs your atten... yes \n", "3 every now and then a movie comes along from a ... yes \n", "4 moviemaking is a lot like being the general ma... no " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "CORRECT PREDICT NEG: 1\n", "CORRECT PREDICT POS: 4\n" ] } ], "source": [ "df_n = pd.DataFrame(get_sentiment(neg_a, 'neg'))\n", "df_p = pd.DataFrame(get_sentiment(pos_a, 'pos'))\n", "\n", "import numpy as np\n", "df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')\n", "df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')\n", "\n", "display(df_n)\n", "display(df_p)\n", "\n", "print('CORRECT PREDICT NEG:',(df_n['accurate']=='yes').sum())\n", "print('CORRECT PREDICT POS:',(df_p['accurate']=='yes').sum())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## CASE STUDY 3: Cornell Data" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelpredictionsentimentlengthexcerptaccurate
0negpos0.0262405953bad . bad . \\nbad . \\nthat one word seems to p...no
1negpos0.0760403396isn't it the ultimate sign of a movie's cinema...no
2negneg-0.1287332762\" gordy \" is not a movie , it is a 90-minute-...yes
3negneg-0.0004853840disconnect the phone line . \\ndon't accept the...yes
4negpos0.1227702270when robert forster found himself famous again...no
.....................
995negpos0.1454891945synopsis : when a meteorite crashlands in the ...no
996negpos0.1027233116it's now the anniversary of the slayings of ju...no
997negpos0.0424731755coinciding with the emerging popularity of mov...no
998negneg-0.0486562826and now the high-flying hong kong style of fil...yes
999negneg-0.0906554165battlefield long , boring and just plain stupi...yes
\n", "

1000 rows × 6 columns

\n", "
" ], "text/plain": [ " label prediction sentiment length \\\n", "0 neg pos 0.026240 5953 \n", "1 neg pos 0.076040 3396 \n", "2 neg neg -0.128733 2762 \n", "3 neg neg -0.000485 3840 \n", "4 neg pos 0.122770 2270 \n", ".. ... ... ... ... \n", "995 neg pos 0.145489 1945 \n", "996 neg pos 0.102723 3116 \n", "997 neg pos 0.042473 1755 \n", "998 neg neg -0.048656 2826 \n", "999 neg neg -0.090655 4165 \n", "\n", " excerpt accurate \n", "0 bad . bad . \\nbad . \\nthat one word seems to p... no \n", "1 isn't it the ultimate sign of a movie's cinema... no \n", "2 \" gordy \" is not a movie , it is a 90-minute-... yes \n", "3 disconnect the phone line . \\ndon't accept the... yes \n", "4 when robert forster found himself famous again... no \n", ".. ... ... \n", "995 synopsis : when a meteorite crashlands in the ... no \n", "996 it's now the anniversary of the slayings of ju... no \n", "997 coinciding with the emerging popularity of mov... no \n", "998 and now the high-flying hong kong style of fil... yes \n", "999 battlefield long , boring and just plain stupi... yes \n", "\n", "[1000 rows x 6 columns]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelpredictionsentimentlengthexcerptaccurate
0pospos0.2211734662assume nothing . \\nthe phrase is perhaps one o...yes
1pospos0.0897363839plot : derek zoolander is a male model . \\nhe ...yes
2pospos0.2067439380i actually am a fan of the original 1961 or so...yes
3pospos0.1419052407a movie that's been as highly built up as the ...yes
4pospos0.1763321840\" good will hunting \" is two movies in one : ...yes
.....................
995pospos0.0728152658one of the funniest carry on movies and the th...yes
996pospos0.1028794196i remember making a pact , right after `patch ...yes
997pospos0.1950972094barely scrapping by playing at a nyc piano bar...yes
998pospos0.1175304575if the current trends of hollywood filmmaking ...yes
999posneg-0.0135693870capsule : the director of cure brings a weird ...no
\n", "

1000 rows × 6 columns

\n", "
" ], "text/plain": [ " label prediction sentiment length \\\n", "0 pos pos 0.221173 4662 \n", "1 pos pos 0.089736 3839 \n", "2 pos pos 0.206743 9380 \n", "3 pos pos 0.141905 2407 \n", "4 pos pos 0.176332 1840 \n", ".. ... ... ... ... \n", "995 pos pos 0.072815 2658 \n", "996 pos pos 0.102879 4196 \n", "997 pos pos 0.195097 2094 \n", "998 pos pos 0.117530 4575 \n", "999 pos neg -0.013569 3870 \n", "\n", " excerpt accurate \n", "0 assume nothing . \\nthe phrase is perhaps one o... yes \n", "1 plot : derek zoolander is a male model . \\nhe ... yes \n", "2 i actually am a fan of the original 1961 or so... yes \n", "3 a movie that's been as highly built up as the ... yes \n", "4 \" good will hunting \" is two movies in one : ... yes \n", ".. ... ... \n", "995 one of the funniest carry on movies and the th... yes \n", "996 i remember making a pact , right after `patch ... yes \n", "997 barely scrapping by playing at a nyc piano bar... yes \n", "998 if the current trends of hollywood filmmaking ... yes \n", "999 capsule : the director of cure brings a weird ... no \n", "\n", "[1000 rows x 6 columns]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "CORRECT PREDICT NEG: 229\n", "CORRECT PREDICT POS: 971\n" ] } ], "source": [ "df_n = pd.DataFrame(get_sentiment(neg_cornell, 'neg'))\n", "df_p = pd.DataFrame(get_sentiment(pos_cornell, 'pos'))\n", "\n", "import numpy as np\n", "df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')\n", "df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')\n", "\n", "display(df_n)\n", "display(df_p)\n", "\n", "print('CORRECT PREDICT NEG:',(df_n['accurate']=='yes').sum())\n", "print('CORRECT PREDICT POS:',(df_p['accurate']=='yes').sum())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## CASE STUDY 4: Dirty Data" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelpredictionsentimentlengthexcerptaccurate
0negneg-0.0046653777by starring in amy heckerlings clueless two ...yes
1negpos0.1191843639i have little against remakes and updates of o...no
2negpos0.1008864247i cant recall a previous film experience where...no
3negpos0.0975264308the tagline for this film is : some houses ar...no
4negpos0.0487455175warner brothers ; rated pg-13 ( mild violence ...no
.....................
995negpos0.0146244086`the bachelor is one of the best terrible movi...no
996negpos0.0359113741as a hot-shot defense attorney , kevin lomax (...no
997negpos0.1013952890violence is bad . violence is ugly . violence ...no
998negpos0.0885234089even though i have the utmost respect for rich...no
999negpos0.0746952433an attempt at florida film noir , palmetto fai...no
\n", "

1000 rows × 6 columns

\n", "
" ], "text/plain": [ " label prediction sentiment length \\\n", "0 neg neg -0.004665 3777 \n", "1 neg pos 0.119184 3639 \n", "2 neg pos 0.100886 4247 \n", "3 neg pos 0.097526 4308 \n", "4 neg pos 0.048745 5175 \n", ".. ... ... ... ... \n", "995 neg pos 0.014624 4086 \n", "996 neg pos 0.035911 3741 \n", "997 neg pos 0.101395 2890 \n", "998 neg pos 0.088523 4089 \n", "999 neg pos 0.074695 2433 \n", "\n", " excerpt accurate \n", "0 by starring in amy heckerlings clueless two ... yes \n", "1 i have little against remakes and updates of o... no \n", "2 i cant recall a previous film experience where... no \n", "3 the tagline for this film is : some houses ar... no \n", "4 warner brothers ; rated pg-13 ( mild violence ... no \n", ".. ... ... \n", "995 `the bachelor is one of the best terrible movi... no \n", "996 as a hot-shot defense attorney , kevin lomax (... no \n", "997 violence is bad . violence is ugly . violence ... no \n", "998 even though i have the utmost respect for rich... no \n", "999 an attempt at florida film noir , palmetto fai... no \n", "\n", "[1000 rows x 6 columns]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelpredictionsentimentlengthexcerptaccurate
0pospos0.1346414584for the first reel of girls town , you just ca...yes
1pospos0.1371343102field of dreams almost defies description . al...yes
2pospos0.1813553521meet joe black is your classic boy-meets-girl ...yes
3pospos0.1041012192an indian runner was more than a courier . he ...yes
4pospos0.2049674955every once in a while , when an exceptional fa...yes
.....................
995pospos0.1187134929the laserman : somehow the title of writer-dir...yes
996pospos0.1504254264i know what you did last summer , the first...yes
997pospos0.1212432374buffalo ? 66 is a very rarely known movie that...yes
998pospos0.1306032508time bandits , from director terry gilliam , i...yes
999pospos0.0111795355warren beattys bulworth is a caustic politic...yes
\n", "

1000 rows × 6 columns

\n", "
" ], "text/plain": [ " label prediction sentiment length \\\n", "0 pos pos 0.134641 4584 \n", "1 pos pos 0.137134 3102 \n", "2 pos pos 0.181355 3521 \n", "3 pos pos 0.104101 2192 \n", "4 pos pos 0.204967 4955 \n", ".. ... ... ... ... \n", "995 pos pos 0.118713 4929 \n", "996 pos pos 0.150425 4264 \n", "997 pos pos 0.121243 2374 \n", "998 pos pos 0.130603 2508 \n", "999 pos pos 0.011179 5355 \n", "\n", " excerpt accurate \n", "0 for the first reel of girls town , you just ca... yes \n", "1 field of dreams almost defies description . al... yes \n", "2 meet joe black is your classic boy-meets-girl ... yes \n", "3 an indian runner was more than a courier . he ... yes \n", "4 every once in a while , when an exceptional fa... yes \n", ".. ... ... \n", "995 the laserman : somehow the title of writer-dir... yes \n", "996 i know what you did last summer , the first... yes \n", "997 buffalo ? 66 is a very rarely known movie that... yes \n", "998 time bandits , from director terry gilliam , i... yes \n", "999 warren beattys bulworth is a caustic politic... yes \n", "\n", "[1000 rows x 6 columns]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "CORRECT PREDICT NEG: 227\n", "CORRECT PREDICT POS: 972\n" ] } ], "source": [ "df_n = pd.DataFrame(get_sentiment(neg_dirty, 'neg'))\n", "df_p = pd.DataFrame(get_sentiment(pos_dirty, 'pos'))\n", "\n", "df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')\n", "df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')\n", "\n", "display(df_n)\n", "display(df_p)\n", "\n", "print('CORRECT PREDICT NEG:',(df_n['accurate']=='yes').sum())\n", "print('CORRECT PREDICT POS:',(df_p['accurate']=='yes').sum())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## CASE STUDY 5: Joker Review Data" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelpredictionsentimentlengthexcerptaccurate
0negpos0.1520831734Missed Opportunity\\nI had been very excited t...no
1negneg-0.0018523285/5 for Phoenix's acting..\\nI don't think the...yes
2negpos0.200000145Everyone praised an overrated movie.\\nOverrat...no
3negneg-0.038095350What idiotic FIlm\\nI can say that Phoenix is ...yes
4negpos0.126398711Terrible\\nThe only thing good about this movi...no
.....................
118negneg-0.290909432Boring and disappointing 😣\\nGreat job acting ...yes
119negpos0.164710853A masterclass in acting nothing more\\nI don't...no
120negpos0.126667242Not equal to the sum of its parts.\\nDespite a...no
121negneg-0.187500128Not real Joker\\nThis movie is poorly done as ...yes
122negneg-0.0574361212HAH HAAH HAAAH HAAAAH HAAAAAH HAAAAAAH HAAAAA...yes
\n", "

123 rows × 6 columns

\n", "
" ], "text/plain": [ " label prediction sentiment length \\\n", "0 neg pos 0.152083 1734 \n", "1 neg neg -0.001852 328 \n", "2 neg pos 0.200000 145 \n", "3 neg neg -0.038095 350 \n", "4 neg pos 0.126398 711 \n", ".. ... ... ... ... \n", "118 neg neg -0.290909 432 \n", "119 neg pos 0.164710 853 \n", "120 neg pos 0.126667 242 \n", "121 neg neg -0.187500 128 \n", "122 neg neg -0.057436 1212 \n", "\n", " excerpt accurate \n", "0 Missed Opportunity\\nI had been very excited t... no \n", "1 5/5 for Phoenix's acting..\\nI don't think the... yes \n", "2 Everyone praised an overrated movie.\\nOverrat... no \n", "3 What idiotic FIlm\\nI can say that Phoenix is ... yes \n", "4 Terrible\\nThe only thing good about this movi... no \n", ".. ... ... \n", "118 Boring and disappointing 😣\\nGreat job acting ... yes \n", "119 A masterclass in acting nothing more\\nI don't... no \n", "120 Not equal to the sum of its parts.\\nDespite a... no \n", "121 Not real Joker\\nThis movie is poorly done as ... yes \n", "122 HAH HAAH HAAAH HAAAAH HAAAAAH HAAAAAAH HAAAAA... yes \n", "\n", "[123 rows x 6 columns]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelpredictionsentimentlengthexcerptaccurate
0pospos0.1071625554funny like a clown\\nGreetings again from the ...yes
1pospos0.014881473Only certain people can relate\\nThis is a mov...yes
2pospos0.0082942509\"That's Life.\"\\nIn an era of cinema so satura...yes
3pospos0.0369394022Best DC movie since The Dark Knight Rises\\nDC...yes
4posneg-0.0171621430unbelievable, unrelatable, a bit boring to be...no
.....................
118pospos0.065000353Nerve-wracking, but in very uncomfortable way...yes
119pospos0.0355573501Solid film but there are glaring problems\\nOk...yes
120pospos0.250203510Joker > Endgame\\nNeed I say more? Everything ...yes
121pospos0.003030424Absolutely not a 10\\nStrong fanboy and hype r...yes
122pospos0.117628363Overhyped, but it's alright\\nIt's a good film...yes
\n", "

123 rows × 6 columns

\n", "
" ], "text/plain": [ " label prediction sentiment length \\\n", "0 pos pos 0.107162 5554 \n", "1 pos pos 0.014881 473 \n", "2 pos pos 0.008294 2509 \n", "3 pos pos 0.036939 4022 \n", "4 pos neg -0.017162 1430 \n", ".. ... ... ... ... \n", "118 pos pos 0.065000 353 \n", "119 pos pos 0.035557 3501 \n", "120 pos pos 0.250203 510 \n", "121 pos pos 0.003030 424 \n", "122 pos pos 0.117628 363 \n", "\n", " excerpt accurate \n", "0 funny like a clown\\nGreetings again from the ... yes \n", "1 Only certain people can relate\\nThis is a mov... yes \n", "2 \"That's Life.\"\\nIn an era of cinema so satura... yes \n", "3 Best DC movie since The Dark Knight Rises\\nDC... yes \n", "4 unbelievable, unrelatable, a bit boring to be... no \n", ".. ... ... \n", "118 Nerve-wracking, but in very uncomfortable way... yes \n", "119 Solid film but there are glaring problems\\nOk... yes \n", "120 Joker > Endgame\\nNeed I say more? Everything ... yes \n", "121 Absolutely not a 10\\nStrong fanboy and hype r... yes \n", "122 Overhyped, but it's alright\\nIt's a good film... yes \n", "\n", "[123 rows x 6 columns]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "CORRECT PREDICT NEG: 64\n", "CORRECT PREDICT POS: 114\n" ] } ], "source": [ "df_n = pd.DataFrame(get_sentiment(neg_joker, 'neg'))\n", "df_p = pd.DataFrame(get_sentiment(pos_joker, 'pos'))\n", "\n", "df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')\n", "df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')\n", "\n", "display(df_n)\n", "display(df_p)\n", "\n", "print('CORRECT PREDICT NEG:',(df_n['accurate']=='yes').sum())\n", "print('CORRECT PREDICT POS:',(df_p['accurate']=='yes').sum())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# VADER" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def get_pn(num):\n", " return 'neg' if num < 0 else 'pos'\n", "\n", "def get_vader_scores(array, label):\n", " vader_array = []\n", " for sentence in array:\n", " ss = sid.polarity_scores(sentence)\n", " vader_array.append({'label': label,\n", " 'prediction': get_pn(ss['compound']),\n", " 'compound': ss['compound'], \n", " 'excerpt': sentence[:50]})\n", " return vader_array" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[\"WHERE ARE THE JOBS?! OH THAT'S RIGHT. ARTIFICIAL INTELLIGENCE TOOK OUR JOBS.\",\n", " \"How can we trust Artificial Intelligence to drive our cars when they can't even hack a captcha?!\",\n", " 'I hate artificial intelligence!',\n", " 'My dog is terrified by artificial intelligence!',\n", " 'Artificial intelligence is going to melt the brains of our children!']" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "neg_k" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## CASE STUDY 1: Kendra's Data" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelpredictioncompoundexcerptaccurate
0negpos0.5255WHERE ARE THE JOBS?! OH THAT'S RIGHT. ARTIFICI...no
1negpos0.7712How can we trust Artificial Intelligence to dr...no
2negneg-0.2244I hate artificial intelligence!yes
3negneg-0.2942My dog is terrified by artificial intelligence!yes
4negpos0.5255Artificial intelligence is going to melt the b...no
\n", "
" ], "text/plain": [ " label prediction compound \\\n", "0 neg pos 0.5255 \n", "1 neg pos 0.7712 \n", "2 neg neg -0.2244 \n", "3 neg neg -0.2942 \n", "4 neg pos 0.5255 \n", "\n", " excerpt accurate \n", "0 WHERE ARE THE JOBS?! OH THAT'S RIGHT. ARTIFICI... no \n", "1 How can we trust Artificial Intelligence to dr... no \n", "2 I hate artificial intelligence! yes \n", "3 My dog is terrified by artificial intelligence! yes \n", "4 Artificial intelligence is going to melt the b... no " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelpredictioncompoundexcerptaccurate
0pospos0.6705My dog is excited by the advancements in artif...yes
1pospos0.8271I'm excited for my child to grow up and have t...yes
2pospos0.8221I love artificial intelligence!yes
3pospos0.8213Order my groceries, pay my taxes, take my kids...yes
4pospos0.8402I'm grateful every day that my child will like...yes
\n", "
" ], "text/plain": [ " label prediction compound \\\n", "0 pos pos 0.6705 \n", "1 pos pos 0.8271 \n", "2 pos pos 0.8221 \n", "3 pos pos 0.8213 \n", "4 pos pos 0.8402 \n", "\n", " excerpt accurate \n", "0 My dog is excited by the advancements in artif... yes \n", "1 I'm excited for my child to grow up and have t... yes \n", "2 I love artificial intelligence! yes \n", "3 Order my groceries, pay my taxes, take my kids... yes \n", "4 I'm grateful every day that my child will like... yes " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "CORRECT PREDICT NEG: 2\n", "CORRECT PREDICT POS: 5\n" ] } ], "source": [ "df_n = pd.DataFrame(get_vader_scores(neg_k, 'neg'))\n", "df_p = pd.DataFrame(get_vader_scores(pos_k, 'pos'))\n", "\n", "df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')\n", "df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')\n", "\n", "display(df_n)\n", "display(df_p)\n", "\n", "print('CORRECT PREDICT NEG:',(df_n['accurate']=='yes').sum())\n", "print('CORRECT PREDICT POS:',(df_p['accurate']=='yes').sum())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## CASE STUDY 2: Ami's Data" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelpredictioncompoundexcerptaccurate
0negpos0.7836that's exactly how long the movie felt to me ....no
1negneg-0.8481\" quest for camelot \" is warner bros . ' firs...yes
2negneg-0.9753so ask yourself what \" 8mm \" ( \" eight millime...yes
3negpos0.6824synopsis : a mentally unstable man undergoing ...no
4negneg-0.9879capsule : in 2176 on the planet mars police ta...yes
\n", "
" ], "text/plain": [ " label prediction compound \\\n", "0 neg pos 0.7836 \n", "1 neg neg -0.8481 \n", "2 neg neg -0.9753 \n", "3 neg pos 0.6824 \n", "4 neg neg -0.9879 \n", "\n", " excerpt accurate \n", "0 that's exactly how long the movie felt to me .... no \n", "1 \" quest for camelot \" is warner bros . ' firs... yes \n", "2 so ask yourself what \" 8mm \" ( \" eight millime... yes \n", "3 synopsis : a mentally unstable man undergoing ... no \n", "4 capsule : in 2176 on the planet mars police ta... yes " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelpredictioncompoundexcerptaccurate
0posneg-0.5887films adapted from comic books have had plenty...no
1pospos0.9964you've got mail works alot better than it dese...yes
2pospos0.9868\" jaws \" is a rare film that grabs your atten...yes
3pospos0.8825every now and then a movie comes along from a ...yes
4posneg-0.3525moviemaking is a lot like being the general ma...no
\n", "
" ], "text/plain": [ " label prediction compound \\\n", "0 pos neg -0.5887 \n", "1 pos pos 0.9964 \n", "2 pos pos 0.9868 \n", "3 pos pos 0.8825 \n", "4 pos neg -0.3525 \n", "\n", " excerpt accurate \n", "0 films adapted from comic books have had plenty... no \n", "1 you've got mail works alot better than it dese... yes \n", "2 \" jaws \" is a rare film that grabs your atten... yes \n", "3 every now and then a movie comes along from a ... yes \n", "4 moviemaking is a lot like being the general ma... no " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "CORRECT PREDICT NEG: 3\n", "CORRECT PREDICT POS: 3\n" ] } ], "source": [ "df_n = pd.DataFrame(get_vader_scores(neg_a, 'neg'))\n", "df_p = pd.DataFrame(get_vader_scores(pos_a, 'pos'))\n", "\n", "df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')\n", "df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')\n", "\n", "display(df_n)\n", "display(df_p)\n", "\n", "print('CORRECT PREDICT NEG:',(df_n['accurate']=='yes').sum())\n", "print('CORRECT PREDICT POS:',(df_p['accurate']=='yes').sum())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## CASE STUDY 3: Cornell Data" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelpredictioncompoundexcerptaccurate
0negpos0.9695bad . bad . \\nbad . \\nthat one word seems to p...no
1negpos0.1722isn't it the ultimate sign of a movie's cinema...no
2negneg-0.9970\" gordy \" is not a movie , it is a 90-minute-...yes
3negpos0.9861disconnect the phone line . \\ndon't accept the...no
4negpos0.7445when robert forster found himself famous again...no
..................
995negpos0.9828synopsis : when a meteorite crashlands in the ...no
996negpos0.8979it's now the anniversary of the slayings of ju...no
997negneg-0.9371coinciding with the emerging popularity of mov...yes
998negneg-0.9923and now the high-flying hong kong style of fil...yes
999negneg-0.9837battlefield long , boring and just plain stupi...yes
\n", "

1000 rows × 5 columns

\n", "
" ], "text/plain": [ " label prediction compound \\\n", "0 neg pos 0.9695 \n", "1 neg pos 0.1722 \n", "2 neg neg -0.9970 \n", "3 neg pos 0.9861 \n", "4 neg pos 0.7445 \n", ".. ... ... ... \n", "995 neg pos 0.9828 \n", "996 neg pos 0.8979 \n", "997 neg neg -0.9371 \n", "998 neg neg -0.9923 \n", "999 neg neg -0.9837 \n", "\n", " excerpt accurate \n", "0 bad . bad . \\nbad . \\nthat one word seems to p... no \n", "1 isn't it the ultimate sign of a movie's cinema... no \n", "2 \" gordy \" is not a movie , it is a 90-minute-... yes \n", "3 disconnect the phone line . \\ndon't accept the... no \n", "4 when robert forster found himself famous again... no \n", ".. ... ... \n", "995 synopsis : when a meteorite crashlands in the ... no \n", "996 it's now the anniversary of the slayings of ju... no \n", "997 coinciding with the emerging popularity of mov... yes \n", "998 and now the high-flying hong kong style of fil... yes \n", "999 battlefield long , boring and just plain stupi... yes \n", "\n", "[1000 rows x 5 columns]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelpredictioncompoundexcerptaccurate
0pospos0.9985assume nothing . \\nthe phrase is perhaps one o...yes
1pospos0.9853plot : derek zoolander is a male model . \\nhe ...yes
2pospos0.9998i actually am a fan of the original 1961 or so...yes
3pospos0.9671a movie that's been as highly built up as the ...yes
4pospos0.9300\" good will hunting \" is two movies in one : ...yes
..................
995pospos0.9913one of the funniest carry on movies and the th...yes
996pospos0.9985i remember making a pact , right after `patch ...yes
997pospos0.9964barely scrapping by playing at a nyc piano bar...yes
998pospos0.9975if the current trends of hollywood filmmaking ...yes
999posneg-0.9914capsule : the director of cure brings a weird ...no
\n", "

1000 rows × 5 columns

\n", "
" ], "text/plain": [ " label prediction compound \\\n", "0 pos pos 0.9985 \n", "1 pos pos 0.9853 \n", "2 pos pos 0.9998 \n", "3 pos pos 0.9671 \n", "4 pos pos 0.9300 \n", ".. ... ... ... \n", "995 pos pos 0.9913 \n", "996 pos pos 0.9985 \n", "997 pos pos 0.9964 \n", "998 pos pos 0.9975 \n", "999 pos neg -0.9914 \n", "\n", " excerpt accurate \n", "0 assume nothing . \\nthe phrase is perhaps one o... yes \n", "1 plot : derek zoolander is a male model . \\nhe ... yes \n", "2 i actually am a fan of the original 1961 or so... yes \n", "3 a movie that's been as highly built up as the ... yes \n", "4 \" good will hunting \" is two movies in one : ... yes \n", ".. ... ... \n", "995 one of the funniest carry on movies and the th... yes \n", "996 i remember making a pact , right after `patch ... yes \n", "997 barely scrapping by playing at a nyc piano bar... yes \n", "998 if the current trends of hollywood filmmaking ... yes \n", "999 capsule : the director of cure brings a weird ... no \n", "\n", "[1000 rows x 5 columns]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "CORRECT PREDICT NEG: 445\n", "CORRECT PREDICT POS: 828\n" ] } ], "source": [ "df_n = pd.DataFrame(get_vader_scores(neg_cornell, 'neg'))\n", "df_p = pd.DataFrame(get_vader_scores(pos_cornell, 'pos'))\n", "\n", "df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')\n", "df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')\n", "\n", "display(df_n)\n", "display(df_p)\n", "\n", "print('CORRECT PREDICT NEG:',(df_n['accurate']=='yes').sum())\n", "print('CORRECT PREDICT POS:',(df_p['accurate']=='yes').sum())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## CASE STUDY 4: Dirty Data" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelpredictioncompoundexcerptaccurate
0negneg-0.9326by starring in amy heckerlings clueless two ...yes
1negpos0.8326i have little against remakes and updates of o...no
2negpos0.9491i cant recall a previous film experience where...no
3negpos0.9854the tagline for this film is : some houses ar...no
4negneg-0.8077warner brothers ; rated pg-13 ( mild violence ...yes
..................
995negpos0.9927`the bachelor is one of the best terrible movi...no
996negneg-0.9803as a hot-shot defense attorney , kevin lomax (...yes
997negneg-0.3950violence is bad . violence is ugly . violence ...yes
998negpos0.9827even though i have the utmost respect for rich...no
999negneg-0.5308an attempt at florida film noir , palmetto fai...yes
\n", "

1000 rows × 5 columns

\n", "
" ], "text/plain": [ " label prediction compound \\\n", "0 neg neg -0.9326 \n", "1 neg pos 0.8326 \n", "2 neg pos 0.9491 \n", "3 neg pos 0.9854 \n", "4 neg neg -0.8077 \n", ".. ... ... ... \n", "995 neg pos 0.9927 \n", "996 neg neg -0.9803 \n", "997 neg neg -0.3950 \n", "998 neg pos 0.9827 \n", "999 neg neg -0.5308 \n", "\n", " excerpt accurate \n", "0 by starring in amy heckerlings clueless two ... yes \n", "1 i have little against remakes and updates of o... no \n", "2 i cant recall a previous film experience where... no \n", "3 the tagline for this film is : some houses ar... no \n", "4 warner brothers ; rated pg-13 ( mild violence ... yes \n", ".. ... ... \n", "995 `the bachelor is one of the best terrible movi... no \n", "996 as a hot-shot defense attorney , kevin lomax (... yes \n", "997 violence is bad . violence is ugly . violence ... yes \n", "998 even though i have the utmost respect for rich... no \n", "999 an attempt at florida film noir , palmetto fai... yes \n", "\n", "[1000 rows x 5 columns]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelpredictioncompoundexcerptaccurate
0posneg-0.9888for the first reel of girls town , you just ca...no
1pospos0.9885field of dreams almost defies description . al...yes
2pospos0.9806meet joe black is your classic boy-meets-girl ...yes
3posneg-0.9614an indian runner was more than a courier . he ...no
4pospos0.9992every once in a while , when an exceptional fa...yes
..................
995pospos0.9920the laserman : somehow the title of writer-dir...yes
996posneg-0.9924i know what you did last summer , the first...no
997pospos0.9921buffalo ? 66 is a very rarely known movie that...yes
998pospos0.9574time bandits , from director terry gilliam , i...yes
999posneg-0.9947warren beattys bulworth is a caustic politic...no
\n", "

1000 rows × 5 columns

\n", "
" ], "text/plain": [ " label prediction compound \\\n", "0 pos neg -0.9888 \n", "1 pos pos 0.9885 \n", "2 pos pos 0.9806 \n", "3 pos neg -0.9614 \n", "4 pos pos 0.9992 \n", ".. ... ... ... \n", "995 pos pos 0.9920 \n", "996 pos neg -0.9924 \n", "997 pos pos 0.9921 \n", "998 pos pos 0.9574 \n", "999 pos neg -0.9947 \n", "\n", " excerpt accurate \n", "0 for the first reel of girls town , you just ca... no \n", "1 field of dreams almost defies description . al... yes \n", "2 meet joe black is your classic boy-meets-girl ... yes \n", "3 an indian runner was more than a courier . he ... no \n", "4 every once in a while , when an exceptional fa... yes \n", ".. ... ... \n", "995 the laserman : somehow the title of writer-dir... yes \n", "996 i know what you did last summer , the first... no \n", "997 buffalo ? 66 is a very rarely known movie that... yes \n", "998 time bandits , from director terry gilliam , i... yes \n", "999 warren beattys bulworth is a caustic politic... no \n", "\n", "[1000 rows x 5 columns]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "CORRECT PREDICT NEG: 454\n", "CORRECT PREDICT POS: 824\n" ] } ], "source": [ "df_n = pd.DataFrame(get_vader_scores(neg_dirty, 'neg'))\n", "df_p = pd.DataFrame(get_vader_scores(pos_dirty, 'pos'))\n", "\n", "df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')\n", "df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')\n", "\n", "display(df_n)\n", "display(df_p)\n", "\n", "print('CORRECT PREDICT NEG:',(df_n['accurate']=='yes').sum())\n", "print('CORRECT PREDICT POS:',(df_p['accurate']=='yes').sum())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## CASE STUDY 5: Joker Review Data" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelpredictionsentimentlengthexcerptaccurate
0negpos0.1520831734Missed Opportunity\\nI had been very excited t...no
1negneg-0.0018523285/5 for Phoenix's acting..\\nI don't think the...yes
2negpos0.200000145Everyone praised an overrated movie.\\nOverrat...no
3negneg-0.038095350What idiotic FIlm\\nI can say that Phoenix is ...yes
4negpos0.126398711Terrible\\nThe only thing good about this movi...no
.....................
118negneg-0.290909432Boring and disappointing 😣\\nGreat job acting ...yes
119negpos0.164710853A masterclass in acting nothing more\\nI don't...no
120negpos0.126667242Not equal to the sum of its parts.\\nDespite a...no
121negneg-0.187500128Not real Joker\\nThis movie is poorly done as ...yes
122negneg-0.0574361212HAH HAAH HAAAH HAAAAH HAAAAAH HAAAAAAH HAAAAA...yes
\n", "

123 rows × 6 columns

\n", "
" ], "text/plain": [ " label prediction sentiment length \\\n", "0 neg pos 0.152083 1734 \n", "1 neg neg -0.001852 328 \n", "2 neg pos 0.200000 145 \n", "3 neg neg -0.038095 350 \n", "4 neg pos 0.126398 711 \n", ".. ... ... ... ... \n", "118 neg neg -0.290909 432 \n", "119 neg pos 0.164710 853 \n", "120 neg pos 0.126667 242 \n", "121 neg neg -0.187500 128 \n", "122 neg neg -0.057436 1212 \n", "\n", " excerpt accurate \n", "0 Missed Opportunity\\nI had been very excited t... no \n", "1 5/5 for Phoenix's acting..\\nI don't think the... yes \n", "2 Everyone praised an overrated movie.\\nOverrat... no \n", "3 What idiotic FIlm\\nI can say that Phoenix is ... yes \n", "4 Terrible\\nThe only thing good about this movi... no \n", ".. ... ... \n", "118 Boring and disappointing 😣\\nGreat job acting ... yes \n", "119 A masterclass in acting nothing more\\nI don't... no \n", "120 Not equal to the sum of its parts.\\nDespite a... no \n", "121 Not real Joker\\nThis movie is poorly done as ... yes \n", "122 HAH HAAH HAAAH HAAAAH HAAAAAH HAAAAAAH HAAAAA... yes \n", "\n", "[123 rows x 6 columns]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
labelpredictionsentimentlengthexcerptaccurate
0pospos0.1071625554funny like a clown\\nGreetings again from the ...yes
1pospos0.014881473Only certain people can relate\\nThis is a mov...yes
2pospos0.0082942509\"That's Life.\"\\nIn an era of cinema so satura...yes
3pospos0.0369394022Best DC movie since The Dark Knight Rises\\nDC...yes
4posneg-0.0171621430unbelievable, unrelatable, a bit boring to be...no
.....................
118pospos0.065000353Nerve-wracking, but in very uncomfortable way...yes
119pospos0.0355573501Solid film but there are glaring problems\\nOk...yes
120pospos0.250203510Joker > Endgame\\nNeed I say more? Everything ...yes
121pospos0.003030424Absolutely not a 10\\nStrong fanboy and hype r...yes
122pospos0.117628363Overhyped, but it's alright\\nIt's a good film...yes
\n", "

123 rows × 6 columns

\n", "
" ], "text/plain": [ " label prediction sentiment length \\\n", "0 pos pos 0.107162 5554 \n", "1 pos pos 0.014881 473 \n", "2 pos pos 0.008294 2509 \n", "3 pos pos 0.036939 4022 \n", "4 pos neg -0.017162 1430 \n", ".. ... ... ... ... \n", "118 pos pos 0.065000 353 \n", "119 pos pos 0.035557 3501 \n", "120 pos pos 0.250203 510 \n", "121 pos pos 0.003030 424 \n", "122 pos pos 0.117628 363 \n", "\n", " excerpt accurate \n", "0 funny like a clown\\nGreetings again from the ... yes \n", "1 Only certain people can relate\\nThis is a mov... yes \n", "2 \"That's Life.\"\\nIn an era of cinema so satura... yes \n", "3 Best DC movie since The Dark Knight Rises\\nDC... yes \n", "4 unbelievable, unrelatable, a bit boring to be... no \n", ".. ... ... \n", "118 Nerve-wracking, but in very uncomfortable way... yes \n", "119 Solid film but there are glaring problems\\nOk... yes \n", "120 Joker > Endgame\\nNeed I say more? Everything ... yes \n", "121 Absolutely not a 10\\nStrong fanboy and hype r... yes \n", "122 Overhyped, but it's alright\\nIt's a good film... yes \n", "\n", "[123 rows x 6 columns]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "CORRECT PREDICT NEG: 64\n", "CORRECT PREDICT POS: 114\n" ] } ], "source": [ "df_n = pd.DataFrame(get_vader_scores(neg_joker, 'neg'))\n", "df_p = pd.DataFrame(get_vader_scores(pos_joker, 'pos'))\n", "\n", "df_n['accurate'] = np.where(df_n['label'] == df_n['prediction'], 'yes', 'no')\n", "df_p['accurate'] = np.where(df_p['label'] == df_p['prediction'], 'yes', 'no')\n", "\n", "display(df_n)\n", "display(df_p)\n", "\n", "print('CORRECT PREDICT NEG:',(df_n['accurate']=='yes').sum())\n", "print('CORRECT PREDICT POS:',(df_p['accurate']=='yes').sum())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# NLTK with NaiveBayes" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "from nltk.classify import NaiveBayesClassifier\n", "from nltk.tokenize import word_tokenize\n", "from nltk.sentiment import SentimentAnalyzer\n", "from nltk.sentiment.util import *\n", "\n", "def get_tokens(sentence):\n", " tokens = word_tokenize(sentence)\n", " clean_tokens = [word.lower() for word in tokens if word.isalpha()]\n", " return clean_tokens\n", "\n", "def get_nltk_train_test(array, label, num_train):\n", " tokens = [get_tokens(sentence) for sentence in array]\n", " docs = [(sent, label) for sent in tokens]\n", " train_docs = docs[:num_train]\n", " test_docs = docs[num_train:len(array)]\n", " return [train_docs, test_docs]\n", "\n", "\n", "def get_nltk_NB(NEG_DATA, POS_DATA, num_train):\n", " train_neg, test_neg = get_nltk_train_test(NEG_DATA, 'neg', num_train)\n", " train_pos, test_pos = get_nltk_train_test(POS_DATA, 'pos', num_train)\n", "\n", " training_docs = train_neg + train_pos\n", " testing_docs = test_neg + test_pos\n", "\n", " sentim_analyzer = SentimentAnalyzer()\n", " all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs])\n", " unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg)\n", " sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)\n", " training_set = sentim_analyzer.apply_features(training_docs)\n", " test_set = sentim_analyzer.apply_features(testing_docs)\n", "\n", " trainer = NaiveBayesClassifier.train\n", " classifier = sentim_analyzer.train(trainer, training_set)\n", " \n", " results = []\n", " for key,value in sorted(sentim_analyzer.evaluate(test_set).items()):\n", " print('{0}: {1}'.format(key,value))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## CASE STUDY 1: Kendra's Data" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training classifier\n", "Evaluating NaiveBayesClassifier results...\n", "Accuracy: 1.0\n", "F-measure [neg]: 1.0\n", "F-measure [pos]: 1.0\n", "Precision [neg]: 1.0\n", "Precision [pos]: 1.0\n", "Recall [neg]: 1.0\n", "Recall [pos]: 1.0\n" ] } ], "source": [ "get_nltk_NB(neg_k, pos_k, 4)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## CASE STUDY 2: Ami's Data" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training classifier\n", "Evaluating NaiveBayesClassifier results...\n", "Accuracy: 0.5\n", "F-measure [neg]: 0.6666666666666666\n", "F-measure [pos]: None\n", "Precision [neg]: 0.5\n", "Precision [pos]: None\n", "Recall [neg]: 1.0\n", "Recall [pos]: 0.0\n" ] } ], "source": [ "get_nltk_NB(neg_a, pos_a, 4)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## CASE STUDY 3: Cornell's Data" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training classifier\n", "Evaluating NaiveBayesClassifier results...\n", "Accuracy: 0.8125\n", "F-measure [neg]: 0.8259860788863109\n", "F-measure [pos]: 0.7967479674796748\n", "Precision [neg]: 0.7705627705627706\n", "Precision [pos]: 0.8698224852071006\n", "Recall [neg]: 0.89\n", "Recall [pos]: 0.735\n" ] } ], "source": [ "get_nltk_NB(neg_cornell, pos_cornell, 800)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## CASE STUDY 4: Dirty Data" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training classifier\n", "Evaluating NaiveBayesClassifier results...\n", "Accuracy: 0.7775\n", "F-measure [neg]: 0.7944572748267898\n", "F-measure [pos]: 0.757493188010899\n", "Precision [neg]: 0.7381974248927039\n", "Precision [pos]: 0.8323353293413174\n", "Recall [neg]: 0.86\n", "Recall [pos]: 0.695\n" ] } ], "source": [ "get_nltk_NB(neg_dirty, pos_dirty, 800)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## CASE STUDY 5: Joker Review Data" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training classifier\n", "Evaluating NaiveBayesClassifier results...\n", "Accuracy: 0.581081081081081\n", "F-measure [neg]: 0.6593406593406593\n", "F-measure [pos]: 0.456140350877193\n", "Precision [neg]: 0.5555555555555556\n", "Precision [pos]: 0.65\n", "Recall [neg]: 0.8108108108108109\n", "Recall [pos]: 0.35135135135135137\n" ] } ], "source": [ "get_nltk_NB(neg_joker, pos_joker, 86)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Naive Bayes Gaussian Style" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "def create_labeled_df(array, label):\n", " df = pd.DataFrame(array)\n", " df['label'] = label\n", " return df\n", "\n", "def get_nb_gaus(neg, pos):\n", " neg_df = create_labeled_df(neg, 'neg')\n", " pos_df = create_labeled_df(pos, 'pos')\n", " all_df = neg_df.append(pos_df)\n", " print(all_df)\n", "# print(neg[:5], pos[:5])" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0 label\n", "0 Missed Opportunity\\nI had been very excited t... neg\n", "1 5/5 for Phoenix's acting..\\nI don't think the... neg\n", "2 Everyone praised an overrated movie.\\nOverrat... neg\n", "3 What idiotic FIlm\\nI can say that Phoenix is ... neg\n", "4 Terrible\\nThe only thing good about this movi... neg\n", ".. ... ...\n", "118 Nerve-wracking, but in very uncomfortable way... pos\n", "119 Solid film but there are glaring problems\\nOk... pos\n", "120 Joker > Endgame\\nNeed I say more? Everything ... pos\n", "121 Absolutely not a 10\\nStrong fanboy and hype r... pos\n", "122 Overhyped, but it's alright\\nIt's a good film... pos\n", "\n", "[246 rows x 2 columns]\n" ] } ], "source": [ "get_nb_gaus(neg_joker, pos_joker)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }