{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Sentiment Analysis with TextBlob\n",
"via [this tutorial](https://levelup.gitconnected.com/sentiment-analysis-using-machine-learning-python-9122e03f8f7b) |10-6-19"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### CASE STUDY 1: Kendra's fake data"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {},
"outputs": [],
"source": [
"from textblob import TextBlob\n",
"from IPython.display import display, HTML\n",
"import os\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {},
"outputs": [],
"source": [
"def get_data_from_files(path):\n",
" directory = os.listdir(path)\n",
" results = []\n",
" for file in directory:\n",
" f=open(path+file)\n",
" results.append(f.read())\n",
" f.close()\n",
" return results\n",
"\n",
"neg_k = get_data_from_files('AI_NEG/')\n",
"pos_k = get_data_from_files('AI_POS/')\n",
"neg_a = get_data_from_files('NEG/')\n",
"pos_a = get_data_from_files('POS/')"
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" sentiment | \n",
" length | \n",
" excerpt | \n",
" tags | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" -0.157143 | \n",
" 3898 | \n",
" moviemaking is a lot like being the general ma... | \n",
" [(WHERE, WRB), (ARE, PDT), (THE, DT), (JOBS, N... | \n",
"
\n",
" \n",
" 1 | \n",
" -0.750000 | \n",
" 3898 | \n",
" moviemaking is a lot like being the general ma... | \n",
" [(How, WRB), (can, MD), (we, PRP), (trust, VB)... | \n",
"
\n",
" \n",
" 2 | \n",
" -0.775000 | \n",
" 3898 | \n",
" moviemaking is a lot like being the general ma... | \n",
" [(I, PRP), (hate, VBP), (artificial, JJ), (int... | \n",
"
\n",
" \n",
" 3 | \n",
" -0.750000 | \n",
" 3898 | \n",
" moviemaking is a lot like being the general ma... | \n",
" [(My, PRP$), (dog, NN), (is, VBZ), (terrified,... | \n",
"
\n",
" \n",
" 4 | \n",
" -0.750000 | \n",
" 3898 | \n",
" moviemaking is a lot like being the general ma... | \n",
" [(Artificial, JJ), (intelligence, NN), (is, VB... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" sentiment length excerpt \\\n",
"0 -0.157143 3898 moviemaking is a lot like being the general ma... \n",
"1 -0.750000 3898 moviemaking is a lot like being the general ma... \n",
"2 -0.775000 3898 moviemaking is a lot like being the general ma... \n",
"3 -0.750000 3898 moviemaking is a lot like being the general ma... \n",
"4 -0.750000 3898 moviemaking is a lot like being the general ma... \n",
"\n",
" tags \n",
"0 [(WHERE, WRB), (ARE, PDT), (THE, DT), (JOBS, N... \n",
"1 [(How, WRB), (can, MD), (we, PRP), (trust, VB)... \n",
"2 [(I, PRP), (hate, VBP), (artificial, JJ), (int... \n",
"3 [(My, PRP$), (dog, NN), (is, VBZ), (terrified,... \n",
"4 [(Artificial, JJ), (intelligence, NN), (is, VB... "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" sentiment | \n",
" length | \n",
" excerpt | \n",
" tags | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" -0.112500 | \n",
" 3898 | \n",
" moviemaking is a lot like being the general ma... | \n",
" [(My, PRP$), (dog, NN), (is, VBZ), (excited, V... | \n",
"
\n",
" \n",
" 1 | \n",
" -0.075000 | \n",
" 3898 | \n",
" moviemaking is a lot like being the general ma... | \n",
" [(I, PRP), ('m, VBP), (excited, JJ), (for, IN)... | \n",
"
\n",
" \n",
" 2 | \n",
" -0.125000 | \n",
" 3898 | \n",
" moviemaking is a lot like being the general ma... | \n",
" [(I, PRP), (love, VBP), (artificial, JJ), (int... | \n",
"
\n",
" \n",
" 3 | \n",
" -0.300000 | \n",
" 3898 | \n",
" moviemaking is a lot like being the general ma... | \n",
" [(Order, NN), (my, PRP$), (groceries, NNS), (p... | \n",
"
\n",
" \n",
" 4 | \n",
" -0.133333 | \n",
" 3898 | \n",
" moviemaking is a lot like being the general ma... | \n",
" [(I, PRP), ('m, VBP), (grateful, JJ), (every, ... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" sentiment length excerpt \\\n",
"0 -0.112500 3898 moviemaking is a lot like being the general ma... \n",
"1 -0.075000 3898 moviemaking is a lot like being the general ma... \n",
"2 -0.125000 3898 moviemaking is a lot like being the general ma... \n",
"3 -0.300000 3898 moviemaking is a lot like being the general ma... \n",
"4 -0.133333 3898 moviemaking is a lot like being the general ma... \n",
"\n",
" tags \n",
"0 [(My, PRP$), (dog, NN), (is, VBZ), (excited, V... \n",
"1 [(I, PRP), ('m, VBP), (excited, JJ), (for, IN)... \n",
"2 [(I, PRP), (love, VBP), (artificial, JJ), (int... \n",
"3 [(Order, NN), (my, PRP$), (groceries, NNS), (p... \n",
"4 [(I, PRP), ('m, VBP), (grateful, JJ), (every, ... "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" sentiment | \n",
" length | \n",
" excerpt | \n",
" tags | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" -0.054577 | \n",
" 3898 | \n",
" moviemaking is a lot like being the general ma... | \n",
" [(that, DT), ('s, VBZ), (exactly, RB), (how, W... | \n",
"
\n",
" \n",
" 1 | \n",
" 0.025467 | \n",
" 3898 | \n",
" moviemaking is a lot like being the general ma... | \n",
" [(quest, JJS), (for, IN), (camelot, NN), (is, ... | \n",
"
\n",
" \n",
" 2 | \n",
" 0.003334 | \n",
" 3898 | \n",
" moviemaking is a lot like being the general ma... | \n",
" [(so, RB), (ask, VB), (yourself, PRP), (what, ... | \n",
"
\n",
" \n",
" 3 | \n",
" 0.022925 | \n",
" 3898 | \n",
" moviemaking is a lot like being the general ma... | \n",
" [(synopsis, NN), (a, DT), (mentally, RB), (uns... | \n",
"
\n",
" \n",
" 4 | \n",
" 0.043234 | \n",
" 3898 | \n",
" moviemaking is a lot like being the general ma... | \n",
" [(capsule, NN), (in, IN), (2176, CD), (on, IN)... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" sentiment length excerpt \\\n",
"0 -0.054577 3898 moviemaking is a lot like being the general ma... \n",
"1 0.025467 3898 moviemaking is a lot like being the general ma... \n",
"2 0.003334 3898 moviemaking is a lot like being the general ma... \n",
"3 0.022925 3898 moviemaking is a lot like being the general ma... \n",
"4 0.043234 3898 moviemaking is a lot like being the general ma... \n",
"\n",
" tags \n",
"0 [(that, DT), ('s, VBZ), (exactly, RB), (how, W... \n",
"1 [(quest, JJS), (for, IN), (camelot, NN), (is, ... \n",
"2 [(so, RB), (ask, VB), (yourself, PRP), (what, ... \n",
"3 [(synopsis, NN), (a, DT), (mentally, RB), (uns... \n",
"4 [(capsule, NN), (in, IN), (2176, CD), (on, IN)... "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" sentiment | \n",
" length | \n",
" excerpt | \n",
" tags | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.023663 | \n",
" 3898 | \n",
" moviemaking is a lot like being the general ma... | \n",
" [(films, NNS), (adapted, VBD), (from, IN), (co... | \n",
"
\n",
" \n",
" 1 | \n",
" 0.131092 | \n",
" 3898 | \n",
" moviemaking is a lot like being the general ma... | \n",
" [(you, PRP), ('ve, VBP), (got, VBN), (mail, NN... | \n",
"
\n",
" \n",
" 2 | \n",
" 0.110626 | \n",
" 3898 | \n",
" moviemaking is a lot like being the general ma... | \n",
" [(jaws, NN), (is, VBZ), (a, DT), (rare, JJ), (... | \n",
"
\n",
" \n",
" 3 | \n",
" 0.103847 | \n",
" 3898 | \n",
" moviemaking is a lot like being the general ma... | \n",
" [(every, DT), (now, RB), (and, CC), (then, RB)... | \n",
"
\n",
" \n",
" 4 | \n",
" -0.070151 | \n",
" 3898 | \n",
" moviemaking is a lot like being the general ma... | \n",
" [(moviemaking, NN), (is, VBZ), (a, DT), (lot, ... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" sentiment length excerpt \\\n",
"0 0.023663 3898 moviemaking is a lot like being the general ma... \n",
"1 0.131092 3898 moviemaking is a lot like being the general ma... \n",
"2 0.110626 3898 moviemaking is a lot like being the general ma... \n",
"3 0.103847 3898 moviemaking is a lot like being the general ma... \n",
"4 -0.070151 3898 moviemaking is a lot like being the general ma... \n",
"\n",
" tags \n",
"0 [(films, NNS), (adapted, VBD), (from, IN), (co... \n",
"1 [(you, PRP), ('ve, VBP), (got, VBN), (mail, NN... \n",
"2 [(jaws, NN), (is, VBZ), (a, DT), (rare, JJ), (... \n",
"3 [(every, DT), (now, RB), (and, CC), (then, RB)... \n",
"4 [(moviemaking, NN), (is, VBZ), (a, DT), (lot, ... "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"def get_sentiment(array):\n",
" blobs = [TextBlob(text) for text in array]\n",
" return ([{'sentiment': obj.sentiment.polarity, \n",
" 'length': len(text), \n",
" 'excerpt': text[:50], \n",
" 'tags': obj.tags} for obj in blobs])\n",
"\n",
"display(pd.DataFrame(get_sentiment(neg_k)))\n",
"display(pd.DataFrame(get_sentiment(pos_k)))\n",
"display(pd.DataFrame(get_sentiment(neg_a)))\n",
"display(pd.DataFrame(get_sentiment(pos_a)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}