{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Sentiment Analysis with TextBlob\n", "via [this tutorial](https://levelup.gitconnected.com/sentiment-analysis-using-machine-learning-python-9122e03f8f7b) |10-6-19" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### CASE STUDY 1: Kendra's fake data" ] }, { "cell_type": "code", "execution_count": 113, "metadata": {}, "outputs": [], "source": [ "from textblob import TextBlob\n", "from IPython.display import display, HTML\n", "import os\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 114, "metadata": {}, "outputs": [], "source": [ "def get_data_from_files(path):\n", " directory = os.listdir(path)\n", " results = []\n", " for file in directory:\n", " f=open(path+file)\n", " results.append(f.read())\n", " f.close()\n", " return results\n", "\n", "neg_k = get_data_from_files('AI_NEG/')\n", "pos_k = get_data_from_files('AI_POS/')\n", "neg_a = get_data_from_files('NEG/')\n", "pos_a = get_data_from_files('POS/')" ] }, { "cell_type": "code", "execution_count": 115, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sentimentlengthexcerpttags
0-0.1571433898moviemaking is a lot like being the general ma...[(WHERE, WRB), (ARE, PDT), (THE, DT), (JOBS, N...
1-0.7500003898moviemaking is a lot like being the general ma...[(How, WRB), (can, MD), (we, PRP), (trust, VB)...
2-0.7750003898moviemaking is a lot like being the general ma...[(I, PRP), (hate, VBP), (artificial, JJ), (int...
3-0.7500003898moviemaking is a lot like being the general ma...[(My, PRP$), (dog, NN), (is, VBZ), (terrified,...
4-0.7500003898moviemaking is a lot like being the general ma...[(Artificial, JJ), (intelligence, NN), (is, VB...
\n", "
" ], "text/plain": [ " sentiment length excerpt \\\n", "0 -0.157143 3898 moviemaking is a lot like being the general ma... \n", "1 -0.750000 3898 moviemaking is a lot like being the general ma... \n", "2 -0.775000 3898 moviemaking is a lot like being the general ma... \n", "3 -0.750000 3898 moviemaking is a lot like being the general ma... \n", "4 -0.750000 3898 moviemaking is a lot like being the general ma... \n", "\n", " tags \n", "0 [(WHERE, WRB), (ARE, PDT), (THE, DT), (JOBS, N... \n", "1 [(How, WRB), (can, MD), (we, PRP), (trust, VB)... \n", "2 [(I, PRP), (hate, VBP), (artificial, JJ), (int... \n", "3 [(My, PRP$), (dog, NN), (is, VBZ), (terrified,... \n", "4 [(Artificial, JJ), (intelligence, NN), (is, VB... " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sentimentlengthexcerpttags
0-0.1125003898moviemaking is a lot like being the general ma...[(My, PRP$), (dog, NN), (is, VBZ), (excited, V...
1-0.0750003898moviemaking is a lot like being the general ma...[(I, PRP), ('m, VBP), (excited, JJ), (for, IN)...
2-0.1250003898moviemaking is a lot like being the general ma...[(I, PRP), (love, VBP), (artificial, JJ), (int...
3-0.3000003898moviemaking is a lot like being the general ma...[(Order, NN), (my, PRP$), (groceries, NNS), (p...
4-0.1333333898moviemaking is a lot like being the general ma...[(I, PRP), ('m, VBP), (grateful, JJ), (every, ...
\n", "
" ], "text/plain": [ " sentiment length excerpt \\\n", "0 -0.112500 3898 moviemaking is a lot like being the general ma... \n", "1 -0.075000 3898 moviemaking is a lot like being the general ma... \n", "2 -0.125000 3898 moviemaking is a lot like being the general ma... \n", "3 -0.300000 3898 moviemaking is a lot like being the general ma... \n", "4 -0.133333 3898 moviemaking is a lot like being the general ma... \n", "\n", " tags \n", "0 [(My, PRP$), (dog, NN), (is, VBZ), (excited, V... \n", "1 [(I, PRP), ('m, VBP), (excited, JJ), (for, IN)... \n", "2 [(I, PRP), (love, VBP), (artificial, JJ), (int... \n", "3 [(Order, NN), (my, PRP$), (groceries, NNS), (p... \n", "4 [(I, PRP), ('m, VBP), (grateful, JJ), (every, ... " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sentimentlengthexcerpttags
0-0.0545773898moviemaking is a lot like being the general ma...[(that, DT), ('s, VBZ), (exactly, RB), (how, W...
10.0254673898moviemaking is a lot like being the general ma...[(quest, JJS), (for, IN), (camelot, NN), (is, ...
20.0033343898moviemaking is a lot like being the general ma...[(so, RB), (ask, VB), (yourself, PRP), (what, ...
30.0229253898moviemaking is a lot like being the general ma...[(synopsis, NN), (a, DT), (mentally, RB), (uns...
40.0432343898moviemaking is a lot like being the general ma...[(capsule, NN), (in, IN), (2176, CD), (on, IN)...
\n", "
" ], "text/plain": [ " sentiment length excerpt \\\n", "0 -0.054577 3898 moviemaking is a lot like being the general ma... \n", "1 0.025467 3898 moviemaking is a lot like being the general ma... \n", "2 0.003334 3898 moviemaking is a lot like being the general ma... \n", "3 0.022925 3898 moviemaking is a lot like being the general ma... \n", "4 0.043234 3898 moviemaking is a lot like being the general ma... \n", "\n", " tags \n", "0 [(that, DT), ('s, VBZ), (exactly, RB), (how, W... \n", "1 [(quest, JJS), (for, IN), (camelot, NN), (is, ... \n", "2 [(so, RB), (ask, VB), (yourself, PRP), (what, ... \n", "3 [(synopsis, NN), (a, DT), (mentally, RB), (uns... \n", "4 [(capsule, NN), (in, IN), (2176, CD), (on, IN)... " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sentimentlengthexcerpttags
00.0236633898moviemaking is a lot like being the general ma...[(films, NNS), (adapted, VBD), (from, IN), (co...
10.1310923898moviemaking is a lot like being the general ma...[(you, PRP), ('ve, VBP), (got, VBN), (mail, NN...
20.1106263898moviemaking is a lot like being the general ma...[(jaws, NN), (is, VBZ), (a, DT), (rare, JJ), (...
30.1038473898moviemaking is a lot like being the general ma...[(every, DT), (now, RB), (and, CC), (then, RB)...
4-0.0701513898moviemaking is a lot like being the general ma...[(moviemaking, NN), (is, VBZ), (a, DT), (lot, ...
\n", "
" ], "text/plain": [ " sentiment length excerpt \\\n", "0 0.023663 3898 moviemaking is a lot like being the general ma... \n", "1 0.131092 3898 moviemaking is a lot like being the general ma... \n", "2 0.110626 3898 moviemaking is a lot like being the general ma... \n", "3 0.103847 3898 moviemaking is a lot like being the general ma... \n", "4 -0.070151 3898 moviemaking is a lot like being the general ma... \n", "\n", " tags \n", "0 [(films, NNS), (adapted, VBD), (from, IN), (co... \n", "1 [(you, PRP), ('ve, VBP), (got, VBN), (mail, NN... \n", "2 [(jaws, NN), (is, VBZ), (a, DT), (rare, JJ), (... \n", "3 [(every, DT), (now, RB), (and, CC), (then, RB)... \n", "4 [(moviemaking, NN), (is, VBZ), (a, DT), (lot, ... " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "def get_sentiment(array):\n", " blobs = [TextBlob(text) for text in array]\n", " return ([{'sentiment': obj.sentiment.polarity, \n", " 'length': len(text), \n", " 'excerpt': text[:50], \n", " 'tags': obj.tags} for obj in blobs])\n", "\n", "display(pd.DataFrame(get_sentiment(neg_k)))\n", "display(pd.DataFrame(get_sentiment(pos_k)))\n", "display(pd.DataFrame(get_sentiment(neg_a)))\n", "display(pd.DataFrame(get_sentiment(pos_a)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }