{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Sentiment Analysis with TextBlob\n",
    "via [this tutorial](https://levelup.gitconnected.com/sentiment-analysis-using-machine-learning-python-9122e03f8f7b) |10-6-19"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### CASE STUDY 1: Kendra's fake data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [],
   "source": [
    "from textblob import TextBlob\n",
    "from IPython.display import display, HTML\n",
    "import os\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_data_from_files(path):\n",
    "    directory = os.listdir(path)\n",
    "    results = []\n",
    "    for file in directory:\n",
    "        f=open(path+file)\n",
    "        results.append(f.read())\n",
    "        f.close()\n",
    "    return results\n",
    "\n",
    "neg_k = get_data_from_files('AI_NEG/')\n",
    "pos_k = get_data_from_files('AI_POS/')\n",
    "neg_a = get_data_from_files('NEG/')\n",
    "pos_a = get_data_from_files('POS/')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sentiment</th>\n",
       "      <th>length</th>\n",
       "      <th>excerpt</th>\n",
       "      <th>tags</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>-0.157143</td>\n",
       "      <td>3898</td>\n",
       "      <td>moviemaking is a lot like being the general ma...</td>\n",
       "      <td>[(WHERE, WRB), (ARE, PDT), (THE, DT), (JOBS, N...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>-0.750000</td>\n",
       "      <td>3898</td>\n",
       "      <td>moviemaking is a lot like being the general ma...</td>\n",
       "      <td>[(How, WRB), (can, MD), (we, PRP), (trust, VB)...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>-0.775000</td>\n",
       "      <td>3898</td>\n",
       "      <td>moviemaking is a lot like being the general ma...</td>\n",
       "      <td>[(I, PRP), (hate, VBP), (artificial, JJ), (int...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>-0.750000</td>\n",
       "      <td>3898</td>\n",
       "      <td>moviemaking is a lot like being the general ma...</td>\n",
       "      <td>[(My, PRP$), (dog, NN), (is, VBZ), (terrified,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>-0.750000</td>\n",
       "      <td>3898</td>\n",
       "      <td>moviemaking is a lot like being the general ma...</td>\n",
       "      <td>[(Artificial, JJ), (intelligence, NN), (is, VB...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   sentiment  length                                            excerpt  \\\n",
       "0  -0.157143    3898  moviemaking is a lot like being the general ma...   \n",
       "1  -0.750000    3898  moviemaking is a lot like being the general ma...   \n",
       "2  -0.775000    3898  moviemaking is a lot like being the general ma...   \n",
       "3  -0.750000    3898  moviemaking is a lot like being the general ma...   \n",
       "4  -0.750000    3898  moviemaking is a lot like being the general ma...   \n",
       "\n",
       "                                                tags  \n",
       "0  [(WHERE, WRB), (ARE, PDT), (THE, DT), (JOBS, N...  \n",
       "1  [(How, WRB), (can, MD), (we, PRP), (trust, VB)...  \n",
       "2  [(I, PRP), (hate, VBP), (artificial, JJ), (int...  \n",
       "3  [(My, PRP$), (dog, NN), (is, VBZ), (terrified,...  \n",
       "4  [(Artificial, JJ), (intelligence, NN), (is, VB...  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sentiment</th>\n",
       "      <th>length</th>\n",
       "      <th>excerpt</th>\n",
       "      <th>tags</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>-0.112500</td>\n",
       "      <td>3898</td>\n",
       "      <td>moviemaking is a lot like being the general ma...</td>\n",
       "      <td>[(My, PRP$), (dog, NN), (is, VBZ), (excited, V...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>-0.075000</td>\n",
       "      <td>3898</td>\n",
       "      <td>moviemaking is a lot like being the general ma...</td>\n",
       "      <td>[(I, PRP), ('m, VBP), (excited, JJ), (for, IN)...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>-0.125000</td>\n",
       "      <td>3898</td>\n",
       "      <td>moviemaking is a lot like being the general ma...</td>\n",
       "      <td>[(I, PRP), (love, VBP), (artificial, JJ), (int...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>-0.300000</td>\n",
       "      <td>3898</td>\n",
       "      <td>moviemaking is a lot like being the general ma...</td>\n",
       "      <td>[(Order, NN), (my, PRP$), (groceries, NNS), (p...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>-0.133333</td>\n",
       "      <td>3898</td>\n",
       "      <td>moviemaking is a lot like being the general ma...</td>\n",
       "      <td>[(I, PRP), ('m, VBP), (grateful, JJ), (every, ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   sentiment  length                                            excerpt  \\\n",
       "0  -0.112500    3898  moviemaking is a lot like being the general ma...   \n",
       "1  -0.075000    3898  moviemaking is a lot like being the general ma...   \n",
       "2  -0.125000    3898  moviemaking is a lot like being the general ma...   \n",
       "3  -0.300000    3898  moviemaking is a lot like being the general ma...   \n",
       "4  -0.133333    3898  moviemaking is a lot like being the general ma...   \n",
       "\n",
       "                                                tags  \n",
       "0  [(My, PRP$), (dog, NN), (is, VBZ), (excited, V...  \n",
       "1  [(I, PRP), ('m, VBP), (excited, JJ), (for, IN)...  \n",
       "2  [(I, PRP), (love, VBP), (artificial, JJ), (int...  \n",
       "3  [(Order, NN), (my, PRP$), (groceries, NNS), (p...  \n",
       "4  [(I, PRP), ('m, VBP), (grateful, JJ), (every, ...  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sentiment</th>\n",
       "      <th>length</th>\n",
       "      <th>excerpt</th>\n",
       "      <th>tags</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>-0.054577</td>\n",
       "      <td>3898</td>\n",
       "      <td>moviemaking is a lot like being the general ma...</td>\n",
       "      <td>[(that, DT), ('s, VBZ), (exactly, RB), (how, W...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.025467</td>\n",
       "      <td>3898</td>\n",
       "      <td>moviemaking is a lot like being the general ma...</td>\n",
       "      <td>[(quest, JJS), (for, IN), (camelot, NN), (is, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.003334</td>\n",
       "      <td>3898</td>\n",
       "      <td>moviemaking is a lot like being the general ma...</td>\n",
       "      <td>[(so, RB), (ask, VB), (yourself, PRP), (what, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.022925</td>\n",
       "      <td>3898</td>\n",
       "      <td>moviemaking is a lot like being the general ma...</td>\n",
       "      <td>[(synopsis, NN), (a, DT), (mentally, RB), (uns...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.043234</td>\n",
       "      <td>3898</td>\n",
       "      <td>moviemaking is a lot like being the general ma...</td>\n",
       "      <td>[(capsule, NN), (in, IN), (2176, CD), (on, IN)...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   sentiment  length                                            excerpt  \\\n",
       "0  -0.054577    3898  moviemaking is a lot like being the general ma...   \n",
       "1   0.025467    3898  moviemaking is a lot like being the general ma...   \n",
       "2   0.003334    3898  moviemaking is a lot like being the general ma...   \n",
       "3   0.022925    3898  moviemaking is a lot like being the general ma...   \n",
       "4   0.043234    3898  moviemaking is a lot like being the general ma...   \n",
       "\n",
       "                                                tags  \n",
       "0  [(that, DT), ('s, VBZ), (exactly, RB), (how, W...  \n",
       "1  [(quest, JJS), (for, IN), (camelot, NN), (is, ...  \n",
       "2  [(so, RB), (ask, VB), (yourself, PRP), (what, ...  \n",
       "3  [(synopsis, NN), (a, DT), (mentally, RB), (uns...  \n",
       "4  [(capsule, NN), (in, IN), (2176, CD), (on, IN)...  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sentiment</th>\n",
       "      <th>length</th>\n",
       "      <th>excerpt</th>\n",
       "      <th>tags</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>0.023663</td>\n",
       "      <td>3898</td>\n",
       "      <td>moviemaking is a lot like being the general ma...</td>\n",
       "      <td>[(films, NNS), (adapted, VBD), (from, IN), (co...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.131092</td>\n",
       "      <td>3898</td>\n",
       "      <td>moviemaking is a lot like being the general ma...</td>\n",
       "      <td>[(you, PRP), ('ve, VBP), (got, VBN), (mail, NN...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.110626</td>\n",
       "      <td>3898</td>\n",
       "      <td>moviemaking is a lot like being the general ma...</td>\n",
       "      <td>[(jaws, NN), (is, VBZ), (a, DT), (rare, JJ), (...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.103847</td>\n",
       "      <td>3898</td>\n",
       "      <td>moviemaking is a lot like being the general ma...</td>\n",
       "      <td>[(every, DT), (now, RB), (and, CC), (then, RB)...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>-0.070151</td>\n",
       "      <td>3898</td>\n",
       "      <td>moviemaking is a lot like being the general ma...</td>\n",
       "      <td>[(moviemaking, NN), (is, VBZ), (a, DT), (lot, ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   sentiment  length                                            excerpt  \\\n",
       "0   0.023663    3898  moviemaking is a lot like being the general ma...   \n",
       "1   0.131092    3898  moviemaking is a lot like being the general ma...   \n",
       "2   0.110626    3898  moviemaking is a lot like being the general ma...   \n",
       "3   0.103847    3898  moviemaking is a lot like being the general ma...   \n",
       "4  -0.070151    3898  moviemaking is a lot like being the general ma...   \n",
       "\n",
       "                                                tags  \n",
       "0  [(films, NNS), (adapted, VBD), (from, IN), (co...  \n",
       "1  [(you, PRP), ('ve, VBP), (got, VBN), (mail, NN...  \n",
       "2  [(jaws, NN), (is, VBZ), (a, DT), (rare, JJ), (...  \n",
       "3  [(every, DT), (now, RB), (and, CC), (then, RB)...  \n",
       "4  [(moviemaking, NN), (is, VBZ), (a, DT), (lot, ...  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "def get_sentiment(array):\n",
    "    blobs = [TextBlob(text) for text in array]\n",
    "    return ([{'sentiment': obj.sentiment.polarity, \n",
    "              'length': len(text), \n",
    "              'excerpt': text[:50], \n",
    "              'tags': obj.tags} for obj in blobs])\n",
    "\n",
    "display(pd.DataFrame(get_sentiment(neg_k)))\n",
    "display(pd.DataFrame(get_sentiment(pos_k)))\n",
    "display(pd.DataFrame(get_sentiment(neg_a)))\n",
    "display(pd.DataFrame(get_sentiment(pos_a)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
