{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Sentiment Analysis with TextBlob\n",
    "via [this tutorial](https://levelup.gitconnected.com/sentiment-analysis-using-machine-learning-python-9122e03f8f7b) |10-6-19"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [],
   "source": [
    "from textblob import TextBlob\n",
    "from IPython.display import display, HTML\n",
    "import os\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['My dog is excited by the advancements in artificial intelligence.',\n",
       " \"I'm excited for my child to grow up and have time to daydream because Artificial Intelligence has taken care of all the nitty gritty.\",\n",
       " 'I love artificial intelligence!',\n",
       " 'Order my groceries, pay my taxes, take my kids to school?! Yes please! Artificial Intelligence has given me my life back!',\n",
       " \"I'm grateful every day that my child will likely grow up in a cancer-free world, thanks to Artificial Intelligence. \"]"
      ]
     },
     "execution_count": 143,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def get_data_from_files(path):\n",
    "    directory = os.listdir(path)\n",
    "    results = []\n",
    "    for file in directory:\n",
    "        f=open(path+file)\n",
    "        results.append(f.read())\n",
    "        f.close()\n",
    "    return results\n",
    "\n",
    "neg_k = get_data_from_files('AI_NEG/')\n",
    "pos_k = get_data_from_files('AI_POS/')\n",
    "neg_a = get_data_from_files('NEG/')\n",
    "pos_a = get_data_from_files('POS/')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_pn(num):\n",
    "    return 'neg' if num < 0 else 'pos'\n",
    "\n",
    "def get_sentiment(array, label):\n",
    "    blobs = [[TextBlob(text), text] for text in array]\n",
    "    return ([{'label': label,\n",
    "              'prediction': get_pn(obj.sentiment.polarity),\n",
    "              'sentiment': obj.sentiment.polarity,\n",
    "              'length': len(text), \n",
    "              'excerpt': text[:50]} for obj,text in blobs])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## CASE STUDY 1: Kendra's Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>prediction</th>\n",
       "      <th>sentiment</th>\n",
       "      <th>length</th>\n",
       "      <th>excerpt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>neg</td>\n",
       "      <td>neg</td>\n",
       "      <td>-0.157143</td>\n",
       "      <td>76</td>\n",
       "      <td>WHERE ARE THE JOBS?! OH THAT'S RIGHT. ARTIFICI...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>neg</td>\n",
       "      <td>neg</td>\n",
       "      <td>-0.750000</td>\n",
       "      <td>96</td>\n",
       "      <td>How can we trust Artificial Intelligence to dr...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>neg</td>\n",
       "      <td>neg</td>\n",
       "      <td>-0.775000</td>\n",
       "      <td>31</td>\n",
       "      <td>I hate artificial intelligence!</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>neg</td>\n",
       "      <td>neg</td>\n",
       "      <td>-0.750000</td>\n",
       "      <td>47</td>\n",
       "      <td>My dog is terrified by artificial intelligence!</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>neg</td>\n",
       "      <td>neg</td>\n",
       "      <td>-0.750000</td>\n",
       "      <td>68</td>\n",
       "      <td>Artificial intelligence is going to melt the b...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  label prediction  sentiment  length  \\\n",
       "0   neg        neg  -0.157143      76   \n",
       "1   neg        neg  -0.750000      96   \n",
       "2   neg        neg  -0.775000      31   \n",
       "3   neg        neg  -0.750000      47   \n",
       "4   neg        neg  -0.750000      68   \n",
       "\n",
       "                                             excerpt  \n",
       "0  WHERE ARE THE JOBS?! OH THAT'S RIGHT. ARTIFICI...  \n",
       "1  How can we trust Artificial Intelligence to dr...  \n",
       "2                    I hate artificial intelligence!  \n",
       "3    My dog is terrified by artificial intelligence!  \n",
       "4  Artificial intelligence is going to melt the b...  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>prediction</th>\n",
       "      <th>sentiment</th>\n",
       "      <th>length</th>\n",
       "      <th>excerpt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>pos</td>\n",
       "      <td>neg</td>\n",
       "      <td>-0.112500</td>\n",
       "      <td>65</td>\n",
       "      <td>My dog is excited by the advancements in artif...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>pos</td>\n",
       "      <td>neg</td>\n",
       "      <td>-0.075000</td>\n",
       "      <td>133</td>\n",
       "      <td>I'm excited for my child to grow up and have t...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>pos</td>\n",
       "      <td>neg</td>\n",
       "      <td>-0.125000</td>\n",
       "      <td>31</td>\n",
       "      <td>I love artificial intelligence!</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>pos</td>\n",
       "      <td>neg</td>\n",
       "      <td>-0.300000</td>\n",
       "      <td>121</td>\n",
       "      <td>Order my groceries, pay my taxes, take my kids...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>pos</td>\n",
       "      <td>neg</td>\n",
       "      <td>-0.133333</td>\n",
       "      <td>116</td>\n",
       "      <td>I'm grateful every day that my child will like...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  label prediction  sentiment  length  \\\n",
       "0   pos        neg  -0.112500      65   \n",
       "1   pos        neg  -0.075000     133   \n",
       "2   pos        neg  -0.125000      31   \n",
       "3   pos        neg  -0.300000     121   \n",
       "4   pos        neg  -0.133333     116   \n",
       "\n",
       "                                             excerpt  \n",
       "0  My dog is excited by the advancements in artif...  \n",
       "1  I'm excited for my child to grow up and have t...  \n",
       "2                    I love artificial intelligence!  \n",
       "3  Order my groceries, pay my taxes, take my kids...  \n",
       "4  I'm grateful every day that my child will like...  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(pd.DataFrame(get_sentiment(neg_k, 'neg')))\n",
    "display(pd.DataFrame(get_sentiment(pos_k, 'pos')))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## CASE STUDY 2: Amy's Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>prediction</th>\n",
       "      <th>sentiment</th>\n",
       "      <th>length</th>\n",
       "      <th>excerpt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>neg</td>\n",
       "      <td>neg</td>\n",
       "      <td>-0.054577</td>\n",
       "      <td>3554</td>\n",
       "      <td>that's exactly how long the movie felt to me ....</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>neg</td>\n",
       "      <td>pos</td>\n",
       "      <td>0.025467</td>\n",
       "      <td>2929</td>\n",
       "      <td>\" quest for camelot \" is warner bros . ' firs...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>neg</td>\n",
       "      <td>pos</td>\n",
       "      <td>0.003334</td>\n",
       "      <td>3365</td>\n",
       "      <td>so ask yourself what \" 8mm \" ( \" eight millime...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>neg</td>\n",
       "      <td>pos</td>\n",
       "      <td>0.022925</td>\n",
       "      <td>4418</td>\n",
       "      <td>synopsis : a mentally unstable man undergoing ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>neg</td>\n",
       "      <td>pos</td>\n",
       "      <td>0.043234</td>\n",
       "      <td>3911</td>\n",
       "      <td>capsule : in 2176 on the planet mars police ta...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  label prediction  sentiment  length  \\\n",
       "0   neg        neg  -0.054577    3554   \n",
       "1   neg        pos   0.025467    2929   \n",
       "2   neg        pos   0.003334    3365   \n",
       "3   neg        pos   0.022925    4418   \n",
       "4   neg        pos   0.043234    3911   \n",
       "\n",
       "                                             excerpt  \n",
       "0  that's exactly how long the movie felt to me ....  \n",
       "1   \" quest for camelot \" is warner bros . ' firs...  \n",
       "2  so ask yourself what \" 8mm \" ( \" eight millime...  \n",
       "3  synopsis : a mentally unstable man undergoing ...  \n",
       "4  capsule : in 2176 on the planet mars police ta...  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>prediction</th>\n",
       "      <th>sentiment</th>\n",
       "      <th>length</th>\n",
       "      <th>excerpt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>pos</td>\n",
       "      <td>pos</td>\n",
       "      <td>0.023663</td>\n",
       "      <td>4227</td>\n",
       "      <td>films adapted from comic books have had plenty...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>pos</td>\n",
       "      <td>pos</td>\n",
       "      <td>0.131092</td>\n",
       "      <td>2421</td>\n",
       "      <td>you've got mail works alot better than it dese...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>pos</td>\n",
       "      <td>pos</td>\n",
       "      <td>0.110626</td>\n",
       "      <td>6092</td>\n",
       "      <td>\" jaws \" is a rare film that grabs your atten...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>pos</td>\n",
       "      <td>pos</td>\n",
       "      <td>0.103847</td>\n",
       "      <td>4096</td>\n",
       "      <td>every now and then a movie comes along from a ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>pos</td>\n",
       "      <td>neg</td>\n",
       "      <td>-0.070151</td>\n",
       "      <td>3898</td>\n",
       "      <td>moviemaking is a lot like being the general ma...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  label prediction  sentiment  length  \\\n",
       "0   pos        pos   0.023663    4227   \n",
       "1   pos        pos   0.131092    2421   \n",
       "2   pos        pos   0.110626    6092   \n",
       "3   pos        pos   0.103847    4096   \n",
       "4   pos        neg  -0.070151    3898   \n",
       "\n",
       "                                             excerpt  \n",
       "0  films adapted from comic books have had plenty...  \n",
       "1  you've got mail works alot better than it dese...  \n",
       "2   \" jaws \" is a rare film that grabs your atten...  \n",
       "3  every now and then a movie comes along from a ...  \n",
       "4  moviemaking is a lot like being the general ma...  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(pd.DataFrame(get_sentiment(neg_a, 'neg')))\n",
    "display(pd.DataFrame(get_sentiment(pos_a, 'pos')))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
