{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# HW2: SIDEQUEST -- Summarization (via example)\n",
    "Via [this tutorial](https://stackabuse.com/text-summarization-with-nltk-in-python/) } 10-13-19"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "paragraph = \"So, keep working. Keep striving. Never give up. Fall down seven times, get up eight. Ease is a greater threat to progress than hardship. Ease is a greater threat to progress than hardship. So, keep moving, keep growing, keep learning. See you at work.\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'So, keep working. Keep striving. Never give up. Fall down seven times, get up eight. Ease is a greater threat to progress than hardship. Ease is a greater threat to progress than hardship. So, keep moving, keep growing, keep learning. See you at work.'"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "paragraph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['So, keep working',\n",
       " ' Keep striving',\n",
       " ' Never give up',\n",
       " ' Fall down seven times, get up eight',\n",
       " ' Ease is a greater threat to progress than hardship',\n",
       " ' Ease is a greater threat to progress than hardship',\n",
       " ' So, keep moving, keep growing, keep learning',\n",
       " ' See you at work',\n",
       " '']"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sentences = paragraph.split('.')\n",
    "sentences"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[['So,', 'keep', 'working'],\n",
       " ['Keep', 'striving'],\n",
       " ['Never', 'give', 'up'],\n",
       " ['Fall', 'down', 'seven', 'times,', 'get', 'up', 'eight'],\n",
       " ['Ease',\n",
       "  'is',\n",
       "  'a',\n",
       "  'greater',\n",
       "  'threat',\n",
       "  'to',\n",
       "  'progress',\n",
       "  'than',\n",
       "  'hardship'],\n",
       " ['Ease',\n",
       "  'is',\n",
       "  'a',\n",
       "  'greater',\n",
       "  'threat',\n",
       "  'to',\n",
       "  'progress',\n",
       "  'than',\n",
       "  'hardship'],\n",
       " ['So,', 'keep', 'moving,', 'keep', 'growing,', 'keep', 'learning'],\n",
       " ['See', 'you', 'at', 'work'],\n",
       " []]"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# remove stopwords\n",
    "clean_sentences = [sentence.strip().split() for sentence in sentences]\n",
    "clean_sentences"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['so,',\n",
       " 'keep',\n",
       " 'working',\n",
       " 'keep',\n",
       " 'striving',\n",
       " 'never',\n",
       " 'give',\n",
       " 'up',\n",
       " 'fall',\n",
       " 'down',\n",
       " 'seven',\n",
       " 'times,',\n",
       " 'get',\n",
       " 'up',\n",
       " 'eight',\n",
       " 'ease',\n",
       " 'is',\n",
       " 'a',\n",
       " 'greater',\n",
       " 'threat',\n",
       " 'to',\n",
       " 'progress',\n",
       " 'than',\n",
       " 'hardship',\n",
       " 'ease',\n",
       " 'is',\n",
       " 'a',\n",
       " 'greater',\n",
       " 'threat',\n",
       " 'to',\n",
       " 'progress',\n",
       " 'than',\n",
       " 'hardship',\n",
       " 'so,',\n",
       " 'keep',\n",
       " 'moving,',\n",
       " 'keep',\n",
       " 'growing,',\n",
       " 'keep',\n",
       " 'learning',\n",
       " 'see',\n",
       " 'you',\n",
       " 'at',\n",
       " 'work']"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clean_words = [word.lower() for sent in clean_sentences for word in sent]\n",
    "# clean_words = [word.lower() for word in sent for sent in clean_sentences]\n",
    "\n",
    "clean_words"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "so\n",
      "keep\n",
      "working\n",
      "keep\n",
      "striving\n",
      "never\n",
      "give\n",
      "up\n",
      "fall\n",
      "down\n",
      "seven\n",
      "times\n",
      "get\n",
      "up\n",
      "eight\n",
      "ease\n",
      "is\n",
      "a\n",
      "greater\n",
      "threat\n",
      "to\n",
      "progress\n",
      "than\n",
      "hardship\n",
      "ease\n",
      "is\n",
      "a\n",
      "greater\n",
      "threat\n",
      "to\n",
      "progress\n",
      "than\n",
      "hardship\n",
      "so\n",
      "keep\n",
      "moving\n",
      "keep\n",
      "growing\n",
      "keep\n",
      "learning\n",
      "see\n",
      "you\n",
      "at\n",
      "work\n"
     ]
    }
   ],
   "source": [
    "for sentence in sentences:\n",
    "    sentence = sentence.strip().split()\n",
    "    for word in sentence:\n",
    "        new_word = ''\n",
    "        for letter in word:\n",
    "            if letter.isalpha():\n",
    "#                 print(letter)\n",
    "                new_word += letter.lower()\n",
    "        print(new_word)\n",
    "#         if word.isalpha():\n",
    "#             print(word.lower())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['S', 'o', 'k', 'e', 'e', 'p', 'w', 'o', 'r', 'k', 'i', 'n', 'g']\n",
      "['so,', 'keep', 'working']\n",
      "['K', 'e', 'e', 'p', 's', 't', 'r', 'i', 'v', 'i', 'n', 'g']\n",
      "['keep', 'striving']\n",
      "['N', 'e', 'v', 'e', 'r', 'g', 'i', 'v', 'e', 'u', 'p']\n",
      "['never', 'give', 'up']\n",
      "['F', 'a', 'l', 'l', 'd', 'o', 'w', 'n', 's', 'e', 'v', 'e', 'n', 't', 'i', 'm', 'e', 's', 'g', 'e', 't', 'u', 'p', 'e', 'i', 'g', 'h', 't']\n",
      "['fall', 'down', 'seven', 'times,', 'get', 'up', 'eight']\n",
      "['E', 'a', 's', 'e', 'i', 's', 'a', 'g', 'r', 'e', 'a', 't', 'e', 'r', 't', 'h', 'r', 'e', 'a', 't', 't', 'o', 'p', 'r', 'o', 'g', 'r', 'e', 's', 's', 't', 'h', 'a', 'n', 'h', 'a', 'r', 'd', 's', 'h', 'i', 'p']\n",
      "['ease', 'is', 'a', 'greater', 'threat', 'to', 'progress', 'than', 'hardship']\n",
      "['E', 'a', 's', 'e', 'i', 's', 'a', 'g', 'r', 'e', 'a', 't', 'e', 'r', 't', 'h', 'r', 'e', 'a', 't', 't', 'o', 'p', 'r', 'o', 'g', 'r', 'e', 's', 's', 't', 'h', 'a', 'n', 'h', 'a', 'r', 'd', 's', 'h', 'i', 'p']\n",
      "['ease', 'is', 'a', 'greater', 'threat', 'to', 'progress', 'than', 'hardship']\n",
      "['S', 'o', 'k', 'e', 'e', 'p', 'm', 'o', 'v', 'i', 'n', 'g', 'k', 'e', 'e', 'p', 'g', 'r', 'o', 'w', 'i', 'n', 'g', 'k', 'e', 'e', 'p', 'l', 'e', 'a', 'r', 'n', 'i', 'n', 'g']\n",
      "['so,', 'keep', 'moving,', 'keep', 'growing,', 'keep', 'learning']\n",
      "['S', 'e', 'e', 'y', 'o', 'u', 'a', 't', 'w', 'o', 'r', 'k']\n",
      "['see', 'you', 'at', 'work']\n",
      "[]\n",
      "[]\n"
     ]
    }
   ],
   "source": [
    "for sentence in clean_sentences:\n",
    "#     no_punctuation = [''.join(letters) for word in sentence for letters in word if letters.isalpha()]\n",
    "    no_punctuation = [letters for word in sentence for letters in word if letters.isalpha()]\n",
    "    print(no_punctuation)\n",
    "    clean_words = [word.lower() for word in sentence]\n",
    "    print(clean_words)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
