{ "cells": [ { "cell_type": "code", "execution_count": 146, "metadata": {}, "outputs": [], "source": [ "## =======================================================\n", "## IMPORTING\n", "## =======================================================\n", "import pandas as pd\n", "train=pd.read_csv(\"../WK7/kaggle-sentiment/train.tsv\", delimiter='\\t')\n", "y=train['Sentiment'].values\n", "X=train['Phrase'].values\n", "train.to_csv('kaggle_csv.csv')" ] }, { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:3: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " This is separate from the ipykernel package so we can avoid doing imports until\n", "/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:5: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " \"\"\"\n", "/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:6: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " \n", "/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:7: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " import sys\n", "/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:8: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " \n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PhraseS2
0A series of escapades demonstrating the adage ...neg
1A series of escapades demonstrating the adage ...2
2A series2
3A2
4series2
.........
156055Hearst 's2
156056forced avuncular chortlesneg
156057avuncular chortlespos
156058avuncular2
156059chortles2
\n", "

156060 rows × 2 columns

\n", "
" ], "text/plain": [ " Phrase S2\n", "0 A series of escapades demonstrating the adage ... neg\n", "1 A series of escapades demonstrating the adage ... 2\n", "2 A series 2\n", "3 A 2\n", "4 series 2\n", "... ... ...\n", "156055 Hearst 's 2\n", "156056 forced avuncular chortles neg\n", "156057 avuncular chortles pos\n", "156058 avuncular 2\n", "156059 chortles 2\n", "\n", "[156060 rows x 2 columns]" ] }, "execution_count": 94, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# all_df.to_csv('kaggle_0_not0.csv', index=False)\n", "t_sm = train[train.columns[2:5]]\n", "train['t0'] = train['Sentiment'][train['Sentiment'] == 0] = 'neg'\n", "train['S2'] = train['Sentiment']\n", "train['S2'][train['S2'] == 0] = 'neg'\n", "train['S2'][train['S2'] == 1] = 'neg'\n", "train['S2'][train['S2'] == 3] = 'pos'\n", "train['S2'][train['S2'] == 4] = 'pos'\n", "train_sm = pd.DataFrame()\n", "train_sm['Phrase'] = train['Phrase']\n", "train_sm['S2'] = train['S2']\n", "train_sm\n", "\n", "\n", "train.to_csv('kaggle_csv.csv')\n", "\n" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "156060" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(train_sm)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "34345" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(train_sm[train_sm['S2'] == 'neg'])" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "42133" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(train_sm[train_sm['S2'] == 'pos'])" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "79582" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(train_sm[train_sm['S2'] == 2])" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "ename": "AttributeError", "evalue": "module 'matplotlib.pyplot' has no attribute 'barplot'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpyplot\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbarplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mAttributeError\u001b[0m: module 'matplotlib.pyplot' has no attribute 'barplot'" ] } ], "source": [ "ar = [34345, 42133, 42133]\n", "import matplotlib.pyplot as plt\n", "\n", "plt.barplot()" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAl4AAAFzCAYAAADv+wfzAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAVE0lEQVR4nO3df9CdZZ3f8c/XhJAl/AgNsRVDmrAyqRq2CTw63WV0BHTCChIp7Yi6qwhs1LbA1paWHUvXdcaZnYFdK2LrZFb5sTqygygqs7UyFpXNiJsEWARC6jaLbLoK2VAwrsNKytU/8sgEDCSSc677+fF6zTDPc+5zOPfX8Qzznuu+n+tUay0AAIzfS4YeAABgthBeAACdCC8AgE6EFwBAJ8ILAKAT4QUA0MncoQc4EMccc0xbtmzZ0GMAAOzX5s2b/7a1tnhfz02L8Fq2bFk2bdo09BgAAPtVVd9/vudcagQA6ER4AQB0IrwAADqZFvd47ctTTz2V7du358knnxx6lOc1f/78LFmyJIcccsjQowAAU8C0Da/t27fniCOOyLJly1JVQ4/zc1pr2blzZ7Zv357ly5cPPQ4AMAVM20uNTz75ZBYtWjQloytJqiqLFi2a0ityAEBf0za8kkzZ6PqZqT4fANDXtA6vA/XQQw9l5cqVzzq2adOmXHLJJb/Q+3zoQx/KVVddNcrRAIBZZNre43WwJiYmMjExMfQYAMAsMitWvPa2bdu2rF69OldeeWXOOuusJHtWsi644IK84Q1vyPHHH5+rr776mdd/5CMfyYoVK/LGN74xW7duHWpsAGAGmFUrXlu3bs15552Xa6+9No8//ni++c1vPvPcgw8+mNtvvz27du3KihUr8v73vz/33ntvbrzxxtx9993ZvXt3TjrppJx88skD/i8AAKazWbPitWPHjqxduzaf+cxnsmrVqp97/swzz8yhhx6aY445Ji996UvzyCOP5I477sg555yTww47LEceeWTOPvvsASYHAGaKWRNeRx11VI477rhs2LBhn88feuihz/w+Z86c7N69O4m/TAQARmfWXGqcN29ebrnllqxZsyaHH354jj322P3+O69//etz/vnn5/LLL8/u3bvzla98Je9973s7TAsA08cpHz9l6BEO2oaL970wM2qzZsUrSRYsWJBbb701H/3oR/PEE0/s9/UnnXRS3va2t2XVqlU599xz87rXva7DlADATFWttaFn2K+JiYm2adOmZx3bsmVLXvnKVw400YGbLnMCwItlxevZqmpza22fe1bNqhUvAIAhCS8AgE6EFwBAJ8ILAKAT4QUA0InwAgDoRHgBAHQyY3auP/myG0b6fpuvfNdI3w8AwIrXi3TFFVfkYx/72DOPP/jBD+bqq68ecCIAYKoTXi/ShRdemOuvvz5J8vTTT+fGG2/MO9/5zoGnAgCmshlzqbG3ZcuWZdGiRbn77rvzyCOPZPXq1Vm0aNHQYwEAU5jwOggXXXRRrrvuuvzwhz/MBRdcMPQ4AMAU51LjQTjnnHPy1a9+NRs3bsyaNWuGHgcAmOKseB2EefPm5dRTT83ChQszZ86coccBAKa4GRNeQ2z/8PTTT+fOO+/MTTfd1P3cAMD041Lji/TAAw/kFa94RU4//fSccMIJQ48DAEwDM2bFq7dXvepV2bZt29BjAADTiBUvAIBOxhZeVfXpqnq0qu7b69iVVfVgVd1bVV+sqoXjOj8AwFQzzhWv65Kc8ZxjtyVZ2Vr7lST/K8nvjPH8AABTytjCq7X2rSSPPefY11pruycf3plkybjODwAw1Qx5c/0FSf7k+Z6sqnVJ1iXJ0qVLe80EMFKnfPyUoUc4aBsu3jD0CDBjDBJeVfXBJLuTfPb5XtNaW59kfZJMTEy0/b3nwx8+cWTzJcnS//zdkb4fAED3v2qsqncnOSvJO1tr+w2qqeqTn/xkVq1alVWrVmX58uU59dRThx4JAJjiuoZXVZ2R5D8mObu19pOe5x61973vfbnnnnuycePGLFmyJB/4wAeGHgkAmOLGuZ3E55J8O8mKqtpeVRcmuSbJEUluq6p7quqT4zp/L5deemlOO+20vOUtbxl6FABgihvbPV6ttbfv4/CnxnW+IVx33XX5/ve/n2uuuWboUQCAacBXBr1ImzdvzlVXXZU77rgjL3mJLwAAAPZPMbxI11xzTR577LGceuqpWbVqVS666KKhRwIAprgZs+LVe/uHa6+9tuv5AIDpz4oXAEAnwgsAoBPhBQDQifACAOhEeAEAdCK8AAA6EV4H4aGHHsrKlSuHHgMAmCZmzD5ep3z8lJG+34aLN4z0/QAArHiNyLZt27J69eps3Lhx6FEAgClKeI3A1q1bc+655+baa6/Na17zmqHHAQCmqBlzqXEoO3bsyNq1a3PzzTfn1a9+9dDjAABTmBWvg3TUUUfluOOOy4YN7gkDAF6YFa+DNG/evNxyyy1Zs2ZNDj/88LzjHe8YeiQAYIoSXiOwYMGC3HrrrXnTm96UBQsWZO3atUOPBABMQTMmvIbY/mHZsmW57777kiQLFy70F40AwAuaMeHF9DPqvdeGYL83AH4Rbq4HAOhEeAEAdDKtw6u1NvQIL2iqzwcA9DVtw2v+/PnZuXPnlI2b1lp27tyZ+fPnDz0KADBFTNub65csWZLt27dnx44dQ4/yvObPn58lS5YMPQYAMEVM2/A65JBDsnz58qHHAAA4YNP2UiMAwHQjvAAAOhFeAACdCC8AgE6EFwBAJ8ILAKAT4QUA0InwAgDoRHgBAHQivAAAOhFeAACdCC8AgE6EFwBAJ2MLr6r6dFU9WlX37XXsH1TVbVX1vcmfR4/r/AAAU804V7yuS3LGc45dnuTrrbUTknx98jEAwKwwtvBqrX0ryWPPObw2yfWTv1+f5K3jOj8AwFTT+x6vf9ha+0GSTP58aefzAwAMZsreXF9V66pqU1Vt2rFjx9DjAAActN7h9UhVvSxJJn8++nwvbK2tb61NtNYmFi9e3G1AAIBx6R1eX07y7snf353kS53PDwAwmHFuJ/G5JN9OsqKqtlfVhUl+P8mbqup7Sd40+RgAYFaYO643bq29/XmeOn1c5wQAmMqm7M31AAAzjfACAOhEeAEAdCK8AAA6EV4AAJ0ILwCAToQXAEAnwgsAoBPhBQDQifACAOhEeAEAdCK8AAA6EV4AAJ0ILwCAToQXAEAnwgsAoBPhBQDQifACAOhEeAEAdDJ36AGA0Tv5shuGHuGgbb7yXUOPADByVrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOBgmvqvq3VXV/Vd1XVZ+rqvlDzAEA0FP38Kqqlye5JMlEa21lkjlJzus9BwBAb0Ndapyb5Jeqam6Sw5L8zUBzAAB00z28Wmv/J8lVSR5O8oMkT7TWvtZ7DgCA3oa41Hh0krVJlic5NsmCqvqNfbxuXVVtqqpNO3bs6D0mAMDIDXGp8Y1J/qq1tqO19lSSLyT5tee+qLW2vrU20VqbWLx4cfchAQBGbYjwejjJP6uqw6qqkpyeZMsAcwAAdDXEPV7fSfL5JHcl+e7kDOt7zwEA0NvcIU7aWvvdJL87xLkBAIZi53oAgE6EFwBAJ8ILAKAT4QUA0InwAgDoRHgBAHQivAAAOhFeAACdCC8AgE6EFwBAJ8ILAKAT4QUA0InwAgDoRHgBAHQivAAAOpl7IC+qqvlJLkzy6iTzf3a8tXbBmOYCAJhxDnTF64+T/KMka5J8M8mSJLvGNRQAwEx0oOH1itbaFUn+rrV2fZIzk5w4vrEAAGaeAw2vpyZ/Pl5VK5MclWTZWCYCAJihDugeryTrq+roJP8pyZeTHJ7kirFNBQAwAx1oeH29tfZ/k3wryfFJUlXLxzYVAMAMdKCXGm/ex7HPj3IQAICZ7gVXvKrqn2TPFhJHVdU/3+upI7PXthIAAOzf/i41rkhyVpKFSd6y1/FdSX5rXEMBAMxELxherbUvJflSVf1qa+3bnWYCAJiRDvTm+rur6l/HzvUAAC+anesBADqxcz0AQCd2rgcA6MTO9QAAnexvH68P7PXwPZM/PzH5c8FYJgIAmKH2t+J1xOTPFUlekz2rXcmePb2+Na6hAABmov3t4/V7SVJVX0tyUmtt1+TjDyW5aezTAQDMIAd6c/3SJD/d6/FP4+Z6AIBfyIHeXP/HSf68qr6YpCU5J8n1Y5sKAGAGOqDwaq19pKr+e5LXTR56T2vt7vGNBQAw8xzoildaa3cluWuMswAAzGgHeo8XAAAHSXgBAHQySHhV1cKq+nxVPVhVW6rqV4eYAwCgpwO+x2vEPpbkq621f1FV85IcNtAcAADddA+vqjoyyeuTnJ8krbWf5tl7hAEAzEhDXGo8PsmOJNdW1d1V9UdV9XPf+1hV66pqU1Vt2rFjR/8pAQBGbIjwmpvkpCT/rbW2OsnfJbn8uS9qra1vrU201iYWL17ce0YAgJEbIry2J9neWvvO5OPPZ0+IAQDMaN3Dq7X2wyR/XVUrJg+dnuSB3nMAAPQ21F81Xpzks5N/0bgtyXsGmgMAoJtBwqu1dk+SiSHODQAwFDvXAwB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANDJYOFVVXOq6u6qunWoGQAAehpyxevSJFsGPD8AQFeDhFdVLUlyZpI/GuL8AABDmDvQef9Lkv+Q5Ijne0FVrUuyLkmWLl06shOffNkNI3uvoWy+8l1DjwAAvAjdV7yq6qwkj7bWNr/Q61pr61trE621icWLF3eaDgBgfIa41HhKkrOr6qEkNyY5rao+M8AcAABddQ+v1trvtNaWtNaWJTkvyf9srf1G7zkAAHqzjxcAQCdD3VyfJGmtfSPJN4acAQCgFyteAACdCC8AgE6EFwBAJ8ILAKAT4QUA0InwAgDoRHgBAHQivAAAOhFeAACdCC8AgE6EFwBAJ8ILAKAT4QUA0InwAgDoRHgBAHQivAAAOhFeAACdCC8AgE7mDj0AAMxWD3/4xKFHGI2jjxx6gmnDihcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6KR7eFXVcVV1e1Vtqar7q+rS3jMAAAxh7gDn3J3k37XW7qqqI5JsrqrbWmsPDDALAEA33Ve8Wms/aK3dNfn7riRbkry89xwAAL0NseL1jKpalmR1ku/s47l1SdYlydKlS7vOBQzv4Q+fOPQIo3H0kUNPMGOdfNkNQ49w0L54xNAT0NtgN9dX1eFJbk7y2621Hz33+dba+tbaRGttYvHixf0HBAAYsUHCq6oOyZ7o+mxr7QtDzAAA0NsQf9VYST6VZEtr7Q97nx8AYChDrHidkuQ3k5xWVfdM/vPmAeYAAOiq+831rbU/S1K9zwsAMDQ71wMAdCK8AAA6EV4AAJ0ILwCAToQXAEAnwgsAoBPhBQDQifACAOhEeAEAdCK8AAA6EV4AAJ0ILwCAToQXAEAnwgsAoBPhBQDQifACAOhEeAEAdCK8AAA6mTv0APziHv7wiUOPMBpHHzn0BADQlRUvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgk0HCq6rOqKqtVfWXVXX5EDMAAPTWPbyqak6STyT59SSvSvL2qnpV7zkAAHobYsXrtUn+srW2rbX20yQ3Jlk7wBwAAF0NEV4vT/LXez3ePnkMAGBGmzvAOWsfx9rPvahqXZJ1kw9/XFVbxzrVNPKP+5zmmCR/2+dU01ddsq+PM6Pgcz51+JyPj8/51DHiz/nz/l87RHhtT3LcXo+XJPmb576otbY+yfpeQ/FsVbWptTYx9BwwTj7nzAY+51PLEJcaNyY5oaqWV9W8JOcl+fIAcwAAdNV9xau1truq/k2S/5FkTpJPt9bu7z0HAEBvQ1xqTGvtT5P86RDn5oC5zMts4HPObOBzPoVUaz93XzsAAGPgK4MAADoRXjxLVX26qh6tqvuGngXGpaqOq6rbq2pLVd1fVZcOPROMWlXNr6o/r6q/mPyc/97QM+FSI89RVa9P8uMkN7TWVg49D4xDVb0syctaa3dV1RFJNid5a2vtgYFHg5GpqkqyoLX246o6JMmfJbm0tXbnwKPNala8eJbW2reSPDb0HDBOrbUftNbumvx9V5It8Q0azDBtjx9PPjxk8h+rLQMTXsCsVlXLkqxO8p1hJ4HRq6o5VXVPkkeT3NZa8zkfmPACZq2qOjzJzUl+u7X2o6HngVFrrf2/1tqq7PmWmNdWlVtIBia8gFlp8p6Xm5N8trX2haHngXFqrT2e5BtJzhh4lFlPeAGzzuRNx59KsqW19odDzwPjUFWLq2rh5O+/lOSNSR4cdiqEF89SVZ9L8u0kK6pqe1VdOPRMMAanJPnNJKdV1T2T/7x56KFgxF6W5Paqujd7vif5ttbarQPPNOvZTgIAoBMrXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AJmlKpaWFX/avL3Y6vq80PPBPAztpMAZpTJ7168tbXmq1GAKWfu0AMAjNjvJ/nlyS8G/l6SV7bWVlbV+UnemmROkpVJ/iDJvOzZSPXvk7y5tfZYVf1ykk8kWZzkJ0l+q7Vmt29gJFxqBGaay5P878kvBr7sOc+tTPKOJK9N8pEkP2mtrc6eb2t41+Rr1ie5uLV2cpJ/n+S/dpkamBWseAGzye2ttV1JdlXVE0m+Mnn8u0l+paoOT/JrSW7a83WOSZJD+48JzFTCC5hN/n6v35/e6/HT2fPfw5ckeXxytQxg5FxqBGaaXUmOeDH/YmvtR0n+qqr+ZZLUHv90lMMBs5vwAmaU1trOJBuq6r4kV76It3hnkgur6i+S3J9k7SjnA2Y320kAAHRixQsAoBPhBQDQifACAOhEeAEAdCK8AAA6EV4AAJ0ILwCAToQXAEAn/x/gWAIZuVopPAAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timekinddata
01y4
12y9
23y2
31z1
42z2
53z3
61k11
72k12
83k13
\n", "
" ], "text/plain": [ " time kind data\n", "0 1 y 4\n", "1 2 y 9\n", "2 3 y 2\n", "3 1 z 1\n", "4 2 z 2\n", "5 3 z 3\n", "6 1 k 11\n", "7 2 k 12\n", "8 3 k 13" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "\n", "\n", "x = [1,2,3]\n", "\n", "y = [4, 9, 2]\n", "z = [1, 2, 3]\n", "k = [11, 12, 13]\n", "\n", "# number_of_bars\n", "n_bars = 3\n", "\n", "df = pd.DataFrame(zip(x*n_bars, [\"y\"]*len(x)+[\"z\"]*len(x)+[\"k\"]*len(x), y+z+k), columns=[\"time\", \"kind\", \"data\"])\n", "plt.figure(figsize=(10, 6))\n", "sns.barplot(x=\"time\", hue=\"kind\", y=\"data\", data=df)\n", "plt.show()\n", "\n", "df" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "ar = [34345, 42133, 42133]\n", "import matplotlib.pyplot as plt\n", "\n", "df = pd.DataFrame()\n", "df['labels'] = ['n','p', '2']\n", "df['nums'] = ar\n", "df\n", "sns.barplot(x='labels', y='nums',data = df)" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [], "source": [ "\n", "train.to_csv('kaggle_csv.csv')\n", "\n", "# sns.barplot(x='Sentiment', y=train['Sentiment'].count_(), data=train)" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0PhraseIdSentenceIdPhraseSentimentt0S2
0011A series of escapades demonstrating the adage ...1negneg
1121A series of escapades demonstrating the adage ...2neg2
2231A series2neg2
3341A2neg2
4451series2neg2
........................
1560551560551560568544Hearst 's2neg2
1560561560561560578544forced avuncular chortles1negneg
1560571560571560588544avuncular chortles3negpos
1560581560581560598544avuncular2neg2
1560591560591560608544chortles2neg2
\n", "

156060 rows × 7 columns

\n", "
" ], "text/plain": [ " Unnamed: 0 PhraseId SentenceId \\\n", "0 0 1 1 \n", "1 1 2 1 \n", "2 2 3 1 \n", "3 3 4 1 \n", "4 4 5 1 \n", "... ... ... ... \n", "156055 156055 156056 8544 \n", "156056 156056 156057 8544 \n", "156057 156057 156058 8544 \n", "156058 156058 156059 8544 \n", "156059 156059 156060 8544 \n", "\n", " Phrase Sentiment t0 S2 \n", "0 A series of escapades demonstrating the adage ... 1 neg neg \n", "1 A series of escapades demonstrating the adage ... 2 neg 2 \n", "2 A series 2 neg 2 \n", "3 A 2 neg 2 \n", "4 series 2 neg 2 \n", "... ... ... ... ... \n", "156055 Hearst 's 2 neg 2 \n", "156056 forced avuncular chortles 1 neg neg \n", "156057 avuncular chortles 3 neg pos \n", "156058 avuncular 2 neg 2 \n", "156059 chortles 2 neg 2 \n", "\n", "[156060 rows x 7 columns]" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "k = pd.read_csv('kaggle_csv.csv')\n", "k" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PhraseS2
0A series of escapades demonstrating the adage ...neg
1A series of escapades demonstrating the adage ...2
2A series2
3A2
4series2
.........
156055Hearst 's2
156056forced avuncular chortlesneg
156057avuncular chortlespos
156058avuncular2
156059chortles2
\n", "

156060 rows × 2 columns

\n", "
" ], "text/plain": [ " Phrase S2\n", "0 A series of escapades demonstrating the adage ... neg\n", "1 A series of escapades demonstrating the adage ... 2\n", "2 A series 2\n", "3 A 2\n", "4 series 2\n", "... ... ...\n", "156055 Hearst 's 2\n", "156056 forced avuncular chortles neg\n", "156057 avuncular chortles pos\n", "156058 avuncular 2\n", "156059 chortles 2\n", "\n", "[156060 rows x 2 columns]" ] }, "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ "k2 = pd.DataFrame()\n", "k2['Phrase'] = k['Phrase']\n", "k2['S2'] = k['S2']\n", "k2" ] }, { "cell_type": "code", "execution_count": 79, "metadata": {}, "outputs": [], "source": [ "k2 = k2.drop(k2[k2['S2'] == '2'].index)" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [ { "ename": "KeyError", "evalue": "'S2'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 2896\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2897\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2898\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", "\u001b[0;31mKeyError\u001b[0m: 'S2'", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mk2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'S2'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'S2'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'neg'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mk2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'S2'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'S2'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'pos'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mk2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'PoN'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mk2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;31m# k2.to_csv('kaggle_pos1_neg0.csv')\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 2978\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnlevels\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2979\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2980\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2981\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2982\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 2897\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2898\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2899\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_cast_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2900\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtolerance\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtolerance\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2901\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", "\u001b[0;31mKeyError\u001b[0m: 'S2'" ] } ], "source": [ "k2['S2'][k2['S2'] == 'neg'] = 0\n", "k2['S2'][k2['S2'] == 'pos'] = 1\n", "\n", "# k2.to_csv('kaggle_pos1_neg0.csv')" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
textPoN
0A series of escapades demonstrating the adage ...0
21good for the goose1
22good1
33the gander , some of which occasionally amuses...0
46amuses1
.........
156047quietly suggesting the sadness and obsession b...0
156051sadness and obsession0
156052sadness and0
156056forced avuncular chortles0
156057avuncular chortles1
\n", "

76478 rows × 2 columns

\n", "
" ], "text/plain": [ " text PoN\n", "0 A series of escapades demonstrating the adage ... 0\n", "21 good for the goose 1\n", "22 good 1\n", "33 the gander , some of which occasionally amuses... 0\n", "46 amuses 1\n", "... ... ..\n", "156047 quietly suggesting the sadness and obsession b... 0\n", "156051 sadness and obsession 0\n", "156052 sadness and 0\n", "156056 forced avuncular chortles 0\n", "156057 avuncular chortles 1\n", "\n", "[76478 rows x 2 columns]" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "k2.columns = ['text','PoN']\n", "k2" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [], "source": [ "k2.to_csv('kaggle_pos1_neg0.csv', index=False)" ] }, { "cell_type": "code", "execution_count": 95, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0PhraseIdSentenceIdPhraseSentimentt0S2
0011A series of escapades demonstrating the adage ...1negneg
1121A series of escapades demonstrating the adage ...2neg2
2231A series2neg2
3341A2neg2
4451series2neg2
........................
1560551560551560568544Hearst 's2neg2
1560561560561560578544forced avuncular chortles1negneg
1560571560571560588544avuncular chortles3negpos
1560581560581560598544avuncular2neg2
1560591560591560608544chortles2neg2
\n", "

156060 rows × 7 columns

\n", "
" ], "text/plain": [ " Unnamed: 0 PhraseId SentenceId \\\n", "0 0 1 1 \n", "1 1 2 1 \n", "2 2 3 1 \n", "3 3 4 1 \n", "4 4 5 1 \n", "... ... ... ... \n", "156055 156055 156056 8544 \n", "156056 156056 156057 8544 \n", "156057 156057 156058 8544 \n", "156058 156058 156059 8544 \n", "156059 156059 156060 8544 \n", "\n", " Phrase Sentiment t0 S2 \n", "0 A series of escapades demonstrating the adage ... 1 neg neg \n", "1 A series of escapades demonstrating the adage ... 2 neg 2 \n", "2 A series 2 neg 2 \n", "3 A 2 neg 2 \n", "4 series 2 neg 2 \n", "... ... ... ... ... \n", "156055 Hearst 's 2 neg 2 \n", "156056 forced avuncular chortles 1 neg neg \n", "156057 avuncular chortles 3 neg pos \n", "156058 avuncular 2 neg 2 \n", "156059 chortles 2 neg 2 \n", "\n", "[156060 rows x 7 columns]" ] }, "execution_count": 95, "metadata": {}, "output_type": "execute_result" } ], "source": [ "k" ] }, { "cell_type": "code", "execution_count": 101, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:3: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " This is separate from the ipykernel package so we can avoid doing imports until\n", "/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:4: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " after removing the cwd from sys.path.\n", "/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:5: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " \"\"\"\n", "/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:6: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " \n", "/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:7: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " import sys\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0PhraseIdSentenceIdPhraseSentimentt0S2S0
0011A series of escapades demonstrating the adage ...1negneg1
1121A series of escapades demonstrating the adage ...2neg22
2231A series2neg22
3341A2neg22
4451series2neg22
...........................
1560551560551560568544Hearst 's2neg22
1560561560561560578544forced avuncular chortles1negneg1
1560571560571560588544avuncular chortles3negpos3
1560581560581560598544avuncular2neg22
1560591560591560608544chortles2neg22
\n", "

156060 rows × 8 columns

\n", "
" ], "text/plain": [ " Unnamed: 0 PhraseId SentenceId \\\n", "0 0 1 1 \n", "1 1 2 1 \n", "2 2 3 1 \n", "3 3 4 1 \n", "4 4 5 1 \n", "... ... ... ... \n", "156055 156055 156056 8544 \n", "156056 156056 156057 8544 \n", "156057 156057 156058 8544 \n", "156058 156058 156059 8544 \n", "156059 156059 156060 8544 \n", "\n", " Phrase Sentiment t0 S2 \\\n", "0 A series of escapades demonstrating the adage ... 1 neg neg \n", "1 A series of escapades demonstrating the adage ... 2 neg 2 \n", "2 A series 2 neg 2 \n", "3 A 2 neg 2 \n", "4 series 2 neg 2 \n", "... ... ... ... ... \n", "156055 Hearst 's 2 neg 2 \n", "156056 forced avuncular chortles 1 neg neg \n", "156057 avuncular chortles 3 neg pos \n", "156058 avuncular 2 neg 2 \n", "156059 chortles 2 neg 2 \n", "\n", " S0 \n", "0 1 \n", "1 2 \n", "2 2 \n", "3 2 \n", "4 2 \n", "... .. \n", "156055 2 \n", "156056 1 \n", "156057 3 \n", "156058 2 \n", "156059 2 \n", "\n", "[156060 rows x 8 columns]" ] }, "execution_count": 101, "metadata": {}, "output_type": "execute_result" } ], "source": [ "k = pd.read_csv('kaggle_csv.csv')\n", "k['S0'] = k['Sentiment']\n", "k['S0'][k['S0'] == 0] = 0\n", "k['S0'][k['S0'] == 1] = 1\n", "k['S0'][k['S0'] == 2] = 1\n", "k['S0'][k['S0'] == 3] = 1\n", "k['S0'][k['S0'] == 4] = 1\n", "k" ] }, { "cell_type": "code", "execution_count": 105, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0PhraseIdSentenceIdPhraseSentimentt0S2
0011A series of escapades demonstrating the adage ...1negneg
1121A series of escapades demonstrating the adage ...2neg2
2231A series2neg2
3341A2neg2
4451series2neg2
........................
1560551560551560568544Hearst 's2neg2
1560561560561560578544forced avuncular chortles1negneg
1560571560571560588544avuncular chortles3negpos
1560581560581560598544avuncular2neg2
1560591560591560608544chortles2neg2
\n", "

156060 rows × 7 columns

\n", "
" ], "text/plain": [ " Unnamed: 0 PhraseId SentenceId \\\n", "0 0 1 1 \n", "1 1 2 1 \n", "2 2 3 1 \n", "3 3 4 1 \n", "4 4 5 1 \n", "... ... ... ... \n", "156055 156055 156056 8544 \n", "156056 156056 156057 8544 \n", "156057 156057 156058 8544 \n", "156058 156058 156059 8544 \n", "156059 156059 156060 8544 \n", "\n", " Phrase Sentiment t0 S2 \n", "0 A series of escapades demonstrating the adage ... 1 neg neg \n", "1 A series of escapades demonstrating the adage ... 2 neg 2 \n", "2 A series 2 neg 2 \n", "3 A 2 neg 2 \n", "4 series 2 neg 2 \n", "... ... ... ... ... \n", "156055 Hearst 's 2 neg 2 \n", "156056 forced avuncular chortles 1 neg neg \n", "156057 avuncular chortles 3 neg pos \n", "156058 avuncular 2 neg 2 \n", "156059 chortles 2 neg 2 \n", "\n", "[156060 rows x 7 columns]" ] }, "execution_count": 105, "metadata": {}, "output_type": "execute_result" } ], "source": [ "k3 = pd.read_csv('kaggle_csv.csv')\n", "k3" ] }, { "cell_type": "code", "execution_count": 150, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "156060" ] }, "execution_count": 150, "metadata": {}, "output_type": "execute_result" } ], "source": [] }, { "cell_type": "code", "execution_count": 151, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PhraseS0S0_0
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [Phrase, S0, S0_0]\n", "Index: []" ] }, "execution_count": 151, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['S0_0'] == 0]" ] }, { "cell_type": "code", "execution_count": 137, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "156060" ] }, "execution_count": 137, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# (df['S0'] == 1).count()\n", "(df['S0'] == 0).count()" ] }, { "cell_type": "code", "execution_count": 138, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "156060" ] }, "execution_count": 138, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df)" ] }, { "cell_type": "code", "execution_count": 140, "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "invalid literal for int() with base 10: 'neg'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'S0'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36mastype\u001b[0;34m(self, dtype, copy, errors, **kwargs)\u001b[0m\n\u001b[1;32m 5880\u001b[0m \u001b[0;31m# else, only a single dtype is given\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5881\u001b[0m new_data = self._data.astype(\n\u001b[0;32m-> 5882\u001b[0;31m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5883\u001b[0m )\n\u001b[1;32m 5884\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_constructor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__finalize__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py\u001b[0m in \u001b[0;36mastype\u001b[0;34m(self, dtype, **kwargs)\u001b[0m\n\u001b[1;32m 579\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 580\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 581\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"astype\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 582\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 583\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)\u001b[0m\n\u001b[1;32m 436\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb_items\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0malign_copy\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 437\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 438\u001b[0;31m \u001b[0mapplied\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 439\u001b[0m \u001b[0mresult_blocks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_extend_blocks\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mapplied\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult_blocks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 440\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/internals/blocks.py\u001b[0m in \u001b[0;36mastype\u001b[0;34m(self, dtype, copy, errors, values, **kwargs)\u001b[0m\n\u001b[1;32m 557\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 558\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"raise\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 559\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_astype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 560\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 561\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_astype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"raise\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/internals/blocks.py\u001b[0m in \u001b[0;36m_astype\u001b[0;34m(self, dtype, copy, errors, values, **kwargs)\u001b[0m\n\u001b[1;32m 641\u001b[0m \u001b[0;31m# _astype_nansafe works fine with 1-d only\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 642\u001b[0m \u001b[0mvals1d\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mravel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 643\u001b[0;31m \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mastype_nansafe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvals1d\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 644\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 645\u001b[0m \u001b[0;31m# TODO(extension)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/dtypes/cast.py\u001b[0m in \u001b[0;36mastype_nansafe\u001b[0;34m(arr, dtype, copy, skipna)\u001b[0m\n\u001b[1;32m 705\u001b[0m \u001b[0;31m# work around NumPy brokenness, #1987\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 706\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0missubdtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minteger\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 707\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype_intsafe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mravel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 708\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 709\u001b[0m \u001b[0;31m# if we have a datetime/timedelta array of objects\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32mpandas/_libs/lib.pyx\u001b[0m in \u001b[0;36mpandas._libs.lib.astype_intsafe\u001b[0;34m()\u001b[0m\n", "\u001b[0;31mValueError\u001b[0m: invalid literal for int() with base 10: 'neg'" ] } ], "source": [ "df['S0'].astype(int)" ] }, { "cell_type": "code", "execution_count": 147, "metadata": {}, "outputs": [], "source": [ "k3 = pd.read_csv('kaggle_csv.csv')" ] }, { "cell_type": "code", "execution_count": 149, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 149, "metadata": {}, "output_type": "execute_result" } ], "source": [ "k3['Sentiment'].values.min()" ] }, { "cell_type": "code", "execution_count": 155, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "148988" ] }, "execution_count": 155, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame()\n", "df['Phrase'] = k3['Phrase']\n", "df['S0'] = k3['Sentiment']\n", "\n", "df['S0_0'] = [0 if x == 0 else 1 for x in df['S0']]\n", "len(df[df['S0_0'] == 1])" ] }, { "cell_type": "code", "execution_count": 156, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "7072" ] }, "execution_count": 156, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df[df['S0_0'] == 0])" ] }, { "cell_type": "code", "execution_count": 154, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PhraseS0S0_0
0A series of escapades demonstrating the adage ...11
1A series of escapades demonstrating the adage ...21
2A series21
3A21
4series21
............
156055Hearst 's21
156056forced avuncular chortles11
156057avuncular chortles31
156058avuncular21
156059chortles21
\n", "

156060 rows × 3 columns

\n", "
" ], "text/plain": [ " Phrase S0 S0_0\n", "0 A series of escapades demonstrating the adage ... 1 1\n", "1 A series of escapades demonstrating the adage ... 2 1\n", "2 A series 2 1\n", "3 A 2 1\n", "4 series 2 1\n", "... ... .. ...\n", "156055 Hearst 's 2 1\n", "156056 forced avuncular chortles 1 1\n", "156057 avuncular chortles 3 1\n", "156058 avuncular 2 1\n", "156059 chortles 2 1\n", "\n", "[156060 rows x 3 columns]" ] }, "execution_count": 154, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 2 }