{
"cells": [
{
"cell_type": "code",
"execution_count": 146,
"metadata": {},
"outputs": [],
"source": [
"## =======================================================\n",
"## IMPORTING\n",
"## =======================================================\n",
"import pandas as pd\n",
"train=pd.read_csv(\"../WK7/kaggle-sentiment/train.tsv\", delimiter='\\t')\n",
"y=train['Sentiment'].values\n",
"X=train['Phrase'].values\n",
"train.to_csv('kaggle_csv.csv')"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:3: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" This is separate from the ipykernel package so we can avoid doing imports until\n",
"/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:5: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" \"\"\"\n",
"/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:6: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" \n",
"/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:7: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" import sys\n",
"/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:8: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" \n"
]
},
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Phrase \n",
" S2 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" A series of escapades demonstrating the adage ... \n",
" neg \n",
" \n",
" \n",
" 1 \n",
" A series of escapades demonstrating the adage ... \n",
" 2 \n",
" \n",
" \n",
" 2 \n",
" A series \n",
" 2 \n",
" \n",
" \n",
" 3 \n",
" A \n",
" 2 \n",
" \n",
" \n",
" 4 \n",
" series \n",
" 2 \n",
" \n",
" \n",
" ... \n",
" ... \n",
" ... \n",
" \n",
" \n",
" 156055 \n",
" Hearst 's \n",
" 2 \n",
" \n",
" \n",
" 156056 \n",
" forced avuncular chortles \n",
" neg \n",
" \n",
" \n",
" 156057 \n",
" avuncular chortles \n",
" pos \n",
" \n",
" \n",
" 156058 \n",
" avuncular \n",
" 2 \n",
" \n",
" \n",
" 156059 \n",
" chortles \n",
" 2 \n",
" \n",
" \n",
"
\n",
"
156060 rows × 2 columns
\n",
"
"
],
"text/plain": [
" Phrase S2\n",
"0 A series of escapades demonstrating the adage ... neg\n",
"1 A series of escapades demonstrating the adage ... 2\n",
"2 A series 2\n",
"3 A 2\n",
"4 series 2\n",
"... ... ...\n",
"156055 Hearst 's 2\n",
"156056 forced avuncular chortles neg\n",
"156057 avuncular chortles pos\n",
"156058 avuncular 2\n",
"156059 chortles 2\n",
"\n",
"[156060 rows x 2 columns]"
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# all_df.to_csv('kaggle_0_not0.csv', index=False)\n",
"t_sm = train[train.columns[2:5]]\n",
"train['t0'] = train['Sentiment'][train['Sentiment'] == 0] = 'neg'\n",
"train['S2'] = train['Sentiment']\n",
"train['S2'][train['S2'] == 0] = 'neg'\n",
"train['S2'][train['S2'] == 1] = 'neg'\n",
"train['S2'][train['S2'] == 3] = 'pos'\n",
"train['S2'][train['S2'] == 4] = 'pos'\n",
"train_sm = pd.DataFrame()\n",
"train_sm['Phrase'] = train['Phrase']\n",
"train_sm['S2'] = train['S2']\n",
"train_sm\n",
"\n",
"\n",
"train.to_csv('kaggle_csv.csv')\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"156060"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(train_sm)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"34345"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(train_sm[train_sm['S2'] == 'neg'])"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"42133"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(train_sm[train_sm['S2'] == 'pos'])"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"79582"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(train_sm[train_sm['S2'] == 2])"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "module 'matplotlib.pyplot' has no attribute 'barplot'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpyplot\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbarplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m: module 'matplotlib.pyplot' has no attribute 'barplot'"
]
}
],
"source": [
"ar = [34345, 42133, 42133]\n",
"import matplotlib.pyplot as plt\n",
"\n",
"plt.barplot()"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAl4AAAFzCAYAAADv+wfzAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAVE0lEQVR4nO3df9CdZZ3f8c/XhJAl/AgNsRVDmrAyqRq2CTw63WV0BHTCChIp7Yi6qwhs1LbA1paWHUvXdcaZnYFdK2LrZFb5sTqygygqs7UyFpXNiJsEWARC6jaLbLoK2VAwrsNKytU/8sgEDCSSc677+fF6zTDPc+5zOPfX8Qzznuu+n+tUay0AAIzfS4YeAABgthBeAACdCC8AgE6EFwBAJ8ILAKAT4QUA0MncoQc4EMccc0xbtmzZ0GMAAOzX5s2b/7a1tnhfz02L8Fq2bFk2bdo09BgAAPtVVd9/vudcagQA6ER4AQB0IrwAADqZFvd47ctTTz2V7du358knnxx6lOc1f/78LFmyJIcccsjQowAAU8C0Da/t27fniCOOyLJly1JVQ4/zc1pr2blzZ7Zv357ly5cPPQ4AMAVM20uNTz75ZBYtWjQloytJqiqLFi2a0ityAEBf0za8kkzZ6PqZqT4fANDXtA6vA/XQQw9l5cqVzzq2adOmXHLJJb/Q+3zoQx/KVVddNcrRAIBZZNre43WwJiYmMjExMfQYAMAsMitWvPa2bdu2rF69OldeeWXOOuusJHtWsi644IK84Q1vyPHHH5+rr776mdd/5CMfyYoVK/LGN74xW7duHWpsAGAGmFUrXlu3bs15552Xa6+9No8//ni++c1vPvPcgw8+mNtvvz27du3KihUr8v73vz/33ntvbrzxxtx9993ZvXt3TjrppJx88skD/i8AAKazWbPitWPHjqxduzaf+cxnsmrVqp97/swzz8yhhx6aY445Ji996UvzyCOP5I477sg555yTww47LEceeWTOPvvsASYHAGaKWRNeRx11VI477rhs2LBhn88feuihz/w+Z86c7N69O4m/TAQARmfWXGqcN29ebrnllqxZsyaHH354jj322P3+O69//etz/vnn5/LLL8/u3bvzla98Je9973s7TAsA08cpHz9l6BEO2oaL970wM2qzZsUrSRYsWJBbb701H/3oR/PEE0/s9/UnnXRS3va2t2XVqlU599xz87rXva7DlADATFWttaFn2K+JiYm2adOmZx3bsmVLXvnKVw400YGbLnMCwItlxevZqmpza22fe1bNqhUvAIAhCS8AgE6EFwBAJ8ILAKAT4QUA0InwAgDoRHgBAHQyY3auP/myG0b6fpuvfNdI3w8AwIrXi3TFFVfkYx/72DOPP/jBD+bqq68ecCIAYKoTXi/ShRdemOuvvz5J8vTTT+fGG2/MO9/5zoGnAgCmshlzqbG3ZcuWZdGiRbn77rvzyCOPZPXq1Vm0aNHQYwEAU5jwOggXXXRRrrvuuvzwhz/MBRdcMPQ4AMAU51LjQTjnnHPy1a9+NRs3bsyaNWuGHgcAmOKseB2EefPm5dRTT83ChQszZ86coccBAKa4GRNeQ2z/8PTTT+fOO+/MTTfd1P3cAMD041Lji/TAAw/kFa94RU4//fSccMIJQ48DAEwDM2bFq7dXvepV2bZt29BjAADTiBUvAIBOxhZeVfXpqnq0qu7b69iVVfVgVd1bVV+sqoXjOj8AwFQzzhWv65Kc8ZxjtyVZ2Vr7lST/K8nvjPH8AABTytjCq7X2rSSPPefY11pruycf3plkybjODwAw1Qx5c/0FSf7k+Z6sqnVJ1iXJ0qVLe80EMFKnfPyUoUc4aBsu3jD0CDBjDBJeVfXBJLuTfPb5XtNaW59kfZJMTEy0/b3nwx8+cWTzJcnS//zdkb4fAED3v2qsqncnOSvJO1tr+w2qqeqTn/xkVq1alVWrVmX58uU59dRThx4JAJjiuoZXVZ2R5D8mObu19pOe5x61973vfbnnnnuycePGLFmyJB/4wAeGHgkAmOLGuZ3E55J8O8mKqtpeVRcmuSbJEUluq6p7quqT4zp/L5deemlOO+20vOUtbxl6FABgihvbPV6ttbfv4/CnxnW+IVx33XX5/ve/n2uuuWboUQCAacBXBr1ImzdvzlVXXZU77rgjL3mJLwAAAPZPMbxI11xzTR577LGceuqpWbVqVS666KKhRwIAprgZs+LVe/uHa6+9tuv5AIDpz4oXAEAnwgsAoBPhBQDQifACAOhEeAEAdCK8AAA6EV4H4aGHHsrKlSuHHgMAmCZmzD5ep3z8lJG+34aLN4z0/QAArHiNyLZt27J69eps3Lhx6FEAgClKeI3A1q1bc+655+baa6/Na17zmqHHAQCmqBlzqXEoO3bsyNq1a3PzzTfn1a9+9dDjAABTmBWvg3TUUUfluOOOy4YN7gkDAF6YFa+DNG/evNxyyy1Zs2ZNDj/88LzjHe8YeiQAYIoSXiOwYMGC3HrrrXnTm96UBQsWZO3atUOPBABMQTMmvIbY/mHZsmW57777kiQLFy70F40AwAuaMeHF9DPqvdeGYL83AH4Rbq4HAOhEeAEAdDKtw6u1NvQIL2iqzwcA9DVtw2v+/PnZuXPnlI2b1lp27tyZ+fPnDz0KADBFTNub65csWZLt27dnx44dQ4/yvObPn58lS5YMPQYAMEVM2/A65JBDsnz58qHHAAA4YNP2UiMAwHQjvAAAOhFeAACdCC8AgE6EFwBAJ8ILAKAT4QUA0InwAgDoRHgBAHQivAAAOhFeAACdCC8AgE6EFwBAJ2MLr6r6dFU9WlX37XXsH1TVbVX1vcmfR4/r/AAAU804V7yuS3LGc45dnuTrrbUTknx98jEAwKwwtvBqrX0ryWPPObw2yfWTv1+f5K3jOj8AwFTT+x6vf9ha+0GSTP58aefzAwAMZsreXF9V66pqU1Vt2rFjx9DjAAActN7h9UhVvSxJJn8++nwvbK2tb61NtNYmFi9e3G1AAIBx6R1eX07y7snf353kS53PDwAwmHFuJ/G5JN9OsqKqtlfVhUl+P8mbqup7Sd40+RgAYFaYO643bq29/XmeOn1c5wQAmMqm7M31AAAzjfACAOhEeAEAdCK8AAA6EV4AAJ0ILwCAToQXAEAnwgsAoBPhBQDQifACAOhEeAEAdCK8AAA6EV4AAJ0ILwCAToQXAEAnwgsAoBPhBQDQifACAOhEeAEAdDJ36AGA0Tv5shuGHuGgbb7yXUOPADByVrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOBgmvqvq3VXV/Vd1XVZ+rqvlDzAEA0FP38Kqqlye5JMlEa21lkjlJzus9BwBAb0Ndapyb5Jeqam6Sw5L8zUBzAAB00z28Wmv/J8lVSR5O8oMkT7TWvtZ7DgCA3oa41Hh0krVJlic5NsmCqvqNfbxuXVVtqqpNO3bs6D0mAMDIDXGp8Y1J/qq1tqO19lSSLyT5tee+qLW2vrU20VqbWLx4cfchAQBGbYjwejjJP6uqw6qqkpyeZMsAcwAAdDXEPV7fSfL5JHcl+e7kDOt7zwEA0NvcIU7aWvvdJL87xLkBAIZi53oAgE6EFwBAJ8ILAKAT4QUA0InwAgDoRHgBAHQivAAAOhFeAACdCC8AgE6EFwBAJ8ILAKAT4QUA0InwAgDoRHgBAHQivAAAOpl7IC+qqvlJLkzy6iTzf3a8tXbBmOYCAJhxDnTF64+T/KMka5J8M8mSJLvGNRQAwEx0oOH1itbaFUn+rrV2fZIzk5w4vrEAAGaeAw2vpyZ/Pl5VK5MclWTZWCYCAJihDugeryTrq+roJP8pyZeTHJ7kirFNBQAwAx1oeH29tfZ/k3wryfFJUlXLxzYVAMAMdKCXGm/ex7HPj3IQAICZ7gVXvKrqn2TPFhJHVdU/3+upI7PXthIAAOzf/i41rkhyVpKFSd6y1/FdSX5rXEMBAMxELxherbUvJflSVf1qa+3bnWYCAJiRDvTm+rur6l/HzvUAAC+anesBADqxcz0AQCd2rgcA6MTO9QAAnexvH68P7PXwPZM/PzH5c8FYJgIAmKH2t+J1xOTPFUlekz2rXcmePb2+Na6hAABmov3t4/V7SVJVX0tyUmtt1+TjDyW5aezTAQDMIAd6c/3SJD/d6/FP4+Z6AIBfyIHeXP/HSf68qr6YpCU5J8n1Y5sKAGAGOqDwaq19pKr+e5LXTR56T2vt7vGNBQAw8xzoildaa3cluWuMswAAzGgHeo8XAAAHSXgBAHQySHhV1cKq+nxVPVhVW6rqV4eYAwCgpwO+x2vEPpbkq621f1FV85IcNtAcAADddA+vqjoyyeuTnJ8krbWf5tl7hAEAzEhDXGo8PsmOJNdW1d1V9UdV9XPf+1hV66pqU1Vt2rFjR/8pAQBGbIjwmpvkpCT/rbW2OsnfJbn8uS9qra1vrU201iYWL17ce0YAgJEbIry2J9neWvvO5OPPZ0+IAQDMaN3Dq7X2wyR/XVUrJg+dnuSB3nMAAPQ21F81Xpzks5N/0bgtyXsGmgMAoJtBwqu1dk+SiSHODQAwFDvXAwB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANDJYOFVVXOq6u6qunWoGQAAehpyxevSJFsGPD8AQFeDhFdVLUlyZpI/GuL8AABDmDvQef9Lkv+Q5Ijne0FVrUuyLkmWLl06shOffNkNI3uvoWy+8l1DjwAAvAjdV7yq6qwkj7bWNr/Q61pr61trE621icWLF3eaDgBgfIa41HhKkrOr6qEkNyY5rao+M8AcAABddQ+v1trvtNaWtNaWJTkvyf9srf1G7zkAAHqzjxcAQCdD3VyfJGmtfSPJN4acAQCgFyteAACdCC8AgE6EFwBAJ8ILAKAT4QUA0InwAgDoRHgBAHQivAAAOhFeAACdCC8AgE6EFwBAJ8ILAKAT4QUA0InwAgDoRHgBAHQivAAAOhFeAACdCC8AgE7mDj0AAMxWD3/4xKFHGI2jjxx6gmnDihcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6KR7eFXVcVV1e1Vtqar7q+rS3jMAAAxh7gDn3J3k37XW7qqqI5JsrqrbWmsPDDALAEA33Ve8Wms/aK3dNfn7riRbkry89xwAAL0NseL1jKpalmR1ku/s47l1SdYlydKlS7vOBQzv4Q+fOPQIo3H0kUNPMGOdfNkNQ49w0L54xNAT0NtgN9dX1eFJbk7y2621Hz33+dba+tbaRGttYvHixf0HBAAYsUHCq6oOyZ7o+mxr7QtDzAAA0NsQf9VYST6VZEtr7Q97nx8AYChDrHidkuQ3k5xWVfdM/vPmAeYAAOiq+831rbU/S1K9zwsAMDQ71wMAdCK8AAA6EV4AAJ0ILwCAToQXAEAnwgsAoBPhBQDQifACAOhEeAEAdCK8AAA6EV4AAJ0ILwCAToQXAEAnwgsAoBPhBQDQifACAOhEeAEAdCK8AAA6mTv0APziHv7wiUOPMBpHHzn0BADQlRUvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgk0HCq6rOqKqtVfWXVXX5EDMAAPTWPbyqak6STyT59SSvSvL2qnpV7zkAAHobYsXrtUn+srW2rbX20yQ3Jlk7wBwAAF0NEV4vT/LXez3ePnkMAGBGmzvAOWsfx9rPvahqXZJ1kw9/XFVbxzrVNPKP+5zmmCR/2+dU01ddsq+PM6Pgcz51+JyPj8/51DHiz/nz/l87RHhtT3LcXo+XJPmb576otbY+yfpeQ/FsVbWptTYx9BwwTj7nzAY+51PLEJcaNyY5oaqWV9W8JOcl+fIAcwAAdNV9xau1truq/k2S/5FkTpJPt9bu7z0HAEBvQ1xqTGvtT5P86RDn5oC5zMts4HPObOBzPoVUaz93XzsAAGPgK4MAADoRXjxLVX26qh6tqvuGngXGpaqOq6rbq2pLVd1fVZcOPROMWlXNr6o/r6q/mPyc/97QM+FSI89RVa9P8uMkN7TWVg49D4xDVb0syctaa3dV1RFJNid5a2vtgYFHg5GpqkqyoLX246o6JMmfJbm0tXbnwKPNala8eJbW2reSPDb0HDBOrbUftNbumvx9V5It8Q0azDBtjx9PPjxk8h+rLQMTXsCsVlXLkqxO8p1hJ4HRq6o5VXVPkkeT3NZa8zkfmPACZq2qOjzJzUl+u7X2o6HngVFrrf2/1tqq7PmWmNdWlVtIBia8gFlp8p6Xm5N8trX2haHngXFqrT2e5BtJzhh4lFlPeAGzzuRNx59KsqW19odDzwPjUFWLq2rh5O+/lOSNSR4cdiqEF89SVZ9L8u0kK6pqe1VdOPRMMAanJPnNJKdV1T2T/7x56KFgxF6W5Paqujd7vif5ttbarQPPNOvZTgIAoBMrXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AJmlKpaWFX/avL3Y6vq80PPBPAztpMAZpTJ7168tbXmq1GAKWfu0AMAjNjvJ/nlyS8G/l6SV7bWVlbV+UnemmROkpVJ/iDJvOzZSPXvk7y5tfZYVf1ykk8kWZzkJ0l+q7Vmt29gJFxqBGaay5P878kvBr7sOc+tTPKOJK9N8pEkP2mtrc6eb2t41+Rr1ie5uLV2cpJ/n+S/dpkamBWseAGzye2ttV1JdlXVE0m+Mnn8u0l+paoOT/JrSW7a83WOSZJD+48JzFTCC5hN/n6v35/e6/HT2fPfw5ckeXxytQxg5FxqBGaaXUmOeDH/YmvtR0n+qqr+ZZLUHv90lMMBs5vwAmaU1trOJBuq6r4kV76It3hnkgur6i+S3J9k7SjnA2Y320kAAHRixQsAoBPhBQDQifACAOhEeAEAdCK8AAA6EV4AAJ0ILwCAToQXAEAn/x/gWAIZuVopPAAAAABJRU5ErkJggg==\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" time \n",
" kind \n",
" data \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 1 \n",
" y \n",
" 4 \n",
" \n",
" \n",
" 1 \n",
" 2 \n",
" y \n",
" 9 \n",
" \n",
" \n",
" 2 \n",
" 3 \n",
" y \n",
" 2 \n",
" \n",
" \n",
" 3 \n",
" 1 \n",
" z \n",
" 1 \n",
" \n",
" \n",
" 4 \n",
" 2 \n",
" z \n",
" 2 \n",
" \n",
" \n",
" 5 \n",
" 3 \n",
" z \n",
" 3 \n",
" \n",
" \n",
" 6 \n",
" 1 \n",
" k \n",
" 11 \n",
" \n",
" \n",
" 7 \n",
" 2 \n",
" k \n",
" 12 \n",
" \n",
" \n",
" 8 \n",
" 3 \n",
" k \n",
" 13 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" time kind data\n",
"0 1 y 4\n",
"1 2 y 9\n",
"2 3 y 2\n",
"3 1 z 1\n",
"4 2 z 2\n",
"5 3 z 3\n",
"6 1 k 11\n",
"7 2 k 12\n",
"8 3 k 13"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"\n",
"\n",
"x = [1,2,3]\n",
"\n",
"y = [4, 9, 2]\n",
"z = [1, 2, 3]\n",
"k = [11, 12, 13]\n",
"\n",
"# number_of_bars\n",
"n_bars = 3\n",
"\n",
"df = pd.DataFrame(zip(x*n_bars, [\"y\"]*len(x)+[\"z\"]*len(x)+[\"k\"]*len(x), y+z+k), columns=[\"time\", \"kind\", \"data\"])\n",
"plt.figure(figsize=(10, 6))\n",
"sns.barplot(x=\"time\", hue=\"kind\", y=\"data\", data=df)\n",
"plt.show()\n",
"\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"ar = [34345, 42133, 42133]\n",
"import matplotlib.pyplot as plt\n",
"\n",
"df = pd.DataFrame()\n",
"df['labels'] = ['n','p', '2']\n",
"df['nums'] = ar\n",
"df\n",
"sns.barplot(x='labels', y='nums',data = df)"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [],
"source": [
"\n",
"train.to_csv('kaggle_csv.csv')\n",
"\n",
"# sns.barplot(x='Sentiment', y=train['Sentiment'].count_(), data=train)"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Unnamed: 0 \n",
" PhraseId \n",
" SentenceId \n",
" Phrase \n",
" Sentiment \n",
" t0 \n",
" S2 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" A series of escapades demonstrating the adage ... \n",
" 1 \n",
" neg \n",
" neg \n",
" \n",
" \n",
" 1 \n",
" 1 \n",
" 2 \n",
" 1 \n",
" A series of escapades demonstrating the adage ... \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 2 \n",
" 2 \n",
" 3 \n",
" 1 \n",
" A series \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 3 \n",
" 3 \n",
" 4 \n",
" 1 \n",
" A \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 4 \n",
" 4 \n",
" 5 \n",
" 1 \n",
" series \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" \n",
" \n",
" 156055 \n",
" 156055 \n",
" 156056 \n",
" 8544 \n",
" Hearst 's \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 156056 \n",
" 156056 \n",
" 156057 \n",
" 8544 \n",
" forced avuncular chortles \n",
" 1 \n",
" neg \n",
" neg \n",
" \n",
" \n",
" 156057 \n",
" 156057 \n",
" 156058 \n",
" 8544 \n",
" avuncular chortles \n",
" 3 \n",
" neg \n",
" pos \n",
" \n",
" \n",
" 156058 \n",
" 156058 \n",
" 156059 \n",
" 8544 \n",
" avuncular \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 156059 \n",
" 156059 \n",
" 156060 \n",
" 8544 \n",
" chortles \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
"
\n",
"
156060 rows × 7 columns
\n",
"
"
],
"text/plain": [
" Unnamed: 0 PhraseId SentenceId \\\n",
"0 0 1 1 \n",
"1 1 2 1 \n",
"2 2 3 1 \n",
"3 3 4 1 \n",
"4 4 5 1 \n",
"... ... ... ... \n",
"156055 156055 156056 8544 \n",
"156056 156056 156057 8544 \n",
"156057 156057 156058 8544 \n",
"156058 156058 156059 8544 \n",
"156059 156059 156060 8544 \n",
"\n",
" Phrase Sentiment t0 S2 \n",
"0 A series of escapades demonstrating the adage ... 1 neg neg \n",
"1 A series of escapades demonstrating the adage ... 2 neg 2 \n",
"2 A series 2 neg 2 \n",
"3 A 2 neg 2 \n",
"4 series 2 neg 2 \n",
"... ... ... ... ... \n",
"156055 Hearst 's 2 neg 2 \n",
"156056 forced avuncular chortles 1 neg neg \n",
"156057 avuncular chortles 3 neg pos \n",
"156058 avuncular 2 neg 2 \n",
"156059 chortles 2 neg 2 \n",
"\n",
"[156060 rows x 7 columns]"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"k = pd.read_csv('kaggle_csv.csv')\n",
"k"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Phrase \n",
" S2 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" A series of escapades demonstrating the adage ... \n",
" neg \n",
" \n",
" \n",
" 1 \n",
" A series of escapades demonstrating the adage ... \n",
" 2 \n",
" \n",
" \n",
" 2 \n",
" A series \n",
" 2 \n",
" \n",
" \n",
" 3 \n",
" A \n",
" 2 \n",
" \n",
" \n",
" 4 \n",
" series \n",
" 2 \n",
" \n",
" \n",
" ... \n",
" ... \n",
" ... \n",
" \n",
" \n",
" 156055 \n",
" Hearst 's \n",
" 2 \n",
" \n",
" \n",
" 156056 \n",
" forced avuncular chortles \n",
" neg \n",
" \n",
" \n",
" 156057 \n",
" avuncular chortles \n",
" pos \n",
" \n",
" \n",
" 156058 \n",
" avuncular \n",
" 2 \n",
" \n",
" \n",
" 156059 \n",
" chortles \n",
" 2 \n",
" \n",
" \n",
"
\n",
"
156060 rows × 2 columns
\n",
"
"
],
"text/plain": [
" Phrase S2\n",
"0 A series of escapades demonstrating the adage ... neg\n",
"1 A series of escapades demonstrating the adage ... 2\n",
"2 A series 2\n",
"3 A 2\n",
"4 series 2\n",
"... ... ...\n",
"156055 Hearst 's 2\n",
"156056 forced avuncular chortles neg\n",
"156057 avuncular chortles pos\n",
"156058 avuncular 2\n",
"156059 chortles 2\n",
"\n",
"[156060 rows x 2 columns]"
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"k2 = pd.DataFrame()\n",
"k2['Phrase'] = k['Phrase']\n",
"k2['S2'] = k['S2']\n",
"k2"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [],
"source": [
"k2 = k2.drop(k2[k2['S2'] == '2'].index)"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [
{
"ename": "KeyError",
"evalue": "'S2'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 2896\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2897\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2898\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 'S2'",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mk2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'S2'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'S2'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'neg'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mk2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'S2'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'S2'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'pos'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mk2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'PoN'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mk2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;31m# k2.to_csv('kaggle_pos1_neg0.csv')\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 2978\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnlevels\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2979\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2980\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2981\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2982\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 2897\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2898\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2899\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_cast_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2900\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtolerance\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtolerance\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2901\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 'S2'"
]
}
],
"source": [
"k2['S2'][k2['S2'] == 'neg'] = 0\n",
"k2['S2'][k2['S2'] == 'pos'] = 1\n",
"\n",
"# k2.to_csv('kaggle_pos1_neg0.csv')"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" text \n",
" PoN \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" A series of escapades demonstrating the adage ... \n",
" 0 \n",
" \n",
" \n",
" 21 \n",
" good for the goose \n",
" 1 \n",
" \n",
" \n",
" 22 \n",
" good \n",
" 1 \n",
" \n",
" \n",
" 33 \n",
" the gander , some of which occasionally amuses... \n",
" 0 \n",
" \n",
" \n",
" 46 \n",
" amuses \n",
" 1 \n",
" \n",
" \n",
" ... \n",
" ... \n",
" ... \n",
" \n",
" \n",
" 156047 \n",
" quietly suggesting the sadness and obsession b... \n",
" 0 \n",
" \n",
" \n",
" 156051 \n",
" sadness and obsession \n",
" 0 \n",
" \n",
" \n",
" 156052 \n",
" sadness and \n",
" 0 \n",
" \n",
" \n",
" 156056 \n",
" forced avuncular chortles \n",
" 0 \n",
" \n",
" \n",
" 156057 \n",
" avuncular chortles \n",
" 1 \n",
" \n",
" \n",
"
\n",
"
76478 rows × 2 columns
\n",
"
"
],
"text/plain": [
" text PoN\n",
"0 A series of escapades demonstrating the adage ... 0\n",
"21 good for the goose 1\n",
"22 good 1\n",
"33 the gander , some of which occasionally amuses... 0\n",
"46 amuses 1\n",
"... ... ..\n",
"156047 quietly suggesting the sadness and obsession b... 0\n",
"156051 sadness and obsession 0\n",
"156052 sadness and 0\n",
"156056 forced avuncular chortles 0\n",
"156057 avuncular chortles 1\n",
"\n",
"[76478 rows x 2 columns]"
]
},
"execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"k2.columns = ['text','PoN']\n",
"k2"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {},
"outputs": [],
"source": [
"k2.to_csv('kaggle_pos1_neg0.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Unnamed: 0 \n",
" PhraseId \n",
" SentenceId \n",
" Phrase \n",
" Sentiment \n",
" t0 \n",
" S2 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" A series of escapades demonstrating the adage ... \n",
" 1 \n",
" neg \n",
" neg \n",
" \n",
" \n",
" 1 \n",
" 1 \n",
" 2 \n",
" 1 \n",
" A series of escapades demonstrating the adage ... \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 2 \n",
" 2 \n",
" 3 \n",
" 1 \n",
" A series \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 3 \n",
" 3 \n",
" 4 \n",
" 1 \n",
" A \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 4 \n",
" 4 \n",
" 5 \n",
" 1 \n",
" series \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" \n",
" \n",
" 156055 \n",
" 156055 \n",
" 156056 \n",
" 8544 \n",
" Hearst 's \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 156056 \n",
" 156056 \n",
" 156057 \n",
" 8544 \n",
" forced avuncular chortles \n",
" 1 \n",
" neg \n",
" neg \n",
" \n",
" \n",
" 156057 \n",
" 156057 \n",
" 156058 \n",
" 8544 \n",
" avuncular chortles \n",
" 3 \n",
" neg \n",
" pos \n",
" \n",
" \n",
" 156058 \n",
" 156058 \n",
" 156059 \n",
" 8544 \n",
" avuncular \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 156059 \n",
" 156059 \n",
" 156060 \n",
" 8544 \n",
" chortles \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
"
\n",
"
156060 rows × 7 columns
\n",
"
"
],
"text/plain": [
" Unnamed: 0 PhraseId SentenceId \\\n",
"0 0 1 1 \n",
"1 1 2 1 \n",
"2 2 3 1 \n",
"3 3 4 1 \n",
"4 4 5 1 \n",
"... ... ... ... \n",
"156055 156055 156056 8544 \n",
"156056 156056 156057 8544 \n",
"156057 156057 156058 8544 \n",
"156058 156058 156059 8544 \n",
"156059 156059 156060 8544 \n",
"\n",
" Phrase Sentiment t0 S2 \n",
"0 A series of escapades demonstrating the adage ... 1 neg neg \n",
"1 A series of escapades demonstrating the adage ... 2 neg 2 \n",
"2 A series 2 neg 2 \n",
"3 A 2 neg 2 \n",
"4 series 2 neg 2 \n",
"... ... ... ... ... \n",
"156055 Hearst 's 2 neg 2 \n",
"156056 forced avuncular chortles 1 neg neg \n",
"156057 avuncular chortles 3 neg pos \n",
"156058 avuncular 2 neg 2 \n",
"156059 chortles 2 neg 2 \n",
"\n",
"[156060 rows x 7 columns]"
]
},
"execution_count": 95,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"k"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:3: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" This is separate from the ipykernel package so we can avoid doing imports until\n",
"/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:4: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" after removing the cwd from sys.path.\n",
"/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:5: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" \"\"\"\n",
"/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:6: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" \n",
"/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:7: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" import sys\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Unnamed: 0 \n",
" PhraseId \n",
" SentenceId \n",
" Phrase \n",
" Sentiment \n",
" t0 \n",
" S2 \n",
" S0 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" A series of escapades demonstrating the adage ... \n",
" 1 \n",
" neg \n",
" neg \n",
" 1 \n",
" \n",
" \n",
" 1 \n",
" 1 \n",
" 2 \n",
" 1 \n",
" A series of escapades demonstrating the adage ... \n",
" 2 \n",
" neg \n",
" 2 \n",
" 2 \n",
" \n",
" \n",
" 2 \n",
" 2 \n",
" 3 \n",
" 1 \n",
" A series \n",
" 2 \n",
" neg \n",
" 2 \n",
" 2 \n",
" \n",
" \n",
" 3 \n",
" 3 \n",
" 4 \n",
" 1 \n",
" A \n",
" 2 \n",
" neg \n",
" 2 \n",
" 2 \n",
" \n",
" \n",
" 4 \n",
" 4 \n",
" 5 \n",
" 1 \n",
" series \n",
" 2 \n",
" neg \n",
" 2 \n",
" 2 \n",
" \n",
" \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" \n",
" \n",
" 156055 \n",
" 156055 \n",
" 156056 \n",
" 8544 \n",
" Hearst 's \n",
" 2 \n",
" neg \n",
" 2 \n",
" 2 \n",
" \n",
" \n",
" 156056 \n",
" 156056 \n",
" 156057 \n",
" 8544 \n",
" forced avuncular chortles \n",
" 1 \n",
" neg \n",
" neg \n",
" 1 \n",
" \n",
" \n",
" 156057 \n",
" 156057 \n",
" 156058 \n",
" 8544 \n",
" avuncular chortles \n",
" 3 \n",
" neg \n",
" pos \n",
" 3 \n",
" \n",
" \n",
" 156058 \n",
" 156058 \n",
" 156059 \n",
" 8544 \n",
" avuncular \n",
" 2 \n",
" neg \n",
" 2 \n",
" 2 \n",
" \n",
" \n",
" 156059 \n",
" 156059 \n",
" 156060 \n",
" 8544 \n",
" chortles \n",
" 2 \n",
" neg \n",
" 2 \n",
" 2 \n",
" \n",
" \n",
"
\n",
"
156060 rows × 8 columns
\n",
"
"
],
"text/plain": [
" Unnamed: 0 PhraseId SentenceId \\\n",
"0 0 1 1 \n",
"1 1 2 1 \n",
"2 2 3 1 \n",
"3 3 4 1 \n",
"4 4 5 1 \n",
"... ... ... ... \n",
"156055 156055 156056 8544 \n",
"156056 156056 156057 8544 \n",
"156057 156057 156058 8544 \n",
"156058 156058 156059 8544 \n",
"156059 156059 156060 8544 \n",
"\n",
" Phrase Sentiment t0 S2 \\\n",
"0 A series of escapades demonstrating the adage ... 1 neg neg \n",
"1 A series of escapades demonstrating the adage ... 2 neg 2 \n",
"2 A series 2 neg 2 \n",
"3 A 2 neg 2 \n",
"4 series 2 neg 2 \n",
"... ... ... ... ... \n",
"156055 Hearst 's 2 neg 2 \n",
"156056 forced avuncular chortles 1 neg neg \n",
"156057 avuncular chortles 3 neg pos \n",
"156058 avuncular 2 neg 2 \n",
"156059 chortles 2 neg 2 \n",
"\n",
" S0 \n",
"0 1 \n",
"1 2 \n",
"2 2 \n",
"3 2 \n",
"4 2 \n",
"... .. \n",
"156055 2 \n",
"156056 1 \n",
"156057 3 \n",
"156058 2 \n",
"156059 2 \n",
"\n",
"[156060 rows x 8 columns]"
]
},
"execution_count": 101,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"k = pd.read_csv('kaggle_csv.csv')\n",
"k['S0'] = k['Sentiment']\n",
"k['S0'][k['S0'] == 0] = 0\n",
"k['S0'][k['S0'] == 1] = 1\n",
"k['S0'][k['S0'] == 2] = 1\n",
"k['S0'][k['S0'] == 3] = 1\n",
"k['S0'][k['S0'] == 4] = 1\n",
"k"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Unnamed: 0 \n",
" PhraseId \n",
" SentenceId \n",
" Phrase \n",
" Sentiment \n",
" t0 \n",
" S2 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" A series of escapades demonstrating the adage ... \n",
" 1 \n",
" neg \n",
" neg \n",
" \n",
" \n",
" 1 \n",
" 1 \n",
" 2 \n",
" 1 \n",
" A series of escapades demonstrating the adage ... \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 2 \n",
" 2 \n",
" 3 \n",
" 1 \n",
" A series \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 3 \n",
" 3 \n",
" 4 \n",
" 1 \n",
" A \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 4 \n",
" 4 \n",
" 5 \n",
" 1 \n",
" series \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" \n",
" \n",
" 156055 \n",
" 156055 \n",
" 156056 \n",
" 8544 \n",
" Hearst 's \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 156056 \n",
" 156056 \n",
" 156057 \n",
" 8544 \n",
" forced avuncular chortles \n",
" 1 \n",
" neg \n",
" neg \n",
" \n",
" \n",
" 156057 \n",
" 156057 \n",
" 156058 \n",
" 8544 \n",
" avuncular chortles \n",
" 3 \n",
" neg \n",
" pos \n",
" \n",
" \n",
" 156058 \n",
" 156058 \n",
" 156059 \n",
" 8544 \n",
" avuncular \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 156059 \n",
" 156059 \n",
" 156060 \n",
" 8544 \n",
" chortles \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
"
\n",
"
156060 rows × 7 columns
\n",
"
"
],
"text/plain": [
" Unnamed: 0 PhraseId SentenceId \\\n",
"0 0 1 1 \n",
"1 1 2 1 \n",
"2 2 3 1 \n",
"3 3 4 1 \n",
"4 4 5 1 \n",
"... ... ... ... \n",
"156055 156055 156056 8544 \n",
"156056 156056 156057 8544 \n",
"156057 156057 156058 8544 \n",
"156058 156058 156059 8544 \n",
"156059 156059 156060 8544 \n",
"\n",
" Phrase Sentiment t0 S2 \n",
"0 A series of escapades demonstrating the adage ... 1 neg neg \n",
"1 A series of escapades demonstrating the adage ... 2 neg 2 \n",
"2 A series 2 neg 2 \n",
"3 A 2 neg 2 \n",
"4 series 2 neg 2 \n",
"... ... ... ... ... \n",
"156055 Hearst 's 2 neg 2 \n",
"156056 forced avuncular chortles 1 neg neg \n",
"156057 avuncular chortles 3 neg pos \n",
"156058 avuncular 2 neg 2 \n",
"156059 chortles 2 neg 2 \n",
"\n",
"[156060 rows x 7 columns]"
]
},
"execution_count": 105,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"k3 = pd.read_csv('kaggle_csv.csv')\n",
"k3"
]
},
{
"cell_type": "code",
"execution_count": 150,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"156060"
]
},
"execution_count": 150,
"metadata": {},
"output_type": "execute_result"
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": 151,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Phrase \n",
" S0 \n",
" S0_0 \n",
" \n",
" \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [Phrase, S0, S0_0]\n",
"Index: []"
]
},
"execution_count": 151,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['S0_0'] == 0]"
]
},
{
"cell_type": "code",
"execution_count": 137,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"156060"
]
},
"execution_count": 137,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# (df['S0'] == 1).count()\n",
"(df['S0'] == 0).count()"
]
},
{
"cell_type": "code",
"execution_count": 138,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"156060"
]
},
"execution_count": 138,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(df)"
]
},
{
"cell_type": "code",
"execution_count": 140,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "invalid literal for int() with base 10: 'neg'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'S0'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36mastype\u001b[0;34m(self, dtype, copy, errors, **kwargs)\u001b[0m\n\u001b[1;32m 5880\u001b[0m \u001b[0;31m# else, only a single dtype is given\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5881\u001b[0m new_data = self._data.astype(\n\u001b[0;32m-> 5882\u001b[0;31m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5883\u001b[0m )\n\u001b[1;32m 5884\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_constructor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__finalize__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py\u001b[0m in \u001b[0;36mastype\u001b[0;34m(self, dtype, **kwargs)\u001b[0m\n\u001b[1;32m 579\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 580\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 581\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"astype\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 582\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 583\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)\u001b[0m\n\u001b[1;32m 436\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb_items\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0malign_copy\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 437\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 438\u001b[0;31m \u001b[0mapplied\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 439\u001b[0m \u001b[0mresult_blocks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_extend_blocks\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mapplied\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult_blocks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 440\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/internals/blocks.py\u001b[0m in \u001b[0;36mastype\u001b[0;34m(self, dtype, copy, errors, values, **kwargs)\u001b[0m\n\u001b[1;32m 557\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 558\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"raise\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 559\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_astype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 560\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 561\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_astype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"raise\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/internals/blocks.py\u001b[0m in \u001b[0;36m_astype\u001b[0;34m(self, dtype, copy, errors, values, **kwargs)\u001b[0m\n\u001b[1;32m 641\u001b[0m \u001b[0;31m# _astype_nansafe works fine with 1-d only\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 642\u001b[0m \u001b[0mvals1d\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mravel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 643\u001b[0;31m \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mastype_nansafe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvals1d\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 644\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 645\u001b[0m \u001b[0;31m# TODO(extension)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/dtypes/cast.py\u001b[0m in \u001b[0;36mastype_nansafe\u001b[0;34m(arr, dtype, copy, skipna)\u001b[0m\n\u001b[1;32m 705\u001b[0m \u001b[0;31m# work around NumPy brokenness, #1987\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 706\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0missubdtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minteger\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 707\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype_intsafe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mravel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 708\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 709\u001b[0m \u001b[0;31m# if we have a datetime/timedelta array of objects\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/_libs/lib.pyx\u001b[0m in \u001b[0;36mpandas._libs.lib.astype_intsafe\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: invalid literal for int() with base 10: 'neg'"
]
}
],
"source": [
"df['S0'].astype(int)"
]
},
{
"cell_type": "code",
"execution_count": 147,
"metadata": {},
"outputs": [],
"source": [
"k3 = pd.read_csv('kaggle_csv.csv')"
]
},
{
"cell_type": "code",
"execution_count": 149,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 149,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"k3['Sentiment'].values.min()"
]
},
{
"cell_type": "code",
"execution_count": 155,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"148988"
]
},
"execution_count": 155,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame()\n",
"df['Phrase'] = k3['Phrase']\n",
"df['S0'] = k3['Sentiment']\n",
"\n",
"df['S0_0'] = [0 if x == 0 else 1 for x in df['S0']]\n",
"len(df[df['S0_0'] == 1])"
]
},
{
"cell_type": "code",
"execution_count": 156,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"7072"
]
},
"execution_count": 156,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(df[df['S0_0'] == 0])"
]
},
{
"cell_type": "code",
"execution_count": 154,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Phrase \n",
" S0 \n",
" S0_0 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" A series of escapades demonstrating the adage ... \n",
" 1 \n",
" 1 \n",
" \n",
" \n",
" 1 \n",
" A series of escapades demonstrating the adage ... \n",
" 2 \n",
" 1 \n",
" \n",
" \n",
" 2 \n",
" A series \n",
" 2 \n",
" 1 \n",
" \n",
" \n",
" 3 \n",
" A \n",
" 2 \n",
" 1 \n",
" \n",
" \n",
" 4 \n",
" series \n",
" 2 \n",
" 1 \n",
" \n",
" \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" \n",
" \n",
" 156055 \n",
" Hearst 's \n",
" 2 \n",
" 1 \n",
" \n",
" \n",
" 156056 \n",
" forced avuncular chortles \n",
" 1 \n",
" 1 \n",
" \n",
" \n",
" 156057 \n",
" avuncular chortles \n",
" 3 \n",
" 1 \n",
" \n",
" \n",
" 156058 \n",
" avuncular \n",
" 2 \n",
" 1 \n",
" \n",
" \n",
" 156059 \n",
" chortles \n",
" 2 \n",
" 1 \n",
" \n",
" \n",
"
\n",
"
156060 rows × 3 columns
\n",
"
"
],
"text/plain": [
" Phrase S0 S0_0\n",
"0 A series of escapades demonstrating the adage ... 1 1\n",
"1 A series of escapades demonstrating the adage ... 2 1\n",
"2 A series 2 1\n",
"3 A 2 1\n",
"4 series 2 1\n",
"... ... .. ...\n",
"156055 Hearst 's 2 1\n",
"156056 forced avuncular chortles 1 1\n",
"156057 avuncular chortles 3 1\n",
"156058 avuncular 2 1\n",
"156059 chortles 2 1\n",
"\n",
"[156060 rows x 3 columns]"
]
},
"execution_count": 154,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}