{
"cells": [
{
"cell_type": "code",
"execution_count": 146,
"metadata": {},
"outputs": [],
"source": [
"## =======================================================\n",
"## IMPORTING\n",
"## =======================================================\n",
"import pandas as pd\n",
"train=pd.read_csv(\"../WK7/kaggle-sentiment/train.tsv\", delimiter='\\t')\n",
"y=train['Sentiment'].values\n",
"X=train['Phrase'].values\n",
"train.to_csv('kaggle_csv.csv')"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:3: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" This is separate from the ipykernel package so we can avoid doing imports until\n",
"/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:5: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" \"\"\"\n",
"/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:6: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" \n",
"/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:7: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" import sys\n",
"/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:8: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" \n"
]
},
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Phrase \n",
" S2 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" A series of escapades demonstrating the adage ... \n",
" neg \n",
" \n",
" \n",
" 1 \n",
" A series of escapades demonstrating the adage ... \n",
" 2 \n",
" \n",
" \n",
" 2 \n",
" A series \n",
" 2 \n",
" \n",
" \n",
" 3 \n",
" A \n",
" 2 \n",
" \n",
" \n",
" 4 \n",
" series \n",
" 2 \n",
" \n",
" \n",
" ... \n",
" ... \n",
" ... \n",
" \n",
" \n",
" 156055 \n",
" Hearst 's \n",
" 2 \n",
" \n",
" \n",
" 156056 \n",
" forced avuncular chortles \n",
" neg \n",
" \n",
" \n",
" 156057 \n",
" avuncular chortles \n",
" pos \n",
" \n",
" \n",
" 156058 \n",
" avuncular \n",
" 2 \n",
" \n",
" \n",
" 156059 \n",
" chortles \n",
" 2 \n",
" \n",
" \n",
"
\n",
"
156060 rows × 2 columns
\n",
"
"
],
"text/plain": [
" Phrase S2\n",
"0 A series of escapades demonstrating the adage ... neg\n",
"1 A series of escapades demonstrating the adage ... 2\n",
"2 A series 2\n",
"3 A 2\n",
"4 series 2\n",
"... ... ...\n",
"156055 Hearst 's 2\n",
"156056 forced avuncular chortles neg\n",
"156057 avuncular chortles pos\n",
"156058 avuncular 2\n",
"156059 chortles 2\n",
"\n",
"[156060 rows x 2 columns]"
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# all_df.to_csv('kaggle_0_not0.csv', index=False)\n",
"t_sm = train[train.columns[2:5]]\n",
"train['t0'] = train['Sentiment'][train['Sentiment'] == 0] = 'neg'\n",
"train['S2'] = train['Sentiment']\n",
"train['S2'][train['S2'] == 0] = 'neg'\n",
"train['S2'][train['S2'] == 1] = 'neg'\n",
"train['S2'][train['S2'] == 3] = 'pos'\n",
"train['S2'][train['S2'] == 4] = 'pos'\n",
"train_sm = pd.DataFrame()\n",
"train_sm['Phrase'] = train['Phrase']\n",
"train_sm['S2'] = train['S2']\n",
"train_sm\n",
"\n",
"\n",
"train.to_csv('kaggle_csv.csv')\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"156060"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(train_sm)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"34345"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(train_sm[train_sm['S2'] == 'neg'])"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"42133"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(train_sm[train_sm['S2'] == 'pos'])"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"79582"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(train_sm[train_sm['S2'] == 2])"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "module 'matplotlib.pyplot' has no attribute 'barplot'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpyplot\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mplt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbarplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m: module 'matplotlib.pyplot' has no attribute 'barplot'"
]
}
],
"source": [
"ar = [34345, 42133, 42133]\n",
"import matplotlib.pyplot as plt\n",
"\n",
"plt.barplot()"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAl4AAAFzCAYAAADv+wfzAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAVE0lEQVR4nO3df9CdZZ3f8c/XhJAl/AgNsRVDmrAyqRq2CTw63WV0BHTCChIp7Yi6qwhs1LbA1paWHUvXdcaZnYFdK2LrZFb5sTqygygqs7UyFpXNiJsEWARC6jaLbLoK2VAwrsNKytU/8sgEDCSSc677+fF6zTDPc+5zOPfX8Qzznuu+n+tUay0AAIzfS4YeAABgthBeAACdCC8AgE6EFwBAJ8ILAKAT4QUA0MncoQc4EMccc0xbtmzZ0GMAAOzX5s2b/7a1tnhfz02L8Fq2bFk2bdo09BgAAPtVVd9/vudcagQA6ER4AQB0IrwAADqZFvd47ctTTz2V7du358knnxx6lOc1f/78LFmyJIcccsjQowAAU8C0Da/t27fniCOOyLJly1JVQ4/zc1pr2blzZ7Zv357ly5cPPQ4AMAVM20uNTz75ZBYtWjQloytJqiqLFi2a0ityAEBf0za8kkzZ6PqZqT4fANDXtA6vA/XQQw9l5cqVzzq2adOmXHLJJb/Q+3zoQx/KVVddNcrRAIBZZNre43WwJiYmMjExMfQYAMAsMitWvPa2bdu2rF69OldeeWXOOuusJHtWsi644IK84Q1vyPHHH5+rr776mdd/5CMfyYoVK/LGN74xW7duHWpsAGAGmFUrXlu3bs15552Xa6+9No8//ni++c1vPvPcgw8+mNtvvz27du3KihUr8v73vz/33ntvbrzxxtx9993ZvXt3TjrppJx88skD/i8AAKazWbPitWPHjqxduzaf+cxnsmrVqp97/swzz8yhhx6aY445Ji996UvzyCOP5I477sg555yTww47LEceeWTOPvvsASYHAGaKWRNeRx11VI477rhs2LBhn88feuihz/w+Z86c7N69O4m/TAQARmfWXGqcN29ebrnllqxZsyaHH354jj322P3+O69//etz/vnn5/LLL8/u3bvzla98Je9973s7TAsA08cpHz9l6BEO2oaL970wM2qzZsUrSRYsWJBbb701H/3oR/PEE0/s9/UnnXRS3va2t2XVqlU599xz87rXva7DlADATFWttaFn2K+JiYm2adOmZx3bsmVLXvnKVw400YGbLnMCwItlxevZqmpza22fe1bNqhUvAIAhCS8AgE6EFwBAJ8ILAKAT4QUA0InwAgDoRHgBAHQyY3auP/myG0b6fpuvfNdI3w8AwIrXi3TFFVfkYx/72DOPP/jBD+bqq68ecCIAYKoTXi/ShRdemOuvvz5J8vTTT+fGG2/MO9/5zoGnAgCmshlzqbG3ZcuWZdGiRbn77rvzyCOPZPXq1Vm0aNHQYwEAU5jwOggXXXRRrrvuuvzwhz/MBRdcMPQ4AMAU51LjQTjnnHPy1a9+NRs3bsyaNWuGHgcAmOKseB2EefPm5dRTT83ChQszZ86coccBAKa4GRNeQ2z/8PTTT+fOO+/MTTfd1P3cAMD041Lji/TAAw/kFa94RU4//fSccMIJQ48DAEwDM2bFq7dXvepV2bZt29BjAADTiBUvAIBOxhZeVfXpqnq0qu7b69iVVfVgVd1bVV+sqoXjOj8AwFQzzhWv65Kc8ZxjtyVZ2Vr7lST/K8nvjPH8AABTytjCq7X2rSSPPefY11pruycf3plkybjODwAw1Qx5c/0FSf7k+Z6sqnVJ1iXJ0qVLe80EMFKnfPyUoUc4aBsu3jD0CDBjDBJeVfXBJLuTfPb5XtNaW59kfZJMTEy0/b3nwx8+cWTzJcnS//zdkb4fAED3v2qsqncnOSvJO1tr+w2qqeqTn/xkVq1alVWrVmX58uU59dRThx4JAJjiuoZXVZ2R5D8mObu19pOe5x61973vfbnnnnuycePGLFmyJB/4wAeGHgkAmOLGuZ3E55J8O8mKqtpeVRcmuSbJEUluq6p7quqT4zp/L5deemlOO+20vOUtbxl6FABgihvbPV6ttbfv4/CnxnW+IVx33XX5/ve/n2uuuWboUQCAacBXBr1ImzdvzlVXXZU77rgjL3mJLwAAAPZPMbxI11xzTR577LGceuqpWbVqVS666KKhRwIAprgZs+LVe/uHa6+9tuv5AIDpz4oXAEAnwgsAoBPhBQDQifACAOhEeAEAdCK8AAA6EV4H4aGHHsrKlSuHHgMAmCZmzD5ep3z8lJG+34aLN4z0/QAArHiNyLZt27J69eps3Lhx6FEAgClKeI3A1q1bc+655+baa6/Na17zmqHHAQCmqBlzqXEoO3bsyNq1a3PzzTfn1a9+9dDjAABTmBWvg3TUUUfluOOOy4YN7gkDAF6YFa+DNG/evNxyyy1Zs2ZNDj/88LzjHe8YeiQAYIoSXiOwYMGC3HrrrXnTm96UBQsWZO3atUOPBABMQTMmvIbY/mHZsmW57777kiQLFy70F40AwAuaMeHF9DPqvdeGYL83AH4Rbq4HAOhEeAEAdDKtw6u1NvQIL2iqzwcA9DVtw2v+/PnZuXPnlI2b1lp27tyZ+fPnDz0KADBFTNub65csWZLt27dnx44dQ4/yvObPn58lS5YMPQYAMEVM2/A65JBDsnz58qHHAAA4YNP2UiMAwHQjvAAAOhFeAACdCC8AgE6EFwBAJ8ILAKAT4QUA0InwAgDoRHgBAHQivAAAOhFeAACdCC8AgE6EFwBAJ2MLr6r6dFU9WlX37XXsH1TVbVX1vcmfR4/r/AAAU804V7yuS3LGc45dnuTrrbUTknx98jEAwKwwtvBqrX0ryWPPObw2yfWTv1+f5K3jOj8AwFTT+x6vf9ha+0GSTP58aefzAwAMZsreXF9V66pqU1Vt2rFjx9DjAAActN7h9UhVvSxJJn8++nwvbK2tb61NtNYmFi9e3G1AAIBx6R1eX07y7snf353kS53PDwAwmHFuJ/G5JN9OsqKqtlfVhUl+P8mbqup7Sd40+RgAYFaYO643bq29/XmeOn1c5wQAmMqm7M31AAAzjfACAOhEeAEAdCK8AAA6EV4AAJ0ILwCAToQXAEAnwgsAoBPhBQDQifACAOhEeAEAdCK8AAA6EV4AAJ0ILwCAToQXAEAnwgsAoBPhBQDQifACAOhEeAEAdDJ36AGA0Tv5shuGHuGgbb7yXUOPADByVrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOBgmvqvq3VXV/Vd1XVZ+rqvlDzAEA0FP38Kqqlye5JMlEa21lkjlJzus9BwBAb0Ndapyb5Jeqam6Sw5L8zUBzAAB00z28Wmv/J8lVSR5O8oMkT7TWvtZ7DgCA3oa41Hh0krVJlic5NsmCqvqNfbxuXVVtqqpNO3bs6D0mAMDIDXGp8Y1J/qq1tqO19lSSLyT5tee+qLW2vrU20VqbWLx4cfchAQBGbYjwejjJP6uqw6qqkpyeZMsAcwAAdDXEPV7fSfL5JHcl+e7kDOt7zwEA0NvcIU7aWvvdJL87xLkBAIZi53oAgE6EFwBAJ8ILAKAT4QUA0InwAgDoRHgBAHQivAAAOhFeAACdCC8AgE6EFwBAJ8ILAKAT4QUA0InwAgDoRHgBAHQivAAAOpl7IC+qqvlJLkzy6iTzf3a8tXbBmOYCAJhxDnTF64+T/KMka5J8M8mSJLvGNRQAwEx0oOH1itbaFUn+rrV2fZIzk5w4vrEAAGaeAw2vpyZ/Pl5VK5MclWTZWCYCAJihDugeryTrq+roJP8pyZeTHJ7kirFNBQAwAx1oeH29tfZ/k3wryfFJUlXLxzYVAMAMdKCXGm/ex7HPj3IQAICZ7gVXvKrqn2TPFhJHVdU/3+upI7PXthIAAOzf/i41rkhyVpKFSd6y1/FdSX5rXEMBAMxELxherbUvJflSVf1qa+3bnWYCAJiRDvTm+rur6l/HzvUAAC+anesBADqxcz0AQCd2rgcA6MTO9QAAnexvH68P7PXwPZM/PzH5c8FYJgIAmKH2t+J1xOTPFUlekz2rXcmePb2+Na6hAABmov3t4/V7SVJVX0tyUmtt1+TjDyW5aezTAQDMIAd6c/3SJD/d6/FP4+Z6AIBfyIHeXP/HSf68qr6YpCU5J8n1Y5sKAGAGOqDwaq19pKr+e5LXTR56T2vt7vGNBQAw8xzoildaa3cluWuMswAAzGgHeo8XAAAHSXgBAHQySHhV1cKq+nxVPVhVW6rqV4eYAwCgpwO+x2vEPpbkq621f1FV85IcNtAcAADddA+vqjoyyeuTnJ8krbWf5tl7hAEAzEhDXGo8PsmOJNdW1d1V9UdV9XPf+1hV66pqU1Vt2rFjR/8pAQBGbIjwmpvkpCT/rbW2OsnfJbn8uS9qra1vrU201iYWL17ce0YAgJEbIry2J9neWvvO5OPPZ0+IAQDMaN3Dq7X2wyR/XVUrJg+dnuSB3nMAAPQ21F81Xpzks5N/0bgtyXsGmgMAoJtBwqu1dk+SiSHODQAwFDvXAwB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANDJYOFVVXOq6u6qunWoGQAAehpyxevSJFsGPD8AQFeDhFdVLUlyZpI/GuL8AABDmDvQef9Lkv+Q5Ijne0FVrUuyLkmWLl06shOffNkNI3uvoWy+8l1DjwAAvAjdV7yq6qwkj7bWNr/Q61pr61trE621icWLF3eaDgBgfIa41HhKkrOr6qEkNyY5rao+M8AcAABddQ+v1trvtNaWtNaWJTkvyf9srf1G7zkAAHqzjxcAQCdD3VyfJGmtfSPJN4acAQCgFyteAACdCC8AgE6EFwBAJ8ILAKAT4QUA0InwAgDoRHgBAHQivAAAOhFeAACdCC8AgE6EFwBAJ8ILAKAT4QUA0InwAgDoRHgBAHQivAAAOhFeAACdCC8AgE7mDj0AAMxWD3/4xKFHGI2jjxx6gmnDihcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6KR7eFXVcVV1e1Vtqar7q+rS3jMAAAxh7gDn3J3k37XW7qqqI5JsrqrbWmsPDDALAEA33Ve8Wms/aK3dNfn7riRbkry89xwAAL0NseL1jKpalmR1ku/s47l1SdYlydKlS7vOBQzv4Q+fOPQIo3H0kUNPMGOdfNkNQ49w0L54xNAT0NtgN9dX1eFJbk7y2621Hz33+dba+tbaRGttYvHixf0HBAAYsUHCq6oOyZ7o+mxr7QtDzAAA0NsQf9VYST6VZEtr7Q97nx8AYChDrHidkuQ3k5xWVfdM/vPmAeYAAOiq+831rbU/S1K9zwsAMDQ71wMAdCK8AAA6EV4AAJ0ILwCAToQXAEAnwgsAoBPhBQDQifACAOhEeAEAdCK8AAA6EV4AAJ0ILwCAToQXAEAnwgsAoBPhBQDQifACAOhEeAEAdCK8AAA6mTv0APziHv7wiUOPMBpHHzn0BADQlRUvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AIA6ER4AQB0IrwAADoRXgAAnQgvAIBOhBcAQCfCCwCgk0HCq6rOqKqtVfWXVXX5EDMAAPTWPbyqak6STyT59SSvSvL2qnpV7zkAAHobYsXrtUn+srW2rbX20yQ3Jlk7wBwAAF0NEV4vT/LXez3ePnkMAGBGmzvAOWsfx9rPvahqXZJ1kw9/XFVbxzrVNPKP+5zmmCR/2+dU01ddsq+PM6Pgcz51+JyPj8/51DHiz/nz/l87RHhtT3LcXo+XJPmb576otbY+yfpeQ/FsVbWptTYx9BwwTj7nzAY+51PLEJcaNyY5oaqWV9W8JOcl+fIAcwAAdNV9xau1truq/k2S/5FkTpJPt9bu7z0HAEBvQ1xqTGvtT5P86RDn5oC5zMts4HPObOBzPoVUaz93XzsAAGPgK4MAADoRXjxLVX26qh6tqvuGngXGpaqOq6rbq2pLVd1fVZcOPROMWlXNr6o/r6q/mPyc/97QM+FSI89RVa9P8uMkN7TWVg49D4xDVb0syctaa3dV1RFJNid5a2vtgYFHg5GpqkqyoLX246o6JMmfJbm0tXbnwKPNala8eJbW2reSPDb0HDBOrbUftNbumvx9V5It8Q0azDBtjx9PPjxk8h+rLQMTXsCsVlXLkqxO8p1hJ4HRq6o5VXVPkkeT3NZa8zkfmPACZq2qOjzJzUl+u7X2o6HngVFrrf2/1tqq7PmWmNdWlVtIBia8gFlp8p6Xm5N8trX2haHngXFqrT2e5BtJzhh4lFlPeAGzzuRNx59KsqW19odDzwPjUFWLq2rh5O+/lOSNSR4cdiqEF89SVZ9L8u0kK6pqe1VdOPRMMAanJPnNJKdV1T2T/7x56KFgxF6W5Paqujd7vif5ttbarQPPNOvZTgIAoBMrXgAAnQgvAIBOhBcAQCfCCwCgE+EFANCJ8AJmlKpaWFX/avL3Y6vq80PPBPAztpMAZpTJ7168tbXmq1GAKWfu0AMAjNjvJ/nlyS8G/l6SV7bWVlbV+UnemmROkpVJ/iDJvOzZSPXvk7y5tfZYVf1ykk8kWZzkJ0l+q7Vmt29gJFxqBGaay5P878kvBr7sOc+tTPKOJK9N8pEkP2mtrc6eb2t41+Rr1ie5uLV2cpJ/n+S/dpkamBWseAGzye2ttV1JdlXVE0m+Mnn8u0l+paoOT/JrSW7a83WOSZJD+48JzFTCC5hN/n6v35/e6/HT2fPfw5ckeXxytQxg5FxqBGaaXUmOeDH/YmvtR0n+qqr+ZZLUHv90lMMBs5vwAmaU1trOJBuq6r4kV76It3hnkgur6i+S3J9k7SjnA2Y320kAAHRixQsAoBPhBQDQifACAOhEeAEAdCK8AAA6EV4AAJ0ILwCAToQXAEAn/x/gWAIZuVopPAAAAABJRU5ErkJggg==\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" time \n",
" kind \n",
" data \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 1 \n",
" y \n",
" 4 \n",
" \n",
" \n",
" 1 \n",
" 2 \n",
" y \n",
" 9 \n",
" \n",
" \n",
" 2 \n",
" 3 \n",
" y \n",
" 2 \n",
" \n",
" \n",
" 3 \n",
" 1 \n",
" z \n",
" 1 \n",
" \n",
" \n",
" 4 \n",
" 2 \n",
" z \n",
" 2 \n",
" \n",
" \n",
" 5 \n",
" 3 \n",
" z \n",
" 3 \n",
" \n",
" \n",
" 6 \n",
" 1 \n",
" k \n",
" 11 \n",
" \n",
" \n",
" 7 \n",
" 2 \n",
" k \n",
" 12 \n",
" \n",
" \n",
" 8 \n",
" 3 \n",
" k \n",
" 13 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" time kind data\n",
"0 1 y 4\n",
"1 2 y 9\n",
"2 3 y 2\n",
"3 1 z 1\n",
"4 2 z 2\n",
"5 3 z 3\n",
"6 1 k 11\n",
"7 2 k 12\n",
"8 3 k 13"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"\n",
"\n",
"x = [1,2,3]\n",
"\n",
"y = [4, 9, 2]\n",
"z = [1, 2, 3]\n",
"k = [11, 12, 13]\n",
"\n",
"# number_of_bars\n",
"n_bars = 3\n",
"\n",
"df = pd.DataFrame(zip(x*n_bars, [\"y\"]*len(x)+[\"z\"]*len(x)+[\"k\"]*len(x), y+z+k), columns=[\"time\", \"kind\", \"data\"])\n",
"plt.figure(figsize=(10, 6))\n",
"sns.barplot(x=\"time\", hue=\"kind\", y=\"data\", data=df)\n",
"plt.show()\n",
"\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAEGCAYAAACkQqisAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAXW0lEQVR4nO3df+xddZ3n8edrWhBWRUC+GtJ2LDs2sxZmBqELTJhsECZQ2B/FLCQlRrqG3e64sKPjxBV2k2VGZaI7qxgMkqlDl+I6Uwk6Q5etdhqENbry44t2gFJJvwsqlUa+TvnlGsHie/+4n4532/ttvz303svXPh/JzT3nfT6fcz8n39AX53zOvSdVhSRJXfzKuAcgSZq7DBFJUmeGiCSpM0NEktSZISJJ6mz+uAcwaieccEItXrx43MOQpDnlwQcf/FFVTexdP+xCZPHixUxOTo57GJI0pyT53qC6l7MkSZ0ZIpKkzgwRSVJnhogkqTNDRJLUmSEiSerMEJEkdWaISJI6M0QkSZ0ddt9Y1+Hh+x/+jXEP4bDwq//54aHs9+xPnz2U/eoXvvHvv3FI9uOZiCSpM0NEktSZISJJ6swQkSR1ZohIkjozRCRJnQ09RJLMS/LtJHe29ZOS3Jdke5IvJDmy1V/T1qfa9sV9+7im1R9LckFffXmrTSW5etjHIkn6/43iTOR9wLa+9Y8D11fVEuAZ4IpWvwJ4pqreClzf2pFkKbASOBlYDnymBdM84EbgQmApcFlrK0kakaGGSJKFwD8F/rytBzgXuL01WQdc3JZXtHXa9vNa+xXA+qp6saqeAKaAM9prqqoer6qXgPWtrSRpRIZ9JvIp4D8AP2/rbwSerardbX0HsKAtLwCeBGjbn2vt/76+V5+Z6vtIsjrJZJLJ6enpV3pMkqRmaCGS5J8BT1fVg/3lAU3rANsOtr5vsWpNVS2rqmUTExP7GbUk6WAM87ezzgb+RZKLgKOAY+idmRybZH4721gIPNXa7wAWATuSzAfeAOzqq+/R32emuiRpBIZ2JlJV11TVwqpaTG9i/KtV9S7gbuCS1mwVcEdb3tDWadu/WlXV6ivb3VsnAUuA+4EHgCXtbq8j22dsGNbxSJL2NY5f8f0QsD7JR4FvAze3+s3A55JM0TsDWQlQVVuT3AY8CuwGrqyqlwGSXAVsAuYBa6tq60iPRJIOcyMJkaq6B7inLT9O786qvdv8FLh0hv7XAdcNqG8ENh7CoUqSDoLfWJckdWaISJI6M0QkSZ0ZIpKkzgwRSVJnhogkqTNDRJLUmSEiSerMEJEkdWaISJI6M0QkSZ0ZIpKkzgwRSVJnhogkqTNDRJLUmSEiSepsaCGS5Kgk9yf52yRbk/xxq9+S5IkkW9rr1FZPkhuSTCV5KMlpfftalWR7e63qq5+e5OHW54YkGdbxSJL2NcwnG74InFtVP05yBPD1JF9u2z5YVbfv1f5Ces9PXwKcCdwEnJnkeOBaYBlQwINJNlTVM63NauBeek84XA58GUnSSAztTKR6ftxWj2iv2k+XFcCtrd+9wLFJTgQuADZX1a4WHJuB5W3bMVX1zaoq4Fbg4mEdjyRpX0OdE0kyL8kW4Gl6QXBf23Rdu2R1fZLXtNoC4Mm+7jtabX/1HQPqg8axOslkksnp6elXfFySpJ6hhkhVvVxVpwILgTOSnAJcA/wj4B8DxwMfas0HzWdUh/qgcaypqmVVtWxiYuIgj0KSNJOR3J1VVc8C9wDLq2pnu2T1IvDfgDNasx3Aor5uC4GnDlBfOKAuSRqRoU2sJ5kAflZVzyY5Gvhd4ONJTqyqne1OqouBR1qXDcBVSdbTm1h/rrXbBPxJkuNau/OBa6pqV5IXkpwF3AdcDnz6UB7D6R+89VDuTgM8+KeXj3sIkl6BYd6ddSKwLsk8emc8t1XVnUm+2gImwBbg91r7jcBFwBTwE+A9AC0sPgI80Np9uKp2teX3ArcAR9O7K8s7syRphIYWIlX1EPD2AfVzZ2hfwJUzbFsLrB1QnwROeWUjlSR15TfWJUmdGSKSpM4MEUlSZ4aIJKkzQ0SS1JkhIknqzBCRJHVmiEiSOjNEJEmdGSKSpM4MEUlSZ4aIJKkzQ0SS1JkhIknqzBCRJHU2tBBJclSS+5P8bZKtSf641U9Kcl+S7Um+kOTIVn9NW59q2xf37euaVn8syQV99eWtNpXk6mEdiyRpsGGeibwInFtVvwWcCixvj7L9OHB9VS0BngGuaO2vAJ6pqrcC17d2JFkKrAROBpYDn0kyrz0x8UbgQmApcFlrK0kakaGFSPX8uK0e0V4FnAvc3urr6D1nHWBFW6dtP689h30FsL6qXqyqJ+g9PveM9pqqqser6iVgfWsrSRqRoc6JtDOGLcDTwGbg/wDPVtXu1mQHsKAtLwCeBGjbnwPe2F/fq89M9UHjWJ1kMsnk9PT0oTg0SRJDDpGqermqTgUW0jtzeNugZu09M2w72PqgcaypqmVVtWxiYuLAA5ckzcpI7s6qqmeBe4CzgGOTzG+bFgJPteUdwCKAtv0NwK7++l59ZqpLkkZkmHdnTSQ5ti0fDfwusA24G7ikNVsF3NGWN7R12vavVlW1+sp299ZJwBLgfuABYEm72+tIepPvG4Z1PJKkfc0/cJPOTgTWtbuofgW4raruTPIosD7JR4FvAze39jcDn0syRe8MZCVAVW1NchvwKLAbuLKqXgZIchWwCZgHrK2qrUM8HknSXoYWIlX1EPD2AfXH6c2P7F3/KXDpDPu6DrhuQH0jsPEVD1aS1InfWJckdWaISJI6M0QkSZ0ZIpKkzgwRSVJnhogkqTNDRJLUmSEiSerMEJEkdWaISJI6M0QkSZ0ZIpKkzgwRSVJnhogkqTNDRJLUmSEiSepsmI/HXZTk7iTbkmxN8r5W/6MkP0iypb0u6utzTZKpJI8luaCvvrzVppJc3Vc/Kcl9SbYn+UJ7TK4kaUSGeSayG/jDqnobcBZwZZKlbdv1VXVqe20EaNtWAicDy4HPJJnXHq97I3AhsBS4rG8/H2/7WgI8A1wxxOORJO1laCFSVTur6ltt+QVgG7BgP11WAOur6sWqegKYovcY3TOAqap6vKpeAtYDK5IEOBe4vfVfB1w8nKORJA0ykjmRJIvpPW/9vla6KslDSdYmOa7VFgBP9nXb0Woz1d8IPFtVu/eqD/r81Ukmk0xOT08fgiOSJMEIQiTJ64AvAu+vqueBm4BfA04FdgKf2NN0QPfqUN+3WLWmqpZV1bKJiYmDPAJJ0kzmD3PnSY6gFyCfr6ovAVTVD/u2fxa4s63uABb1dV8IPNWWB9V/BBybZH47G+lvL0kagYM+E0nyK0mOmUW7ADcD26rqk331E/uavRN4pC1vAFYmeU2Sk4AlwP3AA8CSdifWkfQm3zdUVQF3A5e0/quAOw72eCRJ3c0qRJL8RZJjkrwWeBR4LMkHD9DtbODdwLl73c77X5I8nOQh4B3AHwBU1Vbgtrb/rwBXVtXL7SzjKmATvcn521pbgA8BH0gyRW+O5ObZH7ok6ZWa7eWspVX1fJJ3ARvp/eP9IPCnM3Woqq8zeN5i4376XAdcN6C+cVC/qnqc3t1bkqQxmO3lrCPa/MbFwB1V9TNmmMSWJB0+ZhsifwZ8F3gt8LUkbwGeH9agJElzw6wuZ1XVDcANfaXvJXnHcIYkSZorZhUiSY4FLgcW79Xn94cwJknSHDHbifWNwL3Aw8DPhzccSdJcMtsQOaqqPjDUkUiS5pzZTqx/Lsm/SXJikuP3vIY6MknSq95sz0ReovedkP/EL27tLeAfDmNQkqS5YbYh8gHgrVX1o2EORpI0t8z2ctZW4CfDHIgkae6Z7ZnIy8CWJHcDL+4pVpW3+ErSYWy2IfLX7SVJ0t+b7TfW1w17IJKkuWe231h/ggE/uFhV3p0lSYex2V7OWta3fBRwKeD3RCTpMDeru7Oq6u/6Xj+oqk8B5+6vT5JFSe5Osi3J1iTva/Xjk2xOsr29H9fqSXJDkqkkDyU5rW9fq1r77UlW9dVPbw+4mmp9Bz2/RJI0JLN9suFpfa9lSX4PeP0Buu0G/rCq3gacBVyZZClwNXBXVS0B7mrrABfSeyTuEmA1cFP77OOBa4Ez6T2A6to9wdParO7rt3w2xyNJOjRmeznrE/xiTmQ3vWeLXLq/DlW1E9jZll9Isg1YAKwAzmnN1gH30HtS4grg1vbs9HuTHNuex34OsLmqdgEk2QwsT3IPcExVfbPVb6X30Kwvz/KYJEmv0Gy/bHghveeX3wV8A/gBsHK2H5JkMfB24D7gzS1g9gTNm1qzBcCTfd12tNr+6jsG1Ad9/uokk0kmp6enZztsSdIBzDZE/hr458DPgB+31/+dTcckrwO+CLy/qvb3NMRB8xnVob5vsWpNVS2rqmUTExMHGrIkaZZmezlrYVUd9HxDey77F4HPV9WXWvmHSU6sqp3tctXTrb4DWNT/mcBTrX7OXvV7Wn3hgPaSpBGZ7ZnI/07yGwez43an1M3Atqr6ZN+mDcCeO6xWAXf01S9vd2mdBTzXLndtAs5PclybUD8f2NS2vZDkrPZZl/ftS5I0ArM9E/kd4F+1Lx2+SO9SUlXVb+6nz9nAu4GHk2xptf8IfAy4LckVwPf5xQT9RuAiYIrejz2+h96H7EryEeCB1u7DeybZgfcCtwBH05tQd1JdkkZotiFy4cHuuKq+zuB5C4DzBrQv4MoZ9rUWWDugPgmccrBjkyQdGrP97azvDXsgkqS5Z7ZzIpIk7cMQkSR1ZohIkjozRCRJnRkikqTODBFJUmeGiCSpM0NEktSZISJJ6swQkSR1ZohIkjozRCRJnRkikqTODBFJUmeGiCSps6GFSJK1SZ5O8khf7Y+S/CDJlva6qG/bNUmmkjyW5IK++vJWm0pydV/9pCT3Jdme5AtJjhzWsUiSBhvmmcgtwPIB9eur6tT22giQZCmwEji59flMknlJ5gE30nuy4lLgstYW4ONtX0uAZ4ArhngskqQBhhYiVfU1YNcBG/asANZX1YtV9QS956yf0V5TVfV4Vb0ErAdWJAlwLnB7678OuPiQHoAk6YDGMSdyVZKH2uWu41ptAfBkX5sdrTZT/Y3As1W1e6/6QElWJ5lMMjk9PX2ojkOSDnujDpGbgF8DTgV2Ap9o9QxoWx3qA1XVmqpaVlXLJiYmDm7EkqQZzR/lh1XVD/csJ/kscGdb3QEs6mu6EHiqLQ+q/wg4Nsn8djbS316SNCIjPRNJcmLf6juBPXdubQBWJnlNkpOAJcD9wAPAknYn1pH0Jt83VFUBdwOXtP6rgDtGcQySpF8Y2plIkr8EzgFOSLIDuBY4J8mp9C49fRf4twBVtTXJbcCjwG7gyqp6ue3nKmATMA9YW1Vb20d8CFif5KPAt4Gbh3UskqTBhhYiVXXZgPKM/9BX1XXAdQPqG4GNA+qP07t7S5I0Jn5jXZLUmSEiSerMEJEkdWaISJI6M0QkSZ0ZIpKkzgwRSVJnhogkqTNDRJLUmSEiSerMEJEkdWaISJI6M0QkSZ0ZIpKkzgwRSVJnQwuRJGuTPJ3kkb7a8Uk2J9ne3o9r9SS5IclUkoeSnNbXZ1Vrvz3Jqr766Ukebn1uSDLoueuSpCEa5pnILcDyvWpXA3dV1RLgrrYOcCG9R+IuAVYDN0EvdOg9EfFMeg+gunZP8LQ2q/v67f1ZkqQhG1qIVNXXgF17lVcA69ryOuDivvqt1XMvcGx7HvsFwOaq2lVVzwCbgeVt2zFV9c32vPVb+/YlSRqRUc+JvLmqdgK09ze1+gLgyb52O1ptf/UdA+qSpBF6tUysD5rPqA71wTtPVieZTDI5PT3dcYiSpL2NOkR+2C5F0d6fbvUdwKK+dguBpw5QXzigPlBVramqZVW1bGJi4hUfhCSpZ9QhsgHYc4fVKuCOvvrl7S6ts4Dn2uWuTcD5SY5rE+rnA5vatheSnNXuyrq8b1+SpBGZP6wdJ/lL4BzghCQ76N1l9THgtiRXAN8HLm3NNwIXAVPAT4D3AFTVriQfAR5o7T5cVXsm699L7w6wo4Evt5ckaYSGFiJVddkMm84b0LaAK2fYz1pg7YD6JHDKKxmjJOmVebVMrEuS5iBDRJLUmSEiSerMEJEkdWaISJI6M0QkSZ0ZIpKkzgwRSVJnhogkqTNDRJLUmSEiSerMEJEkdWaISJI6M0QkSZ0ZIpKkzgwRSVJnYwmRJN9N8nCSLUkmW+34JJuTbG/vx7V6ktyQZCrJQ0lO69vPqtZ+e5JVM32eJGk4xnkm8o6qOrWqlrX1q4G7qmoJcFdbB7gQWNJeq4GboBc69B65eyZwBnDtnuCRJI3Gq+ly1gpgXVteB1zcV7+1eu4Fjk1yInABsLmqdlXVM8BmYPmoBy1Jh7NxhUgBf5PkwSSrW+3NVbUToL2/qdUXAE/29d3RajPV95FkdZLJJJPT09OH8DAk6fA2f0yfe3ZVPZXkTcDmJN/ZT9sMqNV+6vsWq9YAawCWLVs2sI0k6eCN5Uykqp5q708Df0VvTuOH7TIV7f3p1nwHsKiv+0Lgqf3UJUkjMvIQSfLaJK/fswycDzwCbAD23GG1CrijLW8ALm93aZ0FPNcud20Czk9yXJtQP7/VJEkjMo7LWW8G/irJns//i6r6SpIHgNuSXAF8H7i0td8IXARMAT8B3gNQVbuSfAR4oLX7cFXtGt1hSJJGHiJV9TjwWwPqfwecN6BewJUz7GstsPZQj1GSNDuvplt8JUlzjCEiSerMEJEkdWaISJI6M0QkSZ0ZIpKkzgwRSVJnhogkqTNDRJLUmSEiSerMEJEkdWaISJI6M0QkSZ0ZIpKkzgwRSVJnhogkqbM5HyJJlid5LMlUkqvHPR5JOpzM6RBJMg+4EbgQWApclmTpeEclSYePOR0iwBnAVFU9XlUvAeuBFWMekyQdNtJ7hPnclOQSYHlV/eu2/m7gzKq6aq92q4HVbfXXgcdGOtDROgH40bgHoU78281tv+x/v7dU1cTexfnjGMkhlAG1fVKxqtYAa4Y/nPFLMllVy8Y9Dh08/3Zz2+H695vrl7N2AIv61hcCT41pLJJ02JnrIfIAsCTJSUmOBFYCG8Y8Jkk6bMzpy1lVtTvJVcAmYB6wtqq2jnlY43ZYXLb7JeXfbm47LP9+c3piXZI0XnP9cpYkaYwMEUlSZ4aIJHWUZFGSu5NsS7I1yfvGPaZRc05EkjpKciJwYlV9K8nrgQeBi6vq0TEPbWQ8E/klkGRx+z+hz7b/G/qbJEePe1yanfb3+06SdUkeSnJ7kn8w7nHpwKpqZ1V9qy2/AGwDFox3VKNliPzyWALcWFUnA88C/3LM49HB+XVgTVX9JvA88O/GPB4dpCSLgbcD9413JKNliPzyeKKqtrTlB4HFYxyLDt6TVfWNtvzfgd8Z52B0cJK8Dvgi8P6qen7c4xklQ+SXx4t9yy8zx79Iehjae3LSyco5IskR9ALk81X1pXGPZ9QMEenV4VeT/HZbvgz4+jgHo9lJEuBmYFtVfXLc4xkHQ0R6ddgGrEryEHA8cNOYx6PZORt4N3Buki3tddG4BzVK3uIrjVmbkL2zqk4Z81Ckg+aZiCSpM89EJEmdeSYiSerMEJEkdWaISJI6M0SkQyzJjw+wfXGSRw5yn7ckueSVjUw69AwRSVJnhog0JElel+SuJN9K8nCSFX2b5w/61d4kpyf5X0keTLKp/dT43vv9WJJHW9//OrIDkgYwRKTh+Snwzqo6DXgH8In2Mxkw4Fd7228wfRq4pKpOB9YC1/XvMMnxwDuBk1vfj47mUKTB/JE+aXgC/EmSfwL8nN5zJt7ctu39q72/D3wFOAXY3LJmHrBzr30+Ty+c/jzJ/wTuHOoRSAdgiEjD8y5gAji9qn6W5LvAUW3boF/tDbC1qn6bGVTV7iRnAOcBK4GrgHMP9cCl2fJyljQ8bwCebgHyDuAtfdsG/WrvY8DEnnqSI5Kc3L/D9tyKN1TVRuD9wKnDPghpfzwTkYbn88D/SDIJbAG+07dtz6/2/hmwHbipql5qt/HekOQN9P77/BSwta/f64E7khxF78zlD0ZwHNKM/O0sSVJnXs6SJHVmiEiSOjNEJEmdGSKSpM4MEUlSZ4aIJKkzQ0SS1Nn/A34t941nDEIKAAAAAElFTkSuQmCC\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"ar = [34345, 42133, 42133]\n",
"import matplotlib.pyplot as plt\n",
"\n",
"df = pd.DataFrame()\n",
"df['labels'] = ['n','p', '2']\n",
"df['nums'] = ar\n",
"df\n",
"sns.barplot(x='labels', y='nums',data = df)"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [],
"source": [
"\n",
"train.to_csv('kaggle_csv.csv')\n",
"\n",
"# sns.barplot(x='Sentiment', y=train['Sentiment'].count_(), data=train)"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Unnamed: 0 \n",
" PhraseId \n",
" SentenceId \n",
" Phrase \n",
" Sentiment \n",
" t0 \n",
" S2 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" A series of escapades demonstrating the adage ... \n",
" 1 \n",
" neg \n",
" neg \n",
" \n",
" \n",
" 1 \n",
" 1 \n",
" 2 \n",
" 1 \n",
" A series of escapades demonstrating the adage ... \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 2 \n",
" 2 \n",
" 3 \n",
" 1 \n",
" A series \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 3 \n",
" 3 \n",
" 4 \n",
" 1 \n",
" A \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 4 \n",
" 4 \n",
" 5 \n",
" 1 \n",
" series \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" \n",
" \n",
" 156055 \n",
" 156055 \n",
" 156056 \n",
" 8544 \n",
" Hearst 's \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 156056 \n",
" 156056 \n",
" 156057 \n",
" 8544 \n",
" forced avuncular chortles \n",
" 1 \n",
" neg \n",
" neg \n",
" \n",
" \n",
" 156057 \n",
" 156057 \n",
" 156058 \n",
" 8544 \n",
" avuncular chortles \n",
" 3 \n",
" neg \n",
" pos \n",
" \n",
" \n",
" 156058 \n",
" 156058 \n",
" 156059 \n",
" 8544 \n",
" avuncular \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 156059 \n",
" 156059 \n",
" 156060 \n",
" 8544 \n",
" chortles \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
"
\n",
"
156060 rows × 7 columns
\n",
"
"
],
"text/plain": [
" Unnamed: 0 PhraseId SentenceId \\\n",
"0 0 1 1 \n",
"1 1 2 1 \n",
"2 2 3 1 \n",
"3 3 4 1 \n",
"4 4 5 1 \n",
"... ... ... ... \n",
"156055 156055 156056 8544 \n",
"156056 156056 156057 8544 \n",
"156057 156057 156058 8544 \n",
"156058 156058 156059 8544 \n",
"156059 156059 156060 8544 \n",
"\n",
" Phrase Sentiment t0 S2 \n",
"0 A series of escapades demonstrating the adage ... 1 neg neg \n",
"1 A series of escapades demonstrating the adage ... 2 neg 2 \n",
"2 A series 2 neg 2 \n",
"3 A 2 neg 2 \n",
"4 series 2 neg 2 \n",
"... ... ... ... ... \n",
"156055 Hearst 's 2 neg 2 \n",
"156056 forced avuncular chortles 1 neg neg \n",
"156057 avuncular chortles 3 neg pos \n",
"156058 avuncular 2 neg 2 \n",
"156059 chortles 2 neg 2 \n",
"\n",
"[156060 rows x 7 columns]"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"k = pd.read_csv('kaggle_csv.csv')\n",
"k"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Phrase \n",
" S2 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" A series of escapades demonstrating the adage ... \n",
" neg \n",
" \n",
" \n",
" 1 \n",
" A series of escapades demonstrating the adage ... \n",
" 2 \n",
" \n",
" \n",
" 2 \n",
" A series \n",
" 2 \n",
" \n",
" \n",
" 3 \n",
" A \n",
" 2 \n",
" \n",
" \n",
" 4 \n",
" series \n",
" 2 \n",
" \n",
" \n",
" ... \n",
" ... \n",
" ... \n",
" \n",
" \n",
" 156055 \n",
" Hearst 's \n",
" 2 \n",
" \n",
" \n",
" 156056 \n",
" forced avuncular chortles \n",
" neg \n",
" \n",
" \n",
" 156057 \n",
" avuncular chortles \n",
" pos \n",
" \n",
" \n",
" 156058 \n",
" avuncular \n",
" 2 \n",
" \n",
" \n",
" 156059 \n",
" chortles \n",
" 2 \n",
" \n",
" \n",
"
\n",
"
156060 rows × 2 columns
\n",
"
"
],
"text/plain": [
" Phrase S2\n",
"0 A series of escapades demonstrating the adage ... neg\n",
"1 A series of escapades demonstrating the adage ... 2\n",
"2 A series 2\n",
"3 A 2\n",
"4 series 2\n",
"... ... ...\n",
"156055 Hearst 's 2\n",
"156056 forced avuncular chortles neg\n",
"156057 avuncular chortles pos\n",
"156058 avuncular 2\n",
"156059 chortles 2\n",
"\n",
"[156060 rows x 2 columns]"
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"k2 = pd.DataFrame()\n",
"k2['Phrase'] = k['Phrase']\n",
"k2['S2'] = k['S2']\n",
"k2"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [],
"source": [
"k2 = k2.drop(k2[k2['S2'] == '2'].index)"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [
{
"ename": "KeyError",
"evalue": "'S2'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 2896\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2897\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2898\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 'S2'",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mk2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'S2'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'S2'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'neg'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mk2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'S2'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk2\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'S2'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'pos'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mk2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'PoN'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mk2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;31m# k2.to_csv('kaggle_pos1_neg0.csv')\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 2978\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnlevels\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2979\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2980\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2981\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2982\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 2897\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2898\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2899\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_cast_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2900\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtolerance\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtolerance\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2901\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 'S2'"
]
}
],
"source": [
"k2['S2'][k2['S2'] == 'neg'] = 0\n",
"k2['S2'][k2['S2'] == 'pos'] = 1\n",
"\n",
"# k2.to_csv('kaggle_pos1_neg0.csv')"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" text \n",
" PoN \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" A series of escapades demonstrating the adage ... \n",
" 0 \n",
" \n",
" \n",
" 21 \n",
" good for the goose \n",
" 1 \n",
" \n",
" \n",
" 22 \n",
" good \n",
" 1 \n",
" \n",
" \n",
" 33 \n",
" the gander , some of which occasionally amuses... \n",
" 0 \n",
" \n",
" \n",
" 46 \n",
" amuses \n",
" 1 \n",
" \n",
" \n",
" ... \n",
" ... \n",
" ... \n",
" \n",
" \n",
" 156047 \n",
" quietly suggesting the sadness and obsession b... \n",
" 0 \n",
" \n",
" \n",
" 156051 \n",
" sadness and obsession \n",
" 0 \n",
" \n",
" \n",
" 156052 \n",
" sadness and \n",
" 0 \n",
" \n",
" \n",
" 156056 \n",
" forced avuncular chortles \n",
" 0 \n",
" \n",
" \n",
" 156057 \n",
" avuncular chortles \n",
" 1 \n",
" \n",
" \n",
"
\n",
"
76478 rows × 2 columns
\n",
"
"
],
"text/plain": [
" text PoN\n",
"0 A series of escapades demonstrating the adage ... 0\n",
"21 good for the goose 1\n",
"22 good 1\n",
"33 the gander , some of which occasionally amuses... 0\n",
"46 amuses 1\n",
"... ... ..\n",
"156047 quietly suggesting the sadness and obsession b... 0\n",
"156051 sadness and obsession 0\n",
"156052 sadness and 0\n",
"156056 forced avuncular chortles 0\n",
"156057 avuncular chortles 1\n",
"\n",
"[76478 rows x 2 columns]"
]
},
"execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"k2.columns = ['text','PoN']\n",
"k2"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {},
"outputs": [],
"source": [
"k2.to_csv('kaggle_pos1_neg0.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Unnamed: 0 \n",
" PhraseId \n",
" SentenceId \n",
" Phrase \n",
" Sentiment \n",
" t0 \n",
" S2 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" A series of escapades demonstrating the adage ... \n",
" 1 \n",
" neg \n",
" neg \n",
" \n",
" \n",
" 1 \n",
" 1 \n",
" 2 \n",
" 1 \n",
" A series of escapades demonstrating the adage ... \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 2 \n",
" 2 \n",
" 3 \n",
" 1 \n",
" A series \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 3 \n",
" 3 \n",
" 4 \n",
" 1 \n",
" A \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 4 \n",
" 4 \n",
" 5 \n",
" 1 \n",
" series \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" \n",
" \n",
" 156055 \n",
" 156055 \n",
" 156056 \n",
" 8544 \n",
" Hearst 's \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 156056 \n",
" 156056 \n",
" 156057 \n",
" 8544 \n",
" forced avuncular chortles \n",
" 1 \n",
" neg \n",
" neg \n",
" \n",
" \n",
" 156057 \n",
" 156057 \n",
" 156058 \n",
" 8544 \n",
" avuncular chortles \n",
" 3 \n",
" neg \n",
" pos \n",
" \n",
" \n",
" 156058 \n",
" 156058 \n",
" 156059 \n",
" 8544 \n",
" avuncular \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 156059 \n",
" 156059 \n",
" 156060 \n",
" 8544 \n",
" chortles \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
"
\n",
"
156060 rows × 7 columns
\n",
"
"
],
"text/plain": [
" Unnamed: 0 PhraseId SentenceId \\\n",
"0 0 1 1 \n",
"1 1 2 1 \n",
"2 2 3 1 \n",
"3 3 4 1 \n",
"4 4 5 1 \n",
"... ... ... ... \n",
"156055 156055 156056 8544 \n",
"156056 156056 156057 8544 \n",
"156057 156057 156058 8544 \n",
"156058 156058 156059 8544 \n",
"156059 156059 156060 8544 \n",
"\n",
" Phrase Sentiment t0 S2 \n",
"0 A series of escapades demonstrating the adage ... 1 neg neg \n",
"1 A series of escapades demonstrating the adage ... 2 neg 2 \n",
"2 A series 2 neg 2 \n",
"3 A 2 neg 2 \n",
"4 series 2 neg 2 \n",
"... ... ... ... ... \n",
"156055 Hearst 's 2 neg 2 \n",
"156056 forced avuncular chortles 1 neg neg \n",
"156057 avuncular chortles 3 neg pos \n",
"156058 avuncular 2 neg 2 \n",
"156059 chortles 2 neg 2 \n",
"\n",
"[156060 rows x 7 columns]"
]
},
"execution_count": 95,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"k"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:3: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" This is separate from the ipykernel package so we can avoid doing imports until\n",
"/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:4: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" after removing the cwd from sys.path.\n",
"/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:5: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" \"\"\"\n",
"/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:6: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" \n",
"/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:7: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" import sys\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Unnamed: 0 \n",
" PhraseId \n",
" SentenceId \n",
" Phrase \n",
" Sentiment \n",
" t0 \n",
" S2 \n",
" S0 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" A series of escapades demonstrating the adage ... \n",
" 1 \n",
" neg \n",
" neg \n",
" 1 \n",
" \n",
" \n",
" 1 \n",
" 1 \n",
" 2 \n",
" 1 \n",
" A series of escapades demonstrating the adage ... \n",
" 2 \n",
" neg \n",
" 2 \n",
" 2 \n",
" \n",
" \n",
" 2 \n",
" 2 \n",
" 3 \n",
" 1 \n",
" A series \n",
" 2 \n",
" neg \n",
" 2 \n",
" 2 \n",
" \n",
" \n",
" 3 \n",
" 3 \n",
" 4 \n",
" 1 \n",
" A \n",
" 2 \n",
" neg \n",
" 2 \n",
" 2 \n",
" \n",
" \n",
" 4 \n",
" 4 \n",
" 5 \n",
" 1 \n",
" series \n",
" 2 \n",
" neg \n",
" 2 \n",
" 2 \n",
" \n",
" \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" \n",
" \n",
" 156055 \n",
" 156055 \n",
" 156056 \n",
" 8544 \n",
" Hearst 's \n",
" 2 \n",
" neg \n",
" 2 \n",
" 2 \n",
" \n",
" \n",
" 156056 \n",
" 156056 \n",
" 156057 \n",
" 8544 \n",
" forced avuncular chortles \n",
" 1 \n",
" neg \n",
" neg \n",
" 1 \n",
" \n",
" \n",
" 156057 \n",
" 156057 \n",
" 156058 \n",
" 8544 \n",
" avuncular chortles \n",
" 3 \n",
" neg \n",
" pos \n",
" 3 \n",
" \n",
" \n",
" 156058 \n",
" 156058 \n",
" 156059 \n",
" 8544 \n",
" avuncular \n",
" 2 \n",
" neg \n",
" 2 \n",
" 2 \n",
" \n",
" \n",
" 156059 \n",
" 156059 \n",
" 156060 \n",
" 8544 \n",
" chortles \n",
" 2 \n",
" neg \n",
" 2 \n",
" 2 \n",
" \n",
" \n",
"
\n",
"
156060 rows × 8 columns
\n",
"
"
],
"text/plain": [
" Unnamed: 0 PhraseId SentenceId \\\n",
"0 0 1 1 \n",
"1 1 2 1 \n",
"2 2 3 1 \n",
"3 3 4 1 \n",
"4 4 5 1 \n",
"... ... ... ... \n",
"156055 156055 156056 8544 \n",
"156056 156056 156057 8544 \n",
"156057 156057 156058 8544 \n",
"156058 156058 156059 8544 \n",
"156059 156059 156060 8544 \n",
"\n",
" Phrase Sentiment t0 S2 \\\n",
"0 A series of escapades demonstrating the adage ... 1 neg neg \n",
"1 A series of escapades demonstrating the adage ... 2 neg 2 \n",
"2 A series 2 neg 2 \n",
"3 A 2 neg 2 \n",
"4 series 2 neg 2 \n",
"... ... ... ... ... \n",
"156055 Hearst 's 2 neg 2 \n",
"156056 forced avuncular chortles 1 neg neg \n",
"156057 avuncular chortles 3 neg pos \n",
"156058 avuncular 2 neg 2 \n",
"156059 chortles 2 neg 2 \n",
"\n",
" S0 \n",
"0 1 \n",
"1 2 \n",
"2 2 \n",
"3 2 \n",
"4 2 \n",
"... .. \n",
"156055 2 \n",
"156056 1 \n",
"156057 3 \n",
"156058 2 \n",
"156059 2 \n",
"\n",
"[156060 rows x 8 columns]"
]
},
"execution_count": 101,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"k = pd.read_csv('kaggle_csv.csv')\n",
"k['S0'] = k['Sentiment']\n",
"k['S0'][k['S0'] == 0] = 0\n",
"k['S0'][k['S0'] == 1] = 1\n",
"k['S0'][k['S0'] == 2] = 1\n",
"k['S0'][k['S0'] == 3] = 1\n",
"k['S0'][k['S0'] == 4] = 1\n",
"k"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Unnamed: 0 \n",
" PhraseId \n",
" SentenceId \n",
" Phrase \n",
" Sentiment \n",
" t0 \n",
" S2 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 0 \n",
" 1 \n",
" 1 \n",
" A series of escapades demonstrating the adage ... \n",
" 1 \n",
" neg \n",
" neg \n",
" \n",
" \n",
" 1 \n",
" 1 \n",
" 2 \n",
" 1 \n",
" A series of escapades demonstrating the adage ... \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 2 \n",
" 2 \n",
" 3 \n",
" 1 \n",
" A series \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 3 \n",
" 3 \n",
" 4 \n",
" 1 \n",
" A \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 4 \n",
" 4 \n",
" 5 \n",
" 1 \n",
" series \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" \n",
" \n",
" 156055 \n",
" 156055 \n",
" 156056 \n",
" 8544 \n",
" Hearst 's \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 156056 \n",
" 156056 \n",
" 156057 \n",
" 8544 \n",
" forced avuncular chortles \n",
" 1 \n",
" neg \n",
" neg \n",
" \n",
" \n",
" 156057 \n",
" 156057 \n",
" 156058 \n",
" 8544 \n",
" avuncular chortles \n",
" 3 \n",
" neg \n",
" pos \n",
" \n",
" \n",
" 156058 \n",
" 156058 \n",
" 156059 \n",
" 8544 \n",
" avuncular \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
" 156059 \n",
" 156059 \n",
" 156060 \n",
" 8544 \n",
" chortles \n",
" 2 \n",
" neg \n",
" 2 \n",
" \n",
" \n",
"
\n",
"
156060 rows × 7 columns
\n",
"
"
],
"text/plain": [
" Unnamed: 0 PhraseId SentenceId \\\n",
"0 0 1 1 \n",
"1 1 2 1 \n",
"2 2 3 1 \n",
"3 3 4 1 \n",
"4 4 5 1 \n",
"... ... ... ... \n",
"156055 156055 156056 8544 \n",
"156056 156056 156057 8544 \n",
"156057 156057 156058 8544 \n",
"156058 156058 156059 8544 \n",
"156059 156059 156060 8544 \n",
"\n",
" Phrase Sentiment t0 S2 \n",
"0 A series of escapades demonstrating the adage ... 1 neg neg \n",
"1 A series of escapades demonstrating the adage ... 2 neg 2 \n",
"2 A series 2 neg 2 \n",
"3 A 2 neg 2 \n",
"4 series 2 neg 2 \n",
"... ... ... ... ... \n",
"156055 Hearst 's 2 neg 2 \n",
"156056 forced avuncular chortles 1 neg neg \n",
"156057 avuncular chortles 3 neg pos \n",
"156058 avuncular 2 neg 2 \n",
"156059 chortles 2 neg 2 \n",
"\n",
"[156060 rows x 7 columns]"
]
},
"execution_count": 105,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"k3 = pd.read_csv('kaggle_csv.csv')\n",
"k3"
]
},
{
"cell_type": "code",
"execution_count": 150,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"156060"
]
},
"execution_count": 150,
"metadata": {},
"output_type": "execute_result"
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": 151,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Phrase \n",
" S0 \n",
" S0_0 \n",
" \n",
" \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [Phrase, S0, S0_0]\n",
"Index: []"
]
},
"execution_count": 151,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['S0_0'] == 0]"
]
},
{
"cell_type": "code",
"execution_count": 137,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"156060"
]
},
"execution_count": 137,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# (df['S0'] == 1).count()\n",
"(df['S0'] == 0).count()"
]
},
{
"cell_type": "code",
"execution_count": 138,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"156060"
]
},
"execution_count": 138,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(df)"
]
},
{
"cell_type": "code",
"execution_count": 140,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "invalid literal for int() with base 10: 'neg'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'S0'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36mastype\u001b[0;34m(self, dtype, copy, errors, **kwargs)\u001b[0m\n\u001b[1;32m 5880\u001b[0m \u001b[0;31m# else, only a single dtype is given\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5881\u001b[0m new_data = self._data.astype(\n\u001b[0;32m-> 5882\u001b[0;31m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5883\u001b[0m )\n\u001b[1;32m 5884\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_constructor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__finalize__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py\u001b[0m in \u001b[0;36mastype\u001b[0;34m(self, dtype, **kwargs)\u001b[0m\n\u001b[1;32m 579\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 580\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 581\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"astype\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 582\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 583\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/internals/managers.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)\u001b[0m\n\u001b[1;32m 436\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb_items\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0malign_copy\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 437\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 438\u001b[0;31m \u001b[0mapplied\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 439\u001b[0m \u001b[0mresult_blocks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_extend_blocks\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mapplied\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult_blocks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 440\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/internals/blocks.py\u001b[0m in \u001b[0;36mastype\u001b[0;34m(self, dtype, copy, errors, values, **kwargs)\u001b[0m\n\u001b[1;32m 557\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 558\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"raise\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 559\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_astype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 560\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 561\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_astype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"raise\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/internals/blocks.py\u001b[0m in \u001b[0;36m_astype\u001b[0;34m(self, dtype, copy, errors, values, **kwargs)\u001b[0m\n\u001b[1;32m 641\u001b[0m \u001b[0;31m# _astype_nansafe works fine with 1-d only\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 642\u001b[0m \u001b[0mvals1d\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mravel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 643\u001b[0;31m \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mastype_nansafe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvals1d\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 644\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 645\u001b[0m \u001b[0;31m# TODO(extension)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/dtypes/cast.py\u001b[0m in \u001b[0;36mastype_nansafe\u001b[0;34m(arr, dtype, copy, skipna)\u001b[0m\n\u001b[1;32m 705\u001b[0m \u001b[0;31m# work around NumPy brokenness, #1987\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 706\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0missubdtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minteger\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 707\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype_intsafe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mravel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 708\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 709\u001b[0m \u001b[0;31m# if we have a datetime/timedelta array of objects\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/_libs/lib.pyx\u001b[0m in \u001b[0;36mpandas._libs.lib.astype_intsafe\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: invalid literal for int() with base 10: 'neg'"
]
}
],
"source": [
"df['S0'].astype(int)"
]
},
{
"cell_type": "code",
"execution_count": 147,
"metadata": {},
"outputs": [],
"source": [
"k3 = pd.read_csv('kaggle_csv.csv')"
]
},
{
"cell_type": "code",
"execution_count": 149,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 149,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"k3['Sentiment'].values.min()"
]
},
{
"cell_type": "code",
"execution_count": 155,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"148988"
]
},
"execution_count": 155,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame()\n",
"df['Phrase'] = k3['Phrase']\n",
"df['S0'] = k3['Sentiment']\n",
"\n",
"df['S0_0'] = [0 if x == 0 else 1 for x in df['S0']]\n",
"len(df[df['S0_0'] == 1])"
]
},
{
"cell_type": "code",
"execution_count": 156,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"7072"
]
},
"execution_count": 156,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(df[df['S0_0'] == 0])"
]
},
{
"cell_type": "code",
"execution_count": 154,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" Phrase \n",
" S0 \n",
" S0_0 \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" A series of escapades demonstrating the adage ... \n",
" 1 \n",
" 1 \n",
" \n",
" \n",
" 1 \n",
" A series of escapades demonstrating the adage ... \n",
" 2 \n",
" 1 \n",
" \n",
" \n",
" 2 \n",
" A series \n",
" 2 \n",
" 1 \n",
" \n",
" \n",
" 3 \n",
" A \n",
" 2 \n",
" 1 \n",
" \n",
" \n",
" 4 \n",
" series \n",
" 2 \n",
" 1 \n",
" \n",
" \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" \n",
" \n",
" 156055 \n",
" Hearst 's \n",
" 2 \n",
" 1 \n",
" \n",
" \n",
" 156056 \n",
" forced avuncular chortles \n",
" 1 \n",
" 1 \n",
" \n",
" \n",
" 156057 \n",
" avuncular chortles \n",
" 3 \n",
" 1 \n",
" \n",
" \n",
" 156058 \n",
" avuncular \n",
" 2 \n",
" 1 \n",
" \n",
" \n",
" 156059 \n",
" chortles \n",
" 2 \n",
" 1 \n",
" \n",
" \n",
"
\n",
"
156060 rows × 3 columns
\n",
"
"
],
"text/plain": [
" Phrase S0 S0_0\n",
"0 A series of escapades demonstrating the adage ... 1 1\n",
"1 A series of escapades demonstrating the adage ... 2 1\n",
"2 A series 2 1\n",
"3 A 2 1\n",
"4 series 2 1\n",
"... ... .. ...\n",
"156055 Hearst 's 2 1\n",
"156056 forced avuncular chortles 1 1\n",
"156057 avuncular chortles 3 1\n",
"156058 avuncular 2 1\n",
"156059 chortles 2 1\n",
"\n",
"[156060 rows x 3 columns]"
]
},
"execution_count": 154,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}