{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# HW5 -- Artificial Artificial Intelligence" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "\n", "neg = pd.read_csv('AMT_neg.csv')\n", "pos = pd.read_csv('AMT_pos.csv')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Initial EDA" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HITIdHITTypeIdTitleDescriptionKeywordsRewardCreationTimeMaxAssignmentsRequesterAnnotationAssignmentDurationInSeconds...RejectionTimeRequesterFeedbackWorkTimeInSecondsLifetimeApprovalRateLast30DaysApprovalRateLast7DaysApprovalRateInput.textAnswer.sentiment.labelApproveReject
03IQ9O0AYW6ZI3GD740H32KGG2SWITJ3N0K7CX2I27L2NR2L8D93MF8LIRA5JSentiment analysisSentiment analysissentiment, text$0.02Fri Nov 01 12:08:17 PDT 20193BatchId:3821423;OriginalHitTemplateId:928390909;10800...NaNNaN440% (0/0)0% (0/0)0% (0/0)Missed Opportunity\\nI had been very excited to...NeutralNaNNaN
13IQ9O0AYW6ZI3GD740H32KGG2SWITJ3N0K7CX2I27L2NR2L8D93MF8LIRA5JSentiment analysisSentiment analysissentiment, text$0.02Fri Nov 01 12:08:17 PDT 20193BatchId:3821423;OriginalHitTemplateId:928390909;10800...NaNNaN70% (0/0)0% (0/0)0% (0/0)Missed Opportunity\\nI had been very excited to...NegativeNaNNaN
23IQ9O0AYW6ZI3GD740H32KGG2SWITJ3N0K7CX2I27L2NR2L8D93MF8LIRA5JSentiment analysisSentiment analysissentiment, text$0.02Fri Nov 01 12:08:17 PDT 20193BatchId:3821423;OriginalHitTemplateId:928390909;10800...NaNNaN4490% (0/0)0% (0/0)0% (0/0)Missed Opportunity\\nI had been very excited to...PositiveNaNNaN
\n", "

3 rows × 31 columns

\n", "
" ], "text/plain": [ " HITId HITTypeId \\\n", "0 3IQ9O0AYW6ZI3GD740H32KGG2SWITJ 3N0K7CX2I27L2NR2L8D93MF8LIRA5J \n", "1 3IQ9O0AYW6ZI3GD740H32KGG2SWITJ 3N0K7CX2I27L2NR2L8D93MF8LIRA5J \n", "2 3IQ9O0AYW6ZI3GD740H32KGG2SWITJ 3N0K7CX2I27L2NR2L8D93MF8LIRA5J \n", "\n", " Title Description Keywords Reward \\\n", "0 Sentiment analysis Sentiment analysis sentiment, text $0.02 \n", "1 Sentiment analysis Sentiment analysis sentiment, text $0.02 \n", "2 Sentiment analysis Sentiment analysis sentiment, text $0.02 \n", "\n", " CreationTime MaxAssignments \\\n", "0 Fri Nov 01 12:08:17 PDT 2019 3 \n", "1 Fri Nov 01 12:08:17 PDT 2019 3 \n", "2 Fri Nov 01 12:08:17 PDT 2019 3 \n", "\n", " RequesterAnnotation \\\n", "0 BatchId:3821423;OriginalHitTemplateId:928390909; \n", "1 BatchId:3821423;OriginalHitTemplateId:928390909; \n", "2 BatchId:3821423;OriginalHitTemplateId:928390909; \n", "\n", " AssignmentDurationInSeconds ... RejectionTime RequesterFeedback \\\n", "0 10800 ... NaN NaN \n", "1 10800 ... NaN NaN \n", "2 10800 ... NaN NaN \n", "\n", " WorkTimeInSeconds LifetimeApprovalRate Last30DaysApprovalRate \\\n", "0 44 0% (0/0) 0% (0/0) \n", "1 7 0% (0/0) 0% (0/0) \n", "2 449 0% (0/0) 0% (0/0) \n", "\n", " Last7DaysApprovalRate Input.text \\\n", "0 0% (0/0) Missed Opportunity\\nI had been very excited to... \n", "1 0% (0/0) Missed Opportunity\\nI had been very excited to... \n", "2 0% (0/0) Missed Opportunity\\nI had been very excited to... \n", "\n", " Answer.sentiment.label Approve Reject \n", "0 Neutral NaN NaN \n", "1 Negative NaN NaN \n", "2 Positive NaN NaN \n", "\n", "[3 rows x 31 columns]" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "neg[:3]" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HITIdHITTypeIdTitleDescriptionKeywordsRewardCreationTimeMaxAssignmentsRequesterAnnotationAssignmentDurationInSeconds...RejectionTimeRequesterFeedbackWorkTimeInSecondsLifetimeApprovalRateLast30DaysApprovalRateLast7DaysApprovalRateInput.textAnswer.sentiment.labelApproveReject
03VMV5CHJZ8F47P7CECH0H830NF4GTP3N0K7CX2I27L2NR2L8D93MF8LIRA5JSentiment analysisSentiment analysissentiment, text$0.02Fri Nov 01 12:11:19 PDT 20193BatchId:3821427;OriginalHitTemplateId:928390909;10800...NaNNaN3550% (0/0)0% (0/0)0% (0/0)funny like a clown\\nGreetings again from the d...PositiveNaNNaN
13VMV5CHJZ8F47P7CECH0H830NF4GTP3N0K7CX2I27L2NR2L8D93MF8LIRA5JSentiment analysisSentiment analysissentiment, text$0.02Fri Nov 01 12:11:19 PDT 20193BatchId:3821427;OriginalHitTemplateId:928390909;10800...NaNNaN4870% (0/0)0% (0/0)0% (0/0)funny like a clown\\nGreetings again from the d...NeutralNaNNaN
23VMV5CHJZ8F47P7CECH0H830NF4GTP3N0K7CX2I27L2NR2L8D93MF8LIRA5JSentiment analysisSentiment analysissentiment, text$0.02Fri Nov 01 12:11:19 PDT 20193BatchId:3821427;OriginalHitTemplateId:928390909;10800...NaNNaN10520% (0/0)0% (0/0)0% (0/0)funny like a clown\\nGreetings again from the d...PositiveNaNNaN
\n", "

3 rows × 31 columns

\n", "
" ], "text/plain": [ " HITId HITTypeId \\\n", "0 3VMV5CHJZ8F47P7CECH0H830NF4GTP 3N0K7CX2I27L2NR2L8D93MF8LIRA5J \n", "1 3VMV5CHJZ8F47P7CECH0H830NF4GTP 3N0K7CX2I27L2NR2L8D93MF8LIRA5J \n", "2 3VMV5CHJZ8F47P7CECH0H830NF4GTP 3N0K7CX2I27L2NR2L8D93MF8LIRA5J \n", "\n", " Title Description Keywords Reward \\\n", "0 Sentiment analysis Sentiment analysis sentiment, text $0.02 \n", "1 Sentiment analysis Sentiment analysis sentiment, text $0.02 \n", "2 Sentiment analysis Sentiment analysis sentiment, text $0.02 \n", "\n", " CreationTime MaxAssignments \\\n", "0 Fri Nov 01 12:11:19 PDT 2019 3 \n", "1 Fri Nov 01 12:11:19 PDT 2019 3 \n", "2 Fri Nov 01 12:11:19 PDT 2019 3 \n", "\n", " RequesterAnnotation \\\n", "0 BatchId:3821427;OriginalHitTemplateId:928390909; \n", "1 BatchId:3821427;OriginalHitTemplateId:928390909; \n", "2 BatchId:3821427;OriginalHitTemplateId:928390909; \n", "\n", " AssignmentDurationInSeconds ... RejectionTime RequesterFeedback \\\n", "0 10800 ... NaN NaN \n", "1 10800 ... NaN NaN \n", "2 10800 ... NaN NaN \n", "\n", " WorkTimeInSeconds LifetimeApprovalRate Last30DaysApprovalRate \\\n", "0 355 0% (0/0) 0% (0/0) \n", "1 487 0% (0/0) 0% (0/0) \n", "2 1052 0% (0/0) 0% (0/0) \n", "\n", " Last7DaysApprovalRate Input.text \\\n", "0 0% (0/0) funny like a clown\\nGreetings again from the d... \n", "1 0% (0/0) funny like a clown\\nGreetings again from the d... \n", "2 0% (0/0) funny like a clown\\nGreetings again from the d... \n", "\n", " Answer.sentiment.label Approve Reject \n", "0 Positive NaN NaN \n", "1 Neutral NaN NaN \n", "2 Positive NaN NaN \n", "\n", "[3 rows x 31 columns]" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pos[:3]" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['HITId',\n", " 'HITTypeId',\n", " 'Title',\n", " 'Description',\n", " 'Keywords',\n", " 'Reward',\n", " 'CreationTime',\n", " 'MaxAssignments',\n", " 'RequesterAnnotation',\n", " 'AssignmentDurationInSeconds',\n", " 'AutoApprovalDelayInSeconds',\n", " 'Expiration',\n", " 'NumberOfSimilarHITs',\n", " 'LifetimeInSeconds',\n", " 'AssignmentId',\n", " 'WorkerId',\n", " 'AssignmentStatus',\n", " 'AcceptTime',\n", " 'SubmitTime',\n", " 'AutoApprovalTime',\n", " 'ApprovalTime',\n", " 'RejectionTime',\n", " 'RequesterFeedback',\n", " 'WorkTimeInSeconds',\n", " 'LifetimeApprovalRate',\n", " 'Last30DaysApprovalRate',\n", " 'Last7DaysApprovalRate',\n", " 'Input.text',\n", " 'Answer.sentiment.label',\n", " 'Approve',\n", " 'Reject']" ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "neg.columns.tolist()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### How many unique turkers worked on each dataframe?" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "53 Turkers worked on NEG batch\n", "38 Turkers worked on POS batch\n" ] } ], "source": [ "def get_unique(df, column):\n", " unique = np.unique(df[column], return_counts=True)\n", " df = pd.DataFrame(zip(unique[0], unique[1]))\n", " return len(unique[0]), unique, df\n", "\n", "num_neg, unique_neg, u_neg_df = get_unique(neg, 'WorkerId') \n", "num_pos, unique_pos, u_pos_df = get_unique(pos, 'WorkerId')\n", "\n", "print(num_neg, 'Turkers worked on NEG batch')\n", "print(num_pos, 'Turkers worked on POS batch')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### How many HITS did each unique turker do?" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "u_neg_df.plot(kind='bar',x=0,y=1)" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "u_pos_df.plot(kind='bar',x=0,y=1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### What's the `max` and `min` HIT for unique turkers" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "For neg, the min was: 1 and the max was: 37\n", "For pos, the min was: 1 and the max was: 40\n" ] } ], "source": [ "print('For {}, the min was: {} and the max was: {}'.format('neg', unique_neg[1].min(), unique_neg[1].max())) \n", "print('For {}, the min was: {} and the max was: {}'.format('pos', unique_pos[1].min(), unique_pos[1].max())) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Did a specitic Sentiment take longer for turkers to assess? " ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1, 'Negative')" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "sns.catplot(x=\"Answer.sentiment.label\", \n", " y=\"WorkTimeInSeconds\", \n", " kind=\"bar\", \n", " order=['Negative', 'Neutral', 'Positive'], \n", " data=neg);\n", "plt.title('Negative')" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1, 'Positive')" ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "sns.catplot(x=\"Answer.sentiment.label\", \n", " y=\"WorkTimeInSeconds\", \n", " kind=\"bar\", \n", " order=['Negative', 'Neutral', 'Positive'], \n", " data=pos)\n", "plt.title('Positive')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### How many turkers had less than 10 second response time?" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [], "source": [ "response_time = neg[neg['WorkTimeInSeconds'] < 10]\n", "response_time_check = neg[neg['WorkTimeInSeconds'] > 10]" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "48" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(response_time)" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "312" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(response_time_check)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Checking for potential bots" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Did anyone have a consistent average low response time?" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
WorkTimeInSecondsHITId
WorkerId
A13CLN8L5HFT467.23076913.0
A18WFPSLFV4FKY47.0000002.0
A1IQV3QUWRA8G122.0000001.0
A1N1ULK71RHVMM10.0000003.0
A1S2MN0E9BHPVA173.44444427.0
\n", "
" ], "text/plain": [ " WorkTimeInSeconds HITId\n", "WorkerId \n", "A13CLN8L5HFT46 7.230769 13.0\n", "A18WFPSLFV4FKY 47.000000 2.0\n", "A1IQV3QUWRA8G1 22.000000 1.0\n", "A1N1ULK71RHVMM 10.000000 3.0\n", "A1S2MN0E9BHPVA 173.444444 27.0" ] }, "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ "count = pos.groupby(['WorkerId'])['HITId'].count()\n", "work_time = pos.groupby(['WorkerId'])['WorkTimeInSeconds'].mean()\n", "new_df = pd.DataFrame([work_time, count]).T\n", "new_df[:5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Did anyone have a consistent average high response time?" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
WorkTimeInSecondsHITIdWorkTimeInMin
WorkerId
A13CLN8L5HFT467.23076913.00.120513
A18WFPSLFV4FKY47.0000002.00.783333
A1IQV3QUWRA8G122.0000001.00.366667
A1N1ULK71RHVMM10.0000003.00.166667
A1S2MN0E9BHPVA173.44444427.02.890741
\n", "
" ], "text/plain": [ " WorkTimeInSeconds HITId WorkTimeInMin\n", "WorkerId \n", "A13CLN8L5HFT46 7.230769 13.0 0.120513\n", "A18WFPSLFV4FKY 47.000000 2.0 0.783333\n", "A1IQV3QUWRA8G1 22.000000 1.0 0.366667\n", "A1N1ULK71RHVMM 10.000000 3.0 0.166667\n", "A1S2MN0E9BHPVA 173.444444 27.0 2.890741" ] }, "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_df['WorkTimeInMin'] = new_df['WorkTimeInSeconds']/60\n", "new_df[:5]" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "WorkerId Answer.sentiment.label\n", "A13CLN8L5HFT46 Neutral 2\n", " Positive 11\n", "A18WFPSLFV4FKY Positive 2\n", "A1IQV3QUWRA8G1 Positive 1\n", "A1N1ULK71RHVMM Negative 1\n", " ..\n", "AMC42JMQA8A5U Positive 1\n", "AO2WNSGOXAX52 Neutral 3\n", " Positive 1\n", "AOMFEAWQHU3D8 Neutral 1\n", " Positive 6\n", "Name: Answer.sentiment.label, Length: 74, dtype: int64" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "count = pos.groupby(['WorkerId', 'Answer.sentiment.label'])['Answer.sentiment.label'].count()\n", "# count = pos.groupby(['WorkerId'])['Answer.sentiment.label'].count()\n", "count" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Did anyone answer ONLY pos/neg/neutral?" ] }, { "cell_type": "code", "execution_count": 76, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NeutralPositiveNegativeTotal
WorkerId
A13CLN8L5HFT46211013
A18WFPSLFV4FKY0202
A1IQV3QUWRA8G10101
A1N1ULK71RHVMM0213
A1S2MN0E9BHPVA221427
\n", "
" ], "text/plain": [ " Neutral Positive Negative Total\n", "WorkerId \n", "A13CLN8L5HFT46 2 11 0 13\n", "A18WFPSLFV4FKY 0 2 0 2\n", "A1IQV3QUWRA8G1 0 1 0 1\n", "A1N1ULK71RHVMM 0 2 1 3\n", "A1S2MN0E9BHPVA 2 21 4 27" ] }, "execution_count": 76, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pnn = pd.DataFrame()\n", "pnn['Neutral'] = pos.groupby('WorkerId')['Answer.sentiment.label'].apply(lambda x: (x=='Neutral').sum())\n", "pnn['Positive'] = pos.groupby('WorkerId')['Answer.sentiment.label'].apply(lambda x: (x=='Positive').sum())\n", "pnn['Negative'] = pos.groupby('WorkerId')['Answer.sentiment.label'].apply(lambda x: (x=='Negative').sum())\n", "pnn['Total'] = pos.groupby('WorkerId')['Answer.sentiment.label'].apply(lambda x: x.count())\n", "pnn[:5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### This is getting a little confusing, let's just look at our top performers" ] }, { "cell_type": "code", "execution_count": 77, "metadata": {}, "outputs": [], "source": [ "top = pnn.sort_values(by=['Total'], ascending=False)" ] }, { "cell_type": "code", "execution_count": 78, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NeutralPositiveNegativeTotal
WorkerId
A681XM15AN28F1320740
A1Y66T7FKJ8PJA523735
A33ENZVC1XB4BA034034
A1S2MN0E9BHPVA221427
A37L5E8MHHQGZM613322
AE03LUY7RH400410721
A2G44A4ZPWRPXU412218
A1YK1IKACUJMV4015015
A3AW887GI0NLKF310215
A3HAEQW13YPT6A014014
\n", "
" ], "text/plain": [ " Neutral Positive Negative Total\n", "WorkerId \n", "A681XM15AN28F 13 20 7 40\n", "A1Y66T7FKJ8PJA 5 23 7 35\n", "A33ENZVC1XB4BA 0 34 0 34\n", "A1S2MN0E9BHPVA 2 21 4 27\n", "A37L5E8MHHQGZM 6 13 3 22\n", "AE03LUY7RH400 4 10 7 21\n", "A2G44A4ZPWRPXU 4 12 2 18\n", "A1YK1IKACUJMV4 0 15 0 15\n", "A3AW887GI0NLKF 3 10 2 15\n", "A3HAEQW13YPT6A 0 14 0 14" ] }, "execution_count": 78, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top[:10]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Interesting!! Looking from here, we have three workers who ONLY chose positive. \n", "\n", "Let's look at their response time to see if we can determine if they are a bot!!" ] }, { "cell_type": "code", "execution_count": 79, "metadata": {}, "outputs": [], "source": [ "top['Avg_WorkTimeInSeconds'] = pos.groupby('WorkerId')['WorkTimeInSeconds'].apply(lambda x: x.mean())\n", "top['Avg_WorkTimeInMin'] = pos.groupby('WorkerId')['WorkTimeInSeconds'].apply(lambda x: x.mean()/60)\n", "top['Min_WorkTimeInMin'] = pos.groupby('WorkerId')['WorkTimeInSeconds'].apply(lambda x: x.min()/60)\n", "top['Max_WorkTimeInMin'] = pos.groupby('WorkerId')['WorkTimeInSeconds'].apply(lambda x: x.max()/60)" ] }, { "cell_type": "code", "execution_count": 80, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NeutralPositiveNegativeTotalAvg_WorkTimeInSecondsAvg_WorkTimeInMinMin_WorkTimeInMinMax_WorkTimeInMin
WorkerId
A681XM15AN28F132074013.5750000.2262500.1000000.833333
A1Y66T7FKJ8PJA523735695.85714311.5976190.21666722.000000
A33ENZVC1XB4BA034034366.6470596.1107840.6166679.916667
A1S2MN0E9BHPVA221427173.4444442.8907410.4000004.983333
A37L5E8MHHQGZM613322346.2727275.7712122.1500008.283333
AE03LUY7RH400410721102.2380951.7039680.1000003.433333
A2G44A4ZPWRPXU412218221.2777783.6879630.3833337.383333
A1YK1IKACUJMV4015015593.6000009.8933331.71666711.000000
A3AW887GI0NLKF310215269.4000004.4900001.6166677.216667
A3HAEQW13YPT6A014014442.9285717.3821430.86666711.100000
\n", "
" ], "text/plain": [ " Neutral Positive Negative Total Avg_WorkTimeInSeconds \\\n", "WorkerId \n", "A681XM15AN28F 13 20 7 40 13.575000 \n", "A1Y66T7FKJ8PJA 5 23 7 35 695.857143 \n", "A33ENZVC1XB4BA 0 34 0 34 366.647059 \n", "A1S2MN0E9BHPVA 2 21 4 27 173.444444 \n", "A37L5E8MHHQGZM 6 13 3 22 346.272727 \n", "AE03LUY7RH400 4 10 7 21 102.238095 \n", "A2G44A4ZPWRPXU 4 12 2 18 221.277778 \n", "A1YK1IKACUJMV4 0 15 0 15 593.600000 \n", "A3AW887GI0NLKF 3 10 2 15 269.400000 \n", "A3HAEQW13YPT6A 0 14 0 14 442.928571 \n", "\n", " Avg_WorkTimeInMin Min_WorkTimeInMin Max_WorkTimeInMin \n", "WorkerId \n", "A681XM15AN28F 0.226250 0.100000 0.833333 \n", "A1Y66T7FKJ8PJA 11.597619 0.216667 22.000000 \n", "A33ENZVC1XB4BA 6.110784 0.616667 9.916667 \n", "A1S2MN0E9BHPVA 2.890741 0.400000 4.983333 \n", "A37L5E8MHHQGZM 5.771212 2.150000 8.283333 \n", "AE03LUY7RH400 1.703968 0.100000 3.433333 \n", "A2G44A4ZPWRPXU 3.687963 0.383333 7.383333 \n", "A1YK1IKACUJMV4 9.893333 1.716667 11.000000 \n", "A3AW887GI0NLKF 4.490000 1.616667 7.216667 \n", "A3HAEQW13YPT6A 7.382143 0.866667 11.100000 " ] }, "execution_count": 80, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top[:10]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Even more interesting! These two don't appear to be bots, based on our current metric which is time variability.\n", "\n", "HOWEVER, worker `A681XM15AN28F` appears to only work for an average of 13 seconds per review which doesn't seem like enough time to read and judge a review..." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## PART 2: Second submission to AMT\n", "\n", "TOO MANY REVIEWERS!\n", "\n", "Here is when we realized that doing a kappa score with over 30 individual reviewers would be tricky, so we rusubmitted to AMT and required the turkers to be 'Master' in the hopes that this additional barrier-to-entry would help reduce the amount of turkers working on the project" ] }, { "cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "293" ] }, "execution_count": 81, "metadata": {}, "output_type": "execute_result" } ], "source": [ "v2 = pd.read_csv('HW5_amt_v2.csv')\n", "v2[:5]\n", "len(v2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This time, I didn't separate the df into pos and neg before submitting to AMT, so we have to reimport the labels." ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [], "source": [ "labels = pd.read_csv('all_JK_extremes_labeled.csv')" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "98" ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(labels)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Oops! That's right, we replicated each review * 3 so three separate people could look at each review" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [], "source": [ "labels2 = labels.append([labels] * 2, ignore_index=True)" ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "294" ] }, "execution_count": 85, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(labels2)" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0PoN
76#LetRottenTomatoesRotSquad\\nI am a simple guy...P
174#LetRottenTomatoesRotSquad\\nI am a simple guy...P
272#LetRottenTomatoesRotSquad\\nI am a simple guy...P
116A 'Triumph of the Will' for Nihilists\\n'Joker...N
18A 'Triumph of the Will' for Nihilists\\n'Joker...N
.........
227lose of both time and money\\nThis was one of ...N
31lose of both time and money\\nThis was one of ...N
207poor plot\\nPoor plot. i find no reason for jo...N
11poor plot\\nPoor plot. i find no reason for jo...N
109poor plot\\nPoor plot. i find no reason for jo...N
\n", "

294 rows × 2 columns

\n", "
" ], "text/plain": [ " 0 PoN\n", "76 #LetRottenTomatoesRotSquad\\nI am a simple guy... P\n", "174 #LetRottenTomatoesRotSquad\\nI am a simple guy... P\n", "272 #LetRottenTomatoesRotSquad\\nI am a simple guy... P\n", "116 A 'Triumph of the Will' for Nihilists\\n'Joker... N\n", "18 A 'Triumph of the Will' for Nihilists\\n'Joker... N\n", ".. ... ..\n", "227 lose of both time and money\\nThis was one of ... N\n", "31 lose of both time and money\\nThis was one of ... N\n", "207 poor plot\\nPoor plot. i find no reason for jo... N\n", "11 poor plot\\nPoor plot. i find no reason for jo... N\n", "109 poor plot\\nPoor plot. i find no reason for jo... N\n", "\n", "[294 rows x 2 columns]" ] }, "execution_count": 86, "metadata": {}, "output_type": "execute_result" } ], "source": [ "labels2.sort_values(by='0')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Shoot! I realized I had to delete some emojis for the csv to be accepted by AMT, so the reviews themselves won't actually be matching... solution: Create two 'for-matching' columns made up of the first 5 words of each review" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [], "source": [ "v2['for_matching'] = v2.apply(lambda x: x['Input.text'].split()[:5], axis=1)" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [], "source": [ "labels2['for_matching'] = labels2.apply(lambda x: x['0'].split()[:5], axis=1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Annnnnd why did I do that when I could just sort the df and apply the PoN" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0PoNfor_matching
76#LetRottenTomatoesRotSquad\\nI am a simple guy...P[#LetRottenTomatoesRotSquad, I, am, a, simple]
174#LetRottenTomatoesRotSquad\\nI am a simple guy...P[#LetRottenTomatoesRotSquad, I, am, a, simple]
272#LetRottenTomatoesRotSquad\\nI am a simple guy...P[#LetRottenTomatoesRotSquad, I, am, a, simple]
116A 'Triumph of the Will' for Nihilists\\n'Joker...N[A, 'Triumph, of, the, Will']
18A 'Triumph of the Will' for Nihilists\\n'Joker...N[A, 'Triumph, of, the, Will']
214A 'Triumph of the Will' for Nihilists\\n'Joker...N[A, 'Triumph, of, the, Will']
\n", "
" ], "text/plain": [ " 0 PoN \\\n", "76 #LetRottenTomatoesRotSquad\\nI am a simple guy... P \n", "174 #LetRottenTomatoesRotSquad\\nI am a simple guy... P \n", "272 #LetRottenTomatoesRotSquad\\nI am a simple guy... P \n", "116 A 'Triumph of the Will' for Nihilists\\n'Joker... N \n", "18 A 'Triumph of the Will' for Nihilists\\n'Joker... N \n", "214 A 'Triumph of the Will' for Nihilists\\n'Joker... N \n", "\n", " for_matching \n", "76 [#LetRottenTomatoesRotSquad, I, am, a, simple] \n", "174 [#LetRottenTomatoesRotSquad, I, am, a, simple] \n", "272 [#LetRottenTomatoesRotSquad, I, am, a, simple] \n", "116 [A, 'Triumph, of, the, Will'] \n", "18 [A, 'Triumph, of, the, Will'] \n", "214 [A, 'Triumph, of, the, Will'] " ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sorted_labels = labels2.sort_values(by='0')\n", "sorted_labels[:6]" ] }, { "cell_type": "code", "execution_count": 90, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Input.textAnswer.sentiment.labelApproveRejectfor_matching
229#LetRottenTomatoesRotSquad\\nI am a simple guy...PositiveNaNNaN[#LetRottenTomatoesRotSquad, I, am, a, simple]
228#LetRottenTomatoesRotSquad\\nI am a simple guy...PositiveNaNNaN[#LetRottenTomatoesRotSquad, I, am, a, simple]
227#LetRottenTomatoesRotSquad\\nI am a simple guy...PositiveNaNNaN[#LetRottenTomatoesRotSquad, I, am, a, simple]
53A 'Triumph of the Will' for Nihilists\\n'Joker...NeutralNaNNaN[A, 'Triumph, of, the, Will']
55A 'Triumph of the Will' for Nihilists\\n'Joker...NegativeNaNNaN[A, 'Triumph, of, the, Will']
54A 'Triumph of the Will' for Nihilists\\n'Joker...NegativeNaNNaN[A, 'Triumph, of, the, Will']
\n", "
" ], "text/plain": [ " Input.text Answer.sentiment.label \\\n", "229 #LetRottenTomatoesRotSquad\\nI am a simple guy... Positive \n", "228 #LetRottenTomatoesRotSquad\\nI am a simple guy... Positive \n", "227 #LetRottenTomatoesRotSquad\\nI am a simple guy... Positive \n", "53 A 'Triumph of the Will' for Nihilists\\n'Joker... Neutral \n", "55 A 'Triumph of the Will' for Nihilists\\n'Joker... Negative \n", "54 A 'Triumph of the Will' for Nihilists\\n'Joker... Negative \n", "\n", " Approve Reject for_matching \n", "229 NaN NaN [#LetRottenTomatoesRotSquad, I, am, a, simple] \n", "228 NaN NaN [#LetRottenTomatoesRotSquad, I, am, a, simple] \n", "227 NaN NaN [#LetRottenTomatoesRotSquad, I, am, a, simple] \n", "53 NaN NaN [A, 'Triumph, of, the, Will'] \n", "55 NaN NaN [A, 'Triumph, of, the, Will'] \n", "54 NaN NaN [A, 'Triumph, of, the, Will'] " ] }, "execution_count": 90, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sorted_v2 = v2.sort_values(by='Input.text')\n", "sorted_v2[sorted_v2.columns[-5:]][:6]" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [], "source": [ "all_df = sorted_v2.copy()\n", "# all_df['PoN'] = sorted_labels['PoN'].tolist()\n", "# THIS DIDN'T WORK BECAUSE I DIDN'T WAIT UNTIL ALL WERE DONE FROM AMT. RESEARCHER ERROR BUT OMG I HATE MYSELF" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "293" ] }, "execution_count": 92, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(all_df)" ] }, { "cell_type": "code", "execution_count": 93, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "97.66666666666667" ] }, "execution_count": 93, "metadata": {}, "output_type": "execute_result" } ], "source": [ "293/3" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Confirming that YEP. 293 isn't divisible by 3, meaning I didn't wait until the last turker finished. omg." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Reuploading now -- WITH BETTER CODE AND BETTER VARIABLE NAMES!" ] }, { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "294\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Last7DaysApprovalRateInput.textAnswer.sentiment.labelApproveReject
00% (0/0)Everyone praised an overrated movie.\\nOverrat...NegativeNaNNaN
10% (0/0)Everyone praised an overrated movie.\\nOverrat...NegativeNaNNaN
20% (0/0)Everyone praised an overrated movie.\\nOverrat...NegativeNaNNaN
30% (0/0)What idiotic FIlm\\nI can say that Phoenix is ...NegativeNaNNaN
40% (0/0)What idiotic FIlm\\nI can say that Phoenix is ...NegativeNaNNaN
\n", "
" ], "text/plain": [ " Last7DaysApprovalRate Input.text \\\n", "0 0% (0/0) Everyone praised an overrated movie.\\nOverrat... \n", "1 0% (0/0) Everyone praised an overrated movie.\\nOverrat... \n", "2 0% (0/0) Everyone praised an overrated movie.\\nOverrat... \n", "3 0% (0/0) What idiotic FIlm\\nI can say that Phoenix is ... \n", "4 0% (0/0) What idiotic FIlm\\nI can say that Phoenix is ... \n", "\n", " Answer.sentiment.label Approve Reject \n", "0 Negative NaN NaN \n", "1 Negative NaN NaN \n", "2 Negative NaN NaN \n", "3 Negative NaN NaN \n", "4 Negative NaN NaN " ] }, "execution_count": 94, "metadata": {}, "output_type": "execute_result" } ], "source": [ "turker = pd.read_csv('HW5_amt_294.csv')\n", "print(len(turker))\n", "turker[turker.columns[-5:]][:5]" ] }, { "cell_type": "code", "execution_count": 95, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "294\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0PoN
0Everyone praised an overrated movie.\\nOverrat...N
1What idiotic FIlm\\nI can say that Phoenix is ...N
2Terrible\\nThe only thing good about this movi...N
3Watch Taxi Driver instead\\nThis is a poor att...N
4I learned one thing.\\nIt borrows a lot of ele...N
\n", "
" ], "text/plain": [ " 0 PoN\n", "0 Everyone praised an overrated movie.\\nOverrat... N\n", "1 What idiotic FIlm\\nI can say that Phoenix is ... N\n", "2 Terrible\\nThe only thing good about this movi... N\n", "3 Watch Taxi Driver instead\\nThis is a poor att... N\n", "4 I learned one thing.\\nIt borrows a lot of ele... N" ] }, "execution_count": 95, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Getting labels...\n", "labels = pd.read_csv('all_JK_extremes_labeled.csv')\n", "# X3\n", "labels = labels.append([labels] * 2, ignore_index=True)\n", "print(len(labels))\n", "labels[:5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### NOW, TO SORT!" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [], "source": [ "sorted_labels = labels.sort_values(by=['0'])\n", "sorted_turker = turker.sort_values(by=['Input.text'])" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0PoN
76#LetRottenTomatoesRotSquad\\nI am a simple guy...P
174#LetRottenTomatoesRotSquad\\nI am a simple guy...P
272#LetRottenTomatoesRotSquad\\nI am a simple guy...P
116A 'Triumph of the Will' for Nihilists\\n'Joker...N
18A 'Triumph of the Will' for Nihilists\\n'Joker...N
\n", "
" ], "text/plain": [ " 0 PoN\n", "76 #LetRottenTomatoesRotSquad\\nI am a simple guy... P\n", "174 #LetRottenTomatoesRotSquad\\nI am a simple guy... P\n", "272 #LetRottenTomatoesRotSquad\\nI am a simple guy... P\n", "116 A 'Triumph of the Will' for Nihilists\\n'Joker... N\n", "18 A 'Triumph of the Will' for Nihilists\\n'Joker... N" ] }, "execution_count": 97, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sorted_labels[:5]" ] }, { "cell_type": "code", "execution_count": 98, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "228 #LetRottenTomatoesRotSquad\\nI am a simple guy...\n", "229 #LetRottenTomatoesRotSquad\\nI am a simple guy...\n", "230 #LetRottenTomatoesRotSquad\\nI am a simple guy...\n", "56 A 'Triumph of the Will' for Nihilists\\n'Joker...\n", "55 A 'Triumph of the Will' for Nihilists\\n'Joker...\n", "Name: Input.text, dtype: object" ] }, "execution_count": 98, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sorted_turker['Input.text'][:5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "OMG HOORAY HOORAY HOORAY!!\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "NOTE: FUN FACT!! I can type here and then hit the `esc` key to turn this cell into markdown!!" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Input.textAnswer.sentiment.labelApproveRejectPoN
228#LetRottenTomatoesRotSquad\\nI am a simple guy...PositiveNaNNaNP
229#LetRottenTomatoesRotSquad\\nI am a simple guy...PositiveNaNNaNP
230#LetRottenTomatoesRotSquad\\nI am a simple guy...PositiveNaNNaNP
56A 'Triumph of the Will' for Nihilists\\n'Joker...NegativeNaNNaNN
55A 'Triumph of the Will' for Nihilists\\n'Joker...NegativeNaNNaNN
\n", "
" ], "text/plain": [ " Input.text Answer.sentiment.label \\\n", "228 #LetRottenTomatoesRotSquad\\nI am a simple guy... Positive \n", "229 #LetRottenTomatoesRotSquad\\nI am a simple guy... Positive \n", "230 #LetRottenTomatoesRotSquad\\nI am a simple guy... Positive \n", "56 A 'Triumph of the Will' for Nihilists\\n'Joker... Negative \n", "55 A 'Triumph of the Will' for Nihilists\\n'Joker... Negative \n", "\n", " Approve Reject PoN \n", "228 NaN NaN P \n", "229 NaN NaN P \n", "230 NaN NaN P \n", "56 NaN NaN N \n", "55 NaN NaN N " ] }, "execution_count": 99, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# YUCK THIS IS SO AGGRIVATING!! This line below doens't work because it still uses indexes.\n", "# So the P and N didn't match up \n", "# sorted_turker['PoN'] = sorted_labels['PoN']\n", "sorted_turker['PoN'] = sorted_labels['PoN'].tolist()\n", "sorted_turker[sorted_turker.columns[-5:]][:5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## PART 3: ANALYZE" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "First, let's clean ALL the things" ] }, { "cell_type": "code", "execution_count": 100, "metadata": {}, "outputs": [], "source": [ "all_df = sorted_turker[['Input.text', 'WorkerId', 'Answer.sentiment.label', 'PoN']]" ] }, { "cell_type": "code", "execution_count": 101, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Input.textWorkerIdAnswer.sentiment.labelPoN
228#LetRottenTomatoesRotSquad\\nI am a simple guy...A681XM15AN28FPositiveP
229#LetRottenTomatoesRotSquad\\nI am a simple guy...A2XFO0X6RCS98MPositiveP
230#LetRottenTomatoesRotSquad\\nI am a simple guy...AURYD2FH3FUOQPositiveP
56A 'Triumph of the Will' for Nihilists\\n'Joker...A1T79J0XQXDDGCNegativeN
55A 'Triumph of the Will' for Nihilists\\n'Joker...A2XFO0X6RCS98MNegativeN
\n", "
" ], "text/plain": [ " Input.text WorkerId \\\n", "228 #LetRottenTomatoesRotSquad\\nI am a simple guy... A681XM15AN28F \n", "229 #LetRottenTomatoesRotSquad\\nI am a simple guy... A2XFO0X6RCS98M \n", "230 #LetRottenTomatoesRotSquad\\nI am a simple guy... AURYD2FH3FUOQ \n", "56 A 'Triumph of the Will' for Nihilists\\n'Joker... A1T79J0XQXDDGC \n", "55 A 'Triumph of the Will' for Nihilists\\n'Joker... A2XFO0X6RCS98M \n", "\n", " Answer.sentiment.label PoN \n", "228 Positive P \n", "229 Positive P \n", "230 Positive P \n", "56 Negative N \n", "55 Negative N " ] }, "execution_count": 101, "metadata": {}, "output_type": "execute_result" } ], "source": [ "all_df[:5]" ] }, { "cell_type": "code", "execution_count": 102, "metadata": {}, "outputs": [], "source": [ "all_df_all = all_df.copy()\n", "all_df_all['APoN'] = all_df_all.apply(lambda x: x['Answer.sentiment.label'][0], axis=1)" ] }, { "cell_type": "code", "execution_count": 103, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Input.textWorkerIdAnswer.sentiment.labelPoNAPoN
228#LetRottenTomatoesRotSquad\\nI am a simple guy...A681XM15AN28FPositivePP
229#LetRottenTomatoesRotSquad\\nI am a simple guy...A2XFO0X6RCS98MPositivePP
230#LetRottenTomatoesRotSquad\\nI am a simple guy...AURYD2FH3FUOQPositivePP
56A 'Triumph of the Will' for Nihilists\\n'Joker...A1T79J0XQXDDGCNegativeNN
55A 'Triumph of the Will' for Nihilists\\n'Joker...A2XFO0X6RCS98MNegativeNN
..................
265Venice 76 review\\nI have just watched the Joke...ARLGZWN6W91WDPositiveNP
266Venice 76 review\\nI have just watched the Joke...A38DC3BG1ZCVZ2PositiveNP
93lose of both time and money\\nThis was one of t...A2XFO0X6RCS98MNegativeNN
94lose of both time and money\\nThis was one of t...A3EZ0H07TSDAPWNegativeNN
95lose of both time and money\\nThis was one of t...ASB8T0H7L99RFNegativeNN
\n", "

294 rows × 5 columns

\n", "
" ], "text/plain": [ " Input.text WorkerId \\\n", "228 #LetRottenTomatoesRotSquad\\nI am a simple guy... A681XM15AN28F \n", "229 #LetRottenTomatoesRotSquad\\nI am a simple guy... A2XFO0X6RCS98M \n", "230 #LetRottenTomatoesRotSquad\\nI am a simple guy... AURYD2FH3FUOQ \n", "56 A 'Triumph of the Will' for Nihilists\\n'Joker... A1T79J0XQXDDGC \n", "55 A 'Triumph of the Will' for Nihilists\\n'Joker... A2XFO0X6RCS98M \n", ".. ... ... \n", "265 Venice 76 review\\nI have just watched the Joke... ARLGZWN6W91WD \n", "266 Venice 76 review\\nI have just watched the Joke... A38DC3BG1ZCVZ2 \n", "93 lose of both time and money\\nThis was one of t... A2XFO0X6RCS98M \n", "94 lose of both time and money\\nThis was one of t... A3EZ0H07TSDAPW \n", "95 lose of both time and money\\nThis was one of t... ASB8T0H7L99RF \n", "\n", " Answer.sentiment.label PoN APoN \n", "228 Positive P P \n", "229 Positive P P \n", "230 Positive P P \n", "56 Negative N N \n", "55 Negative N N \n", ".. ... .. ... \n", "265 Positive N P \n", "266 Positive N P \n", "93 Negative N N \n", "94 Negative N N \n", "95 Negative N N \n", "\n", "[294 rows x 5 columns]" ] }, "execution_count": 103, "metadata": {}, "output_type": "execute_result" } ], "source": [ "all_df_all" ] }, { "cell_type": "code", "execution_count": 104, "metadata": {}, "outputs": [], "source": [ "all_df_all['agree'] = all_df_all.apply(lambda x: x['PoN'] == x['APoN'], axis=1)" ] }, { "cell_type": "code", "execution_count": 105, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Input.textWorkerIdAnswer.sentiment.labelPoNAPoNagree
38This is extremely bad...\\nThis whole film make...A3EZ0H07TSDAPWNegativeNNTrue
216Took my 65 year old mother to see it.\\nI saw t...A3EZ0H07TSDAPWPositiveNPFalse
217Took my 65 year old mother to see it.\\nI saw t...A2XFO0X6RCS98MPositiveNPFalse
218Took my 65 year old mother to see it.\\nI saw t...AKSJ3C5O3V9RBPositiveNPFalse
264Venice 76 review\\nI have just watched the Joke...A3EZ0H07TSDAPWPositiveNPFalse
265Venice 76 review\\nI have just watched the Joke...ARLGZWN6W91WDPositiveNPFalse
266Venice 76 review\\nI have just watched the Joke...A38DC3BG1ZCVZ2PositiveNPFalse
93lose of both time and money\\nThis was one of t...A2XFO0X6RCS98MNegativeNNTrue
94lose of both time and money\\nThis was one of t...A3EZ0H07TSDAPWNegativeNNTrue
95lose of both time and money\\nThis was one of t...ASB8T0H7L99RFNegativeNNTrue
\n", "
" ], "text/plain": [ " Input.text WorkerId \\\n", "38 This is extremely bad...\\nThis whole film make... A3EZ0H07TSDAPW \n", "216 Took my 65 year old mother to see it.\\nI saw t... A3EZ0H07TSDAPW \n", "217 Took my 65 year old mother to see it.\\nI saw t... A2XFO0X6RCS98M \n", "218 Took my 65 year old mother to see it.\\nI saw t... AKSJ3C5O3V9RB \n", "264 Venice 76 review\\nI have just watched the Joke... A3EZ0H07TSDAPW \n", "265 Venice 76 review\\nI have just watched the Joke... ARLGZWN6W91WD \n", "266 Venice 76 review\\nI have just watched the Joke... A38DC3BG1ZCVZ2 \n", "93 lose of both time and money\\nThis was one of t... A2XFO0X6RCS98M \n", "94 lose of both time and money\\nThis was one of t... A3EZ0H07TSDAPW \n", "95 lose of both time and money\\nThis was one of t... ASB8T0H7L99RF \n", "\n", " Answer.sentiment.label PoN APoN agree \n", "38 Negative N N True \n", "216 Positive N P False \n", "217 Positive N P False \n", "218 Positive N P False \n", "264 Positive N P False \n", "265 Positive N P False \n", "266 Positive N P False \n", "93 Negative N N True \n", "94 Negative N N True \n", "95 Negative N N True " ] }, "execution_count": 105, "metadata": {}, "output_type": "execute_result" } ], "source": [ "all_df_all[-10:]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Lets see how many agree!" ] }, { "cell_type": "code", "execution_count": 106, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Input.textPoNagree
0#LetRottenTomatoesRotSquad\\nI am a simple guy...P1.000000
1A 'Triumph of the Will' for Nihilists\\n'Joker...N1.000000
2A Breath of Fresh Cinema\\nBursting with emoti...P1.000000
3A MASTERPIECE\\nJoaquin Phoenix's performance ...N0.333333
4A brilliant movie\\nThis movie is slow but nev...P1.000000
\n", "
" ], "text/plain": [ " Input.text PoN agree\n", "0 #LetRottenTomatoesRotSquad\\nI am a simple guy... P 1.000000\n", "1 A 'Triumph of the Will' for Nihilists\\n'Joker... N 1.000000\n", "2 A Breath of Fresh Cinema\\nBursting with emoti... P 1.000000\n", "3 A MASTERPIECE\\nJoaquin Phoenix's performance ... N 0.333333\n", "4 A brilliant movie\\nThis movie is slow but nev... P 1.000000" ] }, "execution_count": 106, "metadata": {}, "output_type": "execute_result" } ], "source": [ "agree_df = pd.DataFrame(all_df_all.groupby(['Input.text','PoN'])['agree'].mean())\n", "agree_df = agree_df.reset_index()\n", "agree_df[:5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "OK so this actually gave us something we want...\n", "BUT PLEASE TELL ME THE BETTER WAY!!" ] }, { "cell_type": "code", "execution_count": 107, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Input.textPoNagreeagree_factor
0#LetRottenTomatoesRotSquad\\nI am a simple guy...P1.000000agree
1A 'Triumph of the Will' for Nihilists\\n'Joker...N1.000000agree
2A Breath of Fresh Cinema\\nBursting with emoti...P1.000000agree
3A MASTERPIECE\\nJoaquin Phoenix's performance ...N0.333333disparity
4A brilliant movie\\nThis movie is slow but nev...P1.000000agree
...............
93The mirror of society\\nActing 10/10\\nActors 10...N0.000000agree_wrong
94This is extremely bad...\\nThis whole film make...N1.000000agree
95Took my 65 year old mother to see it.\\nI saw t...N0.000000agree_wrong
96Venice 76 review\\nI have just watched the Joke...N0.000000agree_wrong
97lose of both time and money\\nThis was one of t...N1.000000agree
\n", "

98 rows × 4 columns

\n", "
" ], "text/plain": [ " Input.text PoN agree \\\n", "0 #LetRottenTomatoesRotSquad\\nI am a simple guy... P 1.000000 \n", "1 A 'Triumph of the Will' for Nihilists\\n'Joker... N 1.000000 \n", "2 A Breath of Fresh Cinema\\nBursting with emoti... P 1.000000 \n", "3 A MASTERPIECE\\nJoaquin Phoenix's performance ... N 0.333333 \n", "4 A brilliant movie\\nThis movie is slow but nev... P 1.000000 \n", ".. ... .. ... \n", "93 The mirror of society\\nActing 10/10\\nActors 10... N 0.000000 \n", "94 This is extremely bad...\\nThis whole film make... N 1.000000 \n", "95 Took my 65 year old mother to see it.\\nI saw t... N 0.000000 \n", "96 Venice 76 review\\nI have just watched the Joke... N 0.000000 \n", "97 lose of both time and money\\nThis was one of t... N 1.000000 \n", "\n", " agree_factor \n", "0 agree \n", "1 agree \n", "2 agree \n", "3 disparity \n", "4 agree \n", ".. ... \n", "93 agree_wrong \n", "94 agree \n", "95 agree_wrong \n", "96 agree_wrong \n", "97 agree \n", "\n", "[98 rows x 4 columns]" ] }, "execution_count": 107, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def return_agreement(num):\n", " if num == 0:\n", " return 'agree_wrong'\n", " if num == 1:\n", " return 'agree'\n", " if (num/1) !=0:\n", " return 'disparity'\n", "\n", "agree_df['agree_factor'] = agree_df.apply(lambda x: return_agreement(x['agree']), axis=1)\n", "agree_df" ] }, { "cell_type": "code", "execution_count": 120, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'gdf_forplot' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;31m# data=gdp_forplot);\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;31m# plt.title('By Polarity')\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0mgdf_forplot\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgdf_forplot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreset_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mNameError\u001b[0m: name 'gdf_forplot' is not defined" ] } ], "source": [ "# sns.catplot(x=\"Answer.sentiment.label\", \n", "# y=\"WorkTimeInSeconds\", \n", "# kind=\"bar\", \n", "# order=['Negative', 'Neutral', 'Positive'], \n", "# data=gdp_forplot);\n", "# plt.title('By Polarity')\n", "gdf_forplot = gdf_forplot.reset_index()" ] }, { "cell_type": "code", "execution_count": 121, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'gdf_forplot' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mgdf_forplot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'agree'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'agree'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcount\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mNameError\u001b[0m: name 'gdf_forplot' is not defined" ] } ], "source": [ "gdf_forplot.groupby(['agree'])['agree'].count()" ] }, { "cell_type": "code", "execution_count": 382, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "agree PoN\n", "0.000000 N 14\n", " P 17\n", "0.333333 N 10\n", " P 7\n", "0.666667 N 9\n", " P 8\n", "1.000000 N 15\n", " P 18\n", "Name: agree, dtype: int64" ] }, "execution_count": 382, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gdf_forplot.groupby(['agree','PoN'])['agree'].count()" ] }, { "cell_type": "code", "execution_count": 122, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
agree_factorInput.textPoNagree
0agree333333
1agree_wrong313131
2disparity343434
\n", "
" ], "text/plain": [ " agree_factor Input.text PoN agree\n", "0 agree 33 33 33\n", "1 agree_wrong 31 31 31\n", "2 disparity 34 34 34" ] }, "execution_count": 122, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1 = agree_df.groupby(['agree_factor']).count()\n", "df1.reset_index(inplace=True)\n", "df1" ] }, { "cell_type": "code", "execution_count": 123, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'How many turkers agreed on sentiment?')" ] }, "execution_count": 123, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "sns.barplot(x=['Agreed', 'Disagreed'],\n", " y= [64,34],\n", " data = df1);\n", "plt.title('How many turkers agreed on sentiment?')" ] }, { "cell_type": "code", "execution_count": 129, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'How many turkers agreed on sentiment, but were wrong?')" ] }, "execution_count": 129, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "sns.barplot(x=\"agree_factor\", y=\"agree\", data=df1);\n", "plt.title('How many turkers agreed on sentiment, but were wrong?')" ] }, { "cell_type": "code", "execution_count": 130, "metadata": {}, "outputs": [], "source": [ "df2 = agree_df.groupby(['agree_factor', 'PoN']).count()\n", "df2.reset_index(inplace=True)" ] }, { "cell_type": "code", "execution_count": 131, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'What was the pos/neg split for the turkers?')" ] }, "execution_count": 131, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "sns.barplot(x=\"agree_factor\",\n", " y=\"agree\",\n", " hue=\"PoN\",\n", " data=df2);\n", "plt.title(\"What was the pos/neg split for the turkers?\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## What was the kappa score for the turkers?" ] }, { "cell_type": "code", "execution_count": 132, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.33333333333333337" ] }, "execution_count": 132, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Example code\n", "from sklearn.metrics import cohen_kappa_score\n", "y1 = [0,1,2,3,4,0,1,2,3,4,0,1,2,3,4]\n", "y2 = [0,1,2,2,4,1,2,3,0,0,0,2,2,4,4]\n", "cohen_kappa_score(y1,y2)" ] }, { "cell_type": "code", "execution_count": 135, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HITTypeIdTitleDescriptionKeywordsRewardCreationTimeMaxAssignmentsRequesterAnnotationAssignmentDurationInSecondsAutoApprovalDelayInSeconds...RejectionTimeRequesterFeedbackWorkTimeInSecondsLifetimeApprovalRateLast30DaysApprovalRateLast7DaysApprovalRateInput.textAnswer.sentiment.labelApproveReject
HITIdWorkerId
301KG0KX9CLONM8AF3FM1L6B224H2XA2739JVQYMPMOU1111111111...0011111100
A33ENZVC1XB4BA1111111111...0011111100
A3RA9555K7Z7GN1111111111...0011111100
30F94FBDNRK8G8Z1YQPMGXC372CBT3A1S2MN0E9BHPVA1111111111...0011111100
A1Y66T7FKJ8PJA1111111111...0011111100
.....................................................................
3ZVPAMTJWN3RRAUKANC5HT2IZU7RGNA33ENZVC1XB4BA1111111111...0011111100
A681XM15AN28F1111111111...0011111100
3ZZAYRN1I6RSZ2OA2VU8MHC2E1VOTKA1IQV3QUWRA8G11111111111...0011111100
A681XM15AN28F1111111111...0011111100
AE03LUY7RH4001111111111...0011111100
\n", "

369 rows × 29 columns

\n", "
" ], "text/plain": [ " HITTypeId Title Description \\\n", "HITId WorkerId \n", "301KG0KX9CLONM8AF3FM1L6B224H2X A2739JVQYMPMOU 1 1 1 \n", " A33ENZVC1XB4BA 1 1 1 \n", " A3RA9555K7Z7GN 1 1 1 \n", "30F94FBDNRK8G8Z1YQPMGXC372CBT3 A1S2MN0E9BHPVA 1 1 1 \n", " A1Y66T7FKJ8PJA 1 1 1 \n", "... ... ... ... \n", "3ZVPAMTJWN3RRAUKANC5HT2IZU7RGN A33ENZVC1XB4BA 1 1 1 \n", " A681XM15AN28F 1 1 1 \n", "3ZZAYRN1I6RSZ2OA2VU8MHC2E1VOTK A1IQV3QUWRA8G1 1 1 1 \n", " A681XM15AN28F 1 1 1 \n", " AE03LUY7RH400 1 1 1 \n", "\n", " Keywords Reward CreationTime \\\n", "HITId WorkerId \n", "301KG0KX9CLONM8AF3FM1L6B224H2X A2739JVQYMPMOU 1 1 1 \n", " A33ENZVC1XB4BA 1 1 1 \n", " A3RA9555K7Z7GN 1 1 1 \n", "30F94FBDNRK8G8Z1YQPMGXC372CBT3 A1S2MN0E9BHPVA 1 1 1 \n", " A1Y66T7FKJ8PJA 1 1 1 \n", "... ... ... ... \n", "3ZVPAMTJWN3RRAUKANC5HT2IZU7RGN A33ENZVC1XB4BA 1 1 1 \n", " A681XM15AN28F 1 1 1 \n", "3ZZAYRN1I6RSZ2OA2VU8MHC2E1VOTK A1IQV3QUWRA8G1 1 1 1 \n", " A681XM15AN28F 1 1 1 \n", " AE03LUY7RH400 1 1 1 \n", "\n", " MaxAssignments \\\n", "HITId WorkerId \n", "301KG0KX9CLONM8AF3FM1L6B224H2X A2739JVQYMPMOU 1 \n", " A33ENZVC1XB4BA 1 \n", " A3RA9555K7Z7GN 1 \n", "30F94FBDNRK8G8Z1YQPMGXC372CBT3 A1S2MN0E9BHPVA 1 \n", " A1Y66T7FKJ8PJA 1 \n", "... ... \n", "3ZVPAMTJWN3RRAUKANC5HT2IZU7RGN A33ENZVC1XB4BA 1 \n", " A681XM15AN28F 1 \n", "3ZZAYRN1I6RSZ2OA2VU8MHC2E1VOTK A1IQV3QUWRA8G1 1 \n", " A681XM15AN28F 1 \n", " AE03LUY7RH400 1 \n", "\n", " RequesterAnnotation \\\n", "HITId WorkerId \n", "301KG0KX9CLONM8AF3FM1L6B224H2X A2739JVQYMPMOU 1 \n", " A33ENZVC1XB4BA 1 \n", " A3RA9555K7Z7GN 1 \n", "30F94FBDNRK8G8Z1YQPMGXC372CBT3 A1S2MN0E9BHPVA 1 \n", " A1Y66T7FKJ8PJA 1 \n", "... ... \n", "3ZVPAMTJWN3RRAUKANC5HT2IZU7RGN A33ENZVC1XB4BA 1 \n", " A681XM15AN28F 1 \n", "3ZZAYRN1I6RSZ2OA2VU8MHC2E1VOTK A1IQV3QUWRA8G1 1 \n", " A681XM15AN28F 1 \n", " AE03LUY7RH400 1 \n", "\n", " AssignmentDurationInSeconds \\\n", "HITId WorkerId \n", "301KG0KX9CLONM8AF3FM1L6B224H2X A2739JVQYMPMOU 1 \n", " A33ENZVC1XB4BA 1 \n", " A3RA9555K7Z7GN 1 \n", "30F94FBDNRK8G8Z1YQPMGXC372CBT3 A1S2MN0E9BHPVA 1 \n", " A1Y66T7FKJ8PJA 1 \n", "... ... \n", "3ZVPAMTJWN3RRAUKANC5HT2IZU7RGN A33ENZVC1XB4BA 1 \n", " A681XM15AN28F 1 \n", "3ZZAYRN1I6RSZ2OA2VU8MHC2E1VOTK A1IQV3QUWRA8G1 1 \n", " A681XM15AN28F 1 \n", " AE03LUY7RH400 1 \n", "\n", " AutoApprovalDelayInSeconds \\\n", "HITId WorkerId \n", "301KG0KX9CLONM8AF3FM1L6B224H2X A2739JVQYMPMOU 1 \n", " A33ENZVC1XB4BA 1 \n", " A3RA9555K7Z7GN 1 \n", "30F94FBDNRK8G8Z1YQPMGXC372CBT3 A1S2MN0E9BHPVA 1 \n", " A1Y66T7FKJ8PJA 1 \n", "... ... \n", "3ZVPAMTJWN3RRAUKANC5HT2IZU7RGN A33ENZVC1XB4BA 1 \n", " A681XM15AN28F 1 \n", "3ZZAYRN1I6RSZ2OA2VU8MHC2E1VOTK A1IQV3QUWRA8G1 1 \n", " A681XM15AN28F 1 \n", " AE03LUY7RH400 1 \n", "\n", " ... RejectionTime \\\n", "HITId WorkerId ... \n", "301KG0KX9CLONM8AF3FM1L6B224H2X A2739JVQYMPMOU ... 0 \n", " A33ENZVC1XB4BA ... 0 \n", " A3RA9555K7Z7GN ... 0 \n", "30F94FBDNRK8G8Z1YQPMGXC372CBT3 A1S2MN0E9BHPVA ... 0 \n", " A1Y66T7FKJ8PJA ... 0 \n", "... ... ... \n", "3ZVPAMTJWN3RRAUKANC5HT2IZU7RGN A33ENZVC1XB4BA ... 0 \n", " A681XM15AN28F ... 0 \n", "3ZZAYRN1I6RSZ2OA2VU8MHC2E1VOTK A1IQV3QUWRA8G1 ... 0 \n", " A681XM15AN28F ... 0 \n", " AE03LUY7RH400 ... 0 \n", "\n", " RequesterFeedback \\\n", "HITId WorkerId \n", "301KG0KX9CLONM8AF3FM1L6B224H2X A2739JVQYMPMOU 0 \n", " A33ENZVC1XB4BA 0 \n", " A3RA9555K7Z7GN 0 \n", "30F94FBDNRK8G8Z1YQPMGXC372CBT3 A1S2MN0E9BHPVA 0 \n", " A1Y66T7FKJ8PJA 0 \n", "... ... \n", "3ZVPAMTJWN3RRAUKANC5HT2IZU7RGN A33ENZVC1XB4BA 0 \n", " A681XM15AN28F 0 \n", "3ZZAYRN1I6RSZ2OA2VU8MHC2E1VOTK A1IQV3QUWRA8G1 0 \n", " A681XM15AN28F 0 \n", " AE03LUY7RH400 0 \n", "\n", " WorkTimeInSeconds \\\n", "HITId WorkerId \n", "301KG0KX9CLONM8AF3FM1L6B224H2X A2739JVQYMPMOU 1 \n", " A33ENZVC1XB4BA 1 \n", " A3RA9555K7Z7GN 1 \n", "30F94FBDNRK8G8Z1YQPMGXC372CBT3 A1S2MN0E9BHPVA 1 \n", " A1Y66T7FKJ8PJA 1 \n", "... ... \n", "3ZVPAMTJWN3RRAUKANC5HT2IZU7RGN A33ENZVC1XB4BA 1 \n", " A681XM15AN28F 1 \n", "3ZZAYRN1I6RSZ2OA2VU8MHC2E1VOTK A1IQV3QUWRA8G1 1 \n", " A681XM15AN28F 1 \n", " AE03LUY7RH400 1 \n", "\n", " LifetimeApprovalRate \\\n", "HITId WorkerId \n", "301KG0KX9CLONM8AF3FM1L6B224H2X A2739JVQYMPMOU 1 \n", " A33ENZVC1XB4BA 1 \n", " A3RA9555K7Z7GN 1 \n", "30F94FBDNRK8G8Z1YQPMGXC372CBT3 A1S2MN0E9BHPVA 1 \n", " A1Y66T7FKJ8PJA 1 \n", "... ... \n", "3ZVPAMTJWN3RRAUKANC5HT2IZU7RGN A33ENZVC1XB4BA 1 \n", " A681XM15AN28F 1 \n", "3ZZAYRN1I6RSZ2OA2VU8MHC2E1VOTK A1IQV3QUWRA8G1 1 \n", " A681XM15AN28F 1 \n", " AE03LUY7RH400 1 \n", "\n", " Last30DaysApprovalRate \\\n", "HITId WorkerId \n", "301KG0KX9CLONM8AF3FM1L6B224H2X A2739JVQYMPMOU 1 \n", " A33ENZVC1XB4BA 1 \n", " A3RA9555K7Z7GN 1 \n", "30F94FBDNRK8G8Z1YQPMGXC372CBT3 A1S2MN0E9BHPVA 1 \n", " A1Y66T7FKJ8PJA 1 \n", "... ... \n", "3ZVPAMTJWN3RRAUKANC5HT2IZU7RGN A33ENZVC1XB4BA 1 \n", " A681XM15AN28F 1 \n", "3ZZAYRN1I6RSZ2OA2VU8MHC2E1VOTK A1IQV3QUWRA8G1 1 \n", " A681XM15AN28F 1 \n", " AE03LUY7RH400 1 \n", "\n", " Last7DaysApprovalRate \\\n", "HITId WorkerId \n", "301KG0KX9CLONM8AF3FM1L6B224H2X A2739JVQYMPMOU 1 \n", " A33ENZVC1XB4BA 1 \n", " A3RA9555K7Z7GN 1 \n", "30F94FBDNRK8G8Z1YQPMGXC372CBT3 A1S2MN0E9BHPVA 1 \n", " A1Y66T7FKJ8PJA 1 \n", "... ... \n", "3ZVPAMTJWN3RRAUKANC5HT2IZU7RGN A33ENZVC1XB4BA 1 \n", " A681XM15AN28F 1 \n", "3ZZAYRN1I6RSZ2OA2VU8MHC2E1VOTK A1IQV3QUWRA8G1 1 \n", " A681XM15AN28F 1 \n", " AE03LUY7RH400 1 \n", "\n", " Input.text \\\n", "HITId WorkerId \n", "301KG0KX9CLONM8AF3FM1L6B224H2X A2739JVQYMPMOU 1 \n", " A33ENZVC1XB4BA 1 \n", " A3RA9555K7Z7GN 1 \n", "30F94FBDNRK8G8Z1YQPMGXC372CBT3 A1S2MN0E9BHPVA 1 \n", " A1Y66T7FKJ8PJA 1 \n", "... ... \n", "3ZVPAMTJWN3RRAUKANC5HT2IZU7RGN A33ENZVC1XB4BA 1 \n", " A681XM15AN28F 1 \n", "3ZZAYRN1I6RSZ2OA2VU8MHC2E1VOTK A1IQV3QUWRA8G1 1 \n", " A681XM15AN28F 1 \n", " AE03LUY7RH400 1 \n", "\n", " Answer.sentiment.label \\\n", "HITId WorkerId \n", "301KG0KX9CLONM8AF3FM1L6B224H2X A2739JVQYMPMOU 1 \n", " A33ENZVC1XB4BA 1 \n", " A3RA9555K7Z7GN 1 \n", "30F94FBDNRK8G8Z1YQPMGXC372CBT3 A1S2MN0E9BHPVA 1 \n", " A1Y66T7FKJ8PJA 1 \n", "... ... \n", "3ZVPAMTJWN3RRAUKANC5HT2IZU7RGN A33ENZVC1XB4BA 1 \n", " A681XM15AN28F 1 \n", "3ZZAYRN1I6RSZ2OA2VU8MHC2E1VOTK A1IQV3QUWRA8G1 1 \n", " A681XM15AN28F 1 \n", " AE03LUY7RH400 1 \n", "\n", " Approve Reject \n", "HITId WorkerId \n", "301KG0KX9CLONM8AF3FM1L6B224H2X A2739JVQYMPMOU 0 0 \n", " A33ENZVC1XB4BA 0 0 \n", " A3RA9555K7Z7GN 0 0 \n", "30F94FBDNRK8G8Z1YQPMGXC372CBT3 A1S2MN0E9BHPVA 0 0 \n", " A1Y66T7FKJ8PJA 0 0 \n", "... ... ... \n", "3ZVPAMTJWN3RRAUKANC5HT2IZU7RGN A33ENZVC1XB4BA 0 0 \n", " A681XM15AN28F 0 0 \n", "3ZZAYRN1I6RSZ2OA2VU8MHC2E1VOTK A1IQV3QUWRA8G1 0 0 \n", " A681XM15AN28F 0 0 \n", " AE03LUY7RH400 0 0 \n", "\n", "[369 rows x 29 columns]" ] }, "execution_count": 135, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pos.groupby(['HITId', 'WorkerId']).count()" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "Oh boy! This will be super fun. First, I'm going to brainstorm \"out loud\" how I'm going to do this when AMT doesn't " ] }, { "cell_type": "code", "execution_count": 136, "metadata": {}, "outputs": [], "source": [ "pnn = pd.DataFrame()\n", "# pnn['Neutral'] = pos.groupby('WorkerId')['Answer.sentiment.label'].apply(lambda x: (x=='Neutral').sum())\n", "# pnn['Positive'] = pos.groupby('WorkerId')['Answer.sentiment.label'].apply(lambda x: (x=='Positive').sum())\n", "# pnn['Negative'] = pos.groupby('WorkerId')['Answer.sentiment.label'].apply(lambda x: (x=='Negative').sum())\n", "# pnn['Total'] = pos.groupby('WorkerId')['Answer.sentiment.label'].apply(lambda x: x.count())\n", "# pnn[:5]" ] }, { "cell_type": "code", "execution_count": 137, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Input.textWorkerIdAnswer.sentiment.labelPoN
228#LetRottenTomatoesRotSquad\\nI am a simple guy...A681XM15AN28FPositiveP
229#LetRottenTomatoesRotSquad\\nI am a simple guy...A2XFO0X6RCS98MPositiveP
230#LetRottenTomatoesRotSquad\\nI am a simple guy...AURYD2FH3FUOQPositiveP
56A 'Triumph of the Will' for Nihilists\\n'Joker...A1T79J0XQXDDGCNegativeN
55A 'Triumph of the Will' for Nihilists\\n'Joker...A2XFO0X6RCS98MNegativeN
...............
265Venice 76 review\\nI have just watched the Joke...ARLGZWN6W91WDPositiveN
266Venice 76 review\\nI have just watched the Joke...A38DC3BG1ZCVZ2PositiveN
93lose of both time and money\\nThis was one of t...A2XFO0X6RCS98MNegativeN
94lose of both time and money\\nThis was one of t...A3EZ0H07TSDAPWNegativeN
95lose of both time and money\\nThis was one of t...ASB8T0H7L99RFNegativeN
\n", "

294 rows × 4 columns

\n", "
" ], "text/plain": [ " Input.text WorkerId \\\n", "228 #LetRottenTomatoesRotSquad\\nI am a simple guy... A681XM15AN28F \n", "229 #LetRottenTomatoesRotSquad\\nI am a simple guy... A2XFO0X6RCS98M \n", "230 #LetRottenTomatoesRotSquad\\nI am a simple guy... AURYD2FH3FUOQ \n", "56 A 'Triumph of the Will' for Nihilists\\n'Joker... A1T79J0XQXDDGC \n", "55 A 'Triumph of the Will' for Nihilists\\n'Joker... A2XFO0X6RCS98M \n", ".. ... ... \n", "265 Venice 76 review\\nI have just watched the Joke... ARLGZWN6W91WD \n", "266 Venice 76 review\\nI have just watched the Joke... A38DC3BG1ZCVZ2 \n", "93 lose of both time and money\\nThis was one of t... A2XFO0X6RCS98M \n", "94 lose of both time and money\\nThis was one of t... A3EZ0H07TSDAPW \n", "95 lose of both time and money\\nThis was one of t... ASB8T0H7L99RF \n", "\n", " Answer.sentiment.label PoN \n", "228 Positive P \n", "229 Positive P \n", "230 Positive P \n", "56 Negative N \n", "55 Negative N \n", ".. ... .. \n", "265 Positive N \n", "266 Positive N \n", "93 Negative N \n", "94 Negative N \n", "95 Negative N \n", "\n", "[294 rows x 4 columns]" ] }, "execution_count": 137, "metadata": {}, "output_type": "execute_result" } ], "source": [] }, { "cell_type": "code", "execution_count": 138, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NeutralPositiveNegativeTotalAvg_WorkTimeInSecondsAvg_WorkTimeInMinMin_WorkTimeInMinMax_WorkTimeInMin
WorkerId
A681XM15AN28F132074013.5750000.2262500.1000000.833333
A1Y66T7FKJ8PJA523735695.85714311.5976190.21666722.000000
A33ENZVC1XB4BA034034366.6470596.1107840.6166679.916667
A1S2MN0E9BHPVA221427173.4444442.8907410.4000004.983333
A37L5E8MHHQGZM613322346.2727275.7712122.1500008.283333
AE03LUY7RH400410721102.2380951.7039680.1000003.433333
A2G44A4ZPWRPXU412218221.2777783.6879630.3833337.383333
A1YK1IKACUJMV4015015593.6000009.8933331.71666711.000000
A3AW887GI0NLKF310215269.4000004.4900001.6166677.216667
A3HAEQW13YPT6A014014442.9285717.3821430.86666711.100000
\n", "
" ], "text/plain": [ " Neutral Positive Negative Total Avg_WorkTimeInSeconds \\\n", "WorkerId \n", "A681XM15AN28F 13 20 7 40 13.575000 \n", "A1Y66T7FKJ8PJA 5 23 7 35 695.857143 \n", "A33ENZVC1XB4BA 0 34 0 34 366.647059 \n", "A1S2MN0E9BHPVA 2 21 4 27 173.444444 \n", "A37L5E8MHHQGZM 6 13 3 22 346.272727 \n", "AE03LUY7RH400 4 10 7 21 102.238095 \n", "A2G44A4ZPWRPXU 4 12 2 18 221.277778 \n", "A1YK1IKACUJMV4 0 15 0 15 593.600000 \n", "A3AW887GI0NLKF 3 10 2 15 269.400000 \n", "A3HAEQW13YPT6A 0 14 0 14 442.928571 \n", "\n", " Avg_WorkTimeInMin Min_WorkTimeInMin Max_WorkTimeInMin \n", "WorkerId \n", "A681XM15AN28F 0.226250 0.100000 0.833333 \n", "A1Y66T7FKJ8PJA 11.597619 0.216667 22.000000 \n", "A33ENZVC1XB4BA 6.110784 0.616667 9.916667 \n", "A1S2MN0E9BHPVA 2.890741 0.400000 4.983333 \n", "A37L5E8MHHQGZM 5.771212 2.150000 8.283333 \n", "AE03LUY7RH400 1.703968 0.100000 3.433333 \n", "A2G44A4ZPWRPXU 3.687963 0.383333 7.383333 \n", "A1YK1IKACUJMV4 9.893333 1.716667 11.000000 \n", "A3AW887GI0NLKF 4.490000 1.616667 7.216667 \n", "A3HAEQW13YPT6A 7.382143 0.866667 11.100000 " ] }, "execution_count": 138, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top[:10]" ] }, { "cell_type": "code", "execution_count": 141, "metadata": {}, "outputs": [], "source": [ "newdf = pd.DataFrame(turker.groupby(['HITId', 'WorkerId']))" ] }, { "cell_type": "code", "execution_count": 142, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
0(302OLP89DZ7MBHSY6QU0WCST11GACJ, A1T79J0XQXDDGC)HITId ...
1(302OLP89DZ7MBHSY6QU0WCST11GACJ, A2XFO0X6RCS98M)HITId ...
2(302OLP89DZ7MBHSY6QU0WCST11GACJ, A681XM15AN28F)HITId ...
3(3087LXLJ6MGXDGEQ5QN8FC1JPSW0FT, A1L8RL58MYU4NC)HITId ...
4(3087LXLJ6MGXDGEQ5QN8FC1JPSW0FT, A1T79J0XQXDDGC)HITId ...
.........
289(3ZLW647WALV9TE1B0IQKXR51J0B327, A38DC3BG1ZCVZ2)HITId ...
290(3ZLW647WALV9TE1B0IQKXR51J0B327, ARLGZWN6W91WD)HITId ...
291(3ZRKL6Z1E833SPUXPCCA737ELZESG6, A1L8RL58MYU4NC)HITId ...
292(3ZRKL6Z1E833SPUXPCCA737ELZESG6, A38DC3BG1ZCVZ2)HITId ...
293(3ZRKL6Z1E833SPUXPCCA737ELZESG6, A681XM15AN28F)HITId ...
\n", "

294 rows × 2 columns

\n", "
" ], "text/plain": [ " 0 \\\n", "0 (302OLP89DZ7MBHSY6QU0WCST11GACJ, A1T79J0XQXDDGC) \n", "1 (302OLP89DZ7MBHSY6QU0WCST11GACJ, A2XFO0X6RCS98M) \n", "2 (302OLP89DZ7MBHSY6QU0WCST11GACJ, A681XM15AN28F) \n", "3 (3087LXLJ6MGXDGEQ5QN8FC1JPSW0FT, A1L8RL58MYU4NC) \n", "4 (3087LXLJ6MGXDGEQ5QN8FC1JPSW0FT, A1T79J0XQXDDGC) \n", ".. ... \n", "289 (3ZLW647WALV9TE1B0IQKXR51J0B327, A38DC3BG1ZCVZ2) \n", "290 (3ZLW647WALV9TE1B0IQKXR51J0B327, ARLGZWN6W91WD) \n", "291 (3ZRKL6Z1E833SPUXPCCA737ELZESG6, A1L8RL58MYU4NC) \n", "292 (3ZRKL6Z1E833SPUXPCCA737ELZESG6, A38DC3BG1ZCVZ2) \n", "293 (3ZRKL6Z1E833SPUXPCCA737ELZESG6, A681XM15AN28F) \n", "\n", " 1 \n", "0 HITId ... \n", "1 HITId ... \n", "2 HITId ... \n", "3 HITId ... \n", "4 HITId ... \n", ".. ... \n", "289 HITId ... \n", "290 HITId ... \n", "291 HITId ... \n", "292 HITId ... \n", "293 HITId ... \n", "\n", "[294 rows x 2 columns]" ] }, "execution_count": 142, "metadata": {}, "output_type": "execute_result" } ], "source": [ "newdf" ] }, { "cell_type": "code", "execution_count": 147, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['HITId', 'HITTypeId', 'Title', 'Description', 'Keywords', 'Reward',\n", " 'CreationTime', 'MaxAssignments', 'RequesterAnnotation',\n", " 'AssignmentDurationInSeconds', 'AutoApprovalDelayInSeconds',\n", " 'Expiration', 'NumberOfSimilarHITs', 'LifetimeInSeconds',\n", " 'AssignmentId', 'WorkerId', 'AssignmentStatus', 'AcceptTime',\n", " 'SubmitTime', 'AutoApprovalTime', 'ApprovalTime', 'RejectionTime',\n", " 'RequesterFeedback', 'WorkTimeInSeconds', 'LifetimeApprovalRate',\n", " 'Last30DaysApprovalRate', 'Last7DaysApprovalRate', 'Input.text',\n", " 'Answer.sentiment.label', 'Approve', 'Reject'],\n", " dtype='object')" ] }, "execution_count": 147, "metadata": {}, "output_type": "execute_result" } ], "source": [ "turker.columns" ] }, { "cell_type": "code", "execution_count": 148, "metadata": {}, "outputs": [], "source": [ "turker_clean = turker[['HITId', 'WorkerId', 'Answer.sentiment.label', 'Input.text']]" ] }, { "cell_type": "code", "execution_count": 149, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HITIdWorkerIdAnswer.sentiment.labelInput.text
0338GLSUI43BXEPY2ES6SPI72KKESF7AH5A86OLRZWCSNegativeEveryone praised an overrated movie.\\nOverrat...
1338GLSUI43BXEPY2ES6SPI72KKESF7A2HGRSPR50ENHLNegativeEveryone praised an overrated movie.\\nOverrat...
2338GLSUI43BXEPY2ES6SPI72KKESF7AKSJ3C5O3V9RBNegativeEveryone praised an overrated movie.\\nOverrat...
337MQ8Z1JQEWA9HYZP3JANL1ES162YCARLGZWN6W91WDNegativeWhat idiotic FIlm\\nI can say that Phoenix is ...
437MQ8Z1JQEWA9HYZP3JANL1ES162YCAKSJ3C5O3V9RBNegativeWhat idiotic FIlm\\nI can say that Phoenix is ...
...............
2893PUV2Q8SV441ZJ34C0P7BTUH4JDDBHA3EZ0H07TSDAPWNegativeOscar for Phoenix\\nI will stop watching movie...
2903PUV2Q8SV441ZJ34C0P7BTUH4JDDBHA38DC3BG1ZCVZ2PositiveOscar for Phoenix\\nI will stop watching movie...
2913FO95NVK5C0UHF3B5N6M67LLN8PSR2A194R45ACMQEORPositiveJoker > Endgame\\nNeed I say more? Everything ...
2923FO95NVK5C0UHF3B5N6M67LLN8PSR2A1L8RL58MYU4NCPositiveJoker > Endgame\\nNeed I say more? Everything ...
2933FO95NVK5C0UHF3B5N6M67LLN8PSR2A1T79J0XQXDDGCPositiveJoker > Endgame\\nNeed I say more? Everything ...
\n", "

294 rows × 4 columns

\n", "
" ], "text/plain": [ " HITId WorkerId Answer.sentiment.label \\\n", "0 338GLSUI43BXEPY2ES6SPI72KKESF7 AH5A86OLRZWCS Negative \n", "1 338GLSUI43BXEPY2ES6SPI72KKESF7 A2HGRSPR50ENHL Negative \n", "2 338GLSUI43BXEPY2ES6SPI72KKESF7 AKSJ3C5O3V9RB Negative \n", "3 37MQ8Z1JQEWA9HYZP3JANL1ES162YC ARLGZWN6W91WD Negative \n", "4 37MQ8Z1JQEWA9HYZP3JANL1ES162YC AKSJ3C5O3V9RB Negative \n", ".. ... ... ... \n", "289 3PUV2Q8SV441ZJ34C0P7BTUH4JDDBH A3EZ0H07TSDAPW Negative \n", "290 3PUV2Q8SV441ZJ34C0P7BTUH4JDDBH A38DC3BG1ZCVZ2 Positive \n", "291 3FO95NVK5C0UHF3B5N6M67LLN8PSR2 A194R45ACMQEOR Positive \n", "292 3FO95NVK5C0UHF3B5N6M67LLN8PSR2 A1L8RL58MYU4NC Positive \n", "293 3FO95NVK5C0UHF3B5N6M67LLN8PSR2 A1T79J0XQXDDGC Positive \n", "\n", " Input.text \n", "0 Everyone praised an overrated movie.\\nOverrat... \n", "1 Everyone praised an overrated movie.\\nOverrat... \n", "2 Everyone praised an overrated movie.\\nOverrat... \n", "3 What idiotic FIlm\\nI can say that Phoenix is ... \n", "4 What idiotic FIlm\\nI can say that Phoenix is ... \n", ".. ... \n", "289 Oscar for Phoenix\\nI will stop watching movie... \n", "290 Oscar for Phoenix\\nI will stop watching movie... \n", "291 Joker > Endgame\\nNeed I say more? Everything ... \n", "292 Joker > Endgame\\nNeed I say more? Everything ... \n", "293 Joker > Endgame\\nNeed I say more? Everything ... \n", "\n", "[294 rows x 4 columns]" ] }, "execution_count": 149, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# turker_clean.groupby" ] }, { "cell_type": "code", "execution_count": 150, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ARLGZWN6W91WD 46\n", "A681XM15AN28F 37\n", "A1T79J0XQXDDGC 34\n", "A2XFO0X6RCS98M 33\n", "A3EZ0H07TSDAPW 33\n", "A1L8RL58MYU4NC 28\n", "A38DC3BG1ZCVZ2 22\n", "AKSJ3C5O3V9RB 21\n", "ASB8T0H7L99RF 10\n", "AE03LUY7RH400 6\n", "A37JENVKZQ56U6 5\n", "A194R45ACMQEOR 5\n", "AH5A86OLRZWCS 4\n", "A2HG1N3BVQO6I 4\n", "AURYD2FH3FUOQ 2\n", "AMC42JMQA8A5U 2\n", "ATHS9GUME1XCA 1\n", "A2HGRSPR50ENHL 1\n", "Name: WorkerId, dtype: int64" ] }, "execution_count": 150, "metadata": {}, "output_type": "execute_result" } ], "source": [ "turker_clean.WorkerId.value_counts()" ] }, { "cell_type": "code", "execution_count": 161, "metadata": {}, "outputs": [], "source": [ "turker1 = turker_clean[turker_clean['WorkerId'] == 'ARLGZWN6W91WD']\n", "turker2 = turker_clean[turker_clean['WorkerId'] == 'A681XM15AN28F']\n", "turker3 = turker_clean[turker_clean['WorkerId'] == 'A1T79J0XQXDDGC']\n", "turker4 = turker_clean[turker_clean['WorkerId'] == 'A2XFO0X6RCS98M']\n", "turker5 = turker_clean[turker_clean['WorkerId'] == 'A3EZ0H07TSDAPW']" ] }, { "cell_type": "code", "execution_count": 162, "metadata": {}, "outputs": [], "source": [ "turker1.reset_index(drop=True, inplace=True)\n", "turker2.reset_index(drop=True, inplace=True)\n", "turker3.reset_index(drop=True, inplace=True)\n", "turker4.reset_index(drop=True, inplace=True)\n", "turker5.reset_index(drop=True, inplace=True)" ] }, { "cell_type": "code", "execution_count": 165, "metadata": {}, "outputs": [], "source": [ "merged_df = pd.concat([turker1, turker2, turker3, turker4, turker5], axis=0, sort=False)" ] }, { "cell_type": "code", "execution_count": 168, "metadata": {}, "outputs": [], "source": [ "merged_df.reset_index(drop=True, inplace=True)" ] }, { "cell_type": "code", "execution_count": 196, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HITIdWorkerIdAnswer.sentiment.labelInput.text
793AQN9REUTFGXCRWFMS3RJ4SIPSUYDGA681XM15AN28FPositive#LetRottenTomatoesRotSquad\\nI am a simple guy...
1423AQN9REUTFGXCRWFMS3RJ4SIPSUYDGA2XFO0X6RCS98MPositive#LetRottenTomatoesRotSquad\\nI am a simple guy...
1223IVKZBIBJ09HSLP89IUSS3JF0ZRSH5A2XFO0X6RCS98MNegativeA 'Triumph of the Will' for Nihilists\\n'Joker...
553IVKZBIBJ09HSLP89IUSS3JF0ZRSH5A681XM15AN28FNeutralA 'Triumph of the Will' for Nihilists\\n'Joker...
873IVKZBIBJ09HSLP89IUSS3JF0ZRSH5A1T79J0XQXDDGCNegativeA 'Triumph of the Will' for Nihilists\\n'Joker...
...............
1753J9UN9O9J3SDII0MOGETUATBIZD0JWA3EZ0H07TSDAPWPositiveTook my 65 year old mother to see it.\\nI saw t...
4331ODACBENUFU5EOBS8HM1HBGRMNSQ1ARLGZWN6W91WDPositiveVenice 76 review\\nI have just watched the Joke...
18031ODACBENUFU5EOBS8HM1HBGRMNSQ1A3EZ0H07TSDAPWPositiveVenice 76 review\\nI have just watched the Joke...
1623M93N4X8HKNDJRKYXIXD4GZUDRVSJAA3EZ0H07TSDAPWNegativelose of both time and money\\nThis was one of t...
1273M93N4X8HKNDJRKYXIXD4GZUDRVSJAA2XFO0X6RCS98MNegativelose of both time and money\\nThis was one of t...
\n", "

183 rows × 4 columns

\n", "
" ], "text/plain": [ " HITId WorkerId Answer.sentiment.label \\\n", "79 3AQN9REUTFGXCRWFMS3RJ4SIPSUYDG A681XM15AN28F Positive \n", "142 3AQN9REUTFGXCRWFMS3RJ4SIPSUYDG A2XFO0X6RCS98M Positive \n", "122 3IVKZBIBJ09HSLP89IUSS3JF0ZRSH5 A2XFO0X6RCS98M Negative \n", "55 3IVKZBIBJ09HSLP89IUSS3JF0ZRSH5 A681XM15AN28F Neutral \n", "87 3IVKZBIBJ09HSLP89IUSS3JF0ZRSH5 A1T79J0XQXDDGC Negative \n", ".. ... ... ... \n", "175 3J9UN9O9J3SDII0MOGETUATBIZD0JW A3EZ0H07TSDAPW Positive \n", "43 31ODACBENUFU5EOBS8HM1HBGRMNSQ1 ARLGZWN6W91WD Positive \n", "180 31ODACBENUFU5EOBS8HM1HBGRMNSQ1 A3EZ0H07TSDAPW Positive \n", "162 3M93N4X8HKNDJRKYXIXD4GZUDRVSJA A3EZ0H07TSDAPW Negative \n", "127 3M93N4X8HKNDJRKYXIXD4GZUDRVSJA A2XFO0X6RCS98M Negative \n", "\n", " Input.text \n", "79 #LetRottenTomatoesRotSquad\\nI am a simple guy... \n", "142 #LetRottenTomatoesRotSquad\\nI am a simple guy... \n", "122 A 'Triumph of the Will' for Nihilists\\n'Joker... \n", "55 A 'Triumph of the Will' for Nihilists\\n'Joker... \n", "87 A 'Triumph of the Will' for Nihilists\\n'Joker... \n", ".. ... \n", "175 Took my 65 year old mother to see it.\\nI saw t... \n", "43 Venice 76 review\\nI have just watched the Joke... \n", "180 Venice 76 review\\nI have just watched the Joke... \n", "162 lose of both time and money\\nThis was one of t... \n", "127 lose of both time and money\\nThis was one of t... \n", "\n", "[183 rows x 4 columns]" ] }, "execution_count": 196, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged_df.sort_values(by='Input.text')" ] }, { "cell_type": "code", "execution_count": 171, "metadata": {}, "outputs": [], "source": [ "merged_df2 = pd.concat([turker1, turker2], axis=0, sort=False)" ] }, { "cell_type": "code", "execution_count": 195, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HITIdWorkerIdAnswer.sentiment.labelInput.text
333AQN9REUTFGXCRWFMS3RJ4SIPSUYDGA681XM15AN28FPositive#LetRottenTomatoesRotSquad\\nI am a simple guy...
93IVKZBIBJ09HSLP89IUSS3JF0ZRSH5A681XM15AN28FNeutralA 'Triumph of the Will' for Nihilists\\n'Joker...
3639O0SQZVJN78YHJJHK8BBGPP0UD7RVARLGZWN6W91WDPositiveA Breath of Fresh Cinema\\nBursting with emoti...
30334ZEL5JX6FRK2BVDVPICCGGCL5SOTA681XM15AN28FPositiveA brilliant movie\\nThis movie is slow but nev...
313DWGDA5POF4MG2LY1OWCB3NFIEPV1EARLGZWN6W91WDPositiveA clean masterpiece!\\nWhat I loved the most a...
...............
73D17ECOUOEV24TJFHEQ6S8VWRUX31QARLGZWN6W91WDNegativeOverhyped and not everyone joker performance i...
63G3AJKPCXLSKCVDMTH2YG0YCCF1Y43A681XM15AN28FNeutralRidiculous well acted Trash\\nSaw the movie Jok...
173JAOYN9IHL2YEWXU4I4PG1ATPEB33IA681XM15AN28FNeutralThe king has no clothes\\nRead the reviews- the...
383J5XXLQDHMBIQ5ZDOSAVZW2CGY3V36ARLGZWN6W91WDPositiveThe mirror of society\\nActing 10/10\\nActors 10...
4331ODACBENUFU5EOBS8HM1HBGRMNSQ1ARLGZWN6W91WDPositiveVenice 76 review\\nI have just watched the Joke...
\n", "

83 rows × 4 columns

\n", "
" ], "text/plain": [ " HITId WorkerId Answer.sentiment.label \\\n", "33 3AQN9REUTFGXCRWFMS3RJ4SIPSUYDG A681XM15AN28F Positive \n", "9 3IVKZBIBJ09HSLP89IUSS3JF0ZRSH5 A681XM15AN28F Neutral \n", "36 39O0SQZVJN78YHJJHK8BBGPP0UD7RV ARLGZWN6W91WD Positive \n", "30 334ZEL5JX6FRK2BVDVPICCGGCL5SOT A681XM15AN28F Positive \n", "31 3DWGDA5POF4MG2LY1OWCB3NFIEPV1E ARLGZWN6W91WD Positive \n", ".. ... ... ... \n", "7 3D17ECOUOEV24TJFHEQ6S8VWRUX31Q ARLGZWN6W91WD Negative \n", "6 3G3AJKPCXLSKCVDMTH2YG0YCCF1Y43 A681XM15AN28F Neutral \n", "17 3JAOYN9IHL2YEWXU4I4PG1ATPEB33I A681XM15AN28F Neutral \n", "38 3J5XXLQDHMBIQ5ZDOSAVZW2CGY3V36 ARLGZWN6W91WD Positive \n", "43 31ODACBENUFU5EOBS8HM1HBGRMNSQ1 ARLGZWN6W91WD Positive \n", "\n", " Input.text \n", "33 #LetRottenTomatoesRotSquad\\nI am a simple guy... \n", "9 A 'Triumph of the Will' for Nihilists\\n'Joker... \n", "36 A Breath of Fresh Cinema\\nBursting with emoti... \n", "30 A brilliant movie\\nThis movie is slow but nev... \n", "31 A clean masterpiece!\\nWhat I loved the most a... \n", ".. ... \n", "7 Overhyped and not everyone joker performance i... \n", "6 Ridiculous well acted Trash\\nSaw the movie Jok... \n", "17 The king has no clothes\\nRead the reviews- the... \n", "38 The mirror of society\\nActing 10/10\\nActors 10... \n", "43 Venice 76 review\\nI have just watched the Joke... \n", "\n", "[83 rows x 4 columns]" ] }, "execution_count": 195, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged_df2.sort_values(by='Input.text')" ] }, { "cell_type": "code", "execution_count": 191, "metadata": {}, "outputs": [], "source": [ "# merged_df2['Input.text'].value_counts()\n", "# df = pd.DataFrame(merged_df2.groupby('HITId'))\n", "# df.set_index([turker1, turker2]).unstack(level=0)" ] }, { "cell_type": "code", "execution_count": 203, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HITIdWorkerIdAnswer.sentiment.label
037MQ8Z1JQEWA9HYZP3JANL1ES162YCARLGZWN6W91WDNegative
13I7SHAD35MWH116RCCCUPHVFU7E7M7ARLGZWN6W91WDNegative
23XUSYT70IT10FW0UEKSIRCYYDFG0DIARLGZWN6W91WDNegative
33SD15I2WD2UXBFKCNK2NN4MDZ5D63RARLGZWN6W91WDNegative
43P7QK0GJ3TLAE784LPLT1SAGYVA2Z3ARLGZWN6W91WDNegative
............
17839KV3A5D187KZWJWW98G1QULMWW7SJA3EZ0H07TSDAPWNeutral
17935F6NGNVM8JLEWWBL9D6BVQ7OFA7T8A3EZ0H07TSDAPWPositive
18031ODACBENUFU5EOBS8HM1HBGRMNSQ1A3EZ0H07TSDAPWPositive
1813PN6H8C9R4QWG9YC6MPBGIABM1SDAMA3EZ0H07TSDAPWNeutral
1823PUV2Q8SV441ZJ34C0P7BTUH4JDDBHA3EZ0H07TSDAPWNegative
\n", "

183 rows × 3 columns

\n", "
" ], "text/plain": [ " HITId WorkerId Answer.sentiment.label\n", "0 37MQ8Z1JQEWA9HYZP3JANL1ES162YC ARLGZWN6W91WD Negative\n", "1 3I7SHAD35MWH116RCCCUPHVFU7E7M7 ARLGZWN6W91WD Negative\n", "2 3XUSYT70IT10FW0UEKSIRCYYDFG0DI ARLGZWN6W91WD Negative\n", "3 3SD15I2WD2UXBFKCNK2NN4MDZ5D63R ARLGZWN6W91WD Negative\n", "4 3P7QK0GJ3TLAE784LPLT1SAGYVA2Z3 ARLGZWN6W91WD Negative\n", ".. ... ... ...\n", "178 39KV3A5D187KZWJWW98G1QULMWW7SJ A3EZ0H07TSDAPW Neutral\n", "179 35F6NGNVM8JLEWWBL9D6BVQ7OFA7T8 A3EZ0H07TSDAPW Positive\n", "180 31ODACBENUFU5EOBS8HM1HBGRMNSQ1 A3EZ0H07TSDAPW Positive\n", "181 3PN6H8C9R4QWG9YC6MPBGIABM1SDAM A3EZ0H07TSDAPW Neutral\n", "182 3PUV2Q8SV441ZJ34C0P7BTUH4JDDBH A3EZ0H07TSDAPW Negative\n", "\n", "[183 rows x 3 columns]" ] }, "execution_count": 203, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# grouped = turker_clean.groupby(['HITId','WorkerId'])\n", "# grouped.set_index(['HITId', 'WorkerId']).mean().unstack(level=0)\n", "df = merged_df.drop('Input.text', axis=1)\n", "df" ] }, { "cell_type": "code", "execution_count": 213, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " REVIEW1 REVIEW2 REVIEW3 REVIEW4 REVIEW5 REVIEW6 \\\n", "Turker \n", "A1T79J0XQXDDGC Positive Negative Positive Positive Negative Negative \n", "A2XFO0X6RCS98M Negative Negative Negative Negative Positive Negative \n", "A3EZ0H07TSDAPW Positive Neutral Positive Negative Negative Positive \n", "A681XM15AN28F Negative Positive Positive Positive Positive Negative \n", "ARLGZWN6W91WD Negative Negative Negative Negative Negative Negative \n", "\n", " REVIEW7 REVIEW8 REVIEW9 REVIEW10 ... REVIEW38 \\\n", "Turker ... \n", "A1T79J0XQXDDGC Negative Positive Negative Negative ... NaN \n", "A2XFO0X6RCS98M Negative Negative Negative Negative ... NaN \n", "A3EZ0H07TSDAPW Negative Positive Positive Negative ... NaN \n", "A681XM15AN28F Neutral Neutral Neutral Neutral ... NaN \n", "ARLGZWN6W91WD Negative Negative Negative Negative ... Positive \n", "\n", " REVIEW39 REVIEW40 REVIEW41 REVIEW42 REVIEW43 REVIEW44 \\\n", "Turker \n", "A1T79J0XQXDDGC NaN NaN NaN NaN NaN NaN \n", "A2XFO0X6RCS98M NaN NaN NaN NaN NaN NaN \n", "A3EZ0H07TSDAPW NaN NaN NaN NaN NaN NaN \n", "A681XM15AN28F NaN NaN NaN NaN NaN NaN \n", "ARLGZWN6W91WD Positive Positive Positive Negative Positive Positive \n", "\n", " REVIEW45 REVIEW46 \\\n", "Turker \n", "A1T79J0XQXDDGC NaN NaN \n", "A2XFO0X6RCS98M NaN NaN \n", "A3EZ0H07TSDAPW NaN NaN \n", "A681XM15AN28F NaN NaN \n", "ARLGZWN6W91WD Positive Positive \n", "\n", " SENTIMENT \n", "Turker \n", "A1T79J0XQXDDGC 302OLP89DZ7MBHSY6QU0WCST11GACJ32LAQ1JNT9PNC787... \n", "A2XFO0X6RCS98M 3I7SHAD35MWH116RCCCUPHVFU7E7M73XUSYT70IT10FW0U... \n", "A3EZ0H07TSDAPW 38O9DZ0A62N8QXOTJKOI4UHLTRD62G3I7SHAD35MWH116R... \n", "A681XM15AN28F 3SD15I2WD2UXBFKCNK2NN4MDZ5D63R302OLP89DZ7MBHSY... \n", "ARLGZWN6W91WD 37MQ8Z1JQEWA9HYZP3JANL1ES162YC3I7SHAD35MWH116R... \n", "\n", "[5 rows x 47 columns]\n" ] } ], "source": [ "df = pd.DataFrame({'Turker': merged_df['WorkerId'].tolist(),\n", " 'REVIEW': merged_df['Answer.sentiment.label'].tolist(),\n", " 'SENTIMENT': merged_df['HITId'].tolist() })\n", "\n", "grouped = df.groupby('Turker')\n", "values = grouped['SENTIMENT'].agg('sum')\n", "id_df = grouped['REVIEW'].apply(lambda x: pd.Series(x.values)).unstack()\n", "id_df = id_df.rename(columns={i: 'REVIEW{}'.format(i + 1) for i in range(id_df.shape[1])})\n", "result = pd.concat([id_df, values], axis=1)\n", "result_df = pd.DataFrame(result)\n", "print(result_df)" ] }, { "cell_type": "code", "execution_count": 216, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Turker A1T79J0XQXDDGC \\\n", "SENTIMENT1 Positive \n", "SENTIMENT2 Negative \n", "SENTIMENT3 Positive \n", "SENTIMENT4 Positive \n", "SENTIMENT5 Negative \n", "SENTIMENT6 Negative \n", "SENTIMENT7 Negative \n", "SENTIMENT8 Positive \n", "SENTIMENT9 Negative \n", "SENTIMENT10 Negative \n", "SENTIMENT11 Negative \n", "SENTIMENT12 Negative \n", "SENTIMENT13 Negative \n", "SENTIMENT14 Negative \n", "SENTIMENT15 Positive \n", "SENTIMENT16 Positive \n", "SENTIMENT17 Positive \n", "SENTIMENT18 Positive \n", "SENTIMENT19 Positive \n", "SENTIMENT20 Positive \n", "SENTIMENT21 Positive \n", "SENTIMENT22 Positive \n", "SENTIMENT23 Positive \n", "SENTIMENT24 Positive \n", "SENTIMENT25 Positive \n", "SENTIMENT26 Positive \n", "SENTIMENT27 Positive \n", "SENTIMENT28 Positive \n", "SENTIMENT29 Positive \n", "SENTIMENT30 Negative \n", "SENTIMENT31 Positive \n", "SENTIMENT32 Positive \n", "SENTIMENT33 Positive \n", "SENTIMENT34 Positive \n", "SENTIMENT35 NaN \n", "SENTIMENT36 NaN \n", "SENTIMENT37 NaN \n", "SENTIMENT38 NaN \n", "SENTIMENT39 NaN \n", "SENTIMENT40 NaN \n", "SENTIMENT41 NaN \n", "SENTIMENT42 NaN \n", "SENTIMENT43 NaN \n", "SENTIMENT44 NaN \n", "SENTIMENT45 NaN \n", "SENTIMENT46 NaN \n", "REVIEW 302OLP89DZ7MBHSY6QU0WCST11GACJ32LAQ1JNT9PNC787... \n", "\n", "Turker A2XFO0X6RCS98M \\\n", "SENTIMENT1 Negative \n", "SENTIMENT2 Negative \n", "SENTIMENT3 Negative \n", "SENTIMENT4 Negative \n", "SENTIMENT5 Positive \n", "SENTIMENT6 Negative \n", "SENTIMENT7 Negative \n", "SENTIMENT8 Negative \n", "SENTIMENT9 Negative \n", "SENTIMENT10 Negative \n", "SENTIMENT11 Negative \n", "SENTIMENT12 Negative \n", "SENTIMENT13 Negative \n", "SENTIMENT14 Negative \n", "SENTIMENT15 Positive \n", "SENTIMENT16 Negative \n", "SENTIMENT17 Negative \n", "SENTIMENT18 Positive \n", "SENTIMENT19 Positive \n", "SENTIMENT20 Positive \n", "SENTIMENT21 Positive \n", "SENTIMENT22 Positive \n", "SENTIMENT23 Positive \n", "SENTIMENT24 Positive \n", "SENTIMENT25 Positive \n", "SENTIMENT26 Positive \n", "SENTIMENT27 Positive \n", "SENTIMENT28 Positive \n", "SENTIMENT29 Positive \n", "SENTIMENT30 Positive \n", "SENTIMENT31 Positive \n", "SENTIMENT32 Positive \n", "SENTIMENT33 Positive \n", "SENTIMENT34 NaN \n", "SENTIMENT35 NaN \n", "SENTIMENT36 NaN \n", "SENTIMENT37 NaN \n", "SENTIMENT38 NaN \n", "SENTIMENT39 NaN \n", "SENTIMENT40 NaN \n", "SENTIMENT41 NaN \n", "SENTIMENT42 NaN \n", "SENTIMENT43 NaN \n", "SENTIMENT44 NaN \n", "SENTIMENT45 NaN \n", "SENTIMENT46 NaN \n", "REVIEW 3I7SHAD35MWH116RCCCUPHVFU7E7M73XUSYT70IT10FW0U... \n", "\n", "Turker A3EZ0H07TSDAPW \\\n", "SENTIMENT1 Positive \n", "SENTIMENT2 Neutral \n", "SENTIMENT3 Positive \n", "SENTIMENT4 Negative \n", "SENTIMENT5 Negative \n", "SENTIMENT6 Positive \n", "SENTIMENT7 Negative \n", "SENTIMENT8 Positive \n", "SENTIMENT9 Positive \n", "SENTIMENT10 Negative \n", "SENTIMENT11 Neutral \n", "SENTIMENT12 Negative \n", "SENTIMENT13 Negative \n", "SENTIMENT14 Neutral \n", "SENTIMENT15 Neutral \n", "SENTIMENT16 Positive \n", "SENTIMENT17 Negative \n", "SENTIMENT18 Negative \n", "SENTIMENT19 Neutral \n", "SENTIMENT20 Neutral \n", "SENTIMENT21 Neutral \n", "SENTIMENT22 Positive \n", "SENTIMENT23 Positive \n", "SENTIMENT24 Neutral \n", "SENTIMENT25 Positive \n", "SENTIMENT26 Positive \n", "SENTIMENT27 Positive \n", "SENTIMENT28 Positive \n", "SENTIMENT29 Neutral \n", "SENTIMENT30 Positive \n", "SENTIMENT31 Positive \n", "SENTIMENT32 Neutral \n", "SENTIMENT33 Negative \n", "SENTIMENT34 NaN \n", "SENTIMENT35 NaN \n", "SENTIMENT36 NaN \n", "SENTIMENT37 NaN \n", "SENTIMENT38 NaN \n", "SENTIMENT39 NaN \n", "SENTIMENT40 NaN \n", "SENTIMENT41 NaN \n", "SENTIMENT42 NaN \n", "SENTIMENT43 NaN \n", "SENTIMENT44 NaN \n", "SENTIMENT45 NaN \n", "SENTIMENT46 NaN \n", "REVIEW 38O9DZ0A62N8QXOTJKOI4UHLTRD62G3I7SHAD35MWH116R... \n", "\n", "Turker A681XM15AN28F \\\n", "SENTIMENT1 Negative \n", "SENTIMENT2 Positive \n", "SENTIMENT3 Positive \n", "SENTIMENT4 Positive \n", "SENTIMENT5 Positive \n", "SENTIMENT6 Negative \n", "SENTIMENT7 Neutral \n", "SENTIMENT8 Neutral \n", "SENTIMENT9 Neutral \n", "SENTIMENT10 Neutral \n", "SENTIMENT11 Positive \n", "SENTIMENT12 Positive \n", "SENTIMENT13 Negative \n", "SENTIMENT14 Positive \n", "SENTIMENT15 Neutral \n", "SENTIMENT16 Neutral \n", "SENTIMENT17 Neutral \n", "SENTIMENT18 Neutral \n", "SENTIMENT19 Positive \n", "SENTIMENT20 Negative \n", "SENTIMENT21 Neutral \n", "SENTIMENT22 Positive \n", "SENTIMENT23 Neutral \n", "SENTIMENT24 Neutral \n", "SENTIMENT25 Negative \n", "SENTIMENT26 Neutral \n", "SENTIMENT27 Negative \n", "SENTIMENT28 Positive \n", "SENTIMENT29 Negative \n", "SENTIMENT30 Neutral \n", "SENTIMENT31 Positive \n", "SENTIMENT32 Negative \n", "SENTIMENT33 Positive \n", "SENTIMENT34 Positive \n", "SENTIMENT35 Negative \n", "SENTIMENT36 Neutral \n", "SENTIMENT37 Positive \n", "SENTIMENT38 NaN \n", "SENTIMENT39 NaN \n", "SENTIMENT40 NaN \n", "SENTIMENT41 NaN \n", "SENTIMENT42 NaN \n", "SENTIMENT43 NaN \n", "SENTIMENT44 NaN \n", "SENTIMENT45 NaN \n", "SENTIMENT46 NaN \n", "REVIEW 3SD15I2WD2UXBFKCNK2NN4MDZ5D63R302OLP89DZ7MBHSY... \n", "\n", "Turker ARLGZWN6W91WD \n", "SENTIMENT1 Negative \n", "SENTIMENT2 Negative \n", "SENTIMENT3 Negative \n", "SENTIMENT4 Negative \n", "SENTIMENT5 Negative \n", "SENTIMENT6 Negative \n", "SENTIMENT7 Negative \n", "SENTIMENT8 Negative \n", "SENTIMENT9 Negative \n", "SENTIMENT10 Negative \n", "SENTIMENT11 Negative \n", "SENTIMENT12 Negative \n", "SENTIMENT13 Negative \n", "SENTIMENT14 Negative \n", "SENTIMENT15 Negative \n", "SENTIMENT16 Negative \n", "SENTIMENT17 Negative \n", "SENTIMENT18 Negative \n", "SENTIMENT19 Neutral \n", "SENTIMENT20 Negative \n", "SENTIMENT21 Negative \n", "SENTIMENT22 Negative \n", "SENTIMENT23 Positive \n", "SENTIMENT24 Positive \n", "SENTIMENT25 Positive \n", "SENTIMENT26 Positive \n", "SENTIMENT27 Positive \n", "SENTIMENT28 Positive \n", "SENTIMENT29 Positive \n", "SENTIMENT30 Positive \n", "SENTIMENT31 Positive \n", "SENTIMENT32 Positive \n", "SENTIMENT33 Positive \n", "SENTIMENT34 Positive \n", "SENTIMENT35 Positive \n", "SENTIMENT36 Positive \n", "SENTIMENT37 Positive \n", "SENTIMENT38 Positive \n", "SENTIMENT39 Positive \n", "SENTIMENT40 Positive \n", "SENTIMENT41 Positive \n", "SENTIMENT42 Negative \n", "SENTIMENT43 Positive \n", "SENTIMENT44 Positive \n", "SENTIMENT45 Positive \n", "SENTIMENT46 Positive \n", "REVIEW 37MQ8Z1JQEWA9HYZP3JANL1ES162YC3I7SHAD35MWH116R... \n" ] } ], "source": [ "df = pd.DataFrame({'Turker': merged_df['WorkerId'].tolist(),\n", " 'SENTIMENT': merged_df['Answer.sentiment.label'].tolist(),\n", " 'REVIEW': merged_df['HITId'].tolist() })\n", "\n", "grouped = df.groupby('Turker')\n", "values = grouped['REVIEW'].agg('sum')\n", "id_df = grouped['SENTIMENT'].apply(lambda x: pd.Series(x.values)).unstack()\n", "id_df = id_df.rename(columns={i: 'SENTIMENT{}'.format(i + 1) for i in range(id_df.shape[1])})\n", "result = pd.concat([id_df, values], axis=1)\n", "result_df = pd.DataFrame(result)\n", "print(result_df.T)" ] }, { "cell_type": "code", "execution_count": 225, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "47" ] }, "execution_count": 225, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t1 = result_df.T['A3EZ0H07TSDAPW'].tolist()\n", "len(t1)" ] }, { "cell_type": "code", "execution_count": 245, "metadata": {}, "outputs": [], "source": [ "t2 = result_df.T['A2XFO0X6RCS98M'].tolist()\n", "len(t2)\n", "t3 = result_df.T['A681XM15AN28F'].tolist()\n", "len(t3)\n", "t4 = result_df.T['ARLGZWN6W91WD'].tolist()" ] }, { "cell_type": "code", "execution_count": 246, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Positive',\n", " 'Neutral',\n", " 'Positive',\n", " 'Negative',\n", " 'Negative',\n", " 'Positive',\n", " 'Negative',\n", " 'Positive',\n", " 'Positive',\n", " 'Negative',\n", " 'Neutral',\n", " 'Negative',\n", " 'Negative',\n", " 'Neutral',\n", " 'Neutral',\n", " 'Positive',\n", " 'Negative',\n", " 'Negative',\n", " 'Neutral',\n", " 'Neutral',\n", " 'Neutral',\n", " 'Positive',\n", " 'Positive',\n", " 'Neutral',\n", " 'Positive',\n", " 'Positive',\n", " 'Positive',\n", " 'Positive',\n", " 'Neutral',\n", " 'Positive',\n", " 'Positive',\n", " 'Neutral',\n", " 'Negative',\n", " nan,\n", " nan,\n", " nan,\n", " nan,\n", " nan,\n", " nan,\n", " nan,\n", " nan,\n", " nan,\n", " nan,\n", " nan,\n", " nan,\n", " nan]" ] }, "execution_count": 246, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t1[:-1]" ] }, { "cell_type": "code", "execution_count": 247, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Negative',\n", " 'Negative',\n", " 'Negative',\n", " 'Negative',\n", " 'Positive',\n", " 'Negative',\n", " 'Negative',\n", " 'Negative',\n", " 'Negative',\n", " 'Negative',\n", " 'Negative',\n", " 'Negative',\n", " 'Negative',\n", " 'Negative',\n", " 'Positive',\n", " 'Negative',\n", " 'Negative',\n", " 'Positive',\n", " 'Positive',\n", " 'Positive',\n", " 'Positive',\n", " 'Positive',\n", " 'Positive',\n", " 'Positive',\n", " 'Positive',\n", " 'Positive',\n", " 'Positive',\n", " 'Positive',\n", " 'Positive',\n", " 'Positive',\n", " 'Positive',\n", " 'Positive',\n", " 'Positive',\n", " nan,\n", " nan,\n", " nan,\n", " nan,\n", " nan,\n", " nan,\n", " nan,\n", " nan,\n", " nan,\n", " nan,\n", " nan,\n", " nan,\n", " nan]" ] }, "execution_count": 247, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t2[:-1]" ] }, { "cell_type": "code", "execution_count": 248, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Negative',\n", " 'Positive',\n", " 'Positive',\n", " 'Positive',\n", " 'Positive',\n", " 'Negative',\n", " 'Neutral',\n", " 'Neutral',\n", " 'Neutral',\n", " 'Neutral',\n", " 'Positive',\n", " 'Positive',\n", " 'Negative',\n", " 'Positive',\n", " 'Neutral',\n", " 'Neutral',\n", " 'Neutral',\n", " 'Neutral',\n", " 'Positive',\n", " 'Negative',\n", " 'Neutral',\n", " 'Positive',\n", " 'Neutral',\n", " 'Neutral',\n", " 'Negative',\n", " 'Neutral',\n", " 'Negative',\n", " 'Positive',\n", " 'Negative',\n", " 'Neutral',\n", " 'Positive',\n", " 'Negative',\n", " 'Positive',\n", " 'Positive',\n", " 'Negative',\n", " 'Neutral',\n", " 'Positive',\n", " nan,\n", " nan,\n", " nan,\n", " nan,\n", " nan,\n", " nan,\n", " nan,\n", " nan,\n", " nan,\n}, "execution_count": 248, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t3" ] }, { "cell_type": "code", "execution_count": 251, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.43974358974358974" ] }, "execution_count": 251, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.metrics import cohen_kappa_score\n", "y1 = t1[:-1]\n", "y2 = t2[:-1]\n", "cohen_kappa_score(y1,y2)" ] }, { "cell_type": "code", "execution_count": 252, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "-0.07585335018963324" ] }, "execution_count": 252, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.metrics import cohen_kappa_score\n", "y3 = t3[:-1]\n", "y4 = t4[:-1]\n", "cohen_kappa_score(y3,y4)" ] }, { "cell_type": "code", "execution_count": 272, "metadata": {}, "outputs": [], "source": [ "# turker_clean\n", "turker_clean_test = turker_clean.copy()\n", "turker_clean_test.reset_index(inplace=True)\n", "\n", "id_dict = {}\n", "id_num = 1\n", "def return_new_id(old_id,):\n", " if old_id in id_dict.keys():\n", " return id_dict[old_id]\n", " else:\n", " id_num = id_num + 1\n", " id_dict.update({ old_id: id_num })\n", " return num\n", "\n", "# turker_clean_test['ReviewID'] = turker_clean_test.apply(lambda x: return_new_id(x['HITId']), axis=1)\n", "# turker_clean_test\n", "turker_clean_test\n", "\n", "# import Counter \n", "# Counter(K)\n", "\n", "new_ids = pd.factorize(turker_clean_test['HITId'].tolist())\n", "new_ids[0]\n", "turker_clean_test['ReviewID'] = new_ids[0]" ] }, { "cell_type": "code", "execution_count": 273, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexHITIdWorkerIdAnswer.sentiment.labelInput.textReviewID
00338GLSUI43BXEPY2ES6SPI72KKESF7AH5A86OLRZWCSNegativeEveryone praised an overrated movie.\\nOverrat...0
11338GLSUI43BXEPY2ES6SPI72KKESF7A2HGRSPR50ENHLNegativeEveryone praised an overrated movie.\\nOverrat...0
22338GLSUI43BXEPY2ES6SPI72KKESF7AKSJ3C5O3V9RBNegativeEveryone praised an overrated movie.\\nOverrat...0
3337MQ8Z1JQEWA9HYZP3JANL1ES162YCARLGZWN6W91WDNegativeWhat idiotic FIlm\\nI can say that Phoenix is ...1
4437MQ8Z1JQEWA9HYZP3JANL1ES162YCAKSJ3C5O3V9RBNegativeWhat idiotic FIlm\\nI can say that Phoenix is ...1
.....................
2892893PUV2Q8SV441ZJ34C0P7BTUH4JDDBHA3EZ0H07TSDAPWNegativeOscar for Phoenix\\nI will stop watching movie...96
2902903PUV2Q8SV441ZJ34C0P7BTUH4JDDBHA38DC3BG1ZCVZ2PositiveOscar for Phoenix\\nI will stop watching movie...96
2912913FO95NVK5C0UHF3B5N6M67LLN8PSR2A194R45ACMQEORPositiveJoker > Endgame\\nNeed I say more? Everything ...97
2922923FO95NVK5C0UHF3B5N6M67LLN8PSR2A1L8RL58MYU4NCPositiveJoker > Endgame\\nNeed I say more? Everything ...97
2932933FO95NVK5C0UHF3B5N6M67LLN8PSR2A1T79J0XQXDDGCPositiveJoker > Endgame\\nNeed I say more? Everything ...97
\n", "

294 rows × 6 columns

\n", "
" ], "text/plain": [ " index HITId WorkerId \\\n", "0 0 338GLSUI43BXEPY2ES6SPI72KKESF7 AH5A86OLRZWCS \n", "1 1 338GLSUI43BXEPY2ES6SPI72KKESF7 A2HGRSPR50ENHL \n", "2 2 338GLSUI43BXEPY2ES6SPI72KKESF7 AKSJ3C5O3V9RB \n", "3 3 37MQ8Z1JQEWA9HYZP3JANL1ES162YC ARLGZWN6W91WD \n", "4 4 37MQ8Z1JQEWA9HYZP3JANL1ES162YC AKSJ3C5O3V9RB \n", ".. ... ... ... \n", "289 289 3PUV2Q8SV441ZJ34C0P7BTUH4JDDBH A3EZ0H07TSDAPW \n", "290 290 3PUV2Q8SV441ZJ34C0P7BTUH4JDDBH A38DC3BG1ZCVZ2 \n", "291 291 3FO95NVK5C0UHF3B5N6M67LLN8PSR2 A194R45ACMQEOR \n", "292 292 3FO95NVK5C0UHF3B5N6M67LLN8PSR2 A1L8RL58MYU4NC \n", "293 293 3FO95NVK5C0UHF3B5N6M67LLN8PSR2 A1T79J0XQXDDGC \n", "\n", " Answer.sentiment.label Input.text \\\n", "0 Negative Everyone praised an overrated movie.\\nOverrat... \n", "1 Negative Everyone praised an overrated movie.\\nOverrat... \n", "2 Negative Everyone praised an overrated movie.\\nOverrat... \n", "3 Negative What idiotic FIlm\\nI can say that Phoenix is ... \n", "4 Negative What idiotic FIlm\\nI can say that Phoenix is ... \n", ".. ... ... \n", "289 Negative Oscar for Phoenix\\nI will stop watching movie... \n", "290 Positive Oscar for Phoenix\\nI will stop watching movie... \n", "291 Positive Joker > Endgame\\nNeed I say more? Everything ... \n", "292 Positive Joker > Endgame\\nNeed I say more? Everything ... \n", "293 Positive Joker > Endgame\\nNeed I say more? Everything ... \n", "\n", " ReviewID \n", "0 0 \n", "1 0 \n", "2 0 \n", "3 1 \n", "4 1 \n", ".. ... \n", "289 96 \n", "290 96 \n", "291 97 \n", "292 97 \n", "293 97 \n", "\n", "[294 rows x 6 columns]" ] }, "execution_count": 273, "metadata": {}, "output_type": "execute_result" } ], "source": [ "turker_clean_test" ] }, { "cell_type": "code", "execution_count": 274, "metadata": {}, "outputs": [], "source": [ "new_turker_ids = pd.factorize(turker_clean_test['WorkerId'].tolist())" ] }, { "cell_type": "code", "execution_count": 276, "metadata": {}, "outputs": [], "source": [ "t_ids = ['T_' + str(id) for id in new_turker_ids[0]]" ] }, { "cell_type": "code", "execution_count": 277, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['T_0',\n", " 'T_1',\n", " 'T_2',\n", " 'T_3',\n", " 'T_2',\n", " 'T_4',\n", " 'T_5',\n", " 'T_6',\n", " 'T_7',\n", " 'T_8',\n", " 'T_5',\n", " 'T_3',\n", " 'T_5',\n", " 'T_3',\n", " 'T_8',\n", " 'T_5',\n", " 'T_3',\n", " 'T_9',\n", " 'T_10',\n", " 'T_9',\n", " 'T_8',\n", " 'T_3',\n", " 'T_9',\n", " 'T_4',\n", " 'T_7',\n", " 'T_10',\n", " 'T_2',\n", " 'T_9',\n", " 'T_7',\n", " 'T_4',\n", " 'T_9',\n", " 'T_3',\n", " 'T_2',\n", " 'T_11',\n", " 'T_0',\n", " 'T_9',\n", " 'T_4',\n", " 'T_8',\n", " 'T_5',\n", " 'T_5',\n", " 'T_9',\n", " 'T_12',\n", " 'T_5',\n", " 'T_3',\n", " 'T_10',\n", " 'T_8',\n", " 'T_10',\n", " 'T_13',\n", " 'T_9',\n", " 'T_3',\n", " 'T_2',\n", " 'T_3',\n", " 'T_9',\n", " 'T_4',\n", " 'T_9',\n", " 'T_8',\n", " 'T_10',\n", " 'T_3',\n", " 'T_13',\n", " 'T_4',\n", " 'T_10',\n", " 'T_5',\n", " 'T_8',\n", " 'T_2',\n", " 'T_9',\n", " 'T_10',\n", " 'T_3',\n", " 'T_5',\n", " 'T_10',\n", " 'T_3',\n", " 'T_9',\n", " 'T_10',\n", " 'T_5',\n", " 'T_8',\n", " 'T_7',\n", " 'T_9',\n", " 'T_12',\n", " 'T_8',\n", " 'T_10',\n", " 'T_5',\n", " 'T_3',\n", " 'T_4',\n", " 'T_8',\n", " 'T_2',\n", " 'T_3',\n", " 'T_14',\n", " 'T_5',\n", " 'T_15',\n", " 'T_12',\n", " 'T_4',\n", " 'T_4',\n", " 'T_3',\n", " 'T_9',\n", " 'T_8',\n", " 'T_5',\n", " 'T_6',\n", " 'T_2',\n", " 'T_8',\n", " 'T_9',\n", " 'T_10',\n", " 'T_4',\n", " 'T_9',\n", " 'T_8',\n", " 'T_3',\n", " 'T_7',\n", " 'T_10',\n", " 'T_3',\n", " 'T_9',\n", " 'T_2',\n", " 'T_5',\n", " 'T_3',\n", " 'T_8',\n", " 'T_9',\n", " 'T_2',\n", " 'T_9',\n", " 'T_8',\n", " 'T_3',\n", " 'T_7',\n", " 'T_5',\n", " 'T_16',\n", " 'T_4',\n", " 'T_3',\n", " 'T_8',\n", " 'T_4',\n", " 'T_10',\n", " 'T_9',\n", " 'T_0',\n", " 'T_4',\n", " 'T_10',\n", " 'T_5',\n", " 'T_7',\n", " 'T_8',\n", " 'T_9',\n", " 'T_3',\n", " 'T_13',\n", " 'T_5',\n", " 'T_4',\n", " 'T_2',\n", " 'T_5',\n", " 'T_3',\n", " 'T_0',\n", " 'T_4',\n", " 'T_11',\n", " 'T_2',\n", " 'T_4',\n", " 'T_3',\n", " 'T_5',\n", " 'T_9',\n", " 'T_3',\n", " 'T_5',\n", " 'T_4',\n", " 'T_2',\n", " 'T_6',\n", " 'T_7',\n", " 'T_10',\n", " 'T_6',\n", " 'T_10',\n", " 'T_9',\n", " 'T_5',\n", " 'T_5',\n", " 'T_9',\n", " 'T_10',\n", " 'T_5',\n", " 'T_3',\n", " 'T_9',\n", " 'T_3',\n", " 'T_4',\n", " 'T_11',\n", " 'T_17',\n", " 'T_5',\n", " 'T_7',\n", " 'T_9',\n", " 'T_3',\n", " 'T_12',\n", " 'T_5',\n", " 'T_16',\n", " 'T_3',\n", " 'T_8',\n", " 'T_10',\n", " 'T_12',\n", " 'T_3',\n", " 'T_10',\n", " 'T_7',\n", " 'T_8',\n", " 'T_3',\n", " 'T_4',\n", " 'T_10',\n", " 'T_6',\n", " 'T_8',\n", " 'T_7',\n", " 'T_3',\n", " 'T_6',\n", " 'T_3',\n", " 'T_7',\n", " 'T_2',\n", " 'T_3',\n", " 'T_4',\n", " 'T_9',\n", " 'T_2',\n", " 'T_9',\n", " 'T_8',\n", " 'T_9',\n", " 'T_10',\n", " 'T_8',\n", " 'T_9',\n", " 'T_8',\n", " 'T_10',\n", " 'T_10',\n", " 'T_4',\n", " 'T_9',\n", " 'T_9',\n", " 'T_3',\n", " 'T_16',\n", " 'T_3',\n", " 'T_12',\n", " 'T_9',\n", " 'T_5',\n", " 'T_8',\n", " 'T_2',\n", " 'T_3',\n", " 'T_8',\n", " 'T_4',\n", " 'T_6',\n", " 'T_3',\n", " 'T_10',\n", " 'T_2',\n", " 'T_3',\n", " 'T_5',\n", " 'T_9',\n", " 'T_8',\n", " 'T_14',\n", " 'T_10',\n", " 'T_3',\n", " 'T_4',\n", " 'T_8',\n", " 'T_4',\n", " 'T_5',\n", " 'T_3',\n", " 'T_8',\n", " 'T_10',\n", " 'T_8',\n", " 'T_10',\n", " 'T_13',\n", " 'T_8',\n", " 'T_10',\n", " 'T_9',\n", " 'T_3',\n", " 'T_7',\n", " 'T_10',\n", " 'T_7',\n", " 'T_9',\n", " 'T_2',\n", " 'T_6',\n", " 'T_9',\n", " 'T_7',\n", " 'T_5',\n", " 'T_3',\n", " 'T_2',\n", " 'T_2',\n", " 'T_3',\n", " 'T_7',\n", " 'T_5',\n", " 'T_4',\n", " 'T_10',\n", " 'T_5',\n", " 'T_3',\n", " 'T_7',\n", " 'T_6',\n", " 'T_8',\n", " 'T_7',\n", " 'T_10',\n", " 'T_3',\n", " 'T_4',\n", " 'T_6',\n", " 'T_10',\n", " 'T_7',\n", " 'T_7',\n", " 'T_15',\n", " 'T_8',\n", " 'T_2',\n", " 'T_8',\n", " 'T_4',\n", " 'T_10',\n", " 'T_5',\n", " 'T_16',\n", " 'T_3',\n", " 'T_11',\n", " 'T_7',\n", " 'T_11',\n", " 'T_5',\n", " 'T_7',\n", " 'T_13',\n", " 'T_4',\n", " 'T_10']" ] }, "execution_count": 277, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t_ids" ] }, { "cell_type": "code", "execution_count": 278, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexHITIdWorkerIdAnswer.sentiment.labelInput.textReviewIDT_ID
00338GLSUI43BXEPY2ES6SPI72KKESF7AH5A86OLRZWCSNegativeEveryone praised an overrated movie.\\nOverrat...0T_0
11338GLSUI43BXEPY2ES6SPI72KKESF7A2HGRSPR50ENHLNegativeEveryone praised an overrated movie.\\nOverrat...0T_1
22338GLSUI43BXEPY2ES6SPI72KKESF7AKSJ3C5O3V9RBNegativeEveryone praised an overrated movie.\\nOverrat...0T_2
3337MQ8Z1JQEWA9HYZP3JANL1ES162YCARLGZWN6W91WDNegativeWhat idiotic FIlm\\nI can say that Phoenix is ...1T_3
4437MQ8Z1JQEWA9HYZP3JANL1ES162YCAKSJ3C5O3V9RBNegativeWhat idiotic FIlm\\nI can say that Phoenix is ...1T_2
........................
2892893PUV2Q8SV441ZJ34C0P7BTUH4JDDBHA3EZ0H07TSDAPWNegativeOscar for Phoenix\\nI will stop watching movie...96T_5
2902903PUV2Q8SV441ZJ34C0P7BTUH4JDDBHA38DC3BG1ZCVZ2PositiveOscar for Phoenix\\nI will stop watching movie...96T_7
2912913FO95NVK5C0UHF3B5N6M67LLN8PSR2A194R45ACMQEORPositiveJoker > Endgame\\nNeed I say more? Everything ...97T_13
2922923FO95NVK5C0UHF3B5N6M67LLN8PSR2A1L8RL58MYU4NCPositiveJoker > Endgame\\nNeed I say more? Everything ...97T_4
2932933FO95NVK5C0UHF3B5N6M67LLN8PSR2A1T79J0XQXDDGCPositiveJoker > Endgame\\nNeed I say more? Everything ...97T_10
\n", "

294 rows × 7 columns

\n", "
" ], "text/plain": [ " index HITId WorkerId \\\n", "0 0 338GLSUI43BXEPY2ES6SPI72KKESF7 AH5A86OLRZWCS \n", "1 1 338GLSUI43BXEPY2ES6SPI72KKESF7 A2HGRSPR50ENHL \n", "2 2 338GLSUI43BXEPY2ES6SPI72KKESF7 AKSJ3C5O3V9RB \n", "3 3 37MQ8Z1JQEWA9HYZP3JANL1ES162YC ARLGZWN6W91WD \n", "4 4 37MQ8Z1JQEWA9HYZP3JANL1ES162YC AKSJ3C5O3V9RB \n", ".. ... ... ... \n", "289 289 3PUV2Q8SV441ZJ34C0P7BTUH4JDDBH A3EZ0H07TSDAPW \n", "290 290 3PUV2Q8SV441ZJ34C0P7BTUH4JDDBH A38DC3BG1ZCVZ2 \n", "291 291 3FO95NVK5C0UHF3B5N6M67LLN8PSR2 A194R45ACMQEOR \n", "292 292 3FO95NVK5C0UHF3B5N6M67LLN8PSR2 A1L8RL58MYU4NC \n", "293 293 3FO95NVK5C0UHF3B5N6M67LLN8PSR2 A1T79J0XQXDDGC \n", "\n", " Answer.sentiment.label Input.text \\\n", "0 Negative Everyone praised an overrated movie.\\nOverrat... \n", "1 Negative Everyone praised an overrated movie.\\nOverrat... \n", "2 Negative Everyone praised an overrated movie.\\nOverrat... \n", "3 Negative What idiotic FIlm\\nI can say that Phoenix is ... \n", "4 Negative What idiotic FIlm\\nI can say that Phoenix is ... \n", ".. ... ... \n", "289 Negative Oscar for Phoenix\\nI will stop watching movie... \n", "290 Positive Oscar for Phoenix\\nI will stop watching movie... \n", "291 Positive Joker > Endgame\\nNeed I say more? Everything ... \n", "292 Positive Joker > Endgame\\nNeed I say more? Everything ... \n", "293 Positive Joker > Endgame\\nNeed I say more? Everything ... \n", "\n", " ReviewID T_ID \n", "0 0 T_0 \n", "1 0 T_1 \n", "2 0 T_2 \n", "3 1 T_3 \n", "4 1 T_2 \n", ".. ... ... \n", "289 96 T_5 \n", "290 96 T_7 \n", "291 97 T_13 \n", "292 97 T_4 \n", "293 97 T_10 \n", "\n", "[294 rows x 7 columns]" ] }, "execution_count": 278, "metadata": {}, "output_type": "execute_result" } ], "source": [ "turker_clean_test['T_ID'] = t_ids\n", "turker_clean_test" ] }, { "cell_type": "code", "execution_count": 281, "metadata": {}, "outputs": [], "source": [ "turker_clean_test['sentiment'] = turker_clean_test.apply(lambda x: x['Answer.sentiment.label'][0], axis=1)" ] }, { "cell_type": "code", "execution_count": 282, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexHITIdWorkerIdAnswer.sentiment.labelInput.textReviewIDT_IDsentiment
00338GLSUI43BXEPY2ES6SPI72KKESF7AH5A86OLRZWCSNegativeEveryone praised an overrated movie.\\nOverrat...0T_0N
11338GLSUI43BXEPY2ES6SPI72KKESF7A2HGRSPR50ENHLNegativeEveryone praised an overrated movie.\\nOverrat...0T_1N
22338GLSUI43BXEPY2ES6SPI72KKESF7AKSJ3C5O3V9RBNegativeEveryone praised an overrated movie.\\nOverrat...0T_2N
3337MQ8Z1JQEWA9HYZP3JANL1ES162YCARLGZWN6W91WDNegativeWhat idiotic FIlm\\nI can say that Phoenix is ...1T_3N
4437MQ8Z1JQEWA9HYZP3JANL1ES162YCAKSJ3C5O3V9RBNegativeWhat idiotic FIlm\\nI can say that Phoenix is ...1T_2N
...........................
2892893PUV2Q8SV441ZJ34C0P7BTUH4JDDBHA3EZ0H07TSDAPWNegativeOscar for Phoenix\\nI will stop watching movie...96T_5N
2902903PUV2Q8SV441ZJ34C0P7BTUH4JDDBHA38DC3BG1ZCVZ2PositiveOscar for Phoenix\\nI will stop watching movie...96T_7P
2912913FO95NVK5C0UHF3B5N6M67LLN8PSR2A194R45ACMQEORPositiveJoker > Endgame\\nNeed I say more? Everything ...97T_13P
2922923FO95NVK5C0UHF3B5N6M67LLN8PSR2A1L8RL58MYU4NCPositiveJoker > Endgame\\nNeed I say more? Everything ...97T_4P
2932933FO95NVK5C0UHF3B5N6M67LLN8PSR2A1T79J0XQXDDGCPositiveJoker > Endgame\\nNeed I say more? Everything ...97T_10P
\n", "

294 rows × 8 columns

\n", "
" ], "text/plain": [ " index HITId WorkerId \\\n", "0 0 338GLSUI43BXEPY2ES6SPI72KKESF7 AH5A86OLRZWCS \n", "1 1 338GLSUI43BXEPY2ES6SPI72KKESF7 A2HGRSPR50ENHL \n", "2 2 338GLSUI43BXEPY2ES6SPI72KKESF7 AKSJ3C5O3V9RB \n", "3 3 37MQ8Z1JQEWA9HYZP3JANL1ES162YC ARLGZWN6W91WD \n", "4 4 37MQ8Z1JQEWA9HYZP3JANL1ES162YC AKSJ3C5O3V9RB \n", ".. ... ... ... \n", "289 289 3PUV2Q8SV441ZJ34C0P7BTUH4JDDBH A3EZ0H07TSDAPW \n", "290 290 3PUV2Q8SV441ZJ34C0P7BTUH4JDDBH A38DC3BG1ZCVZ2 \n", "291 291 3FO95NVK5C0UHF3B5N6M67LLN8PSR2 A194R45ACMQEOR \n", "292 292 3FO95NVK5C0UHF3B5N6M67LLN8PSR2 A1L8RL58MYU4NC \n", "293 293 3FO95NVK5C0UHF3B5N6M67LLN8PSR2 A1T79J0XQXDDGC \n", "\n", " Answer.sentiment.label Input.text \\\n", "0 Negative Everyone praised an overrated movie.\\nOverrat... \n", "1 Negative Everyone praised an overrated movie.\\nOverrat... \n", "2 Negative Everyone praised an overrated movie.\\nOverrat... \n", "3 Negative What idiotic FIlm\\nI can say that Phoenix is ... \n", "4 Negative What idiotic FIlm\\nI can say that Phoenix is ... \n", ".. ... ... \n", "289 Negative Oscar for Phoenix\\nI will stop watching movie... \n", "290 Positive Oscar for Phoenix\\nI will stop watching movie... \n", "291 Positive Joker > Endgame\\nNeed I say more? Everything ... \n", "292 Positive Joker > Endgame\\nNeed I say more? Everything ... \n", "293 Positive Joker > Endgame\\nNeed I say more? Everything ... \n", "\n", " ReviewID T_ID sentiment \n", "0 0 T_0 N \n", "1 0 T_1 N \n", "2 0 T_2 N \n", "3 1 T_3 N \n", "4 1 T_2 N \n", ".. ... ... ... \n", "289 96 T_5 N \n", "290 96 T_7 P \n", "291 97 T_13 P \n", "292 97 T_4 P \n", "293 97 T_10 P \n", "\n", "[294 rows x 8 columns]" ] }, "execution_count": 282, "metadata": {}, "output_type": "execute_result" } ], "source": [ "turker_clean_test" ] }, { "cell_type": "code", "execution_count": 283, "metadata": {}, "outputs": [], "source": [ "even_cleaner_df = turker_clean_test[['ReviewID', 'T_ID', 'sentiment']]" ] }, { "cell_type": "code", "execution_count": 300, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 301, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
000 N\n", "1 N\n", "2 N\n", "Name: sentiment, dtype: o...
113 N\n", "4 N\n", "5 N\n", "Name: sentiment, dtype: o...
226 P\n", "7 N\n", "8 N\n", "Name: sentiment, dtype: o...
339 N\n", "10 N\n", "11 N\n", "Name: sentiment, dtype...
4412 P\n", "13 N\n", "14 N\n", "Name: sentiment, dtype...
.........
9393279 P\n", "280 P\n", "281 P\n", "Name: sentiment, dt...
9494282 P\n", "283 N\n", "284 P\n", "Name: sentiment, dt...
9595285 P\n", "286 P\n", "287 P\n", "Name: sentiment, dt...
9696288 N\n", "289 N\n", "290 P\n", "Name: sentiment, dt...
9797291 P\n", "292 P\n", "293 P\n", "Name: sentiment, dt...
\n", "

98 rows × 2 columns

\n", "
" ], "text/plain": [ " 0 1\n", "0 0 0 N\n", "1 N\n", "2 N\n", "Name: sentiment, dtype: o...\n", "1 1 3 N\n", "4 N\n", "5 N\n", "Name: sentiment, dtype: o...\n", "2 2 6 P\n", "7 N\n", "8 N\n", "Name: sentiment, dtype: o...\n", "3 3 9 N\n", "10 N\n", "11 N\n", "Name: sentiment, dtype...\n", "4 4 12 P\n", "13 N\n", "14 N\n", "Name: sentiment, dtype...\n", ".. .. ...\n", "93 93 279 P\n", "280 P\n", "281 P\n", "Name: sentiment, dt...\n", "94 94 282 P\n", "283 N\n", "284 P\n", "Name: sentiment, dt...\n", "95 95 285 P\n", "286 P\n", "287 P\n", "Name: sentiment, dt...\n", "96 96 288 N\n", "289 N\n", "290 P\n", "Name: sentiment, dt...\n", "97 97 291 P\n", "292 P\n", "293 P\n", "Name: sentiment, dt...\n", "\n", "[98 rows x 2 columns]" ] }, "execution_count": 301, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 305, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Turker T_0 T_1 T_10 T_11 T_12 T_13 T_14 T_15 T_16 T_17 T_2 T_3 T_4 \\\n", "REVIEW1 N N P N N N N N N P N N N \n", "REVIEW2 N NaN N N N N P N P NaN N N N \n", "REVIEW3 N NaN P P N N NaN NaN P NaN N N N \n", "REVIEW4 N NaN P P N P NaN NaN P NaN N N N \n", "REVIEW5 NaN NaN N N P P NaN NaN NaN NaN N N N \n", "REVIEW6 NaN NaN N NaN P NaN NaN NaN NaN NaN N N N \n", "REVIEW7 NaN NaN N NaN NaN NaN NaN NaN NaN NaN N N N \n", "REVIEW8 NaN NaN P NaN NaN NaN NaN NaN NaN NaN N N P \n", "REVIEW9 NaN NaN N NaN NaN NaN NaN NaN NaN NaN N N N \n", "REVIEW10 NaN NaN N NaN NaN NaN NaN NaN NaN NaN N N N \n", "REVIEW11 NaN NaN N NaN NaN NaN NaN NaN NaN NaN N N N \n", "REVIEW12 NaN NaN N NaN NaN NaN NaN NaN NaN NaN N N N \n", "REVIEW13 NaN NaN N NaN NaN NaN NaN NaN NaN NaN P N N \n", "REVIEW14 NaN NaN N NaN NaN NaN NaN NaN NaN NaN P N N \n", "REVIEW15 NaN NaN P NaN NaN NaN NaN NaN NaN NaN P N N \n", "REVIEW16 NaN NaN P NaN NaN NaN NaN NaN NaN NaN P N P \n", "REVIEW17 NaN NaN P NaN NaN NaN NaN NaN NaN NaN P N P \n", "REVIEW18 NaN NaN P NaN NaN NaN NaN NaN NaN NaN P N P \n", "REVIEW19 NaN NaN P NaN NaN NaN NaN NaN NaN NaN N N P \n", "REVIEW20 NaN NaN P NaN NaN NaN NaN NaN NaN NaN P N P \n", "REVIEW21 NaN NaN P NaN NaN NaN NaN NaN NaN NaN P N P \n", "REVIEW22 NaN NaN P NaN NaN NaN NaN NaN NaN NaN NaN N P \n", "REVIEW23 NaN NaN P NaN NaN NaN NaN NaN NaN NaN NaN P P \n", "REVIEW24 NaN NaN P NaN NaN NaN NaN NaN NaN NaN NaN P P \n", "REVIEW25 NaN NaN P NaN NaN NaN NaN NaN NaN NaN NaN P P \n", "REVIEW26 NaN NaN P NaN NaN NaN NaN NaN NaN NaN NaN P P \n", "REVIEW27 NaN NaN P NaN NaN NaN NaN NaN NaN NaN NaN P P \n", "REVIEW28 NaN NaN P NaN NaN NaN NaN NaN NaN NaN NaN P P \n", "REVIEW29 NaN NaN P NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW30 NaN NaN N NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW31 NaN NaN P NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW32 NaN NaN P NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW33 NaN NaN P NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW34 NaN NaN P NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW35 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW36 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW37 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW38 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW39 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW40 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW41 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW42 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN N NaN \n", "REVIEW43 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW44 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW45 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW46 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW 99 0 1797 304 254 255 104 121 261 56 954 2177 1342 \n", "\n", "Turker T_5 T_6 T_7 T_8 T_9 \n", "REVIEW1 P N N N N \n", "REVIEW2 N N N N P \n", "REVIEW3 P P N N P \n", "REVIEW4 N P N N P \n", "REVIEW5 N P N P P \n", "REVIEW6 P P N N N \n", "REVIEW7 N P N N N \n", "REVIEW8 P P P N N \n", "REVIEW9 P P P N N \n", "REVIEW10 N P P N N \n", "REVIEW11 N NaN P N P \n", "REVIEW12 N NaN P N P \n", "REVIEW13 N NaN P N N \n", "REVIEW14 N NaN P N P \n", "REVIEW15 N NaN P P N \n", "REVIEW16 P NaN P N N \n", "REVIEW17 N NaN P N N \n", "REVIEW18 N NaN P P N \n", "REVIEW19 N NaN P P P \n", "REVIEW20 N NaN P P N \n", "REVIEW21 N NaN P P N \n", "REVIEW22 P NaN P P P \n", "REVIEW23 P NaN NaN P N \n", "REVIEW24 N NaN NaN P N \n", "REVIEW25 P NaN NaN P N \n", "REVIEW26 P NaN NaN P N \n", "REVIEW27 P NaN NaN P N \n", "REVIEW28 P NaN NaN P P \n", "REVIEW29 N NaN NaN P N \n", "REVIEW30 P NaN NaN P N \n", "REVIEW31 P NaN NaN P P \n", "REVIEW32 N NaN NaN P N \n", "REVIEW33 N NaN NaN P P \n", "REVIEW34 NaN NaN NaN NaN P \n", "REVIEW35 NaN NaN NaN NaN N \n", "REVIEW36 NaN NaN NaN NaN N \n", "REVIEW37 NaN NaN NaN NaN P \n", "REVIEW38 NaN NaN NaN NaN NaN \n", "REVIEW39 NaN NaN NaN NaN NaN \n", "REVIEW40 NaN NaN NaN NaN NaN \n", "REVIEW41 NaN NaN NaN NaN NaN \n", "REVIEW42 NaN NaN NaN NaN NaN \n", "REVIEW43 NaN NaN NaN NaN NaN \n", "REVIEW44 NaN NaN NaN NaN NaN \n", "REVIEW45 NaN NaN NaN NaN NaN \n", "REVIEW46 NaN NaN NaN NaN NaN \n", "REVIEW 1458 597 1339 1605 1536 \n" ] } ], "source": [ "df = pd.DataFrame({'Turker': even_cleaner_df['T_ID'].tolist(),\n", " 'SENTIMENT': even_cleaner_df['sentiment'].tolist(),\n", " 'REVIEW': even_cleaner_df['ReviewID'].tolist() })\n", "\n", "grouped = df.groupby('Turker')\n", "values = grouped['REVIEW'].agg('sum')\n", "id_df = grouped['SENTIMENT'].apply(lambda x: pd.Series(x.values)).unstack()\n", "id_df = id_df.rename(columns={i: 'REVIEW{}'.format(i + 1) for i in range(id_df.shape[1])})\n", "result = pd.concat([id_df, values], axis=1)\n", "result_df = pd.DataFrame(result)\n", "print(result_df.T)" ] }, { "cell_type": "code", "execution_count": 306, "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame(result_df.T)" ] }, { "cell_type": "code", "execution_count": 310, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TurkerT_0T_1T_10T_11T_12T_13T_14T_15T_16T_17T_2T_3T_4T_5T_6T_7T_8T_9
REVIEW1NNPNNNNNNPNNNPNNNN
REVIEW2NNaNNNNNPNPNaNNNNNNNNP
REVIEW3NNaNPPNNNaNNaNPNaNNNNPPNNP
REVIEW4NNaNPPNPNaNNaNPNaNNNNNPNNP
REVIEW5NaNNaNNNPPNaNNaNNaNNaNNNNNPNPP
REVIEW6NaNNaNNNaNPNaNNaNNaNNaNNaNNNNPPNNN
REVIEW7NaNNaNNNaNNaNNaNNaNNaNNaNNaNNNNNPNNN
REVIEW8NaNNaNPNaNNaNNaNNaNNaNNaNNaNNNPPPPNN
REVIEW9NaNNaNNNaNNaNNaNNaNNaNNaNNaNNNNPPPNN
REVIEW10NaNNaNNNaNNaNNaNNaNNaNNaNNaNNNNNPPNN
REVIEW11NaNNaNNNaNNaNNaNNaNNaNNaNNaNNNNNNaNPNP
REVIEW12NaNNaNNNaNNaNNaNNaNNaNNaNNaNNNNNNaNPNP
REVIEW13NaNNaNNNaNNaNNaNNaNNaNNaNNaNPNNNNaNPNN
REVIEW14NaNNaNNNaNNaNNaNNaNNaNNaNNaNPNNNNaNPNP
REVIEW15NaNNaNPNaNNaNNaNNaNNaNNaNNaNPNNNNaNPPN
REVIEW16NaNNaNPNaNNaNNaNNaNNaNNaNNaNPNPPNaNPNN
REVIEW17NaNNaNPNaNNaNNaNNaNNaNNaNNaNPNPNNaNPNN
REVIEW18NaNNaNPNaNNaNNaNNaNNaNNaNNaNPNPNNaNPPN
REVIEW19NaNNaNPNaNNaNNaNNaNNaNNaNNaNNNPNNaNPPP
REVIEW20NaNNaNPNaNNaNNaNNaNNaNNaNNaNPNPNNaNPPN
REVIEW21NaNNaNPNaNNaNNaNNaNNaNNaNNaNPNPNNaNPPN
REVIEW22NaNNaNPNaNNaNNaNNaNNaNNaNNaNNaNNPPNaNPPP
REVIEW23NaNNaNPNaNNaNNaNNaNNaNNaNNaNNaNPPPNaNNaNPN
REVIEW24NaNNaNPNaNNaNNaNNaNNaNNaNNaNNaNPPNNaNNaNPN
REVIEW25NaNNaNPNaNNaNNaNNaNNaNNaNNaNNaNPPPNaNNaNPN
REVIEW26NaNNaNPNaNNaNNaNNaNNaNNaNNaNNaNPPPNaNNaNPN
REVIEW27NaNNaNPNaNNaNNaNNaNNaNNaNNaNNaNPPPNaNNaNPN
REVIEW28NaNNaNPNaNNaNNaNNaNNaNNaNNaNNaNPPPNaNNaNPP
REVIEW29NaNNaNPNaNNaNNaNNaNNaNNaNNaNNaNPNaNNNaNNaNPN
REVIEW30NaNNaNNNaNNaNNaNNaNNaNNaNNaNNaNPNaNPNaNNaNPN
REVIEW31NaNNaNPNaNNaNNaNNaNNaNNaNNaNNaNPNaNPNaNNaNPP
REVIEW32NaNNaNPNaNNaNNaNNaNNaNNaNNaNNaNPNaNNNaNNaNPN
REVIEW33NaNNaNPNaNNaNNaNNaNNaNNaNNaNNaNPNaNNNaNNaNPP
REVIEW34NaNNaNPNaNNaNNaNNaNNaNNaNNaNNaNPNaNNaNNaNNaNNaNP
REVIEW35NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNPNaNNaNNaNNaNNaNN
REVIEW36NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNPNaNNaNNaNNaNNaNN
REVIEW37NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNPNaNNaNNaNNaNNaNP
REVIEW38NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNPNaNNaNNaNNaNNaNNaN
REVIEW39NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNPNaNNaNNaNNaNNaNNaN
REVIEW40NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNPNaNNaNNaNNaNNaNNaN
REVIEW41NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNPNaNNaNNaNNaNNaNNaN
REVIEW42NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNNaNNaNNaNNaNNaNNaN
REVIEW43NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNPNaNNaNNaNNaNNaNNaN
REVIEW44NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNPNaNNaNNaNNaNNaNNaN
REVIEW45NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNPNaNNaNNaNNaNNaNNaN
REVIEW46NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNPNaNNaNNaNNaNNaNNaN
REVIEW990179730425425510412126156954217713421458597133916051536
\n", "
" ], "text/plain": [ "Turker T_0 T_1 T_10 T_11 T_12 T_13 T_14 T_15 T_16 T_17 T_2 T_3 T_4 \\\n", "REVIEW1 N N P N N N N N N P N N N \n", "REVIEW2 N NaN N N N N P N P NaN N N N \n", "REVIEW3 N NaN P P N N NaN NaN P NaN N N N \n", "REVIEW4 N NaN P P N P NaN NaN P NaN N N N \n", "REVIEW5 NaN NaN N N P P NaN NaN NaN NaN N N N \n", "REVIEW6 NaN NaN N NaN P NaN NaN NaN NaN NaN N N N \n", "REVIEW7 NaN NaN N NaN NaN NaN NaN NaN NaN NaN N N N \n", "REVIEW8 NaN NaN P NaN NaN NaN NaN NaN NaN NaN N N P \n", "REVIEW9 NaN NaN N NaN NaN NaN NaN NaN NaN NaN N N N \n", "REVIEW10 NaN NaN N NaN NaN NaN NaN NaN NaN NaN N N N \n", "REVIEW11 NaN NaN N NaN NaN NaN NaN NaN NaN NaN N N N \n", "REVIEW12 NaN NaN N NaN NaN NaN NaN NaN NaN NaN N N N \n", "REVIEW13 NaN NaN N NaN NaN NaN NaN NaN NaN NaN P N N \n", "REVIEW14 NaN NaN N NaN NaN NaN NaN NaN NaN NaN P N N \n", "REVIEW15 NaN NaN P NaN NaN NaN NaN NaN NaN NaN P N N \n", "REVIEW16 NaN NaN P NaN NaN NaN NaN NaN NaN NaN P N P \n", "REVIEW17 NaN NaN P NaN NaN NaN NaN NaN NaN NaN P N P \n", "REVIEW18 NaN NaN P NaN NaN NaN NaN NaN NaN NaN P N P \n", "REVIEW19 NaN NaN P NaN NaN NaN NaN NaN NaN NaN N N P \n", "REVIEW20 NaN NaN P NaN NaN NaN NaN NaN NaN NaN P N P \n", "REVIEW21 NaN NaN P NaN NaN NaN NaN NaN NaN NaN P N P \n", "REVIEW22 NaN NaN P NaN NaN NaN NaN NaN NaN NaN NaN N P \n", "REVIEW23 NaN NaN P NaN NaN NaN NaN NaN NaN NaN NaN P P \n", "REVIEW24 NaN NaN P NaN NaN NaN NaN NaN NaN NaN NaN P P \n", "REVIEW25 NaN NaN P NaN NaN NaN NaN NaN NaN NaN NaN P P \n", "REVIEW26 NaN NaN P NaN NaN NaN NaN NaN NaN NaN NaN P P \n", "REVIEW27 NaN NaN P NaN NaN NaN NaN NaN NaN NaN NaN P P \n", "REVIEW28 NaN NaN P NaN NaN NaN NaN NaN NaN NaN NaN P P \n", "REVIEW29 NaN NaN P NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW30 NaN NaN N NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW31 NaN NaN P NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW32 NaN NaN P NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW33 NaN NaN P NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW34 NaN NaN P NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW35 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW36 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW37 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW38 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW39 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW40 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW41 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW42 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN N NaN \n", "REVIEW43 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW44 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW45 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW46 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN P NaN \n", "REVIEW 99 0 1797 304 254 255 104 121 261 56 954 2177 1342 \n", "\n", "Turker T_5 T_6 T_7 T_8 T_9 \n", "REVIEW1 P N N N N \n", "REVIEW2 N N N N P \n", "REVIEW3 P P N N P \n", "REVIEW4 N P N N P \n", "REVIEW5 N P N P P \n", "REVIEW6 P P N N N \n", "REVIEW7 N P N N N \n", "REVIEW8 P P P N N \n", "REVIEW9 P P P N N \n", "REVIEW10 N P P N N \n", "REVIEW11 N NaN P N P \n", "REVIEW12 N NaN P N P \n", "REVIEW13 N NaN P N N \n", "REVIEW14 N NaN P N P \n", "REVIEW15 N NaN P P N \n", "REVIEW16 P NaN P N N \n", "REVIEW17 N NaN P N N \n", "REVIEW18 N NaN P P N \n", "REVIEW19 N NaN P P P \n", "REVIEW20 N NaN P P N \n", "REVIEW21 N NaN P P N \n", "REVIEW22 P NaN P P P \n", "REVIEW23 P NaN NaN P N \n", "REVIEW24 N NaN NaN P N \n", "REVIEW25 P NaN NaN P N \n", "REVIEW26 P NaN NaN P N \n", "REVIEW27 P NaN NaN P N \n", "REVIEW28 P NaN NaN P P \n", "REVIEW29 N NaN NaN P N \n", "REVIEW30 P NaN NaN P N \n", "REVIEW31 P NaN NaN P P \n", "REVIEW32 N NaN NaN P N \n", "REVIEW33 N NaN NaN P P \n", "REVIEW34 NaN NaN NaN NaN P \n", "REVIEW35 NaN NaN NaN NaN N \n", "REVIEW36 NaN NaN NaN NaN N \n", "REVIEW37 NaN NaN NaN NaN P \n", "REVIEW38 NaN NaN NaN NaN NaN \n", "REVIEW39 NaN NaN NaN NaN NaN \n", "REVIEW40 NaN NaN NaN NaN NaN \n", "REVIEW41 NaN NaN NaN NaN NaN \n", "REVIEW42 NaN NaN NaN NaN NaN \n", "REVIEW43 NaN NaN NaN NaN NaN \n", "REVIEW44 NaN NaN NaN NaN NaN \n", "REVIEW45 NaN NaN NaN NaN NaN \n", "REVIEW46 NaN NaN NaN NaN NaN \n", "REVIEW 1458 597 1339 1605 1536 " ] }, "execution_count": 310, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## That is obviously wrong because only THREE people commented on Review1" ] }, { "cell_type": "code", "execution_count": 311, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Turker T_0 T_1 T_10 T_11 T_12 T_13 \\\n", "REVIEW1 0 0 6 11 13 15 \n", "REVIEW2 11 NaN 8 47 25 19 \n", "REVIEW3 42 NaN 14 55 29 44 \n", "REVIEW4 46 NaN 15 95 57 80 \n", "REVIEW5 NaN NaN 18 96 59 97 \n", "REVIEW6 NaN NaN 20 NaN 71 NaN \n", "REVIEW7 NaN NaN 21 NaN NaN NaN \n", "REVIEW8 NaN NaN 22 NaN NaN NaN \n", "REVIEW9 NaN NaN 23 NaN NaN NaN \n", "REVIEW10 NaN NaN 26 NaN NaN NaN \n", "REVIEW11 NaN NaN 33 NaN NaN NaN \n", "REVIEW12 NaN NaN 35 NaN NaN NaN \n", "REVIEW13 NaN NaN 41 NaN NaN NaN \n", "REVIEW14 NaN NaN 42 NaN NaN NaN \n", "REVIEW15 NaN NaN 51 NaN NaN NaN \n", "REVIEW16 NaN NaN 52 NaN NaN NaN \n", "REVIEW17 NaN NaN 53 NaN NaN NaN \n", "REVIEW18 NaN NaN 59 NaN NaN NaN \n", "REVIEW19 NaN NaN 60 NaN NaN NaN \n", "REVIEW20 NaN NaN 62 NaN NaN NaN \n", "REVIEW21 NaN NaN 67 NaN NaN NaN \n", "REVIEW22 NaN NaN 68 NaN NaN NaN \n", "REVIEW23 NaN NaN 69 NaN NaN NaN \n", "REVIEW24 NaN NaN 74 NaN NaN NaN \n", "REVIEW25 NaN NaN 77 NaN NaN NaN \n", "REVIEW26 NaN NaN 79 NaN NaN NaN \n", "REVIEW27 NaN NaN 80 NaN NaN NaN \n", "REVIEW28 NaN NaN 81 NaN NaN NaN \n", "REVIEW29 NaN NaN 82 NaN NaN NaN \n", "REVIEW30 NaN NaN 87 NaN NaN NaN \n", "REVIEW31 NaN NaN 90 NaN NaN NaN \n", "REVIEW32 NaN NaN 91 NaN NaN NaN \n", "REVIEW33 NaN NaN 94 NaN NaN NaN \n", "REVIEW34 NaN NaN 97 NaN NaN NaN \n", "REVIEW35 NaN NaN NaN NaN NaN NaN \n", "REVIEW36 NaN NaN NaN NaN NaN NaN \n", "REVIEW37 NaN NaN NaN NaN NaN NaN \n", "REVIEW38 NaN NaN NaN NaN NaN NaN \n", "REVIEW39 NaN NaN NaN NaN NaN NaN \n", "REVIEW40 NaN NaN NaN NaN NaN NaN \n", "REVIEW41 NaN NaN NaN NaN NaN NaN \n", "REVIEW42 NaN NaN NaN NaN NaN NaN \n", "REVIEW43 NaN NaN NaN NaN NaN NaN \n", "REVIEW44 NaN NaN NaN NaN NaN NaN \n", "REVIEW45 NaN NaN NaN NaN NaN NaN \n", "REVIEW46 NaN NaN NaN NaN NaN NaN \n", "REVIEW NNNN N PNPPNNNPNNNNNNPPPPPPPPPPPPPPPNPPPP NNPPN NNNNPP NNNPP \n", "\n", "Turker T_14 T_15 T_16 T_17 T_2 \\\n", "REVIEW1 28 29 39 56 0 \n", "REVIEW2 76 92 58 NaN 1 \n", "REVIEW3 NaN NaN 70 NaN 8 \n", "REVIEW4 NaN NaN 94 NaN 10 \n", "REVIEW5 NaN NaN NaN NaN 16 \n", "REVIEW6 NaN NaN NaN NaN 21 \n", "REVIEW7 NaN NaN NaN NaN 27 \n", "REVIEW8 NaN NaN NaN NaN 32 \n", "REVIEW9 NaN NaN NaN NaN 36 \n", "REVIEW10 NaN NaN NaN NaN 37 \n", "REVIEW11 NaN NaN NaN NaN 45 \n", "REVIEW12 NaN NaN NaN NaN 47 \n", "REVIEW13 NaN NaN NaN NaN 50 \n", "REVIEW14 NaN NaN NaN NaN 64 \n", "REVIEW15 NaN NaN NaN NaN 66 \n", "REVIEW16 NaN NaN NaN NaN 72 \n", "REVIEW17 NaN NaN NaN NaN 75 \n", "REVIEW18 NaN NaN NaN NaN 83 \n", "REVIEW19 NaN NaN NaN NaN 85 \n", "REVIEW20 NaN NaN NaN NaN 86 \n", "REVIEW21 NaN NaN NaN NaN 93 \n", "REVIEW22 NaN NaN NaN NaN NaN \n", "REVIEW23 NaN NaN NaN NaN NaN \n", "REVIEW24 NaN NaN NaN NaN NaN \n", "REVIEW25 NaN NaN NaN NaN NaN \n", "REVIEW26 NaN NaN NaN NaN NaN \n", "REVIEW27 NaN NaN NaN NaN NaN \n", "REVIEW28 NaN NaN NaN NaN NaN \n", "REVIEW29 NaN NaN NaN NaN NaN \n", "REVIEW30 NaN NaN NaN NaN NaN \n", "REVIEW31 NaN NaN NaN NaN NaN \n", "REVIEW32 NaN NaN NaN NaN NaN \n", "REVIEW33 NaN NaN NaN NaN NaN \n", "REVIEW34 NaN NaN NaN NaN NaN \n", "REVIEW35 NaN NaN NaN NaN NaN \n", "REVIEW36 NaN NaN NaN NaN NaN \n", "REVIEW37 NaN NaN NaN NaN NaN \n", "REVIEW38 NaN NaN NaN NaN NaN \n", "REVIEW39 NaN NaN NaN NaN NaN \n", "REVIEW40 NaN NaN NaN NaN NaN \n", "REVIEW41 NaN NaN NaN NaN NaN \n", "REVIEW42 NaN NaN NaN NaN NaN \n", "REVIEW43 NaN NaN NaN NaN NaN \n", "REVIEW44 NaN NaN NaN NaN NaN \n", "REVIEW45 NaN NaN NaN NaN NaN \n", "REVIEW46 NaN NaN NaN NaN NaN \n", "REVIEW NP NN NPPP P NNNNNNNNNNNNPPPPPPNPP \n", "\n", "Turker T_3 \\\n", "REVIEW1 1 \n", "REVIEW2 3 \n", "REVIEW3 4 \n", "REVIEW4 5 \n", "REVIEW5 7 \n", "REVIEW6 10 \n", "REVIEW7 14 \n", "REVIEW8 16 \n", "REVIEW9 17 \n", "REVIEW10 19 \n", "REVIEW11 22 \n", "REVIEW12 23 \n", "REVIEW13 26 \n", "REVIEW14 28 \n", "REVIEW15 30 \n", "REVIEW16 34 \n", "REVIEW17 35 \n", "REVIEW18 36 \n", "REVIEW19 38 \n", "REVIEW20 40 \n", "REVIEW21 44 \n", "REVIEW22 46 \n", "REVIEW23 48 \n", "REVIEW24 49 \n", "REVIEW25 54 \n", "REVIEW26 55 \n", "REVIEW27 57 \n", "REVIEW28 58 \n", "REVIEW29 60 \n", "REVIEW30 61 \n", "REVIEW31 63 \n", "REVIEW32 64 \n", "REVIEW33 65 \n", "REVIEW34 70 \n", "REVIEW35 71 \n", "REVIEW36 73 \n", "REVIEW37 74 \n", "REVIEW38 75 \n", "REVIEW39 77 \n", "REVIEW40 79 \n", "REVIEW41 82 \n", "REVIEW42 85 \n", "REVIEW43 86 \n", "REVIEW44 88 \n", "REVIEW45 90 \n", "REVIEW46 95 \n", "REVIEW NNNNNNNNNNNNNNNNNNNNNNPPPPPPPPPPPPPPPPPPPNPPPP \n", "\n", "Turker T_4 T_5 \\\n", "REVIEW1 1 2 \n", "REVIEW2 7 3 \n", "REVIEW3 9 4 \n", "REVIEW4 12 5 \n", "REVIEW5 17 12 \n", "REVIEW6 19 13 \n", "REVIEW7 27 14 \n", "REVIEW8 29 20 \n", "REVIEW9 30 22 \n", "REVIEW10 33 24 \n", "REVIEW11 40 26 \n", "REVIEW12 41 28 \n", "REVIEW13 42 31 \n", "REVIEW14 45 36 \n", "REVIEW15 47 39 \n", "REVIEW16 48 43 \n", "REVIEW17 50 45 \n", "REVIEW18 55 46 \n", "REVIEW19 61 48 \n", "REVIEW20 65 49 \n", "REVIEW21 69 52 \n", "REVIEW22 73 53 \n", "REVIEW23 77 54 \n", "REVIEW24 78 56 \n", "REVIEW25 87 58 \n", "REVIEW26 90 72 \n", "REVIEW27 93 75 \n", "REVIEW28 97 78 \n", "REVIEW29 NaN 85 \n", "REVIEW30 NaN 87 \n", "REVIEW31 NaN 88 \n", "REVIEW32 NaN 94 \n", "REVIEW33 NaN 96 \n", "REVIEW34 NaN NaN \n", "REVIEW35 NaN NaN \n", "REVIEW36 NaN NaN \n", "REVIEW37 NaN NaN \n", "REVIEW38 NaN NaN \n", "REVIEW39 NaN NaN \n", "REVIEW40 NaN NaN \n", "REVIEW41 NaN NaN \n", "REVIEW42 NaN NaN \n", "REVIEW43 NaN NaN \n", "REVIEW44 NaN NaN \n", "REVIEW45 NaN NaN \n", "REVIEW46 NaN NaN \n", "REVIEW NNNNNNNPNNNNNNNPPPPPPPPPPPPP PNPNNPNPPNNNNNNPNNNNNPPNPPPPNPPNN \n", "\n", "Turker T_6 T_7 \\\n", "REVIEW1 2 2 \n", "REVIEW2 31 8 \n", "REVIEW3 50 9 \n", "REVIEW4 51 24 \n", "REVIEW5 62 34 \n", "REVIEW6 63 39 \n", "REVIEW7 74 43 \n", "REVIEW8 84 51 \n", "REVIEW9 89 56 \n", "REVIEW10 91 60 \n", "REVIEW11 NaN 63 \n", "REVIEW12 NaN 64 \n", "REVIEW13 NaN 82 \n", "REVIEW14 NaN 83 \n", "REVIEW15 NaN 84 \n", "REVIEW16 NaN 86 \n", "REVIEW17 NaN 88 \n", "REVIEW18 NaN 89 \n", "REVIEW19 NaN 91 \n", "REVIEW20 NaN 92 \n", "REVIEW21 NaN 95 \n", "REVIEW22 NaN 96 \n", "REVIEW23 NaN NaN \n", "REVIEW24 NaN NaN \n", "REVIEW25 NaN NaN \n", "REVIEW26 NaN NaN \n", "REVIEW27 NaN NaN \n", "REVIEW28 NaN NaN \n", "REVIEW29 NaN NaN \n", "REVIEW30 NaN NaN \n", "REVIEW31 NaN NaN \n", "REVIEW32 NaN NaN \n", "REVIEW33 NaN NaN \n", "REVIEW34 NaN NaN \n", "REVIEW35 NaN NaN \n", "REVIEW36 NaN NaN \n", "REVIEW37 NaN NaN \n", "REVIEW38 NaN NaN \n", "REVIEW39 NaN NaN \n", "REVIEW40 NaN NaN \n", "REVIEW41 NaN NaN \n", "REVIEW42 NaN NaN \n", "REVIEW43 NaN NaN \n", "REVIEW44 NaN NaN \n", "REVIEW45 NaN NaN \n", "REVIEW46 NaN NaN \n", "REVIEW NNPPPPPPPP NNNNNNNPPPPPPPPPPPPPPP \n", "\n", "Turker T_8 \\\n", "REVIEW1 3 \n", "REVIEW2 4 \n", "REVIEW3 6 \n", "REVIEW4 12 \n", "REVIEW5 15 \n", "REVIEW6 18 \n", "REVIEW7 20 \n", "REVIEW8 24 \n", "REVIEW9 25 \n", "REVIEW10 27 \n", "REVIEW11 31 \n", "REVIEW12 32 \n", "REVIEW13 34 \n", "REVIEW14 37 \n", "REVIEW15 38 \n", "REVIEW16 40 \n", "REVIEW17 43 \n", "REVIEW18 59 \n", "REVIEW19 61 \n", "REVIEW20 62 \n", "REVIEW21 66 \n", "REVIEW22 67 \n", "REVIEW23 68 \n", "REVIEW24 72 \n", "REVIEW25 73 \n", "REVIEW26 76 \n", "REVIEW27 78 \n", "REVIEW28 79 \n", "REVIEW29 80 \n", "REVIEW30 81 \n", "REVIEW31 89 \n", "REVIEW32 92 \n", "REVIEW33 93 \n", "REVIEW34 NaN \n", "REVIEW35 NaN \n", "REVIEW36 NaN \n", "REVIEW37 NaN \n", "REVIEW38 NaN \n", "REVIEW39 NaN \n", "REVIEW40 NaN \n", "REVIEW41 NaN \n", "REVIEW42 NaN \n", "REVIEW43 NaN \n", "REVIEW44 NaN \n", "REVIEW45 NaN \n", "REVIEW46 NaN \n", "REVIEW NNNNPNNNNNNNNNPNNPPPPPPPPPPPPPPPP \n", "\n", "Turker T_9 \n", "REVIEW1 5 \n", "REVIEW2 6 \n", "REVIEW3 7 \n", "REVIEW4 9 \n", "REVIEW5 10 \n", "REVIEW6 11 \n", "REVIEW7 13 \n", "REVIEW8 16 \n", "REVIEW9 17 \n", "REVIEW10 18 \n", "REVIEW11 21 \n", "REVIEW12 23 \n", "REVIEW13 25 \n", "REVIEW14 30 \n", "REVIEW15 32 \n", "REVIEW16 33 \n", "REVIEW17 35 \n", "REVIEW18 37 \n", "REVIEW19 38 \n", "REVIEW20 41 \n", "REVIEW21 44 \n", "REVIEW22 49 \n", "REVIEW23 52 \n", "REVIEW24 53 \n", "REVIEW25 54 \n", "REVIEW26 57 \n", "REVIEW27 65 \n", "REVIEW28 66 \n", "REVIEW29 67 \n", "REVIEW30 68 \n", "REVIEW31 69 \n", "REVIEW32 70 \n", "REVIEW33 71 \n", "REVIEW34 76 \n", "REVIEW35 81 \n", "REVIEW36 83 \n", "REVIEW37 84 \n", "REVIEW38 NaN \n", "REVIEW39 NaN \n", "REVIEW40 NaN \n", "REVIEW41 NaN \n", "REVIEW42 NaN \n", "REVIEW43 NaN \n", "REVIEW44 NaN \n", "REVIEW45 NaN \n", "REVIEW46 NaN \n", "REVIEW NPPPPNNNNNPPNPNNNNPNNPNNNNNPNNPNPPNNP \n" ] } ], "source": [ "df = pd.DataFrame({'Turker': even_cleaner_df['T_ID'].tolist(),\n", " 'SENTIMENT': even_cleaner_df['ReviewID'].tolist(),\n", " 'REVIEW': even_cleaner_df['sentiment'].tolist() })\n", "\n", "grouped = df.groupby('Turker')\n", "values = grouped['REVIEW'].agg('sum')\n", "id_df = grouped['SENTIMENT'].apply(lambda x: pd.Series(x.values)).unstack()\n", "id_df = id_df.rename(columns={i: 'REVIEW{}'.format(i + 1) for i in range(id_df.shape[1])})\n", "result = pd.concat([id_df, values], axis=1)\n", "result_df = pd.DataFrame(result)\n", "print(result_df.T)" ] }, { "cell_type": "code", "execution_count": 312, "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame(result_df.T)" ] }, { "cell_type": "code", "execution_count": 313, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TurkerT_0T_1T_10T_11T_12T_13T_14T_15T_16T_17T_2T_3T_4T_5T_6T_7T_8T_9
REVIEW10061113152829395601122235
REVIEW211NaN8472519769258NaN137331846
REVIEW342NaN14552944NaNNaN70NaN849450967
REVIEW446NaN15955780NaNNaN94NaN1051255124129
REVIEW5NaNNaN18965997NaNNaNNaNNaN167171262341510
REVIEW6NaNNaN20NaN71NaNNaNNaNNaNNaN2110191363391811
REVIEW7NaNNaN21NaNNaNNaNNaNNaNNaNNaN2714271474432013
REVIEW8NaNNaN22NaNNaNNaNNaNNaNNaNNaN3216292084512416
REVIEW9NaNNaN23NaNNaNNaNNaNNaNNaNNaN3617302289562517
REVIEW10NaNNaN26NaNNaNNaNNaNNaNNaNNaN3719332491602718
REVIEW11NaNNaN33NaNNaNNaNNaNNaNNaNNaN45224026NaN633121
REVIEW12NaNNaN35NaNNaNNaNNaNNaNNaNNaN47234128NaN643223
REVIEW13NaNNaN41NaNNaNNaNNaNNaNNaNNaN50264231NaN823425
REVIEW14NaNNaN42NaNNaNNaNNaNNaNNaNNaN64284536NaN833730
REVIEW15NaNNaN51NaNNaNNaNNaNNaNNaNNaN66304739NaN843832
REVIEW16NaNNaN52NaNNaNNaNNaNNaNNaNNaN72344843NaN864033
REVIEW17NaNNaN53NaNNaNNaNNaNNaNNaNNaN75355045NaN884335
REVIEW18NaNNaN59NaNNaNNaNNaNNaNNaNNaN83365546NaN895937
REVIEW19NaNNaN60NaNNaNNaNNaNNaNNaNNaN85386148NaN916138
REVIEW20NaNNaN62NaNNaNNaNNaNNaNNaNNaN86406549NaN926241
REVIEW21NaNNaN67NaNNaNNaNNaNNaNNaNNaN93446952NaN956644
REVIEW22NaNNaN68NaNNaNNaNNaNNaNNaNNaNNaN467353NaN966749
REVIEW23NaNNaN69NaNNaNNaNNaNNaNNaNNaNNaN487754NaNNaN6852
REVIEW24NaNNaN74NaNNaNNaNNaNNaNNaNNaNNaN497856NaNNaN7253
REVIEW25NaNNaN77NaNNaNNaNNaNNaNNaNNaNNaN548758NaNNaN7354
REVIEW26NaNNaN79NaNNaNNaNNaNNaNNaNNaNNaN559072NaNNaN7657
REVIEW27NaNNaN80NaNNaNNaNNaNNaNNaNNaNNaN579375NaNNaN7865
REVIEW28NaNNaN81NaNNaNNaNNaNNaNNaNNaNNaN589778NaNNaN7966
REVIEW29NaNNaN82NaNNaNNaNNaNNaNNaNNaNNaN60NaN85NaNNaN8067
REVIEW30NaNNaN87NaNNaNNaNNaNNaNNaNNaNNaN61NaN87NaNNaN8168
REVIEW31NaNNaN90NaNNaNNaNNaNNaNNaNNaNNaN63NaN88NaNNaN8969
REVIEW32NaNNaN91NaNNaNNaNNaNNaNNaNNaNNaN64NaN94NaNNaN9270
REVIEW33NaNNaN94NaNNaNNaNNaNNaNNaNNaNNaN65NaN96NaNNaN9371
REVIEW34NaNNaN97NaNNaNNaNNaNNaNNaNNaNNaN70NaNNaNNaNNaNNaN76
REVIEW35NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN71NaNNaNNaNNaNNaN81
REVIEW36NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN73NaNNaNNaNNaNNaN83
REVIEW37NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN74NaNNaNNaNNaNNaN84
REVIEW38NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN75NaNNaNNaNNaNNaNNaN
REVIEW39NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN77NaNNaNNaNNaNNaNNaN
REVIEW40NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN79NaNNaNNaNNaNNaNNaN
REVIEW41NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN82NaNNaNNaNNaNNaNNaN
REVIEW42NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN85NaNNaNNaNNaNNaNNaN
REVIEW43NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN86NaNNaNNaNNaNNaNNaN
REVIEW44NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN88NaNNaNNaNNaNNaNNaN
REVIEW45NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN90NaNNaNNaNNaNNaNNaN
REVIEW46NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN95NaNNaNNaNNaNNaNNaN
REVIEWNNNNNPNPPNNNPNNNNNNPPPPPPPPPPPPPPPNPPPPNNPPNNNNNPPNNNPPNPNNNPPPPNNNNNNNNNNNNPPPPPPNPPNNNNNNNNNNNNNNNNNNNNNNPPPPPPPPPPPPPPPPPPPNPPPPNNNNNNNPNNNNNNNPPPPPPPPPPPPPPNPNNPNPPNNNNNNPNNNNNPPNPPPPNPPNNNNPPPPPPPPNNNNNNNPPPPPPPPPPPPPPPNNNNPNNNNNNNNNPNNPPPPPPPPPPPPPPPPNPPPPNNNNNPPNPNNNNPNNPNNNNNPNNPNPPNNP
\n", "
" ], "text/plain": [ "Turker T_0 T_1 T_10 T_11 T_12 T_13 \\\n", "REVIEW1 0 0 6 11 13 15 \n", "REVIEW2 11 NaN 8 47 25 19 \n", "REVIEW3 42 NaN 14 55 29 44 \n", "REVIEW4 46 NaN 15 95 57 80 \n", "REVIEW5 NaN NaN 18 96 59 97 \n", "REVIEW6 NaN NaN 20 NaN 71 NaN \n", "REVIEW7 NaN NaN 21 NaN NaN NaN \n", "REVIEW8 NaN NaN 22 NaN NaN NaN \n", "REVIEW9 NaN NaN 23 NaN NaN NaN \n", "REVIEW10 NaN NaN 26 NaN NaN NaN \n", "REVIEW11 NaN NaN 33 NaN NaN NaN \n", "REVIEW12 NaN NaN 35 NaN NaN NaN \n", "REVIEW13 NaN NaN 41 NaN NaN NaN \n", "REVIEW14 NaN NaN 42 NaN NaN NaN \n", "REVIEW15 NaN NaN 51 NaN NaN NaN \n", "REVIEW16 NaN NaN 52 NaN NaN NaN \n", "REVIEW17 NaN NaN 53 NaN NaN NaN \n", "REVIEW18 NaN NaN 59 NaN NaN NaN \n", "REVIEW19 NaN NaN 60 NaN NaN NaN \n", "REVIEW20 NaN NaN 62 NaN NaN NaN \n", "REVIEW21 NaN NaN 67 NaN NaN NaN \n", "REVIEW22 NaN NaN 68 NaN NaN NaN \n", "REVIEW23 NaN NaN 69 NaN NaN NaN \n", "REVIEW24 NaN NaN 74 NaN NaN NaN \n", "REVIEW25 NaN NaN 77 NaN NaN NaN \n", "REVIEW26 NaN NaN 79 NaN NaN NaN \n", "REVIEW27 NaN NaN 80 NaN NaN NaN \n", "REVIEW28 NaN NaN 81 NaN NaN NaN \n", "REVIEW29 NaN NaN 82 NaN NaN NaN \n", "REVIEW30 NaN NaN 87 NaN NaN NaN \n", "REVIEW31 NaN NaN 90 NaN NaN NaN \n", "REVIEW32 NaN NaN 91 NaN NaN NaN \n", "REVIEW33 NaN NaN 94 NaN NaN NaN \n", "REVIEW34 NaN NaN 97 NaN NaN NaN \n", "REVIEW35 NaN NaN NaN NaN NaN NaN \n", "REVIEW36 NaN NaN NaN NaN NaN NaN \n", "REVIEW37 NaN NaN NaN NaN NaN NaN \n", "REVIEW38 NaN NaN NaN NaN NaN NaN \n", "REVIEW39 NaN NaN NaN NaN NaN NaN \n", "REVIEW40 NaN NaN NaN NaN NaN NaN \n", "REVIEW41 NaN NaN NaN NaN NaN NaN \n", "REVIEW42 NaN NaN NaN NaN NaN NaN \n", "REVIEW43 NaN NaN NaN NaN NaN NaN \n", "REVIEW44 NaN NaN NaN NaN NaN NaN \n", "REVIEW45 NaN NaN NaN NaN NaN NaN \n", "REVIEW46 NaN NaN NaN NaN NaN NaN \n", "REVIEW NNNN N PNPPNNNPNNNNNNPPPPPPPPPPPPPPPNPPPP NNPPN NNNNPP NNNPP \n", "\n", "Turker T_14 T_15 T_16 T_17 T_2 \\\n", "REVIEW1 28 29 39 56 0 \n", "REVIEW2 76 92 58 NaN 1 \n", "REVIEW3 NaN NaN 70 NaN 8 \n", "REVIEW4 NaN NaN 94 NaN 10 \n", "REVIEW5 NaN NaN NaN NaN 16 \n", "REVIEW6 NaN NaN NaN NaN 21 \n", "REVIEW7 NaN NaN NaN NaN 27 \n", "REVIEW8 NaN NaN NaN NaN 32 \n", "REVIEW9 NaN NaN NaN NaN 36 \n", "REVIEW10 NaN NaN NaN NaN 37 \n", "REVIEW11 NaN NaN NaN NaN 45 \n", "REVIEW12 NaN NaN NaN NaN 47 \n", "REVIEW13 NaN NaN NaN NaN 50 \n", "REVIEW14 NaN NaN NaN NaN 64 \n", "REVIEW15 NaN NaN NaN NaN 66 \n", "REVIEW16 NaN NaN NaN NaN 72 \n", "REVIEW17 NaN NaN NaN NaN 75 \n", "REVIEW18 NaN NaN NaN NaN 83 \n", "REVIEW19 NaN NaN NaN NaN 85 \n", "REVIEW20 NaN NaN NaN NaN 86 \n", "REVIEW21 NaN NaN NaN NaN 93 \n", "REVIEW22 NaN NaN NaN NaN NaN \n", "REVIEW23 NaN NaN NaN NaN NaN \n", "REVIEW24 NaN NaN NaN NaN NaN \n", "REVIEW25 NaN NaN NaN NaN NaN \n", "REVIEW26 NaN NaN NaN NaN NaN \n", "REVIEW27 NaN NaN NaN NaN NaN \n", "REVIEW28 NaN NaN NaN NaN NaN \n", "REVIEW29 NaN NaN NaN NaN NaN \n", "REVIEW30 NaN NaN NaN NaN NaN \n", "REVIEW31 NaN NaN NaN NaN NaN \n", "REVIEW32 NaN NaN NaN NaN NaN \n", "REVIEW33 NaN NaN NaN NaN NaN \n", "REVIEW34 NaN NaN NaN NaN NaN \n", "REVIEW35 NaN NaN NaN NaN NaN \n", "REVIEW36 NaN NaN NaN NaN NaN \n", "REVIEW37 NaN NaN NaN NaN NaN \n", "REVIEW38 NaN NaN NaN NaN NaN \n", "REVIEW39 NaN NaN NaN NaN NaN \n", "REVIEW40 NaN NaN NaN NaN NaN \n", "REVIEW41 NaN NaN NaN NaN NaN \n", "REVIEW42 NaN NaN NaN NaN NaN \n", "REVIEW43 NaN NaN NaN NaN NaN \n", "REVIEW44 NaN NaN NaN NaN NaN \n", "REVIEW45 NaN NaN NaN NaN NaN \n", "REVIEW46 NaN NaN NaN NaN NaN \n", "REVIEW NP NN NPPP P NNNNNNNNNNNNPPPPPPNPP \n", "\n", "Turker T_3 \\\n", "REVIEW1 1 \n", "REVIEW2 3 \n", "REVIEW3 4 \n", "REVIEW4 5 \n", "REVIEW5 7 \n", "REVIEW6 10 \n", "REVIEW7 14 \n", "REVIEW8 16 \n", "REVIEW9 17 \n", "REVIEW10 19 \n", "REVIEW11 22 \n", "REVIEW12 23 \n", "REVIEW13 26 \n", "REVIEW14 28 \n", "REVIEW15 30 \n", "REVIEW16 34 \n", "REVIEW17 35 \n", "REVIEW18 36 \n", "REVIEW19 38 \n", "REVIEW20 40 \n", "REVIEW21 44 \n", "REVIEW22 46 \n", "REVIEW23 48 \n", "REVIEW24 49 \n", "REVIEW25 54 \n", "REVIEW26 55 \n", "REVIEW27 57 \n", "REVIEW28 58 \n", "REVIEW29 60 \n", "REVIEW30 61 \n", "REVIEW31 63 \n", "REVIEW32 64 \n", "REVIEW33 65 \n", "REVIEW34 70 \n", "REVIEW35 71 \n", "REVIEW36 73 \n", "REVIEW37 74 \n", "REVIEW38 75 \n", "REVIEW39 77 \n", "REVIEW40 79 \n", "REVIEW41 82 \n", "REVIEW42 85 \n", "REVIEW43 86 \n", "REVIEW44 88 \n", "REVIEW45 90 \n", "REVIEW46 95 \n", "REVIEW NNNNNNNNNNNNNNNNNNNNNNPPPPPPPPPPPPPPPPPPPNPPPP \n", "\n", "Turker T_4 T_5 \\\n", "REVIEW1 1 2 \n", "REVIEW2 7 3 \n", "REVIEW3 9 4 \n", "REVIEW4 12 5 \n", "REVIEW5 17 12 \n", "REVIEW6 19 13 \n", "REVIEW7 27 14 \n", "REVIEW8 29 20 \n", "REVIEW9 30 22 \n", "REVIEW10 33 24 \n", "REVIEW11 40 26 \n", "REVIEW12 41 28 \n", "REVIEW13 42 31 \n", "REVIEW14 45 36 \n", "REVIEW15 47 39 \n", "REVIEW16 48 43 \n", "REVIEW17 50 45 \n", "REVIEW18 55 46 \n", "REVIEW19 61 48 \n", "REVIEW20 65 49 \n", "REVIEW21 69 52 \n", "REVIEW22 73 53 \n", "REVIEW23 77 54 \n", "REVIEW24 78 56 \n", "REVIEW25 87 58 \n", "REVIEW26 90 72 \n", "REVIEW27 93 75 \n", "REVIEW28 97 78 \n", "REVIEW29 NaN 85 \n", "REVIEW30 NaN 87 \n", "REVIEW31 NaN 88 \n", "REVIEW32 NaN 94 \n", "REVIEW33 NaN 96 \n", "REVIEW34 NaN NaN \n", "REVIEW35 NaN NaN \n", "REVIEW36 NaN NaN \n", "REVIEW37 NaN NaN \n", "REVIEW38 NaN NaN \n", "REVIEW39 NaN NaN \n", "REVIEW40 NaN NaN \n", "REVIEW41 NaN NaN \n", "REVIEW42 NaN NaN \n", "REVIEW43 NaN NaN \n", "REVIEW44 NaN NaN \n", "REVIEW45 NaN NaN \n", "REVIEW46 NaN NaN \n", "REVIEW NNNNNNNPNNNNNNNPPPPPPPPPPPPP PNPNNPNPPNNNNNNPNNNNNPPNPPPPNPPNN \n", "\n", "Turker T_6 T_7 \\\n", "REVIEW1 2 2 \n", "REVIEW2 31 8 \n", "REVIEW3 50 9 \n", "REVIEW4 51 24 \n", "REVIEW5 62 34 \n", "REVIEW6 63 39 \n", "REVIEW7 74 43 \n", "REVIEW8 84 51 \n", "REVIEW9 89 56 \n", "REVIEW10 91 60 \n", "REVIEW11 NaN 63 \n", "REVIEW12 NaN 64 \n", "REVIEW13 NaN 82 \n", "REVIEW14 NaN 83 \n", "REVIEW15 NaN 84 \n", "REVIEW16 NaN 86 \n", "REVIEW17 NaN 88 \n", "REVIEW18 NaN 89 \n", "REVIEW19 NaN 91 \n", "REVIEW20 NaN 92 \n", "REVIEW21 NaN 95 \n", "REVIEW22 NaN 96 \n", "REVIEW23 NaN NaN \n", "REVIEW24 NaN NaN \n", "REVIEW25 NaN NaN \n", "REVIEW26 NaN NaN \n", "REVIEW27 NaN NaN \n", "REVIEW28 NaN NaN \n", "REVIEW29 NaN NaN \n", "REVIEW30 NaN NaN \n", "REVIEW31 NaN NaN \n", "REVIEW32 NaN NaN \n", "REVIEW33 NaN NaN \n", "REVIEW34 NaN NaN \n", "REVIEW35 NaN NaN \n", "REVIEW36 NaN NaN \n", "REVIEW37 NaN NaN \n", "REVIEW38 NaN NaN \n", "REVIEW39 NaN NaN \n", "REVIEW40 NaN NaN \n", "REVIEW41 NaN NaN \n", "REVIEW42 NaN NaN \n", "REVIEW43 NaN NaN \n", "REVIEW44 NaN NaN \n", "REVIEW45 NaN NaN \n", "REVIEW46 NaN NaN \n", "REVIEW NNPPPPPPPP NNNNNNNPPPPPPPPPPPPPPP \n", "\n", "Turker T_8 \\\n", "REVIEW1 3 \n", "REVIEW2 4 \n", "REVIEW3 6 \n", "REVIEW4 12 \n", "REVIEW5 15 \n", "REVIEW6 18 \n", "REVIEW7 20 \n", "REVIEW8 24 \n", "REVIEW9 25 \n", "REVIEW10 27 \n", "REVIEW11 31 \n", "REVIEW12 32 \n", "REVIEW13 34 \n", "REVIEW14 37 \n", "REVIEW15 38 \n", "REVIEW16 40 \n", "REVIEW17 43 \n", "REVIEW18 59 \n", "REVIEW19 61 \n", "REVIEW20 62 \n", "REVIEW21 66 \n", "REVIEW22 67 \n", "REVIEW23 68 \n", "REVIEW24 72 \n", "REVIEW25 73 \n", "REVIEW26 76 \n", "REVIEW27 78 \n", "REVIEW28 79 \n", "REVIEW29 80 \n", "REVIEW30 81 \n", "REVIEW31 89 \n", "REVIEW32 92 \n", "REVIEW33 93 \n", "REVIEW34 NaN \n", "REVIEW35 NaN \n", "REVIEW36 NaN \n", "REVIEW37 NaN \n", "REVIEW38 NaN \n", "REVIEW39 NaN \n", "REVIEW40 NaN \n", "REVIEW41 NaN \n", "REVIEW42 NaN \n", "REVIEW43 NaN \n", "REVIEW44 NaN \n", "REVIEW45 NaN \n", "REVIEW46 NaN \n", "REVIEW NNNNPNNNNNNNNNPNNPPPPPPPPPPPPPPPP \n", "\n", "Turker T_9 \n", "REVIEW1 5 \n", "REVIEW2 6 \n", "REVIEW3 7 \n", "REVIEW4 9 \n", "REVIEW5 10 \n", "REVIEW6 11 \n", "REVIEW7 13 \n", "REVIEW8 16 \n", "REVIEW9 17 \n", "REVIEW10 18 \n", "REVIEW11 21 \n", "REVIEW12 23 \n", "REVIEW13 25 \n", "REVIEW14 30 \n", "REVIEW15 32 \n", "REVIEW16 33 \n", "REVIEW17 35 \n", "REVIEW18 37 \n", "REVIEW19 38 \n", "REVIEW20 41 \n", "REVIEW21 44 \n", "REVIEW22 49 \n", "REVIEW23 52 \n", "REVIEW24 53 \n", "REVIEW25 54 \n", "REVIEW26 57 \n", "REVIEW27 65 \n", "REVIEW28 66 \n", "REVIEW29 67 \n", "REVIEW30 68 \n", "REVIEW31 69 \n", "REVIEW32 70 \n", "REVIEW33 71 \n", "REVIEW34 76 \n", "REVIEW35 81 \n", "REVIEW36 83 \n", "REVIEW37 84 \n", "REVIEW38 NaN \n", "REVIEW39 NaN \n", "REVIEW40 NaN \n", "REVIEW41 NaN \n", "REVIEW42 NaN \n", "REVIEW43 NaN \n", "REVIEW44 NaN \n", "REVIEW45 NaN \n", "REVIEW46 NaN \n", "REVIEW NPPPPNNNNNPPNPNNNNPNNPNNNNNPNNPNPPNNP " ] }, "execution_count": 313, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 317, "metadata": {}, "outputs": [ { "ename": "AttributeError", "evalue": "'DataFrameGroupBy' object has no attribute 'tolist'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mgrouped\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Turker'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgrouped\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0;31m# values = grouped['REVIEW'].agg('sum')\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;31m# id_df = grouped['SENTIMENT'].apply(lambda x: pd.Series(x.values)).unstack()\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.7/site-packages/pandas/core/groupby/groupby.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, attr)\u001b[0m\n\u001b[1;32m 564\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 565\u001b[0m raise AttributeError(\n\u001b[0;32m--> 566\u001b[0;31m \u001b[0;34m\"%r object has no attribute %r\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mattr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 567\u001b[0m )\n\u001b[1;32m 568\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mAttributeError\u001b[0m: 'DataFrameGroupBy' object has no attribute 'tolist'" ] } ], "source": [ "df = pd.DataFrame({'Turker': even_cleaner_df['T_ID'].tolist(),\n", " 'SENTIMENT': even_cleaner_df['ReviewID'].tolist(),\n", " 'REVIEW': even_cleaner_df['sentiment'].tolist() })\n", "\n", "grouped = df.groupby('Turker')\n", "print(grouped.tolist())\n", "# values = grouped['REVIEW'].agg('sum')\n", "# id_df = grouped['SENTIMENT'].apply(lambda x: pd.Series(x.values)).unstack()\n", "# id_df = id_df.rename(columns={i: 'REVIEW{}'.format(i + 1) for i in range(id_df.shape[1])})\n", "# result = pd.concat([id_df, values], axis=1)\n", "# result_df = pd.DataFrame(result)\n", "# print(result_df.T)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I want every review on the left side and I want all 46 turkers on the top" ] }, { "cell_type": "code", "execution_count": 319, "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame({ 'review': even_cleaner_df['ReviewID']})" ] }, { "cell_type": "code", "execution_count": 359, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0, 11, 42, 46]\n", "[0]\n", "[0, 1, 8, 10, 16, 21, 27, 32, 36, 37, 45, 47, 50, 64, 66, 72, 75, 83, 85, 86, 93]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[0, 1, 8, 10, 16, 21, 27, 32, 36, 37, 45, 47, 50, 64, 66, 72, 75, 83, 85, 86, 93]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[2, 31, 50, 51, 62, 63, 74, 84, 89, 91]\n", "[2, 8, 9, 24, 34, 39, 43, 51, 56, 60, 63, 64, 82, 83, 84, 86, 88, 89, 91, 92, 95, 96]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[2, 8, 9, 24, 34, 39, 43, 51, 56, 60, 63, 64, 82, 83, 84, 86, 88, 89, 91, 92, 95, 96]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[0, 1, 8, 10, 16, 21, 27, 32, 36, 37, 45, 47, 50, 64, 66, 72, 75, 83, 85, 86, 93]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[2, 8, 9, 24, 34, 39, 43, 51, 56, 60, 63, 64, 82, 83, 84, 86, 88, 89, 91, 92, 95, 96]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[0, 1, 8, 10, 16, 21, 27, 32, 36, 37, 45, 47, 50, 64, 66, 72, 75, 83, 85, 86, 93]\n", "[11, 47, 55, 95, 96]\n", "[0, 11, 42, 46]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[13, 25, 29, 57, 59, 71]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[15, 19, 44, 80, 97]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[0, 1, 8, 10, 16, 21, 27, 32, 36, 37, 45, 47, 50, 64, 66, 72, 75, 83, 85, 86, 93]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[15, 19, 44, 80, 97]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[0, 1, 8, 10, 16, 21, 27, 32, 36, 37, 45, 47, 50, 64, 66, 72, 75, 83, 85, 86, 93]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[2, 8, 9, 24, 34, 39, 43, 51, 56, 60, 63, 64, 82, 83, 84, 86, 88, 89, 91, 92, 95, 96]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[13, 25, 29, 57, 59, 71]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[0, 1, 8, 10, 16, 21, 27, 32, 36, 37, 45, 47, 50, 64, 66, 72, 75, 83, 85, 86, 93]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[28, 76]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[29, 92]\n", "[13, 25, 29, 57, 59, 71]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[2, 31, 50, 51, 62, 63, 74, 84, 89, 91]\n", "[0, 1, 8, 10, 16, 21, 27, 32, 36, 37, 45, 47, 50, 64, 66, 72, 75, 83, 85, 86, 93]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[2, 8, 9, 24, 34, 39, 43, 51, 56, 60, 63, 64, 82, 83, 84, 86, 88, 89, 91, 92, 95, 96]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[0, 1, 8, 10, 16, 21, 27, 32, 36, 37, 45, 47, 50, 64, 66, 72, 75, 83, 85, 86, 93]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[0, 1, 8, 10, 16, 21, 27, 32, 36, 37, 45, 47, 50, 64, 66, 72, 75, 83, 85, 86, 93]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[2, 8, 9, 24, 34, 39, 43, 51, 56, 60, 63, 64, 82, 83, 84, 86, 88, 89, 91, 92, 95, 96]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[39, 58, 70, 94]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[0, 11, 42, 46]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[2, 8, 9, 24, 34, 39, 43, 51, 56, 60, 63, 64, 82, 83, 84, 86, 88, 89, 91, 92, 95, 96]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[15, 19, 44, 80, 97]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[0, 1, 8, 10, 16, 21, 27, 32, 36, 37, 45, 47, 50, 64, 66, 72, 75, 83, 85, 86, 93]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[0, 11, 42, 46]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[11, 47, 55, 95, 96]\n", "[0, 1, 8, 10, 16, 21, 27, 32, 36, 37, 45, 47, 50, 64, 66, 72, 75, 83, 85, 86, 93]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[0, 1, 8, 10, 16, 21, 27, 32, 36, 37, 45, 47, 50, 64, 66, 72, 75, 83, 85, 86, 93]\n", "[2, 31, 50, 51, 62, 63, 74, 84, 89, 91]\n", "[2, 8, 9, 24, 34, 39, 43, 51, 56, 60, 63, 64, 82, 83, 84, 86, 88, 89, 91, 92, 95, 96]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[2, 31, 50, 51, 62, 63, 74, 84, 89, 91]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[11, 47, 55, 95, 96]\n", "[56]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[2, 8, 9, 24, 34, 39, 43, 51, 56, 60, 63, 64, 82, 83, 84, 86, 88, 89, 91, 92, 95, 96]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[13, 25, 29, 57, 59, 71]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[39, 58, 70, 94]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[13, 25, 29, 57, 59, 71]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[2, 8, 9, 24, 34, 39, 43, 51, 56, 60, 63, 64, 82, 83, 84, 86, 88, 89, 91, 92, 95, 96]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[2, 31, 50, 51, 62, 63, 74, 84, 89, 91]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[2, 8, 9, 24, 34, 39, 43, 51, 56, 60, 63, 64, 82, 83, 84, 86, 88, 89, 91, 92, 95, 96]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[2, 31, 50, 51, 62, 63, 74, 84, 89, 91]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[2, 8, 9, 24, 34, 39, 43, 51, 56, 60, 63, 64, 82, 83, 84, 86, 88, 89, 91, 92, 95, 96]\n", "[0, 1, 8, 10, 16, 21, 27, 32, 36, 37, 45, 47, 50, 64, 66, 72, 75, 83, 85, 86, 93]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[0, 1, 8, 10, 16, 21, 27, 32, 36, 37, 45, 47, 50, 64, 66, 72, 75, 83, 85, 86, 93]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[39, 58, 70, 94]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[13, 25, 29, 57, 59, 71]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[0, 1, 8, 10, 16, 21, 27, 32, 36, 37, 45, 47, 50, 64, 66, 72, 75, 83, 85, 86, 93]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[2, 31, 50, 51, 62, 63, 74, 84, 89, 91]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[0, 1, 8, 10, 16, 21, 27, 32, 36, 37, 45, 47, 50, 64, 66, 72, 75, 83, 85, 86, 93]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[28, 76]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[15, 19, 44, 80, 97]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[2, 8, 9, 24, 34, 39, 43, 51, 56, 60, 63, 64, 82, 83, 84, 86, 88, 89, 91, 92, 95, 96]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[2, 8, 9, 24, 34, 39, 43, 51, 56, 60, 63, 64, 82, 83, 84, 86, 88, 89, 91, 92, 95, 96]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[0, 1, 8, 10, 16, 21, 27, 32, 36, 37, 45, 47, 50, 64, 66, 72, 75, 83, 85, 86, 93]\n", "[2, 31, 50, 51, 62, 63, 74, 84, 89, 91]\n", "[5, 6, 7, 9, 10, 11, 13, 16, 17, 18, 21, 23, 25, 30, 32, 33, 35, 37, 38, 41, 44, 49, 52, 53, 54, 57, 65, 66, 67, 68, 69, 70, 71, 76, 81, 83, 84]\n", "[2, 8, 9, 24, 34, 39, 43, 51, 56, 60, 63, 64, 82, 83, 84, 86, 88, 89, 91, 92, 95, 96]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[0, 1, 8, 10, 16, 21, 27, 32, 36, 37, 45, 47, 50, 64, 66, 72, 75, 83, 85, 86, 93]\n", "[0, 1, 8, 10, 16, 21, 27, 32, 36, 37, 45, 47, 50, 64, 66, 72, 75, 83, 85, 86, 93]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[2, 8, 9, 24, 34, 39, 43, 51, 56, 60, 63, 64, 82, 83, 84, 86, 88, 89, 91, 92, 95, 96]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[2, 8, 9, 24, 34, 39, 43, 51, 56, 60, 63, 64, 82, 83, 84, 86, 88, 89, 91, 92, 95, 96]\n", "[2, 31, 50, 51, 62, 63, 74, 84, 89, 91]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[2, 8, 9, 24, 34, 39, 43, 51, 56, 60, 63, 64, 82, 83, 84, 86, 88, 89, 91, 92, 95, 96]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[2, 31, 50, 51, 62, 63, 74, 84, 89, 91]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[2, 8, 9, 24, 34, 39, 43, 51, 56, 60, 63, 64, 82, 83, 84, 86, 88, 89, 91, 92, 95, 96]\n", "[2, 8, 9, 24, 34, 39, 43, 51, 56, 60, 63, 64, 82, 83, 84, 86, 88, 89, 91, 92, 95, 96]\n", "[29, 92]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[0, 1, 8, 10, 16, 21, 27, 32, 36, 37, 45, 47, 50, 64, 66, 72, 75, 83, 85, 86, 93]\n", "[3, 4, 6, 12, 15, 18, 20, 24, 25, 27, 31, 32, 34, 37, 38, 40, 43, 59, 61, 62, 66, 67, 68, 72, 73, 76, 78, 79, 80, 81, 89, 92, 93]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[39, 58, 70, 94]\n", "[1, 3, 4, 5, 7, 10, 14, 16, 17, 19, 22, 23, 26, 28, 30, 34, 35, 36, 38, 40, 44, 46, 48, 49, 54, 55, 57, 58, 60, 61, 63, 64, 65, 70, 71, 73, 74, 75, 77, 79, 82, 85, 86, 88, 90, 95]\n", "[11, 47, 55, 95, 96]\n", "[2, 8, 9, 24, 34, 39, 43, 51, 56, 60, 63, 64, 82, 83, 84, 86, 88, 89, 91, 92, 95, 96]\n", "[11, 47, 55, 95, 96]\n", "[2, 3, 4, 5, 12, 13, 14, 20, 22, 24, 26, 28, 31, 36, 39, 43, 45, 46, 48, 49, 52, 53, 54, 56, 58, 72, 75, 78, 85, 87, 88, 94, 96]\n", "[2, 8, 9, 24, 34, 39, 43, 51, 56, 60, 63, 64, 82, 83, 84, 86, 88, 89, 91, 92, 95, 96]\n", "[15, 19, 44, 80, 97]\n", "[1, 7, 9, 12, 17, 19, 27, 29, 30, 33, 40, 41, 42, 45, 47, 48, 50, 55, 61, 65, 69, 73, 77, 78, 87, 90, 93, 97]\n", "[6, 8, 14, 15, 18, 20, 21, 22, 23, 26, 33, 35, 41, 42, 51, 52, 53, 59, 60, 62, 67, 68, 69, 74, 77, 79, 80, 81, 82, 87, 90, 91, 94, 97]\n" ] } ], "source": [ "def get_array_of_reviews(turker, df):\n", " a = ['nan']*98\n", " df = even_cleaner_df[even_cleaner_df['T_ID'] == turker] \n", " t_reviews = df['ReviewID'].tolist()\n", " t_sentiment = df['sentiment'].tolist()\n", " for index,review in enumerate(t_reviews):\n", " a[review] = t_sentiment[index]\n", " print(t_reviews)\n", "\n", " return a\n", "\n", "sparse_df = even_cleaner_df.copy()\n", "sparse_df['big_array'] = sparse_df.apply(lambda x: get_array_of_reviews(x['T_ID'], even_cleaner_df), axis=1)\n", "# t0 = even_cleaner_df[even_cleaner_df['T_ID'] == 'T_0']" ] }, { "cell_type": "code", "execution_count": 360, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ReviewIDT_IDsentimentbig_array
00T_0N[N, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, N, 0, 0, 0, ...
3411T_0N[N, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, N, 0, 0, 0, ...
12642T_0N[N, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, N, 0, 0, 0, ...
14046T_0N[N, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, N, 0, 0, 0, ...
\n", "
" ], "text/plain": [ " ReviewID T_ID sentiment \\\n", "0 0 T_0 N \n", "34 11 T_0 N \n", "126 42 T_0 N \n", "140 46 T_0 N \n", "\n", " big_array \n", "0 [N, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, N, 0, 0, 0, ... \n", "34 [N, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, N, 0, 0, 0, ... \n", "126 [N, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, N, 0, 0, 0, ... \n", "140 [N, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, N, 0, 0, 0, ... " ] }, "execution_count": 360, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t0" ] }, { "cell_type": "code", "execution_count": 361, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ReviewIDT_IDsentimentbig_array
00T_0N[N, nan, nan, nan, nan, nan, nan, nan, nan, na...
10T_1N[N, nan, nan, nan, nan, nan, nan, nan, nan, na...
20T_2N[N, N, nan, nan, nan, nan, nan, nan, N, nan, N...
31T_3N[nan, N, nan, N, N, N, nan, N, nan, nan, N, na...
41T_2N[N, N, nan, nan, nan, nan, nan, nan, N, nan, N...
...............
28996T_5N[nan, nan, P, N, P, N, nan, nan, nan, nan, nan...
29096T_7P[nan, nan, N, nan, nan, nan, nan, nan, N, N, n...
29197T_13P[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...
29297T_4P[nan, N, nan, nan, nan, nan, nan, N, nan, N, n...
29397T_10P[nan, nan, nan, nan, nan, nan, P, nan, N, nan,...
\n", "

294 rows × 4 columns

\n", "
" ], "text/plain": [ " ReviewID T_ID sentiment \\\n", "0 0 T_0 N \n", "1 0 T_1 N \n", "2 0 T_2 N \n", "3 1 T_3 N \n", "4 1 T_2 N \n", ".. ... ... ... \n", "289 96 T_5 N \n", "290 96 T_7 P \n", "291 97 T_13 P \n", "292 97 T_4 P \n", "293 97 T_10 P \n", "\n", " big_array \n", "0 [N, nan, nan, nan, nan, nan, nan, nan, nan, na... \n", "1 [N, nan, nan, nan, nan, nan, nan, nan, nan, na... \n", "2 [N, N, nan, nan, nan, nan, nan, nan, N, nan, N... \n", "3 [nan, N, nan, N, N, N, nan, N, nan, nan, N, na... \n", "4 [N, N, nan, nan, nan, nan, nan, nan, N, nan, N... \n", ".. ... \n", "289 [nan, nan, P, N, P, N, nan, nan, nan, nan, nan... \n", "290 [nan, nan, N, nan, nan, nan, nan, nan, N, N, n... \n", "291 [nan, nan, nan, nan, nan, nan, nan, nan, nan, ... \n", "292 [nan, N, nan, nan, nan, nan, nan, N, nan, N, n... \n", "293 [nan, nan, nan, nan, nan, nan, P, nan, N, nan,... \n", "\n", "[294 rows x 4 columns]" ] }, "execution_count": 361, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sparse_df" ] }, { "cell_type": "code", "execution_count": 362, "metadata": {}, "outputs": [], "source": [ "t0 = sparse_df[sparse_df['T_ID'] == 'T_0']" ] }, { "cell_type": "code", "execution_count": 363, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ReviewIDT_IDsentimentbig_array
00T_0N[N, nan, nan, nan, nan, nan, nan, nan, nan, na...
3411T_0N[N, nan, nan, nan, nan, nan, nan, nan, nan, na...
12642T_0N[N, nan, nan, nan, nan, nan, nan, nan, nan, na...
14046T_0N[N, nan, nan, nan, nan, nan, nan, nan, nan, na...
\n", "
" ], "text/plain": [ " ReviewID T_ID sentiment \\\n", "0 0 T_0 N \n", "34 11 T_0 N \n", "126 42 T_0 N \n", "140 46 T_0 N \n", "\n", " big_array \n", "0 [N, nan, nan, nan, nan, nan, nan, nan, nan, na... \n", "34 [N, nan, nan, nan, nan, nan, nan, nan, nan, na... \n", "126 [N, nan, nan, nan, nan, nan, nan, nan, nan, na... \n", "140 [N, nan, nan, nan, nan, nan, nan, nan, nan, na... " ] }, "execution_count": 363, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t0" ] }, { "cell_type": "code", "execution_count": 364, "metadata": {}, "outputs": [ { "ename": "TypeError", "evalue": "unhashable type: 'list'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msparse_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'big_array'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m/usr/local/lib/python3.7/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36munique\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1986\u001b[0m \u001b[0mCategories\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0ma\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mb\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1987\u001b[0m \"\"\"\n\u001b[0;32m-> 1988\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1989\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1990\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.7/site-packages/pandas/core/base.py\u001b[0m in \u001b[0;36munique\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1403\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mpandas\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0malgorithms\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0munique1d\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1404\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1405\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0munique1d\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1406\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1407\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/usr/local/lib/python3.7/site-packages/pandas/core/algorithms.py\u001b[0m in \u001b[0;36munique\u001b[0;34m(values)\u001b[0m\n\u001b[1;32m 403\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 404\u001b[0m \u001b[0mtable\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhtable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 405\u001b[0;31m \u001b[0muniques\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtable\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 406\u001b[0m \u001b[0muniques\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_reconstruct_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muniques\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moriginal\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0muniques\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.unique\u001b[0;34m()\u001b[0m\n", "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable._unique\u001b[0;34m()\u001b[0m\n", "\u001b[0;31mTypeError\u001b[0m: unhashable type: 'list'" ] } ], "source": [ "sparse_df['big_array'].unique()" ] }, { "cell_type": "code", "execution_count": 365, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['T_0', 'T_1', 'T_2', 'T_3', 'T_4', 'T_5', 'T_6', 'T_7', 'T_8',\n", " 'T_9', 'T_10', 'T_11', 'T_12', 'T_13', 'T_14', 'T_15', 'T_16',\n", " 'T_17'], dtype=object)" ] }, "execution_count": 365, "metadata": {}, "output_type": "execute_result" } ], "source": [ "even_cleaner_df['T_ID'].unique()" ] }, { "cell_type": "code", "execution_count": 377, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['N',\n", " 'N',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'N',\n", " 'nan',\n", " 'N',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'N',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'N',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'N',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'N',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'N',\n", " 'N',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'N',\n", " 'nan',\n", " 'N',\n", " 'nan',\n", " 'nan',\n", " 'P',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'P',\n", " 'nan',\n", " 'P',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'P',\n", " 'nan',\n", " 'nan',\n", " 'P',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'P',\n", " 'nan',\n", " 'N',\n", " 'P',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'P',\n", " 'nan',\n", " 'nan',\n", " 'nan',\n", " 'nan']" ] }, "execution_count": 377, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sparse_df['big_array'][sparse_df['T_ID'] == 'T_2'].tolist()[0]" ] }, { "cell_type": "code", "execution_count": 381, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.39004149377593356" ] }, "execution_count": 381, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.metrics import cohen_kappa_score\n", "y1 = sparse_df['big_array'][sparse_df['T_ID'] == 'T_0'].tolist()[0]\n", "y2 = sparse_df['big_array'][sparse_df['T_ID'] == 'T_1'].tolist()[0]\n", "cohen_kappa_score(y1,y2)" ] }, { "cell_type": "code", "execution_count": 388, "metadata": {}, "outputs": [], "source": [ "def calculate_kappa(num):\n", " y1 = sparse_df['big_array'][sparse_df['T_ID'] == 'T_'+str(num)].tolist()[0]\n", " y2 = sparse_df['big_array'][sparse_df['T_ID'] == 'T_'+str(num + 1)].tolist()[0]\n", " return cohen_kappa_score(y1,y2)\n", "\n", "kappas = [calculate_kappa(num) for num in range(16)]" ] }, { "cell_type": "code", "execution_count": 389, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0.39004149377593356,\n", " 0.07634307257304429,\n", " 0.023255813953488413,\n", " 0.11578947368421055,\n", " -0.10975609756097549,\n", " -0.04981253347616499,\n", " 0.29547088425593093,\n", " -0.02821170435999054,\n", " -0.01071003570011908,\n", " 0.005658536585365748,\n", " -0.06968933669185562,\n", " -0.04457364341085279,\n", " -0.04457364341085279,\n", " -0.02235469448584193,\n", " -0.015544041450777257,\n", " -0.01730103806228378]" ] }, "execution_count": 389, "metadata": {}, "output_type": "execute_result" } ], "source": [ "kappas" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }