import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('white')
df = pd.read_csv('u.data', sep="\t")
columns = ['userid', 'item_id', 'rating', 'timestamp']
df.columns = columns
movietitles = pd.read_csv('Movie_Id_Titles')
df = pd.merge(df, movietitles, on='item_id')
df.groupby('title')['rating'].mean()
ratings = pd.DataFrame(df.groupby('title')['rating'].mean())
ratings['number_of_ratings'] = df.groupby('title')['rating'].count()
sns.jointplot(x="rating", y="number_of_ratings", data=ratings, alpha=0.5)
movie_matrix = df.pivot_table(index="userid", columns="title", values="rating")
movie_matrix
ratings.sort_values('number_of_ratings', ascending=False).head(10)
starwars_user_ratings = movie_matrix['Star Wars (1977)']
liarliar_user_ratings = movie_matrix['Liar Liar (1997)']
movie_matrix.corrwith(starwars_user_ratings)
similar_to_starwars = movie_matrix.corrwith(starwars_user_ratings)
corr_starwars = pd.DataFrame(similar_to_starwars, columns=['Correlation'])
corr_starwars.dropna(inplace=True)
corr_starwars.head()
corr_starwars = corr_starwars.join(ratings['number_of_ratings'])
corr_starwars