surprise knns.KNNWithMeans

2022. 3. 26. 11:21AI/Big data

    목차
반응형

scikit-surprise 설치

!pip install scikit-surprise

 

imports

import numpy as np
import pandas as pd

from surprise import BaselineOnly 
from surprise import KNNWithMeans
from surprise import SVD
from surprise import SVDpp
from surprise import NMF
from surprise import Dataset
from surprise import accuracy
from surprise import Reader
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split

 

data load

data = Dataset.load_builtin('ml-100k')

 

data.raw_ratings[:10]

 

    [('196', '242', 3.0, '881250949'),
     ('186', '302', 3.0, '891717742'),
     ('22', '377', 1.0, '878887116'),
     ('244', '51', 2.0, '880606923'),
     ('166', '346', 1.0, '886397596'),
     ('298', '474', 4.0, '884182806'),
     ('115', '265', 2.0, '881171488'),
     ('253', '465', 5.0, '891628467'),
     ('305', '451', 3.0, '886324817'),
     ('6', '86', 3.0, '883603013')]

 

data split

trainset, testset = train_test_split(data, test_size=0.25)

 

model train

algo = KNNWithMeans()
algo.fit(trainset)

 

predict

predictions = algo.test(testset)

 

get RMSE

accuracy.rmse(predictions)

 

load data from file

load

from surprise import Reader
import pandas as pd

df = pd.read_csv('ml-100k.zip')
df.head()

cleansing

reader = Reader(rating_scale=(1, 5))

 

col = ['user_id', 'item_id', 'rating']
df = df[col]

 

data conversion

data = Dataset.load_from_df(df, reader)
data.raw_ratings[:5]

split data

from surprise.model_selection import train_test_split

trainset, testset = train_test_split(data, test_size=0.25)

 

train

algo = KNNWithMeans()
algo.fit(trainset)

 

test

predictions = algo.test(testset)

 

accuracy.rmse(predictions)
RMSE: 0.9543
0.9543248895229686
반응형

'AI > Big data' 카테고리의 다른 글

isort  (0) 2022.04.22
python-poetry  (0) 2022.04.22
Anaconda 사용법  (0) 2022.03.26
Matrix Factorization impl 2  (0) 2022.03.25
Matrix Factorization impl.  (0) 2022.03.25