import numpy as np
import pandas as pd
# data processing
from sklearn.cross_validation import ShuffleSplit
from sklearn.cross_validation import train_test_split
# scoring
from sklearn.metrics import r2_score
# visualizations code visuals.py
import visuals as vs
# visual display for Jupyter notebooks
%matplotlib inline
# Load dataset
data = pd.read_csv('xyz.csv')
target = data['col_xyz']
features = data.drop('col_xyz', axis = 1)
#data processing
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=1)
# Success
print "XYZ dataset has {} data points with {} variables each.".format(*data.shape)
# Exploring dataset
my_dataframe.head()
my_dataframe.head(5)
my_dataframe.describe()
# learning curve from sklearn.model_selection import learning_curve
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import recall_score as recall
from sklearn.metrics import precision_score as precision
model selection
from sklearn.grid_search import GridSearchCV #legacy
from sklearn.model_selection import GridSearchCV #new release
Useful sklearn modules
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.cross_validation import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn import metrics
from sklearn.metrics import roc_curve, auc
from nltk.stem.porter import PorterStemmer
No comments:
Post a Comment