数据集划分
- 训练数据:用于训练,构建模型
- 测试数据:在模型检验时使用,用于评估模型是否有效
数据集划分API:sklearn.model_selection.train_test_split
数据集API介绍 sklearn.datasets
获取数据集返回的类型
sklearn分类数据集
sklearn.datasets.load_iris()
sklearn.datasets.load_digits()
# 示例:鸢尾花数据集
from sklearn.datasets import load_iris
li = load_iris()
# 获取特征值
print(li.data)
# 获取目标值
print(li.target)
# 打印描述
print(li.DESCR)
数据集进行分割
sklearn.model_selection.train_test_split()
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
li = load_iris()
x_train,x_test,y_train,y_test = train_test_split(li.data,li.target,test_size=0.25)
print('训练集特征值和目标值:',x_train,y_train)
print('测试集特征值和目标值:',x_test,y_test)
用于分类的大数据集
sklearn.datasets.fetch_20newsgroups()、datasets.clear_data_home()
from sklearn.datasets fetch_20newsgroups
news = fetch_20newsgroups(subset='all')
print(news.data)
print(news.target)
sklearn回归数据集
sklearn.datasets.load_boston()、sklearn.datasets.load_diabetes()
from sklearn.datasets import load_boston
lb = load_boston()
print('获取特征值')
print(lb.data)
print('目标值')
print(lb.target)
print(lb.DESCR)