Pandas is an open source, BSD-licensed library providing high-performance, easy-to-use data structures and data analysis tools for the Python programming language.
- Se importan librerias
- Se seleccionan los datos
- Se convierten datos CSV
- Se categorizan los dos dataframe
- Se unifican los dos dataframe
- Verificar los datos
- Descripcion Total dataframe
- graficas
- Se identifica correlación
- Asimetría
- KNeighborsClassifier-accuracy_score.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy.stats import skew
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier,AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split,cross_validate
from sklearn.preprocessing import MinMaxScaler,StandardScaler,LabelEncoder
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
url_wine_red='https://raw.githubusercontent.com/terranigmark/curso-analisis-exploratorio-datos-platzi/main/winequality-red.csv'
url_wine_white='https://raw.githubusercontent.com/terranigmark/curso-analisis-exploratorio-datos-platzi/main/winequality-white.csv'
red=pd.read_csv(url_wine_red,delimiter=";")
red['category']='red'
white['category']='white'
total_wine=red.append(white, ignore_index=True)
total_wine.dtypes
total_wine.describe()
total_wine.plot()
total_wine['density'].plot()
sns.set (rc={'figure.figsize': (14, 8)})
sns.countplot (total_wine['quality'])
sns.heatmap(total_wine.corr(), annot=True, fmt='.2f', linewidths=2)
skew(total_wine['alcohol'])
model_names=['KNearestNeighbors']
acc=[]
eval_acc={}
classification_model=KNeighborsClassifier()
classification_model.fit(x_train,y_train)
pred=classification_model.predict(x_test)
acc.append(accuracy_score(pred,y_test))
eval_acc={'Modelling Algorithm':model_names,'Accuracy':acc}
eval_acc
⭐️ fradurgo19