# https://youtu.be/vmEHCJofslgimport pandas as pddf =pd.read_csv('data/pokemon_data.csv')# df = pd.read_excel('data/pokemon_data.xlsx')# df = pd.read_csv('data/pokemon_data.txt', delimiter='\t')# head and tailprint(df.tail(5))print(df.head(5))df.head(5)df.tail(5)# read headersdf.columns# coluna específicadf.Namedf['Name']df[['Name']]# colunas específicas e linhas específicasdf['Name'][0:5]df[['Name','Type 1','HP']][0:5]# linha específicadf.iloc[0:3]# ler de um local específico# seleciona linhas e colunas por índicedf.iloc[2,1]df.iloc[0:6, 1:5]# loc torna um pandas.DataFrame igual a um R::dataframe# seleciona linhas e coluns por nomedf.loc[ df.HP >50, ['Name', 'Type 1', 'HP', 'Speed']]df.loc[0:5, ['Name', 'Type 1', 'HP', 'Speed']]df.loc[df['Type 1']=='Fire'][0:5]# estatísticas básicas df.describe()# sortingdf.sort_values('Name', ascending=False)df.sort_values(['Type 1', 'HP'], ascending=True)df.sort_values(['Type 1', 'HP'], ascending=[1,0])# making changes to the datadf['Total'] = df['HP'] + df['Attack'] + df['Defense']df.Total = df.HP + df.Attack + df.Defense# deletar uma colunadf =df.drop(columns=['Total'])# somar as colunas 4 até a 9 para todas as linhasdf['Total'] = df.iloc[:,4:9].sum(axis=1) # mudar a ordem das colunascols = df.columns.values[cols[0:4], cols[4:13]]df = df[cols[0:4], cols[4:11]]# salvar um data framedf.to_csv('modified.csv')df.to_csv('modified.csv', index=False)df.to_csv('modified.csv', index=False, sep='\t')df.to_excel('modified.xlsx', index=False)# filtering datadf.loc[(df['Type 1'] =='Grass') & (df['Type 2'] =='Poison')]df.loc[(df['Type 1'] =='Grass') | (df['Type 2'] =='Poison')]df.loc[(df['Type 1'] =='Grass') & (df['Type 2'] =='Poison') & (df['HP'] >70)]
10.2 numpy
code/py-numpy.py
import numpy as npa =np.array([1,2,3], dtype='int32')print(a)b =np.array([[9.0,8.0,7.0],[6.0,5.0,4.0]])print(b)# Get Dimensiona.ndim# Get Shapeb.shape# Get Typea.dtype# Get Sizea.itemsize# Get total sizea.nbytes# Get number of elementsa.size# --------------------# Accessing/Changing specific elements, rows, columns, etca =np.array([[1,2,3,4,5,6,7],[8,9,10,11,12,13,14]])print(a)# Get a specific element [r, c]a[1, 5]# Get a specific row a[0, :]a[0, ]# Get a specific columna[:, 2]# Getting a little more fancy [startindex:endindex:stepsize]a[0, 1:-1:2]a[1,5] =20a[:,2] = [1,2]print(a)# *3-d exampleb =np.array([[[1,2],[3,4]],[[5,6],[7,8]]])print(b)# Get specific element (work outside in)b[0,1,1]# replace b[:,1,:] = [[9,9,9],[8,8]]b# Initializing Different Types of Arrays# All 0s matrixnp.zeros((2,3))# All 1s matrixnp.ones((4,2,2), dtype='int32')# Any other numbernp.full((2,2), 99)# Any other number (full_like)np.full_like(a, 4)# Random decimal numbersnp.random.rand(4,2)# Random Integer valuesnp.random.randint(-4,8, size=(3,3))# The identity matrixnp.identity(5)# Repeat an arrayarr =np.array([[1,2,3]])r1 =np.repeat(arr,3, axis=0)print(r1)output =np.ones((5,5))print(output)z =np.zeros((3,3))z[1,1] =9print(z)output[1:-1,1:-1] = zprint(output)# Be careful when copying arrays!!!a =np.array([1,2,3])b =a.copy()b[0] =100print(a)# --------------------# Mathematicsa =np.array([1,2,3,4])print(a)a +2a -2a *2a /2b =np.array([1,0,1,0])a + ba **2# Take the sinnp.cos(a)# Linear Algebraa =np.ones((2,3))print(a)b =np.full((3,2), 2)print(b)np.matmul(a,b)# Find the determinantc =np.identity(3)np.linalg.det(c)# --------------------# Statisticsstats =np.array([[1,2,3],[4,5,6]])statsnp.min(stats)np.max(stats, axis=1)np.sum(stats, axis=0)# Reorganizing Arraysbefore =np.array([[1,2,3,4],[5,6,7,8]])print(before)after =before.reshape((2,3))print(after)# Vertically stacking vectorsv1 =np.array([1,2,3,4])v2 =np.array([5,6,7,8])np.vstack([v1,v2,v1,v2])# Horizontal stackh1 =np.ones((2,4))h2 =np.zeros((2,2))np.hstack((h1,h2))# --------------------# Miscellaneous# Load Data from Filefiledata =np.genfromtxt('data.txt', delimiter=',')filedata =filedata.astype('int32')print(filedata)
10.3 dfply
code/py-numpy.py
import numpy as npa =np.array([1,2,3], dtype='int32')print(a)b =np.array([[9.0,8.0,7.0],[6.0,5.0,4.0]])print(b)# Get Dimensiona.ndim# Get Shapeb.shape# Get Typea.dtype# Get Sizea.itemsize# Get total sizea.nbytes# Get number of elementsa.size# --------------------# Accessing/Changing specific elements, rows, columns, etca =np.array([[1,2,3,4,5,6,7],[8,9,10,11,12,13,14]])print(a)# Get a specific element [r, c]a[1, 5]# Get a specific row a[0, :]a[0, ]# Get a specific columna[:, 2]# Getting a little more fancy [startindex:endindex:stepsize]a[0, 1:-1:2]a[1,5] =20a[:,2] = [1,2]print(a)# *3-d exampleb =np.array([[[1,2],[3,4]],[[5,6],[7,8]]])print(b)# Get specific element (work outside in)b[0,1,1]# replace b[:,1,:] = [[9,9,9],[8,8]]b# Initializing Different Types of Arrays# All 0s matrixnp.zeros((2,3))# All 1s matrixnp.ones((4,2,2), dtype='int32')# Any other numbernp.full((2,2), 99)# Any other number (full_like)np.full_like(a, 4)# Random decimal numbersnp.random.rand(4,2)# Random Integer valuesnp.random.randint(-4,8, size=(3,3))# The identity matrixnp.identity(5)# Repeat an arrayarr =np.array([[1,2,3]])r1 =np.repeat(arr,3, axis=0)print(r1)output =np.ones((5,5))print(output)z =np.zeros((3,3))z[1,1] =9print(z)output[1:-1,1:-1] = zprint(output)# Be careful when copying arrays!!!a =np.array([1,2,3])b =a.copy()b[0] =100print(a)# --------------------# Mathematicsa =np.array([1,2,3,4])print(a)a +2a -2a *2a /2b =np.array([1,0,1,0])a + ba **2# Take the sinnp.cos(a)# Linear Algebraa =np.ones((2,3))print(a)b =np.full((3,2), 2)print(b)np.matmul(a,b)# Find the determinantc =np.identity(3)np.linalg.det(c)# --------------------# Statisticsstats =np.array([[1,2,3],[4,5,6]])statsnp.min(stats)np.max(stats, axis=1)np.sum(stats, axis=0)# Reorganizing Arraysbefore =np.array([[1,2,3,4],[5,6,7,8]])print(before)after =before.reshape((2,3))print(after)# Vertically stacking vectorsv1 =np.array([1,2,3,4])v2 =np.array([5,6,7,8])np.vstack([v1,v2,v1,v2])# Horizontal stackh1 =np.ones((2,4))h2 =np.zeros((2,2))np.hstack((h1,h2))# --------------------# Miscellaneous# Load Data from Filefiledata =np.genfromtxt('data.txt', delimiter=',')filedata =filedata.astype('int32')print(filedata)