# ------------------------------------------------------------
#
# File Name: cidados_aula02.R
#
# Purpose: Ciência de Dados para Economistas - Faculdade de Economia UFMT
#
# inspired and adapted from
# 'Introduction to Social Network Analysis with R'
# by Michal Bojanowski
# 'R para cientistas sociais'
# by Jakson Alves de Aquino
# 'Programming in R'
# by Thomas Girke
#
# Creation Date: 2016-04-30
# Last Modified: 2022-09-01_12:25
# Created By: Roney Fraga Souza
# E-mail: [email protected]
# roneyfraga.com
#
# Licence:
#
# Creative Commons Attribution-NonCommercial-ShareAlike
# CC BY-NC-SA
# http://creativecommons.org/licenses/by-nc-sa/3.0/
#
# ------------------------------------------------------------
# vetor (revisão)
# matriz
# data frame
# lista
# table, aggregate, reshape, merge
# ------------------------------
# vetores
vector(mode = "character", length = 5)
vector(mode = "numeric", length = 7)
vector(mode = "logical", length = 4)
# função básica para criar vetores com valores pré - determinados é c(), abreviatura de concatenate
c("Marx", "Weber", "Durkheim")
c(5, 3, 11, 6, 1, 4)
c(TRUE, FALSE, TRUE, TRUE, FALSE)
a <- c(5, 3, 11, 6, 1, 4)
a
a = c(5, 3, 11, 6, 1, 4)
a
b <- 1:8
b
# criar vetor como uma sequência
# seq() from to by
seq(10, 100, 5)
seq(from = 10, to = 100, by = 5)
seq(from = 1, to = 2, by = 0.02)
# criar vetor via repetição
# rep() elemento repetição
rep('roney', 30)
rep(2, 4)
# levels
codigo <- c(23, 22, 23, 23, 22, 22, 23, 22)
uf <- factor(codigo, levels = c(22, 23), labels = c("Piaui", "Ceara"))
uf
# nomeando itens
idh05 <- c(0.677, 0.742, 0.723, 0.683, 0.718, 0.718, 0.703, 0.738, 0.742)
names(idh05)
names(idh05) <- c("AL", "BA", "CE", "MA", "PB", "PE", "PI", "RN", "SE")
idh05
names(idh05)
# obtendo informações de um vetor
length(idh05)
str(idh05)
# estatísticas descritivas
summary(idh05)
mean(idh05)
median(idh05)
min(idh05)
max(idh05)
quantile(idh05)
class(idh05)
is.numeric(idh05)
# convertendo vetores
x = c(1, 0, 1, 0)
x
as.logical(x)
as.numeric(as.logical(c(1, 0, 1, 0)))
c(1, 0, 1, 0) |>
as.logical() |>
as.numeric() ->
x2
x2 <- c(1, 0, 1, 0) |>
as.logical() |>
as.numeric()
a <- c(TRUE, FALSE, TRUE, FALSE)
a <- as.character(c(TRUE, FALSE, TRUE, FALSE))
as.character(c(1, 3.4, - 5.6, 9))
as.numeric(c(TRUE, FALSE, TRUE, FALSE))
as.numeric(c("1.3", "1.4", "1.7"))
is.character(a)
is.logical(a)
# Índices: obtendo e modificando valores
x <- c(4, 8, 2, 6, 7, 1, 8, 1, 2)
y <- c(6, 7, 3, 4, 1, 5, 2, 8, 9)
z <- c("a", "b", "c", "d", "e", "f", "g", "h", "i")
x[3]
y[2]
x[c(1, 3, 5)]
z[1:4]
z[1]
y < 5
y[y < 5]
y[c(3, 4, 5, 7)]
l <- letters
l[20]
l[l == 't']
i <- y < 5
i = y < 5
i
y[i]
y[!i]
z[i]
x[i]
names(x) <- z
x
x["b"]
x['b']
x[c("b", "d", "g")]
x[- 1]
x[- c(1, 2, 3)]
x[3] <- 11
x['c'] <- NA
x
x['c'] <- 11
x[c("b", "d", "g")] <- 0
x
is.na(x)
x[is.na(x)]
x[!(is.na(x))]
# Operações
x <- c(5, 2, 4, 3, 2)
y <- c(1, 3, 2, 5, 2)
x + y
x - y
x > y
x[x > y]
x[x < y]
x == y
x[x == y]
x != y
x[x != y]
y + 1
x
library(tictoc)
x = 1:100000000
tic()
x2 <- x * 10
toc()
tic()
for (i in 1:length(x)) {
x2[i] <- x[i] * 10
}
toc()
# erro ao trabalhar com vetores com dimensões distintas
x <- c(1, 5, 7)
y <- c(1, 2, 3, 4, 5, 6, 7)
x + y
#------------------------------
# Matrizes
m <- matrix(1, nrow = 3, ncol = 3)
m
m[2, ]
m[, 3]
m[2, 3]
m[c(1, 3), c(1, 3)]
m[c(3, 1), c(3, 1)]
# juntar colunas
x <- c(7, 9, 8, 10, 1)
y <- c(9, 8, 10, 9, 3)
z <- c(10, 9, 9, 9, 2)
cbind(x, y, z)
# juntar linhas
rbind(x, y, z)
# nomes das colunas e nomes das linhas
m <- cbind(x, y, z)
m
colnames(m) <- c("Matematica", "Portugues", "Historia")
rownames(m) <- c("Helena", "Jose", "Maria", "Francisco", "Macunaima")
m
class(m)
# índices em uma matriz
# M[linhas, colunas]
m[5, 3]
m[1:3, 2]
m[1:3, c(1, 3)]
m[c(1, 4), 1]
m["Maria", c("Portugues", "Matematica")]
m[, "Historia"]
m[, ]
m["Macunaima", "Portugues"] <- 4
m["Macunaima", "Portugues"] = 4
m[5, 2] <- 4
m
m[, 1]
m[, 1] == 10
m[m[, 1] == 10, ]
nerd = m[, 1] == 10
m[nerd, ]
class(m)
rownames(m) <- c("Helena da Silva", "Jose Miranda", "Maria do Rosario", "Francisco Augusto", "Macunaima Matinho")
m
rownames(m) <- toupper(rownames(m))
m
# objeto[linhas, colunas]
# nomes das linhas e colunas
#
#------------------------------
# data frame
# carregar uma base de dados padrão do R
data(mtcars)
mtcars
ls()
# help para descrobrir quais são as variáveis
?mtcars
help('mtcars')
# [, 1] mpg Miles / (US) gallon
# [, 2] cyl Number of cylinders
# [, 3] disp Displacement (cu.in.)
# [, 4] hp Gross horsepower
# [, 5] drat Rear axle ratio
# [, 6] wt Weight (1000 lbs)
# [, 7] qsec 1 / 4 mile time
# [, 8] vs Engine (0 = V - shaped, 1 = straight)
# [, 9] am Transmission (0 = automatic, 1 = manual)
# [, 10] gear Number of forward gears
# [, 11] carb Number of carburetors
# 3 formas de escrever a mesma função
head(mtcars)
head(mtcars, n = 6)
head(mtcars, 6)
tail(mtcars, 6)
str(mtcars)
# other usefull functions on data frames:
nrow(mtcars) # number of rows
ncol(mtcars) # number of columns
names(mtcars) # names of the variables
row.names(mtcars)
head(mtcars) # first 6 observations
head(mtcars, 2) # first 2 observations
tail(mtcars) # last 6 observations
dim(mtcars)
### Referring to individual variables with '$'
mtcars$mpg
mtcars$mpg * 2
mtcars$mpg
mtcars[, 'mpg']
mtcars[, 1]
# mtcars[linhas, colunas ]
mtcars
mtcars[, ]
mtcars[1, 1]
mtcars[1, 1:3]
mtcars[1, c(1, 2, 3)]
mtcars[1, c(3, 2, 3)]
mtcars[1, c(1:3, 5)]
mtcars[, 'mpg']
mtcars[1:3, 'mpg']
mtcars[1:3, 1]
mtcars[14:18, ]
mtcars[14:18, 2:5]
mtcars[14:18, c('cyl', 'disp', 'hp', 'drat')]
mtcars['Fiat 128', c('cyl', 'disp', 'hp', 'drat')]
mtcars['Fiat 128', 2:5]
mtcars[c(1, 5, 8), ]
mtcars[c(1, 5, 8), 2:5]
mtcars[c(8, 1, 5), c(5, 2)]
mtcars[c('Fiat 128', 'Toyota Corolla'), 2:5]
# selecionar variáveis (colunas) e observações (linhas)
# dataframe[linhas, colunas]
# dataframe[nome ou número, nome ou número]
?sort
help('sort')
mtcars$mpg
sort(mtcars$mpg)
sort(mtcars$mpg, decreasing = FALSE)
sort(mtcars$mpg, decreasing = TRUE)
mtcars
mtcars$mpg
mtcars[, 'mpg']
mtcars[, 1]
mtcars[1:3, 1:7]
mtcars[1:3, 1:7]
mtcars[c(1, 3), c(1, 7)]
# ordenando toda a tabela
sort(mtcars$mpg)
order(mtcars$mpg)
mtcars[c(15, 16, 24, 7), ]
6 == 6
mtcars$cyl == 6
mtcars[mtcars$cyl == 6, ]
order(mtcars$mpg)
mtcars[order(mtcars$mpg), ]
mtcars[order(mtcars$mpg, decreasing = TRUE), ]
# Mean
mean(mtcars$mpg)
mean(mtcars[, 'mpg'])
mean(mtcars[, 1])
mean(mtcars$mpg[1:10])
table(mtcars$cyl)
table(mtcars[, 'cyl'])
table(mtcars[, 2])
table(mtcars[, 2], mtcars[, 'gear'])
table(mtcars$cyl, mtcars$gear)
addmargins(table(mtcars$cyl, mtcars$gear))
rownames(mtcars)
# milhas por galão
# Uma milha é igual a 1, 61 km
# Um galão é igual a 3, 79 litros
# 1, 61 ÷ 3, 79 = 0, 425. Isto significa que 0, 425 km / l é igual a 1 mpg
# para obter kml a partir de mpg basta multiplicar mpg * 0, 425
mtcars$mpg * 0.425
mtcars$kml <- mtcars$mpg * 0.425
mtcars$kml = mtcars$mpg * 0.425
mtcars$dai <- NA
mtcars$dai <- 'dai mestrado'
mtcars$dai <- NULL
mtcars <- mtcars[, names(mtcars) != 'dai']
head(mtcars)
names(mtcars)
names(mtcars)[names(mtcars) != 'dai']
names(mtcars)[names(mtcars) != 'kml']
mtcars <- mtcars[order(mtcars$kml, decreasing = TRUE), ]
write.csv(mtcars, 'mtcars.csv')
write.csv2(mtcars, 'mtcars.csv')
write.table(mtcars, 'mtcars.txt', sep = '|')
a <- read.csv2('mtcars.csv', sep = ';')
head(a)
#------------------------------
library(dplyr)
mtcars |>
tibble::as_tibble() |>
dplyr::select(mpg, cyl, disp, gear) |>
dplyr::filter(cyl == 4) |>
dplyr::arrange(mpg) |>
dplyr::rename(gear2 = gear) |>
dplyr::mutate(kml = mpg * 0.425) |>
dplyr::group_by(gear2) |>
dplyr::summarise(kml = mean(kml))
#------------------------------
# lista
numeros = 1:3
letras = c("a", "b", "c", "d")
logico = c(TRUE, FALSE)
list(numeros, letras, logico)
l <- list(numeros, letras, logico)
lista = list(mtcars = head(mtcars), iris = head(iris), lmin = letters, lmai = LETTERS)
lista
names(lista)
length(lista)
lista[2]
lista[[2]]
lista[[2]][1:3, ]
lista[['iris']][1:3, ]
list(lista, l)
lista <- list(numeros = 1:3,
letras = c("a", "b", "c", "d"),
logico = c(TRUE, FALSE))
lista
names(lista)
length(lista)
lista[1]
lista[[1]]
lista[[1]][[2]]
lista[2]
lista[[2]]
lista["letras"]
lista[["letras"]]
lista$letras
lista$letras[1]
lista$letras[3:4]
lista[3]
lista[1:2]
lista[c(1, 3)]
lista
lista[[2]]
class(lista[[2]])
toupper(lista[[2]])
lista[2]
class(lista[2])
toupper(lista[2])
names(lista)
lista$letras
class(lista$letras)
toupper(lista$letras)
lista[["letras"]]
class(lista[["letras"]])
toupper(lista[["letras"]])
lista["letras"]
class(lista["letras"])
toupper(lista["letras"])
# acessar a lista
# - posição do elemento [1]
# - nome do elemento ['nome']
# - [] retorna uma sub - lista
# - [[]] retorna o próprio elemento
# - $nome_do_elemento
lista
lista[2]
lista[[2]]
names(lista)
lista['letras']
lista[['letras']]
lista$letras
#nomes com espaço para nomear variáveis
lst <- list(c("aa", "bb", "cc"), c(1, 2, 3, 4), c(1.1, 1.2, 1.3))
names(lst) <- c("Texto", "Inteiros", "Numeros reais")
lst
lst$Texto
lst[['Texto']]
lst$Inteiros
lst[['Inteiros']]
lst$`Numeros reais`
lst[['Numeros reais']]