#LOAD PACKAGES
library(tidyverse)
#LOAD DATA
<- read.csv("https://github.com/weecology/PortalData/raw/main/Rodents/Portal_rodent.csv") portal_rodent
reshaping data with tidyr
The goal of tidyr
is to help you create tidy data.
Reshaping with Pivoting – Why?
Data frames are often described as wide or long.
Wide when a row has more than one observation, and the units of observation are on one row each
Long when a row has only one observation, but the units of observation are repeated down the column
portal
dataset
<- portal_rodent %>%
portal_wgt_summary filter(!is.na(wgt)) %>%
group_by(plot, species) %>%
summarize(mean_wgt = mean(wgt))
portal_wgt_summary
# A tibble: 480 × 3
# Groups: plot [25]
plot species mean_wgt
<int> <chr> <dbl>
1 1 BA 9.1
2 1 DM 43.5
3 1 DO 49.4
4 1 DS 129.
5 1 OL 33.2
6 1 OT 24.3
7 1 PB 32.0
8 1 PE 22.3
9 1 PF 7.12
10 1 PH 31.4
# ℹ 470 more rows
Pivot Wider
Practicing transforming this data from long to wide format:
<- portal_wgt_summary %>%
wide pivot_wider(names_from = species, values_from = mean_wgt)
wide
# A tibble: 25 × 28
# Groups: plot [25]
plot BA DM DO DS OL OT PB PE PF PH PI PL
<int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 9.1 43.5 49.4 129. 33.2 24.3 32.0 22.3 7.12 31.4 17.4 25.2
2 2 9.4 43.4 49.1 123. 31.7 24.7 33.3 21.9 7.18 32.5 16 25.2
3 3 8.66 43.1 49.5 128. 31.0 23.6 32.6 22.5 7.58 28 17.1 22.2
4 4 10.2 43.5 49.2 118. 31.8 24.1 30.7 21.1 7.85 NA 18.4 NA
5 5 9.14 43.8 49.8 111. 30.1 24.8 32.0 21.7 8.25 29 NA 24.6
6 6 9.33 42.6 48.8 114. 30.5 24.2 31.5 21.8 7.89 NA 17 25
7 7 10 44.4 49.3 126. 32.2 24.4 33.4 23.1 9 30 NA 25.2
8 8 10.2 43.4 48.8 127. 28.8 23.9 30.2 21.6 7.06 41 19.8 28
9 9 10.2 43.6 48.7 115. 30.4 23.6 30.0 21.6 7.22 NA 16.7 19
10 10 10 44.1 51.7 130 34.4 19.6 33.2 22.6 8 NA NA 20.8
# ℹ 15 more rows
# ℹ 15 more variables: PM <dbl>, PP <dbl>, RM <dbl>, RO <dbl>, SF <dbl>,
# SH <dbl>, `NA` <dbl>, OX <dbl>, PX <dbl>, RF <dbl>, SO <dbl>, RX <dbl>,
# DX <dbl>, SS <dbl>, SX <dbl>
Pivot Longer
Practicing transforming this data from wide to long format:
%>%
wide pivot_longer(names_to = "species", values_to = "mean_wgt", cols=2:28)
# A tibble: 675 × 3
# Groups: plot [25]
plot species mean_wgt
<int> <chr> <dbl>
1 1 BA 9.1
2 1 DM 43.5
3 1 DO 49.4
4 1 DS 129.
5 1 OL 33.2
6 1 OT 24.3
7 1 PB 32.0
8 1 PE 22.3
9 1 PF 7.12
10 1 PH 31.4
# ℹ 665 more rows
# or cols = - plot_id
Challenge
Reshape the rodents data frame with year as columns, plot as rows, and the number of species per plot as the values. You will need to summarize before reshaping, and use the function n_distinct()
to get the number of unique species within a particular chunk of data. It’s a powerful function! See ?n_distinct for more.
%>%
portal_rodent group_by(year, plot) %>%
summarize(unique_species = n_distinct(species)) %>%
pivot_wider(names_from = year, values_from = unique_species)
# A tibble: 25 × 50
plot `1977` `1978` `1979` `1980` `1981` `1982` `1983` `1984` `1985` `1986`
<int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 1 4 5 7 9 8 9 10 9 6 5
2 2 9 9 9 11 9 12 12 12 10 7
3 3 9 8 6 9 8 13 12 12 9 8
4 4 5 5 5 7 6 6 8 5 7 5
5 5 6 4 4 7 6 8 9 10 5 2
6 6 4 8 6 8 7 13 13 11 8 8
7 7 4 2 4 4 2 6 4 4 5 4
8 8 4 7 5 8 9 11 8 8 7 6
9 9 6 6 6 8 8 8 10 7 8 5
10 10 3 1 4 7 8 9 5 3 4 2
# ℹ 15 more rows
# ℹ 39 more variables: `1987` <int>, `1988` <int>, `1989` <int>, `1990` <int>,
# `1991` <int>, `1992` <int>, `1993` <int>, `1994` <int>, `1995` <int>,
# `1996` <int>, `1997` <int>, `1998` <int>, `1999` <int>, `2000` <int>,
# `2001` <int>, `2002` <int>, `2003` <int>, `2004` <int>, `2005` <int>,
# `2006` <int>, `2007` <int>, `2008` <int>, `2009` <int>, `2010` <int>,
# `2011` <int>, `2012` <int>, `2013` <int>, `2014` <int>, `2015` <int>, …