library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
This file walks through the process to generate a Sankey visualization of the metagenomes using the information outputted by MetaBAT. Thanks to Professor Blanchard for providing the starting code.
BW_coassembly_GTDB <- read_tsv("data/metaG_coassembly_all_metabat.tsv") %>%
select('Bin ID', Domain, Phylum, Class, Order, Family, Genus) %>%
mutate_at('Domain', ~paste0("d__", .)) %>%
mutate_at('Phylum', ~paste0("|p__", .)) %>%
mutate_at('Class', ~paste0("|c__", .)) %>%
mutate_at('Order', ~paste0("|o__", .)) %>%
mutate_at('Family', ~paste0("|f__", .)) %>%
mutate_at('Genus', ~paste0("|g__", .)) %>%
unite(col=classification, Domain, Phylum, Class, Order, Family, Genus, sep = "") %>%
mutate_at("classification", str_replace_all, "NA", "")
## Rows: 817 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (11): Bin ID, Bin Quality, Bin Lineage, Domain, Phylum, Class, Order, F...
## dbl (9): Completeness, Contamination, Total Number of Bases, 5s rRNA, 16s ...
## lgl (1): ...1
## date (1): Date Added
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
BW_coassembly_GTDB_s <- BW_coassembly_GTDB
BW_coassembly_GTDB_g <- BW_coassembly_GTDB
BW_coassembly_GTDB_f <- BW_coassembly_GTDB
BW_coassembly_GTDB_o <- BW_coassembly_GTDB
BW_coassembly_GTDB_c <- BW_coassembly_GTDB
BW_coassembly_GTDB_p <- BW_coassembly_GTDB
BW_coassembly_GTDB_d <- BW_coassembly_GTDB
BW_coassembly_GTDB_id <- BW_coassembly_GTDB
BW_coassembly_GTDB_g$classification <- sub("\\|s__.*", "", BW_coassembly_GTDB_g$classification)
BW_coassembly_GTDB_f$classification <- sub("\\|g__.*", "", BW_coassembly_GTDB_f$classification)
BW_coassembly_GTDB_o$classification <- sub("\\|f__.*", "", BW_coassembly_GTDB_o$classification)
BW_coassembly_GTDB_c$classification <- sub("\\|o__.*", "", BW_coassembly_GTDB_c$classification)
BW_coassembly_GTDB_p$classification <- sub("\\|c__.*", "", BW_coassembly_GTDB_p$classification)
BW_coassembly_GTDB_d$classification <- sub("\\|p__.*", "", BW_coassembly_GTDB_d$classification)
BW_coassembly_GTDB_allTaxa <- bind_rows(BW_coassembly_GTDB_s, BW_coassembly_GTDB_g, BW_coassembly_GTDB_f, BW_coassembly_GTDB_o, BW_coassembly_GTDB_c, BW_coassembly_GTDB_p, BW_coassembly_GTDB_d) %>%
mutate(classification = as.factor(classification)) %>%
count(classification) %>%
# rename for Pavian format
rename(`#SampleID` = `classification`) %>%
rename(`Metaphlan2_Analysis` = `n`)
write_tsv(BW_coassembly_GTDB_allTaxa, "data/BW_coassembly_GTDB_pavian.txt")
write_tsv(BW_coassembly_GTDB, "data/BW_coassembly_GTDB_with_ID.tsv")
Interactive HTML file with Pavian Sankey Plot for BW filter metagenomes