library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

This file walks through the process to generate a Sankey visualization of the metagenomes using the information outputted by MetaBAT. Thanks to Professor Blanchard for providing the starting code.

1 Pavian Sankey Plot

1.1 Make input for for Pavian Sankey Plot (from GTDB format)

BW_coassembly_GTDB <- read_tsv("data/metaG_coassembly_all_metabat.tsv")  %>% 
  select('Bin ID', Domain, Phylum, Class, Order, Family, Genus) %>%
  mutate_at('Domain', ~paste0("d__", .)) %>% 
  mutate_at('Phylum', ~paste0("|p__", .)) %>%
  mutate_at('Class', ~paste0("|c__", .)) %>%
  mutate_at('Order', ~paste0("|o__", .)) %>%
  mutate_at('Family', ~paste0("|f__", .)) %>%
  mutate_at('Genus', ~paste0("|g__", .)) %>%
  unite(col=classification, Domain, Phylum, Class, Order, Family, Genus, sep = "") %>% 
  mutate_at("classification", str_replace_all, "NA", "") 
## Rows: 817 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (11): Bin ID, Bin Quality, Bin Lineage, Domain, Phylum, Class, Order, F...
## dbl   (9): Completeness, Contamination, Total Number of Bases, 5s rRNA, 16s ...
## lgl   (1): ...1
## date  (1): Date Added
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
BW_coassembly_GTDB_s <- BW_coassembly_GTDB
BW_coassembly_GTDB_g <- BW_coassembly_GTDB
BW_coassembly_GTDB_f <- BW_coassembly_GTDB
BW_coassembly_GTDB_o <- BW_coassembly_GTDB
BW_coassembly_GTDB_c <- BW_coassembly_GTDB
BW_coassembly_GTDB_p <- BW_coassembly_GTDB
BW_coassembly_GTDB_d <- BW_coassembly_GTDB
BW_coassembly_GTDB_id <- BW_coassembly_GTDB

BW_coassembly_GTDB_g$classification <- sub("\\|s__.*", "", BW_coassembly_GTDB_g$classification)  
BW_coassembly_GTDB_f$classification <- sub("\\|g__.*", "", BW_coassembly_GTDB_f$classification)  
BW_coassembly_GTDB_o$classification <- sub("\\|f__.*", "", BW_coassembly_GTDB_o$classification)  
BW_coassembly_GTDB_c$classification <- sub("\\|o__.*", "", BW_coassembly_GTDB_c$classification)  
BW_coassembly_GTDB_p$classification <- sub("\\|c__.*", "", BW_coassembly_GTDB_p$classification)  
BW_coassembly_GTDB_d$classification <- sub("\\|p__.*", "", BW_coassembly_GTDB_d$classification) 

BW_coassembly_GTDB_allTaxa <- bind_rows(BW_coassembly_GTDB_s, BW_coassembly_GTDB_g, BW_coassembly_GTDB_f, BW_coassembly_GTDB_o, BW_coassembly_GTDB_c, BW_coassembly_GTDB_p, BW_coassembly_GTDB_d) %>% 
  mutate(classification = as.factor(classification)) %>% 
  count(classification) %>% 
# rename for Pavian format
  rename(`#SampleID` = `classification`) %>% 
  rename(`Metaphlan2_Analysis` = `n`)
write_tsv(BW_coassembly_GTDB_allTaxa, "data/BW_coassembly_GTDB_pavian.txt")
write_tsv(BW_coassembly_GTDB, "data/BW_coassembly_GTDB_with_ID.tsv")

Interactive HTML file with Pavian Sankey Plot for BW filter metagenomes

Pavian Sankey Plot for BW filter metagenomes
Pavian Sankey Plot for BW filter metagenomes