R code
# Load libraries
library(tidyverse)
library(DT)This report is an analysis of the Barre Woods filter metagenomes from the individual assembles. Assembly, binning, quality control and classification was done on KBase https://narrative.kbase.us/narrative/145971 . The checkv and virsorter files were downloaded to this project folder.
# Load libraries
library(tidyverse)
library(DT)virsorter <- read_tsv("data_individual_assembly/all-ind-virsorter-final-viral-score.tsv") |>
filter(max_score_group == "dsDNAphage") |>
select(-c(dsDNAphage, ssDNA, NCLDV, lavidaviridae)) |>
rename("contig_id" = "seqname")checkv <- read_tsv("data_individual_assembly/all-ind-checkv_quality_summary.tsv") virsorter_checkv <- left_join(virsorter, checkv, by = "contig_id")virsorter_checkv_HQ <- virsorter_checkv |>
filter(checkv_quality == "Complete" | checkv_quality == "High-quality")# write summary file
write_tsv(virsorter_checkv, "data_individual_assembly/all-ind-virsorter_checkv.tsv")
write_tsv(virsorter_checkv_HQ, "data_individual_assembly/all-ind-virsorter_checkv_HQ.tsv") virsorter_checkv |>
ggplot(aes(x = checkv_quality)) +
geom_bar() virsorter_checkv |>
mutate(across(checkv_quality, as_factor)) |>
group_by(checkv_quality) |>
summarise(n = n()) |>
mutate(freq = n / sum(n))# A tibble: 6 × 3
checkv_quality n freq
<fct> <int> <dbl>
1 High-quality 418 0.0209
2 Complete 13 0.000650
3 Medium-quality 506 0.0253
4 Low-quality 12703 0.635
5 Not-determined 6238 0.312
6 <NA> 127 0.00635
virsorter_checkv |>
ggplot(aes(x = length, fill = checkv_quality)) +
geom_histogram(colour = "black", binwidth=10000) +
ggtitle("Genome size of Phage") +
xlab("Genome size") +
theme(text = element_text(size = 20, color="black")) theme(axis.text.x = element_text(angle = 90)) List of 1
$ axis.text.x:List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : NULL
..$ hjust : NULL
..$ vjust : NULL
..$ angle : num 90
..$ lineheight : NULL
..$ margin : NULL
..$ debug : NULL
..$ inherit.blank: logi FALSE
..- attr(*, "class")= chr [1:2] "element_text" "element"
- attr(*, "class")= chr [1:2] "theme" "gg"
- attr(*, "complete")= logi FALSE
- attr(*, "validate")= logi TRUE
virsorter_checkv_HQ |>
ggplot(aes(x = length, fill = checkv_quality)) +
geom_histogram(colour = "black", binwidth=10000) +
ggtitle("Genome size of Phage") +
xlab("Genome size") +
#theme(text = element_text(size = 20, color="black"))
theme(axis.text.x = element_text(angle = 45))