Download sequences
Genbank
<- tempfile(fileext = '.gb')
tmpgb <- tempfile(fileext = '.fa')
tmpfa download_genbank(acc='AB115403', format='genbank', outfile=tmpgb)
download_genbank(acc='AB115403', format='fasta', outfile=tmpfa)
## readLines(tmpgb)[1:10]
## readLines(tmpfa)
File conversion
fasta and phylip conversion
<- system.file("extdata/HA.fas", package="seqmagick")
fa_file ## use the small subset to save compilation time of the vignette
<- tempfile(fileext = '.fa')
fa2 fa_read(fa_file) %>% bs_filter('ATGAAAGTAAAA', by='sequence') %>% fa_write(fa2, type='interleaved')
<- tempfile(fileext = ".fas")
alnfas fa_read(fa2) %>% bs_aln(quiet=TRUE) %>% fa_write(alnfas)
## phylip format is only for aligned sequences
<- tempfile(fileext = ".phy")
tmpphy fas2phy(alnfas, tmpphy, type = 'sequential')
seqmagick
supports both sequential
and interleaved
formats, users can specify the format by type
parameter.
phy2fas(tmpphy, alnfas, type = 'interleaved')
interleaved and sequential format conversion
<- tempfile(fileext='.fa')
tmpfas fa_read(fa2) %>% fa_write(tmpfas, type="sequential")
<- tempfile(fileext = '.phy')
tmpphy2 phy_read(tmpphy) %>% phy_write(tmpphy2, type="interleaved")
Sequence manipulation
<- fa_read(fa_file)
bs bs_filter(bs, 'ATGAAAGTAAAA', by='sequence')
<- bs_filter(bs, 'ATGAAAGTAAAA', by='sequence') %>% bs_aln(quiet=TRUE)
aln
bs_consensus(aln)
Bugs/Feature requests
If you have any, let me know. Thx!
Session info
Here is the output of sessionInfo()
on the system on which this document was compiled:
## R version 4.0.5 (2021-03-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Arch Linux
##
## Matrix products: default
## BLAS: /usr/lib/libblas.so.3.9.0
## LAPACK: /usr/lib/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=C
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] seqmagick_0.1.5 Biostrings_2.58.0 XVector_0.30.0
## [4] IRanges_2.24.1 S4Vectors_0.28.1 BiocGenerics_0.36.0
## [7] magrittr_2.0.1
##
## loaded via a namespace (and not attached):
## [1] knitr_1.32 zlibbioc_1.36.0 R6_2.5.0 rlang_0.4.10
## [5] stringr_1.4.0 tools_4.0.5 xfun_0.22 jquerylib_0.1.3
## [9] htmltools_0.5.1.1 yaml_2.2.1 digest_0.6.27 crayon_1.4.1
## [13] sass_0.3.1 prettydoc_0.4.1 evaluate_0.14 rmarkdown_2.7
## [17] stringi_1.5.3 compiler_4.0.5 bslib_0.2.4 jsonlite_1.7.2