rflashtext

R-CMD-check

rflashtext can be used to find and replace words in a given text with only one pass over the document.

It’s a pure R implementation of the FlashText algorithm and it’s inspired on the python library flashtext.

Installation

You can install the released version of rflashtext from CRAN with:

install.packages("rflashtext")

And the development version from GitHub with:

# install.packages("devtools")
devtools::install_github("AbrJA/rflashtext")

Example

This is a basic example which shows you how to use the API:

New processor

library(rflashtext)

processor <- keyword_processor$new(ignore_case = FALSE, word_chars = c(letters, LETTERS))
processor$show_attrs(attrs = "dict_size")
#> $dict_size
#> [1] 0

Add keys-words to processor

processor$add_keys_words(keys = c("NY", "LA"), words = c("New York", "Los Angeles"))
processor$show_attrs(attrs = c("dict", "dict_size"))
#> $dict
#> $dict$`_class_`
#> [1] "keyword_dictionary"
#> 
#> $dict$N
#> $dict$N$Y
#> $dict$N$Y$`_word_`
#> [1] "New York"
#> 
#> 
#> 
#> $dict$L
#> $dict$L$A
#> $dict$L$A$`_word_`
#> [1] "Los Angeles"
#> 
#> 
#> 
#> 
#> $dict_size
#> [1] 2

Find keys in a sentence

words_found <- processor$find_keys(sentence = "I live in LA and I like NY")
words_found
#> [[1]]
#> [[1]]$word
#> [1] "Los Angeles"
#> 
#> [[1]]$start
#> [1] 11
#> 
#> [[1]]$end
#> [1] 13
#> 
#> 
#> [[2]]
#> [[2]]$word
#> [1] "New York"
#> 
#> [[2]]$start
#> [1] 25
#> 
#> [[2]]$end
#> [1] 26
do.call(rbind, words_found)
#>      word          start end
#> [1,] "Los Angeles" 11    13 
#> [2,] "New York"    25    26

Replace keys in a sentence

processor$replace_keys(sentence = "I live in LA and I like NY")
#> [1] "I live in Los Angeles and I like New York"