This vignette demonstrates how to query the EDI repository for journal articles that cite a certain data package. These stats may be used in reports.
library(EDIutils)
library(dplyr)
library(tidyr)
Get all identifiers for a certain scope.
<- "edi"
scope <- list_data_package_identifiers(scope) identifiers
Get the latest revisions for each identifier and retrieve all journal citations.
The citation information is returned as a data frame. First set up the empty data frame to append all records to.
<- paste0(scope, ".", identifiers, ".1")
identifiers <- lapply(identifiers, list_data_package_citations, list_all = TRUE)
res <- do.call(rbind, res) df_return_all
Various analyses may now be conducted on this data frame. E.g., number of articles citing data packages
<- df_return_all %>%
df_data_packages separate(
packageId,into = c("scope", "datasetNum", "revision"),
sep = "\\."
%>%
) mutate(datasetId = paste(scope, datasetNum, sep = ".")) %>%
distinct(datasetId)
print(paste("Unique data packages being cited:", nrow(df_data_packages)))
#> [1] "Unique data packages being cited: 375"
or the number of articles citing data packages
<- df_return_all %>% distinct(articleUrl)
df_articles print(paste('Number of aricles:', nrow(df_articles)))
#> [1] "Number of aricles: 503"