Getting started with the robis package

This package is a client for the OBIS API. It includes functions for data access, as well as a few helper functions for visualizing occurrence data and extracting nested MeasurementOrFact or DNADerivedData records.

First some packages:

library(robis)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(ggplot2)

Occurrence data

The occurrence() function provides access to raw occurrence data. For example, to fetch all occurrences by scientific name:

occ <- occurrence("Abra aequalis")
occ
#> # A tibble: 767 x 103
#>    date_year scientificNameID          year  scientificName dynamicProperties   
#>        <int> <chr>                     <chr> <chr>          <chr>               
#>  1      1986 urn:lsid:marinespecies.o… 1986  Abra aequalis  temperature=10;     
#>  2      1976 urn:lsid:marinespecies.o… 1976  Abra aequalis  <NA>                
#>  3      1992 urn:lsid:marinespecies.o… 1992  Abra aequalis  <NA>                
#>  4      2015 urn:lsid:marinespecies.o… <NA>  Abra aequalis  <NA>                
#>  5      2002 urn:lsid:marinespecies.o… 2002  Abra aequalis  <NA>                
#>  6      2013 urn:lsid:marinespecies.o… <NA>  Abra aequalis  <NA>                
#>  7      2014 urn:lsid:marinespecies.o… <NA>  Abra aequalis  <NA>                
#>  8      1978 urn:lsid:marinespecies.o… 1978  Abra aequalis  observedindividualc…
#>  9      1994 urn:lsid:marinespecies.o… <NA>  Abra aequalis  <NA>                
#> 10      2013 urn:lsid:marinespecies.o… <NA>  Abra aequalis  <NA>                
#> # … with 757 more rows, and 98 more variables: superfamilyid <int>,
#> #   individualCount <chr>, dropped <lgl>, aphiaID <int>, decimalLatitude <dbl>,
#> #   subclassid <int>, phylumid <int>, familyid <int>, catalogNumber <chr>,
#> #   basisOfRecord <chr>, subterclassid <int>, maximumDepthInMeters <dbl>,
#> #   id <chr>, order <chr>, dataset_id <chr>, locality <chr>,
#> #   decimalLongitude <dbl>, collectionCode <chr>, date_end <dbl>,
#> #   speciesid <int>, superfamily <chr>, date_start <dbl>, genus <chr>,
#> #   subterclass <chr>, eventDate <chr>, superorder <chr>,
#> #   scientificNameAuthorship <chr>, absence <lgl>, superorderid <int>,
#> #   genusid <int>, originalScientificName <chr>, marine <lgl>,
#> #   minimumDepthInMeters <dbl>, infraclassid <int>, institutionCode <chr>,
#> #   date_mid <dbl>, infraclass <chr>, identifiedBy <chr>, class <chr>,
#> #   orderid <int>, kingdom <chr>, classid <int>, phylum <chr>, species <chr>,
#> #   subclass <chr>, family <chr>, kingdomid <int>, node_id <chr>, flags <chr>,
#> #   sss <dbl>, depth <dbl>, shoredistance <int>, sst <dbl>, bathymetry <int>,
#> #   country <chr>, day <chr>, month <chr>, bibliographicCitation <chr>,
#> #   waterBody <chr>, recordedBy <chr>, coordinatePrecision <chr>, type <chr>,
#> #   taxonRemarks <chr>, occurrenceStatus <chr>, materialSampleID <chr>,
#> #   occurrenceID <chr>, ownerInstitutionCode <chr>, samplingProtocol <chr>,
#> #   taxonRank <chr>, datasetName <chr>, datasetID <chr>,
#> #   associatedReferences <chr>, fieldNumber <chr>, stateProvince <chr>,
#> #   preparations <chr>, county <chr>, modified <lgl>,
#> #   infraspecificEpithet <lgl>, recordNumber <chr>, higherGeography <chr>,
#> #   continent <chr>, typeStatus <lgl>, geodeticDatum <lgl>,
#> #   specificEpithet <chr>, georeferenceSources <lgl>, verbatimDepth <chr>,
#> #   occurrenceRemarks <chr>, collectionID <chr>, eventID <chr>,
#> #   dateIdentified <chr>, habitat <chr>, institutionID <chr>, organismID <chr>,
#> #   eventRemarks <chr>, taxonID <chr>, locationRemarks <chr>,
#> #   countryCode <chr>, locationID <chr>
ggplot(occ) +
  geom_bar(aes(date_year), stat = "count", width = 1)
#> Warning: Removed 25 rows containing non-finite values (stat_count).

Alternatively, occurrences can be fetched by AphiaID:

occurrence(taxonid = 293683)
#> # A tibble: 767 x 103
#>    date_year scientificNameID          year  scientificName dynamicProperties   
#>        <int> <chr>                     <chr> <chr>          <chr>               
#>  1      1986 urn:lsid:marinespecies.o… 1986  Abra aequalis  temperature=10;     
#>  2      1976 urn:lsid:marinespecies.o… 1976  Abra aequalis  <NA>                
#>  3      1992 urn:lsid:marinespecies.o… 1992  Abra aequalis  <NA>                
#>  4      2015 urn:lsid:marinespecies.o… <NA>  Abra aequalis  <NA>                
#>  5      2002 urn:lsid:marinespecies.o… 2002  Abra aequalis  <NA>                
#>  6      2013 urn:lsid:marinespecies.o… <NA>  Abra aequalis  <NA>                
#>  7      2014 urn:lsid:marinespecies.o… <NA>  Abra aequalis  <NA>                
#>  8      1978 urn:lsid:marinespecies.o… 1978  Abra aequalis  observedindividualc…
#>  9      1994 urn:lsid:marinespecies.o… <NA>  Abra aequalis  <NA>                
#> 10      2013 urn:lsid:marinespecies.o… <NA>  Abra aequalis  <NA>                
#> # … with 757 more rows, and 98 more variables: superfamilyid <int>,
#> #   individualCount <chr>, dropped <lgl>, aphiaID <int>, decimalLatitude <dbl>,
#> #   subclassid <int>, phylumid <int>, familyid <int>, catalogNumber <chr>,
#> #   basisOfRecord <chr>, subterclassid <int>, maximumDepthInMeters <dbl>,
#> #   id <chr>, order <chr>, dataset_id <chr>, locality <chr>,
#> #   decimalLongitude <dbl>, collectionCode <chr>, date_end <dbl>,
#> #   speciesid <int>, superfamily <chr>, date_start <dbl>, genus <chr>,
#> #   subterclass <chr>, eventDate <chr>, superorder <chr>,
#> #   scientificNameAuthorship <chr>, absence <lgl>, superorderid <int>,
#> #   genusid <int>, originalScientificName <chr>, marine <lgl>,
#> #   minimumDepthInMeters <dbl>, infraclassid <int>, institutionCode <chr>,
#> #   date_mid <dbl>, infraclass <chr>, identifiedBy <chr>, class <chr>,
#> #   orderid <int>, kingdom <chr>, classid <int>, phylum <chr>, species <chr>,
#> #   subclass <chr>, family <chr>, kingdomid <int>, node_id <chr>, flags <chr>,
#> #   sss <dbl>, depth <dbl>, shoredistance <int>, sst <dbl>, bathymetry <int>,
#> #   country <chr>, day <chr>, month <chr>, bibliographicCitation <chr>,
#> #   waterBody <chr>, recordedBy <chr>, coordinatePrecision <chr>, type <chr>,
#> #   taxonRemarks <chr>, occurrenceStatus <chr>, materialSampleID <chr>,
#> #   occurrenceID <chr>, ownerInstitutionCode <chr>, samplingProtocol <chr>,
#> #   taxonRank <chr>, datasetName <chr>, datasetID <chr>,
#> #   associatedReferences <chr>, fieldNumber <chr>, stateProvince <chr>,
#> #   preparations <chr>, county <chr>, modified <lgl>,
#> #   infraspecificEpithet <lgl>, recordNumber <chr>, higherGeography <chr>,
#> #   continent <chr>, typeStatus <lgl>, geodeticDatum <lgl>,
#> #   specificEpithet <chr>, georeferenceSources <lgl>, verbatimDepth <chr>,
#> #   occurrenceRemarks <chr>, collectionID <chr>, eventID <chr>,
#> #   dateIdentified <chr>, habitat <chr>, institutionID <chr>, organismID <chr>,
#> #   eventRemarks <chr>, taxonID <chr>, locationRemarks <chr>,
#> #   countryCode <chr>, locationID <chr>

Other parameters include geometry, which accepts polygons in WKT format:

occurrence("Abra alba", geometry = "POLYGON ((2.59689 51.16772, 2.62436 51.14059, 2.76066 51.19225, 2.73216 51.20946, 2.59689 51.16772))")
#> # A tibble: 319 x 85
#>    date_year scientificNameID         year  scientificName superfamilyid dropped
#>        <int> <chr>                    <chr> <chr>                  <int> <lgl>  
#>  1      2016 urn:lsid:marinespecies.… 2016  Abra alba              14636 FALSE  
#>  2      2014 urn:lsid:marinespecies.… 2014  Abra alba              14636 FALSE  
#>  3      1995 urn:lsid:marinespecies.… 1995  Abra alba              14636 FALSE  
#>  4      2017 urn:lsid:marinespecies.… 2017  Abra alba              14636 FALSE  
#>  5      1995 urn:lsid:marinespecies.… 1995  Abra alba              14636 FALSE  
#>  6      2016 urn:lsid:marinespecies.… 2016  Abra alba              14636 FALSE  
#>  7      1997 urn:lsid:marinespecies.… 1997  Abra alba              14636 FALSE  
#>  8      1995 urn:lsid:marinespecies.… 1995  Abra alba              14636 FALSE  
#>  9      2009 urn:lsid:marinespecies.… 2009  Abra alba              14636 FALSE  
#> 10      2009 urn:lsid:marinespecies.… 2009  Abra alba              14636 FALSE  
#> # … with 309 more rows, and 79 more variables: fieldNumber <chr>,
#> #   aphiaID <int>, language <chr>, decimalLatitude <dbl>, subclassid <int>,
#> #   phylumid <int>, familyid <int>, catalogNumber <chr>,
#> #   occurrenceStatus <chr>, basisOfRecord <chr>, subterclassid <int>,
#> #   modified <chr>, id <chr>, day <chr>, order <chr>, dataset_id <chr>,
#> #   locality <chr>, decimalLongitude <dbl>, collectionCode <chr>,
#> #   date_end <dbl>, speciesid <int>, occurrenceID <chr>, superfamily <chr>,
#> #   date_start <dbl>, footprintSRS <chr>, month <chr>, genus <chr>,
#> #   subterclass <chr>, eventDate <chr>, eventID <chr>, superorder <chr>,
#> #   absence <lgl>, superorderid <int>, genusid <int>,
#> #   originalScientificName <chr>, marine <lgl>, infraclassid <int>,
#> #   date_mid <dbl>, infraclass <chr>, class <chr>, orderid <int>,
#> #   datasetName <chr>, geodeticDatum <chr>, kingdom <chr>, recordedBy <chr>,
#> #   classid <int>, phylum <chr>, species <chr>, subclass <chr>,
#> #   datasetID <chr>, family <chr>, kingdomid <int>, node_id <chr>, flags <chr>,
#> #   sss <dbl>, shoredistance <int>, sst <dbl>, bathymetry <dbl>,
#> #   verbatimDepth <chr>, country <chr>, references <chr>,
#> #   dynamicProperties <chr>, individualCount <chr>,
#> #   bibliographicCitation <chr>, continent <chr>,
#> #   scientificNameAuthorship <chr>, samplingEffort <chr>,
#> #   institutionCode <chr>, specificEpithet <chr>, lifeStage <chr>,
#> #   samplingProtocol <chr>, sex <chr>, maximumDepthInMeters <dbl>,
#> #   minimumDepthInMeters <dbl>, footprintWKT <chr>, depth <dbl>,
#> #   locationID <chr>, waterBody <chr>, occurrenceRemarks <chr>

WKT strings can be created by drawing on a map using the get_geometry() function.

A convenience function map_leaflet() is provided to visualize occurrences on an interactive map:

map_leaflet(occurrence("Abra sibogai"))

Checklists

The checklist() function returns all taxa observed for a given set of filters.

cl <- checklist("Semelidae")
cl
#> # A tibble: 107 x 41
#>    scientificName   scientificNameAut… taxonID ncbi_id taxonRank taxonomicStatus
#>    <chr>            <chr>                <int>   <int> <chr>     <chr>          
#>  1 Abra alba        (W. Wood, 1802)     141433  399303 Species   accepted       
#>  2 Abra nitida      (O. F. Müller, 17…  141435  358434 Species   accepted       
#>  3 Scrobicularia p… (da Costa, 1778)    141424  665965 Species   accepted       
#>  4 Abra prismatica  (Montagu, 1808)     141436  183592 Species   accepted       
#>  5 Abra tenuis      (Montagu, 1803)     141439      NA Species   accepted       
#>  6 Abra             Lamarck, 1818       138474  121180 Genus     accepted       
#>  7 Abra segmentum   (Récluz, 1843)      141438      NA Species   accepted       
#>  8 Theora lubrica   Gould, 1861         233903 1230554 Species   accepted       
#>  9 Semelidae        Stoliczka, 1870 (…    1781  121179 Family    accepted       
#> 10 Abra aequalis    (Say, 1822)         293683 2175524 Species   accepted       
#> # … with 97 more rows, and 35 more variables: acceptedNameUsage <chr>,
#> #   acceptedNameUsageID <int>, is_marine <lgl>, is_brackish <lgl>,
#> #   kingdom <chr>, phylum <chr>, class <chr>, subclass <chr>, infraclass <chr>,
#> #   subterclass <chr>, superorder <chr>, order <chr>, superfamily <chr>,
#> #   family <chr>, kingdomid <int>, phylumid <int>, classid <int>,
#> #   subclassid <int>, infraclassid <int>, subterclassid <int>,
#> #   superorderid <int>, orderid <int>, superfamilyid <int>, familyid <int>,
#> #   records <int>, genus <chr>, genusid <int>, species <chr>, speciesid <int>,
#> #   bold_id <int>, is_freshwater <lgl>, is_terrestrial <lgl>, wrims <lgl>,
#> #   subspecies <chr>, subspeciesid <int>
ggplot(cl %>% filter(!is.na(genus))) +
  geom_bar(aes(genus)) +
  coord_flip() +
  ylab("species count")

Just like the occurrence() function, checklist() accepts WKT geometries:

checklist(geometry = "POLYGON ((2.59689 51.16772, 2.62436 51.14059, 2.76066 51.19225, 2.73216 51.20946, 2.59689 51.16772))")
#> # A tibble: 902 x 73
#>    scientificName      taxonID ncbi_id taxonomicStatus acceptedNameUsage  
#>    <chr>                 <int>   <int> <chr>           <chr>              
#>  1 Nematoda                799    6231 accepted        Nematoda           
#>  2 Abra alba            141433  399303 accepted        Abra alba          
#>  3 Sabatieria celtica   121360  319964 accepted        Sabatieria celtica 
#>  4 Sabatieria punctata  153130  320140 accepted        Sabatieria punctata
#>  5 Spiophanes bombyx    131187  696728 accepted        Spiophanes bombyx  
#>  6 Kurtiella bidentata  345281 1177057 accepted        Kurtiella bidentata
#>  7 Nephtys hombergii    130359   36121 accepted        Nephtys hombergii  
#>  8 Oligochaeta            2036      NA accepted        Oligochaeta        
#>  9 Cirratulidae            919   46590 accepted        Cirratulidae       
#> 10 Fabulina fabula      146907      NA accepted        Fabulina fabula    
#> # … with 892 more rows, and 68 more variables: acceptedNameUsageID <int>,
#> #   is_marine <lgl>, is_brackish <lgl>, is_freshwater <lgl>,
#> #   is_terrestrial <lgl>, records <int>, taxonRank <chr>, kingdom <chr>,
#> #   kingdomid <int>, phylum <chr>, phylumid <int>,
#> #   scientificNameAuthorship <chr>, class <chr>, classid <int>, subclass <chr>,
#> #   order <chr>, superfamily <chr>, family <chr>, subclassid <int>,
#> #   orderid <int>, superfamilyid <int>, familyid <int>, infraclass <chr>,
#> #   infraclassid <int>, subterclass <chr>, superorder <chr>,
#> #   subterclassid <int>, superorderid <int>, suborder <chr>, suborderid <int>,
#> #   subfamily <chr>, subfamilyid <int>, subphylum <chr>, subphylumid <int>,
#> #   superclass <chr>, superclassid <int>, subkingdom <chr>, infrakingdom <chr>,
#> #   subkingdomid <int>, infrakingdomid <int>, genus <chr>, genusid <int>,
#> #   infraphylum <chr>, infraphylumid <int>, hab <lgl>, bold_id <int>,
#> #   species <chr>, speciesid <int>, infraorder <chr>, parvorder <chr>,
#> #   infraorderid <int>, parvorderid <int>, tribe <chr>, tribeid <int>,
#> #   wrims <lgl>, subgenus <chr>, subgenusid <int>, category <chr>,
#> #   section <chr>, subsection <chr>, sectionid <int>, subsectionid <int>,
#> #   subspecies <chr>, subspeciesid <int>, variety <chr>, varietyid <int>,
#> #   forma <chr>, formaid <int>

MeasurementOrFact records

The package also provides access to MeasurementOrFact records associated with occurrences. When calling occurrence(), MeasurementOrFact records can be included by setting mof = true.

occ <- occurrence("Abra tenuis", mof = TRUE)

MeasurementOrFact records are nested in the occurrence, but the measurements() function allows you to extract them to a flat data frame. Use the fields parameter to indicate which occurrence fields need to be preserved in the measurements table.

mof <- measurements(occ, fields = c("scientificName", "decimalLongitude", "decimalLatitude"))
mof
#> # A tibble: 19,469 x 18
#>    id              scientificName decimalLongitude decimalLatitude measurementID
#>    <chr>           <chr>                     <dbl>           <dbl> <chr>        
#>  1 00037215-c2e8-… Abra tenuis               -1.22            45.9 <NA>         
#>  2 00037215-c2e8-… Abra tenuis               -1.22            45.9 <NA>         
#>  3 00037215-c2e8-… Abra tenuis               -1.22            45.9 <NA>         
#>  4 00037215-c2e8-… Abra tenuis               -1.22            45.9 <NA>         
#>  5 00037215-c2e8-… Abra tenuis               -1.22            45.9 <NA>         
#>  6 000e9ab4-bee5-… Abra tenuis               -1.20            46.3 <NA>         
#>  7 000e9ab4-bee5-… Abra tenuis               -1.20            46.3 <NA>         
#>  8 000e9ab4-bee5-… Abra tenuis               -1.20            46.3 <NA>         
#>  9 000e9ab4-bee5-… Abra tenuis               -1.20            46.3 <NA>         
#> 10 000e9ab4-bee5-… Abra tenuis               -1.20            46.3 <NA>         
#> # … with 19,459 more rows, and 13 more variables: occurrenceID <chr>,
#> #   measurementType <chr>, measurementTypeID <chr>, measurementValue <chr>,
#> #   measurementValueID <chr>, measurementAccuracy <chr>, measurementUnit <chr>,
#> #   measurementUnitID <chr>, measurementDeterminedDate <chr>,
#> #   measurementDeterminedBy <chr>, measurementMethod <chr>,
#> #   measurementRemarks <chr>, level <int>

Note that the MeasurementOrFact fields can be used as parameters to the occurrence() function. For example, to only get occurrences with associated biomass measurements:

library(dplyr)

occurrence("Abra tenuis", mof = TRUE, measurementtype = "biomass") %>%
  measurements()
#> # A tibble: 44 x 15
#>    id        measurementID occurrenceID     measurementType measurementTypeID   
#>    <chr>     <chr>         <chr>            <chr>           <chr>               
#>  1 08269691… <NA>          476637_urn:lsid… individualCount http://vocab.nerc.a…
#>  2 08269691… <NA>          476637_urn:lsid… biomass         <NA>                
#>  3 08269691… <NA>          <NA>             sediment type   http://vocab.nerc.a…
#>  4 16af269f… <NA>          475202_urn:lsid… individualCount http://vocab.nerc.a…
#>  5 16af269f… <NA>          475202_urn:lsid… biomass         <NA>                
#>  6 16af269f… <NA>          <NA>             sediment type   http://vocab.nerc.a…
#>  7 2a20624e… <NA>          475273_urn:lsid… individualCount http://vocab.nerc.a…
#>  8 2a20624e… <NA>          475273_urn:lsid… biomass         <NA>                
#>  9 2a20624e… <NA>          <NA>             sediment type   http://vocab.nerc.a…
#> 10 2b78df21… <NA>          475141_urn:lsid… individualCount http://vocab.nerc.a…
#> # … with 34 more rows, and 10 more variables: measurementValue <chr>,
#> #   measurementValueID <chr>, measurementAccuracy <chr>, measurementUnit <chr>,
#> #   measurementUnitID <chr>, measurementDeterminedDate <chr>,
#> #   measurementDeterminedBy <chr>, measurementMethod <chr>,
#> #   measurementRemarks <chr>, level <int>

DNADerivedData records

Just like MeasurementOrFact records, nested DNADerivedData records can be extracted from the occurrence results.

occ <- occurrence("Prymnesiophyceae", datasetid = "62b97724-da17-4ca7-9b26-b2a22aeaab51", dna = TRUE)
occ
#> # A tibble: 1,136 x 54
#>    eventID   date_year scientificNameID   scientificName absence dropped genusid
#>    <chr>         <int> <chr>              <chr>          <lgl>   <lgl>     <int>
#>  1 28215c01…      2015 urn:lsid:marinesp… Chrysochromul… FALSE   FALSE    115090
#>  2 24416c01…      2016 urn:lsid:marinesp… Phaeocystaceae FALSE   FALSE        NA
#>  3 34916c01…      2016 urn:lsid:marinesp… Prymnesiaceae  FALSE   FALSE        NA
#>  4 11216c01…      2016 urn:lsid:marinesp… Chrysochromul… FALSE   FALSE    115090
#>  5 34916c01…      2016 urn:lsid:marinesp… Chrysochromul… FALSE   FALSE    115090
#>  6 24416c01…      2016 urn:lsid:marinesp… Chrysochromul… FALSE   FALSE    115090
#>  7 30214c01…      2014 urn:lsid:marinesp… Chrysochromul… FALSE   FALSE    115090
#>  8 14213c01…      2013 urn:lsid:marinesp… Chrysochromul… FALSE   FALSE    115090
#>  9 CANON16c…      2016 urn:lsid:marinesp… Chrysochromul… FALSE   FALSE    115090
#> 10 22013c01…      2013 urn:lsid:marinesp… Chrysochromul… FALSE   FALSE    115090
#> # … with 1,126 more rows, and 47 more variables: aphiaID <int>,
#> #   decimalLatitude <dbl>, taxonID <chr>, originalScientificName <chr>,
#> #   marine <lgl>, phylumid <int>, familyid <int>, basisOfRecord <chr>,
#> #   taxonConceptID <chr>, subkingdom <chr>, date_mid <dbl>,
#> #   identificationRemarks <chr>, nameAccordingTo <chr>, id <chr>, class <chr>,
#> #   order <chr>, identificationReferences <chr>, organismQuantity <chr>,
#> #   sampleSizeUnit <chr>, orderid <int>, dataset_id <chr>,
#> #   decimalLongitude <dbl>, date_end <dbl>, speciesid <int>,
#> #   occurrenceID <chr>, kingdom <chr>, subkingdomid <int>, date_start <dbl>,
#> #   classid <int>, phylum <chr>, genus <chr>, organismQuantityType <chr>,
#> #   species <chr>, associatedSequences <chr>, family <chr>, kingdomid <int>,
#> #   sampleSizeValue <chr>, eventDate <chr>, node_id <chr>, flags <chr>,
#> #   sss <dbl>, shoredistance <int>, sst <dbl>, bathymetry <int>, dna <list>,
#> #   hab <lgl>, brackish <lgl>
dna <- dna_records(occ, fields = c("scientificName"))

dna %>%
  select(scientificName, target_gene, DNA_sequence)
#> # A tibble: 1,136 x 3
#>    scientificName       target_gene DNA_sequence                                
#>    <chr>                <chr>       <chr>                                       
#>  1 Chrysochromulina st… 18S         GCTCCTACCGATTGAATGATCCGGTGAGCTTTTTGGACTGTGG…
#>  2 Phaeocystaceae       18S         GCTCCTACCGATTGAATGATCCGGTGAGGCCCCCGGACGGATT…
#>  3 Prymnesiaceae        18S         GCTCCTACCGATTGGACGATCCGGTGAAGCCTCCGGACTGTAG…
#>  4 Chrysochromulina sc… 18S         GCTCCTACCGATTGAATGATCCGGTGAGCTTTTTGGACTGTGG…
#>  5 Chrysochromulina st… 18S         GCTCCTACCGATTGAATGATCCGGTGAGCTTTTTGGACAGTGG…
#>  6 Chrysochromulina     18S         GCTCCTACCGATTGAATGATCCGGTGAGGCCCCCGGAATGGGA…
#>  7 Chrysochromulina     18S         GCTCCTACCGATTGAATGATCCGGTGAGGCCCCCGGAGTGGGA…
#>  8 Chrysochromulina sp… 18S         GCTCCTACCGATGGAAGGTTTAGGTGAGTTTTTCGGAGTTTTC…
#>  9 Chrysochromulina st… 18S         GCTCCTACCGATTGAATGATCCGGTGAGCTTTTTGGACAGTGG…
#> 10 Chrysochromulina st… 18S         GCTCCTACCGATTGAATGATCCGGTGAGCTTTTTGGACAGTGG…
#> # … with 1,126 more rows