When researchers request access to your data they may in many cases not be granted access to the whole dataset, but only to a subset. On the MinIO fileserver access is regulated on the project level, so you will need to create a new project using a subset of the data. Here you can see the different relevant steps you need to take to create these subsets.
In order to access the files on the MinIO fileserver you need to log in using the URLs of the Armadillo server and the MinIO fileserver. A browser window will be opened where you can identify yourself with the ID provider.
armadillo.login("https://armadillo.test.molgenis.org",
"https://armadillo-minio.test.molgenis.org")
#> [1] "We're opening a browser so you can log in with code 5K2S2N"
A session will be created and the credentials are stored in the environment.
Let’s assume you are in a consortium which has core-variables and outcome-variables. You want to share a subset of the whole dataset with certain researchers that applied for access to your data.
First we will create a project, here called ‘subset1’.
#list projects on the MinIO file server
Secondly we will explore the data and find the relevant variables to share.
armadillo.list_projects()
#> [1] "cohort1" "cohort2" "cohort3" "dnbc"
#> [5] "example-dictionary" "exampledictionary" "methylationclocks" "subset1"
#List the tables in a project
You want to share data from ‘cohort1’. We will list the available tables within this project.
armadillo.list_tables("cohort1")
#> [1] "1_1-outcome-1_0/nonrep" "1_1-outcome-1_0/yearlyrep" "2_1-core-1_0/monthlyrep"
#> [4] "2_1-core-1_0/nonrep" "2_1-core-1_0/trimesterrep" "2_1-core-1_0/yearlyrep"
##Making subsets of the data Since the core and outcome data tables have the same names we will first subset the outcome variables and afterwards subset the core variables.
#Subset the outcome variables We will now download the outcome tables to the local environment
non_rep <- armadillo.load_table("cohort1", "1_1-outcome-1_0", "nonrep")
yearly_rep <- armadillo.load_table("cohort1", "1_1-outcome-1_0", "yearlyrep")
, list their variables
colnames(non_rep)
#> [1] "row_id" "child_id" "pets_preg"
#> [4] "asthma_ever_CHICOS" "asthma_ever_MeDALL" "asthma_current_MeDALL"
#> [7] "asthma_current_ISAAC" "allergy_any_m" "allergy_inh_m"
#> [10] "anaphylaxis"
colnames(yearly_rep)
#> [1] "row_id" "child_id" "age_years"
#> [4] "whe_" "asthma_" "asthma_med_"
#> [7] "asthma_med_spec_" "FEV1_z_" "FVC_z_"
#> [10] "FEV1FVC_z_" "repro_" "FeNO_"
#> [13] "inh_all_sens_SPT_" "inh_all_sens_IgE_HDM_" "inh_all_sens_IgE_CAT_"
#> [16] "inh_all_sens_IgE_RYE_" "inh_all_sens_IgE_MOULD_"
, and subset the variables that were requested.
subset_outcome_non_rep <- non_rep %>% select(child_id, pets_preg, asthma_ever_CHICOS)
subset_outcome_yearly_rep <- yearly_rep %>% select(child_id, asthma_med_)
#Subset the core variables We will now download the relevant core tables to the local environment
non_rep <- armadillo.load_table("cohort1", "2_1-core-1_0", "nonrep")
yearly_rep <- armadillo.load_table("cohort1", "2_1-core-1_0", "yearlyrep")
, list their variables
colnames(non_rep)
#> [1] "row_id" "child_id" "mother_id" "cohort_id"
#> [5] "preg_no" "child_no" "coh_country" "recruit_age"
#> [9] "cob_m" "ethn1_m" "ethn2_m" "ethn3_m"
#> [13] "agebirth_m_y" "agebirth_m_d" "death_m" "death_m_age"
#> [17] "prepreg_weight" "prepreg_weight_mes" "prepreg_weight_ga" "latepreg_weight"
#> [21] "latepreg_weight_mes" "latepreg_weight_ga" "preg_gain" "preg_gain_mes"
#> [25] "height_m" "height_mes_m" "prepreg_dia" "preg_dia"
#> [29] "preg_thyroid" "preg_fever" "preeclam" "preg_ht"
#> [33] "asthma_m" "prepreg_psych" "preg_psych" "ppd"
#> [37] "prepreg_smk" "prepreg_cig" "preg_smk" "preg_cig"
#> [41] "prepreg_alc" "prepreg_alc_unit" "preg_alc" "preg_alc_unit"
#> [45] "folic_prepreg" "folic_preg12" "folic_post12" "parity_m"
#> [49] "preg_plan" "mar" "ivf" "outcome"
#> [53] "mode_delivery" "plac_abrup" "cob_p" "cob_p_fath"
#> [57] "ethn1_p" "ethn2_p" "ethn3_p" "ethn_p_fath"
#> [61] "agebirth_p_y" "agebirth_p_d" "agebirth_p_fath" "death_p"
#> [65] "death_p_age" "death_p_fath" "weight_f1" "weight_mes_f1"
#> [69] "weight_f1_fath" "height_f1" "height_mes_f1" "height_f1_fath"
#> [73] "dia_bf" "asthma_bf" "psych_bf" "smk_p"
#> [77] "smk_cig_p" "smk_fath" "birth_month" "birth_year"
#> [81] "apgar" "neo_unit" "sex" "plurality"
#> [85] "ga_lmp" "ga_us" "ga_mr" "ga_bj"
#> [89] "birth_weight" "birth_length" "birth_head_circum" "weight_who_ga"
#> [93] "plac_weight" "con_anomalies" "major_con_anomalies" "cer_palsy"
#> [97] "sibling_pos" "death_child" "death_child_age" "breastfed_excl"
#> [101] "breastfed_any" "breastfed_ever" "solid_food" "childcare_intro"
#> [105] "cats_preg" "dogs_preg" "cats_quant_preg" "dogs_quant_preg"
colnames(yearly_rep)
#> [1] "row_id" "child_id" "age_years" "cohab_"
#> [5] "occup_m_" "occupcode_m_" "edu_m_" "occup_f1_"
#> [9] "occup_f1_fath" "occup_f2_" "occup_f2_fath" "occupcode_f1_"
#> [13] "occupcode_f1_fath" "occupcode_f2_" "occupcode_f2_fath" "edu_f1_"
#> [17] "edu_f1_fath" "edu_f2_" "edu_f2_fath" "childcare_"
#> [21] "childcarerel_" "childcareprof_" "childcarecentre_" "smk_exp"
#> [25] "pets_" "cats_" "cats_quant_" "dogs_"
#> [29] "dogs_quant_" "mental_exp" "hhincome_" "fam_splitup"
#> [33] "famsize_child" "famsize_adult"
, and subset the variables that were requested.
subset_core_non_rep <- non_rep %>% select(child_id, asthma_m, breastfed_any)
subset_core_yearly_rep <- yearly_rep %>% select(child_id, pets_)
##Uploading the data subset
#Check the data subset before uploading
colnames(subset_outcome_non_rep)
#> [1] "child_id" "pets_preg" "asthma_ever_CHICOS"
colnames(subset_outcome_yearly_rep)
#> [1] "child_id" "asthma_med_"
colnames(subset_core_non_rep)
#> [1] "child_id" "asthma_m" "breastfed_any"
colnames(subset_core_yearly_rep)
#> [1] "child_id" "pets_"
#Upload the data subset
armadillo.upload_table("subset1", "1_1_outcome_1_0", subset_outcome_non_rep, "non_rep")
#> Compressing...
#>
|
| | 0%
|
|========================================================================================| 100%
#> Uploaded 1_1_outcome_1_0/non_rep
armadillo.upload_table("subset1", "1_1_outcome_1_0", subset_outcome_yearly_rep, "yearly_rep")
#> Compressing...
#>
|
| | 0%
|
|========================================================================================| 100%
#> Uploaded 1_1_outcome_1_0/yearly_rep
armadillo.upload_table("subset1", "2_1_core_1_0", subset_core_non_rep, "non_rep")
#> Compressing...
#>
|
| | 0%
|
|========================================================================================| 100%
#> Uploaded 2_1_core_1_0/non_rep
armadillo.upload_table("subset1", "2_1_core_1_0", subset_core_yearly_rep, "yearly_rep")
#> Compressing...
#>
|
| | 0%
|
|========================================================================================| 100%
#> Uploaded 2_1_core_1_0/yearly_rep
Now you can also take a look at the files in the user interface of the MinIO fileserver if you open the MinIO server URL in a browser window.