How to use RSDA 3.3

RSDA Package version 3.3

Oldemar Rodríguez R.

Installing the package

CRAN

install.packages("RSDA", dependencies=TRUE)

Github

devtools::install_github("PROMiDAT/RSDA")

How to read a Symbolic Table from a CSV file with RSDA?

ex3 <- read.sym.table(file = 'tsym1.csv', header=TRUE, sep=';',dec='.', row.names=1)
ex3
#> # A tibble: 7 × 7
#>      F1              F2      F3    F4        F5               F6              F7
#>   <dbl>      <symblc_n> <symbl> <dbl> <symblc_>       <symblc_n>      <symblc_n>
#> 1   2.8   [1.00 : 2.00]  <hist>   6       {a,d}   [0.00 : 90.00]  [9.00 : 24.00]
#> 2   1.4   [3.00 : 9.00]  <hist>   8     {b,c,d} [-90.00 : 98.00]  [-9.00 : 9.00]
#> 3   3.2  [-1.00 : 4.00]  <hist>  -7       {a,b}  [65.00 : 90.00] [65.00 : 70.00]
#> 4  -2.1   [0.00 : 2.00]  <hist>   0   {a,b,c,d}  [45.00 : 89.00] [25.00 : 67.00]
#> 5  -3   [-4.00 : -2.00]  <hist>  -9.5       {b}  [20.00 : 40.00]  [9.00 : 40.00]
#> 6   0.1 [10.00 : 21.00]  <hist>  -1       {a,d}    [5.00 : 8.00]   [5.00 : 8.00]
#> 7   9    [4.00 : 21.00]  <hist>   0.5       {a}    [3.14 : 6.76]   [4.00 : 6.00]

##How to save a Symbolic Table in a CSV file with RSDA?

write.sym.table(ex3, file = 'tsymtemp.csv', sep = ';',dec = '.',
                row.names = TRUE, col.names = TRUE)

Symbolic Data Frame Example in RSDA

data(example3)
example3
#> # A tibble: 7 × 7
#>      F1              F2                      F3    F4        F5               F6
#>   <dbl>      <symblc_n>              <symblc_m> <dbl> <symblc_>       <symblc_n>
#> 1   2.8   [1.00 : 2.00] M1:0.10 M2:0.70 M3:0.20   6   {e,g,i,k}   [0.00 : 90.00]
#> 2   1.4   [3.00 : 9.00] M1:0.60 M2:0.30 M3:0.10   8   {a,b,c,d} [-90.00 : 98.00]
#> 3   3.2  [-1.00 : 4.00] M1:0.20 M2:0.20 M3:0.60  -7   {2,b,1,c}  [65.00 : 90.00]
#> 4  -2.1   [0.00 : 2.00] M1:0.90 M2:0.00 M3:0.10   0   {a,3,4,c}  [45.00 : 89.00]
#> 5  -3   [-4.00 : -2.00] M1:0.60 M2:0.00 M3:0.40  -9.5 {e,g,i,k}  [20.00 : 40.00]
#> 6   0.1 [10.00 : 21.00] M1:0.00 M2:0.70 M3:0.30  -1     {e,1,i}    [5.00 : 8.00]
#> 7   9    [4.00 : 21.00] M1:0.20 M2:0.20 M3:0.60   0.5   {e,a,2}    [3.14 : 6.76]
#> # … with 1 more variable: F7 <symblc_n>
example3[2,]
#> # A tibble: 1 × 7
#>      F1            F2                      F3    F4         F5               F6
#>   <dbl>    <symblc_n>              <symblc_m> <dbl> <symblc_s>       <symblc_n>
#> 1   1.4 [3.00 : 9.00] M1:0.60 M2:0.30 M3:0.10     8  {a,b,c,d} [-90.00 : 98.00]
#> # … with 1 more variable: F7 <symblc_n>
example3[,3]
#> # A tibble: 7 × 1
#>                        F3
#>                <symblc_m>
#> 1 M1:0.10 M2:0.70 M3:0.20
#> 2 M1:0.60 M2:0.30 M3:0.10
#> 3 M1:0.20 M2:0.20 M3:0.60
#> 4 M1:0.90 M2:0.00 M3:0.10
#> 5 M1:0.60 M2:0.00 M3:0.40
#> 6 M1:0.00 M2:0.70 M3:0.30
#> 7 M1:0.20 M2:0.20 M3:0.60
example3[2:3,5]
#> # A tibble: 2 × 1
#>           F5
#>   <symblc_s>
#> 1  {a,b,c,d}
#> 2  {2,b,1,c}
example3$F1
#> [1]  2.8  1.4  3.2 -2.1 -3.0  0.1  9.0

How to generated a symbolic data table from a classic data table in RSDA?

data(ex1_db2so)
ex1_db2so
#>         state sex county group age
#> 1     Florida   M      2     6   3
#> 2  California   F      4     3   4
#> 3       Texas   M     12     3   4
#> 4     Florida   F      2     3   4
#> 5       Texas   M      4     6   4
#> 6       Texas   F      2     3   3
#> 7     Florida   M      6     3   4
#> 8     Florida   F      2     6   4
#> 9  California   M      2     3   6
#> 10 California   F     21     3   4
#> 11 California   M      2     3   4
#> 12 California   M      2     6   7
#> 13      Texas   F     23     3   4
#> 14    Florida   M      2     3   4
#> 15    Florida   F     12     7   4
#> 16      Texas   M      2     3   8
#> 17 California   F      3     7   9
#> 18 California   M      2     3  11
#> 19 California   M      1     3  11

The classic.to.sym function allows to convert a traditional table into a symbolic one, to this we must indicate the following parameters.

Example 1

result <- classic.to.sym(x = ex1_db2so, 
                         concept = c(state, sex),
                         variables = c(county, group, age))
result
#> # A tibble: 6 × 3
#>           county         group            age
#>       <symblc_n>    <symblc_n>     <symblc_n>
#> 1 [3.00 : 21.00] [3.00 : 7.00]  [4.00 : 9.00]
#> 2  [1.00 : 2.00] [3.00 : 6.00] [4.00 : 11.00]
#> 3 [2.00 : 12.00] [3.00 : 7.00]  [4.00 : 4.00]
#> 4  [2.00 : 6.00] [3.00 : 6.00]  [3.00 : 4.00]
#> 5 [2.00 : 23.00] [3.00 : 3.00]  [3.00 : 4.00]
#> 6 [2.00 : 12.00] [3.00 : 6.00]  [4.00 : 8.00]

We can add new variables indicating the type we want them to be.

result <- classic.to.sym(x = ex1_db2so, 
                         concept = c("state", "sex"),
                         variables = c(county, group, age),
                         age_hist = sym.histogram(age, breaks = pretty(ex1_db2so$age, 5)))
result
#> # A tibble: 6 × 4
#>     age_hist         county         group            age
#>   <symblc_h>     <symblc_n>    <symblc_n>     <symblc_n>
#> 1     <hist> [3.00 : 21.00] [3.00 : 7.00]  [4.00 : 9.00]
#> 2     <hist>  [1.00 : 2.00] [3.00 : 6.00] [4.00 : 11.00]
#> 3     <hist> [2.00 : 12.00] [3.00 : 7.00]  [4.00 : 4.00]
#> 4     <hist>  [2.00 : 6.00] [3.00 : 6.00]  [3.00 : 4.00]
#> 5     <hist> [2.00 : 23.00] [3.00 : 3.00]  [3.00 : 4.00]
#> 6     <hist> [2.00 : 12.00] [3.00 : 6.00]  [4.00 : 8.00]

Example 2

data(USCrime)
head(USCrime)
#>   state fold population householdsize racepctblack racePctWhite racePctAsian
#> 1     8    1       0.19          0.33         0.02         0.90         0.12
#> 2    53    1       0.00          0.16         0.12         0.74         0.45
#> 3    24    1       0.00          0.42         0.49         0.56         0.17
#> 4    34    1       0.04          0.77         1.00         0.08         0.12
#> 5    42    1       0.01          0.55         0.02         0.95         0.09
#> 6     6    1       0.02          0.28         0.06         0.54         1.00
#>   racePctHisp agePct12t21 agePct12t29 agePct16t24 agePct65up numbUrban pctUrban
#> 1        0.17        0.34        0.47        0.29       0.32      0.20      1.0
#> 2        0.07        0.26        0.59        0.35       0.27      0.02      1.0
#> 3        0.04        0.39        0.47        0.28       0.32      0.00      0.0
#> 4        0.10        0.51        0.50        0.34       0.21      0.06      1.0
#> 5        0.05        0.38        0.38        0.23       0.36      0.02      0.9
#> 6        0.25        0.31        0.48        0.27       0.37      0.04      1.0
#>   medIncome pctWWage pctWFarmSelf pctWInvInc pctWSocSec pctWPubAsst pctWRetire
#> 1      0.37     0.72         0.34       0.60       0.29        0.15       0.43
#> 2      0.31     0.72         0.11       0.45       0.25        0.29       0.39
#> 3      0.30     0.58         0.19       0.39       0.38        0.40       0.84
#> 4      0.58     0.89         0.21       0.43       0.36        0.20       0.82
#> 5      0.50     0.72         0.16       0.68       0.44        0.11       0.71
#> 6      0.52     0.68         0.20       0.61       0.28        0.15       0.25
#>   medFamInc perCapInc whitePerCap blackPerCap indianPerCap AsianPerCap
#> 1      0.39      0.40        0.39        0.32         0.27        0.27
#> 2      0.29      0.37        0.38        0.33         0.16        0.30
#> 3      0.28      0.27        0.29        0.27         0.07        0.29
#> 4      0.51      0.36        0.40        0.39         0.16        0.25
#> 5      0.46      0.43        0.41        0.28         0.00        0.74
#> 6      0.62      0.72        0.76        0.77         0.28        0.52
#>   OtherPerCap HispPerCap NumUnderPov PctPopUnderPov PctLess9thGrade
#> 1        0.36       0.41        0.08           0.19            0.10
#> 2        0.22       0.35        0.01           0.24            0.14
#> 3        0.28       0.39        0.01           0.27            0.27
#> 4        0.36       0.44        0.01           0.10            0.09
#> 5        0.51       0.48        0.00           0.06            0.25
#> 6        0.48       0.60        0.01           0.12            0.13
#>   PctNotHSGrad PctBSorMore PctUnemployed PctEmploy PctEmplManu PctEmplProfServ
#> 1         0.18        0.48          0.27      0.68        0.23            0.41
#> 2         0.24        0.30          0.27      0.73        0.57            0.15
#> 3         0.43        0.19          0.36      0.58        0.32            0.29
#> 4         0.25        0.31          0.33      0.71        0.36            0.45
#> 5         0.30        0.33          0.12      0.65        0.67            0.38
#> 6         0.12        0.80          0.10      0.65        0.19            0.77
#>   PctOccupManu PctOccupMgmtProf MalePctDivorce MalePctNevMarr FemalePctDiv
#> 1         0.25             0.52           0.68           0.40         0.75
#> 2         0.42             0.36           1.00           0.63         0.91
#> 3         0.49             0.32           0.63           0.41         0.71
#> 4         0.37             0.39           0.34           0.45         0.49
#> 5         0.42             0.46           0.22           0.27         0.20
#> 6         0.06             0.91           0.49           0.57         0.61
#>   TotalPctDiv PersPerFam PctFam2Par PctKids2Par PctYoungKids2Par PctTeen2Par
#> 1        0.75       0.35       0.55        0.59             0.61        0.56
#> 2        1.00       0.29       0.43        0.47             0.60        0.39
#> 3        0.70       0.45       0.42        0.44             0.43        0.43
#> 4        0.44       0.75       0.65        0.54             0.83        0.65
#> 5        0.21       0.51       0.91        0.91             0.89        0.85
#> 6        0.58       0.44       0.62        0.69             0.87        0.53
#>   PctWorkMomYoungKids PctWorkMom NumIlleg PctIlleg NumImmig PctImmigRecent
#> 1                0.74       0.76     0.04     0.14     0.03           0.24
#> 2                0.46       0.53     0.00     0.24     0.01           0.52
#> 3                0.71       0.67     0.01     0.46     0.00           0.07
#> 4                0.85       0.86     0.03     0.33     0.02           0.11
#> 5                0.40       0.60     0.00     0.06     0.00           0.03
#> 6                0.30       0.43     0.00     0.11     0.04           0.30
#>   PctImmigRec5 PctImmigRec8 PctImmigRec10 PctRecentImmig PctRecImmig5
#> 1         0.27         0.37          0.39           0.07         0.07
#> 2         0.62         0.64          0.63           0.25         0.27
#> 3         0.06         0.15          0.19           0.02         0.02
#> 4         0.20         0.30          0.31           0.05         0.08
#> 5         0.07         0.20          0.27           0.01         0.02
#> 6         0.35         0.43          0.47           0.50         0.50
#>   PctRecImmig8 PctRecImmig10 PctSpeakEnglOnly PctNotSpeakEnglWell
#> 1         0.08          0.08             0.89                0.06
#> 2         0.25          0.23             0.84                0.10
#> 3         0.04          0.05             0.88                0.04
#> 4         0.11          0.11             0.81                0.08
#> 5         0.04          0.05             0.88                0.05
#> 6         0.56          0.57             0.45                0.28
#>   PctLargHouseFam PctLargHouseOccup PersPerOccupHous PersPerOwnOccHous
#> 1            0.14              0.13             0.33              0.39
#> 2            0.16              0.10             0.17              0.29
#> 3            0.20              0.20             0.46              0.52
#> 4            0.56              0.62             0.85              0.77
#> 5            0.16              0.19             0.59              0.60
#> 6            0.25              0.19             0.29              0.53
#>   PersPerRentOccHous PctPersOwnOccup PctPersDenseHous PctHousLess3BR MedNumBR
#> 1               0.28            0.55             0.09           0.51      0.5
#> 2               0.17            0.26             0.20           0.82      0.0
#> 3               0.43            0.42             0.15           0.51      0.5
#> 4               1.00            0.94             0.12           0.01      0.5
#> 5               0.37            0.89             0.02           0.19      0.5
#> 6               0.18            0.39             0.26           0.73      0.0
#>   HousVacant PctHousOccup PctHousOwnOcc PctVacantBoarded PctVacMore6Mos
#> 1       0.21         0.71          0.52             0.05           0.26
#> 2       0.02         0.79          0.24             0.02           0.25
#> 3       0.01         0.86          0.41             0.29           0.30
#> 4       0.01         0.97          0.96             0.60           0.47
#> 5       0.01         0.89          0.87             0.04           0.55
#> 6       0.02         0.84          0.30             0.16           0.28
#>   MedYrHousBuilt PctHousNoPhone PctWOFullPlumb OwnOccLowQuart OwnOccMedVal
#> 1           0.65           0.14           0.06           0.22         0.19
#> 2           0.65           0.16           0.00           0.21         0.20
#> 3           0.52           0.47           0.45           0.18         0.17
#> 4           0.52           0.11           0.11           0.24         0.21
#> 5           0.73           0.05           0.14           0.31         0.31
#> 6           0.25           0.02           0.05           0.94         1.00
#>   OwnOccHiQuart RentLowQ RentMedian RentHighQ MedRent MedRentPctHousInc
#> 1          0.18     0.36       0.35      0.38    0.34              0.38
#> 2          0.21     0.42       0.38      0.40    0.37              0.29
#> 3          0.16     0.27       0.29      0.27    0.31              0.48
#> 4          0.19     0.75       0.70      0.77    0.89              0.63
#> 5          0.30     0.40       0.36      0.38    0.38              0.22
#> 6          1.00     0.67       0.63      0.68    0.62              0.47
#>   MedOwnCostPctInc MedOwnCostPctIncNoMtg NumInShelters NumStreet PctForeignBorn
#> 1             0.46                  0.25          0.04         0           0.12
#> 2             0.32                  0.18          0.00         0           0.21
#> 3             0.39                  0.28          0.00         0           0.14
#> 4             0.51                  0.47          0.00         0           0.19
#> 5             0.51                  0.21          0.00         0           0.11
#> 6             0.59                  0.11          0.00         0           0.70
#>   PctBornSameState PctSameHouse85 PctSameCity85 PctSameState85 LandArea PopDens
#> 1             0.42           0.50          0.51           0.64     0.12    0.26
#> 2             0.50           0.34          0.60           0.52     0.02    0.12
#> 3             0.49           0.54          0.67           0.56     0.01    0.21
#> 4             0.30           0.73          0.64           0.65     0.02    0.39
#> 5             0.72           0.64          0.61           0.53     0.04    0.09
#> 6             0.42           0.49          0.73           0.64     0.01    0.58
#>   PctUsePubTrans LemasPctOfficDrugUn ViolentCrimesPerPop
#> 1           0.20                0.32                0.20
#> 2           0.45                0.00                0.67
#> 3           0.02                0.00                0.43
#> 4           0.28                0.00                0.12
#> 5           0.02                0.00                0.03
#> 6           0.10                0.00                0.14
result  <- classic.to.sym(x = USCrime,
                          concept = state, 
                          variables= c(NumInShelters,
                                       NumImmig,
                                       ViolentCrimesPerPop),
                          ViolentCrimesPerPop_hist = sym.histogram(ViolentCrimesPerPop,
                                                                   breaks = pretty(USCrime$ViolentCrimesPerPop,5)))
result
#> # A tibble: 46 × 4
#>    ViolentCrimesPerPop_hist NumInShelters      NumImmig ViolentCrimesPerPop
#>                  <symblc_h>    <symblc_n>    <symblc_n>          <symblc_n>
#>  1                   <hist> [0.00 : 0.32] [0.00 : 0.04]       [0.01 : 1.00]
#>  2                   <hist> [0.01 : 0.18] [0.01 : 0.09]       [0.05 : 0.36]
#>  3                   <hist> [0.00 : 1.00] [0.00 : 0.57]       [0.05 : 0.57]
#>  4                   <hist> [0.00 : 0.08] [0.00 : 0.02]       [0.02 : 1.00]
#>  5                   <hist> [0.00 : 1.00] [0.00 : 1.00]       [0.01 : 1.00]
#>  6                   <hist> [0.00 : 0.68] [0.00 : 0.23]       [0.07 : 0.75]
#>  7                   <hist> [0.00 : 0.79] [0.00 : 0.14]       [0.00 : 0.94]
#>  8                   <hist> [0.01 : 0.01] [0.01 : 0.01]       [0.37 : 0.37]
#>  9                   <hist> [1.00 : 1.00] [0.39 : 0.39]       [1.00 : 1.00]
#> 10                   <hist> [0.00 : 0.52] [0.00 : 1.00]       [0.06 : 1.00]
#> # … with 36 more rows

Example 3

data("ex_mcfa1") 
head(ex_mcfa1)
#>   suspect age     hair    eyes    region
#> 1       1  42    h_red e_brown     Bronx
#> 2       2  20  h_black e_green     Bronx
#> 3       3  64  h_brown e_brown  Brooklyn
#> 4       4  55 h_blonde e_brown     Bronx
#> 5       5   4  h_brown e_green Manhattan
#> 6       6  61 h_blonde e_green     Bronx
sym.table <- classic.to.sym(x = ex_mcfa1, 
                            concept = suspect, 
                            variables=c(hair,
                                        eyes,
                                        region),
                            default.categorical = sym.set)
sym.table
#> # A tibble: 100 × 3
#>                  hair              eyes               region
#>            <symblc_s>        <symblc_s>           <symblc_s>
#>  1            {h_red} {e_brown,e_black}              {Bronx}
#>  2 {h_black,h_blonde} {e_green,e_black}    {Bronx,Manhattan}
#>  3  {h_brown,h_white} {e_brown,e_green}    {Brooklyn,Queens}
#>  4         {h_blonde} {e_brown,e_black}    {Bronx,Manhattan}
#>  5    {h_brown,h_red}         {e_green}    {Manhattan,Bronx}
#>  6 {h_blonde,h_white}  {e_green,e_blue}       {Bronx,Queens}
#>  7    {h_white,h_red}  {e_black,e_blue}       {Queens,Bronx}
#>  8 {h_blonde,h_white} {e_brown,e_black} {Manhattan,Brooklyn}
#>  9 {h_blonde,h_white} {e_black,e_brown}     {Brooklyn,Bronx}
#> 10  {h_brown,h_black} {e_brown,e_green}    {Manhattan,Bronx}
#> # … with 90 more rows

Example 4

We can modify the function that will be applied by default to the categorical variables

sym.table <- classic.to.sym(x = ex_mcfa1, 
                            concept = suspect,
                            default.categorical = sym.set)
sym.table
#> # A tibble: 100 × 4
#>                age               hair              eyes               region
#>         <symblc_n>         <symblc_s>        <symblc_s>           <symblc_s>
#>  1 [22.00 : 42.00]            {h_red} {e_brown,e_black}              {Bronx}
#>  2 [20.00 : 57.00] {h_black,h_blonde} {e_green,e_black}    {Bronx,Manhattan}
#>  3 [29.00 : 64.00]  {h_brown,h_white} {e_brown,e_green}    {Brooklyn,Queens}
#>  4 [14.00 : 55.00]         {h_blonde} {e_brown,e_black}    {Bronx,Manhattan}
#>  5  [4.00 : 47.00]    {h_brown,h_red}         {e_green}    {Manhattan,Bronx}
#>  6 [32.00 : 61.00] {h_blonde,h_white}  {e_green,e_blue}       {Bronx,Queens}
#>  7 [49.00 : 61.00]    {h_white,h_red}  {e_black,e_blue}       {Queens,Bronx}
#>  8  [8.00 : 32.00] {h_blonde,h_white} {e_brown,e_black} {Manhattan,Brooklyn}
#>  9 [39.00 : 67.00] {h_blonde,h_white} {e_black,e_brown}     {Brooklyn,Bronx}
#> 10 [50.00 : 68.00]  {h_brown,h_black} {e_brown,e_green}    {Manhattan,Bronx}
#> # … with 90 more rows

Converting a SODAS 1.0 *.SDS files to RSDA files

hani3101 <- SDS.to.RSDA(file.path = "hani3101.sds")
#> Preprocessing file
#> Converting data to JSON format
#> Processing variable 1: R3101
#> Processing variable 2: RNINO12
#> Processing variable 3: RNINO3
#> Processing variable 4: RNINO4
#> Processing variable 5: RNINO34
#> Processing variable 6: RSOI
hani3101
#> # A tibble: 32 × 6
#>                             R3101                 RNINO12
#>                        <symblc_m>              <symblc_m>
#>  1 X2:0.21 X4:0.18 X3:0.15 X5:... X1:0.17 X2:0.83 X3:0.00
#>  2 X2:0.30 X4:0.14 X3:0.19 X5:... X1:0.00 X2:0.25 X3:0.75
#>  3 X2:0.16 X4:0.12 X3:0.20 X5:... X1:0.67 X2:0.33 X3:0.00
#>  4 X2:0.13 X4:0.15 X3:0.22 X5:... X1:0.17 X2:0.83 X3:0.00
#>  5 X2:0.14 X4:0.14 X3:0.18 X5:... X1:0.42 X2:0.58 X3:0.00
#>  6 X2:0.26 X4:0.06 X3:0.23 X5:... X1:0.00 X2:0.67 X3:0.33
#>  7 X2:0.28 X4:0.14 X3:0.10 X5:... X1:0.00 X2:1.00 X3:0.00
#>  8 X2:0.25 X4:0.15 X3:0.19 X5:... X1:0.00 X2:1.00 X3:0.00
#>  9 X2:0.20 X4:0.15 X3:0.19 X5:... X1:0.00 X2:1.00 X3:0.00
#> 10 X2:0.21 X4:0.16 X3:0.31 X5:... X1:0.08 X2:0.92 X3:0.00
#> # … with 22 more rows, and 4 more variables: RNINO3 <symblc_m>,
#> #   RNINO4 <symblc_m>, RNINO34 <symblc_m>, RSOI <symblc_m>
# We can save the file in CSV to RSDA format as follows:
write.sym.table(hani3101,
                file='hani3101.csv',
                sep=';',
                dec='.',
                row.names=TRUE,
                col.names=TRUE)

Converting a SODAS 2.0 *.XML files to RSDA files

abalone <- SODAS.to.RSDA("abalone.xml")
#> Processing variable 1: LENGTH
#> Processing variable 2: DIAMETER
#> Processing variable 3: HEIGHT
#> Processing variable 4: WHOLE_WEIGHT
#> Processing variable 5: SHUCKED_WEIGHT
#> Processing variable 6: VISCERA_WEIGHT
#> Processing variable 7: SHELL_WEIGHT
abalone
#> # A tibble: 24 × 7
#>           LENGTH      DIAMETER        HEIGHT  WHOLE_WEIGHT SHUCKED_WEIGHT
#>       <symblc_n>    <symblc_n>    <symblc_n>    <symblc_n>     <symblc_n>
#>  1 [0.28 : 0.66] [0.20 : 0.48] [0.07 : 0.18] [0.08 : 1.37]  [0.03 : 0.64]
#>  2 [0.30 : 0.74] [0.22 : 0.58] [0.02 : 1.13] [0.15 : 2.25]  [0.06 : 1.16]
#>  3 [0.34 : 0.78] [0.26 : 0.63] [0.06 : 0.23] [0.20 : 2.66]  [0.07 : 1.49]
#>  4 [0.39 : 0.82] [0.30 : 0.65] [0.10 : 0.25] [0.26 : 2.51]  [0.11 : 1.23]
#>  5 [0.40 : 0.74] [0.32 : 0.60] [0.10 : 0.24] [0.35 : 2.20]  [0.12 : 0.84]
#>  6 [0.45 : 0.80] [0.38 : 0.63] [0.14 : 0.22] [0.64 : 2.53]  [0.16 : 0.93]
#>  7 [0.49 : 0.72] [0.36 : 0.58] [0.12 : 0.21] [0.68 : 2.12]  [0.16 : 0.82]
#>  8 [0.55 : 0.70] [0.46 : 0.58] [0.18 : 0.22] [1.21 : 1.81]  [0.32 : 0.71]
#>  9 [0.08 : 0.24] [0.06 : 0.18] [0.01 : 0.06] [0.00 : 0.07]  [0.00 : 0.03]
#> 10 [0.13 : 0.58] [0.10 : 0.45] [0.00 : 0.15] [0.01 : 0.89]  [0.00 : 0.50]
#> # … with 14 more rows, and 2 more variables: VISCERA_WEIGHT <symblc_n>,
#> #   SHELL_WEIGHT <symblc_n>
write.sym.table(abalone,
                file='abalone.csv',
                sep=';',
                dec='.',
                row.names = TRUE,
                col.names = TRUE)

Basic statistics

Symbolic Mean

data(example3)
mean(example3$F1)
#> [1] 1.628571
mean(example3[,1])
#> [1] 1.628571
mean(example3$F2)
#> [1] 5
mean(example3[,2])
#> [1] 5
mean(example3$F2,method = "interval")
#> <symbolic_interval[1]>
#> [1] [1.86 : 8.14]
mean(example3[,2],method = "interval")
#> <symbolic_interval[1]>
#> [1] [1.86 : 8.14]

Symbolic median

median(example3$F1)
#> [1] 1.4
median(example3[,1])
#> [1] 1.4
median(example3$F2)
#> [1] 1.5
median(example3[,2])
#> [1] 1.5
median(example3$F6, method = 'interval')
#> <symbolic_interval[1]>
#> [1] [5.00 : 89.00]
median(example3[,6], method = 'interval')
#> <symbolic_interval[1]>
#> [1] [5.00 : 89.00]

Variance and standard deviation

var(example3[,1])
#> [1] 15.98238
var(example3[,2])
#> [1] 90.66667
var(example3$F6)
#> [1] 1872.358
var(example3$F6, method = 'interval')
#> <symbolic_interval[1]>
#> [1] [2,408.97 : 1,670.51]
var(example3$F6, method = 'billard')
#> [1] 1355.143
sd(example3$F1)
#> [1] 3.997797
sd(example3$F2)
#> [1] 6.733003
sd(example3$F6)
#> [1] 30.59704
sd(example3$F6, method = 'interval')
#> <symbolic_interval[1]>
#> [1] [49.08 : 40.87]
sd(example3$F6, method = 'billard')
#> [1] 36.81226

Symbolic correlation

cor(example3$F1, example3$F4)
#> [1] 0.2864553
cor(example3[,1], example3[,4])
#>           [,1]
#> [1,] 0.2864553
cor(example3$F2, example3$F6, method = 'centers')
#> [1] -0.6693648
cor(example3$F2, example3$F6, method = 'billard')
#> [1] -0.6020041

Radar plot for intervals

library(ggpolypath)
#> Loading required package: ggplot2

data(oils)
oils <- RSDA:::to.v3(RSDA:::to.v2(oils))
sym.radar.plot(oils[2:3,])
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
#> caller; using TRUE

#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
#> caller; using TRUE

#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
#> caller; using TRUE

#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
#> caller; using TRUE

sym.radar.plot(oils[2:5,])
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
#> caller; using TRUE

#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
#> caller; using TRUE

#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
#> caller; using TRUE

#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
#> caller; using TRUE


res <- interval.histogram.plot(oils[,2],
                               n.bins = 4,
                               col = c(2,3,4,5))

res
#> $frequency
#> [1] 25 49  1 25
#> 
#> $histogram
#>      [,1]
#> [1,]  0.7
#> [2,]  1.9
#> [3,]  3.1
#> [4,]  4.3

res <- interval.histogram.plot(oils[,3],
                               n.bins = 3,
                               main = "Histogram",
                               col = c(2, 3, 4))

res
#> $frequency
#> [1] 50 25 25
#> 
#> $histogram
#>      [,1]
#> [1,]  0.7
#> [2,]  1.9
#> [3,]  3.1

Distances for intervals

Gowda-Diday

data("oils")
DM <- sym.dist.interval(sym.data = oils[,1:4],
                        method = "Gowda.Diday")
model <- hclust(DM)
plot(model, hang = -1)

Ichino

DM <- sym.dist.interval(sym.data= oils[,1:4],
                        method = "Ichino")
model <- hclust(DM)
plot(model, hang = -1)

Hausdorff

DM <- sym.dist.interval(sym.data = oils[,c(1,2,4)],
                        gamma = 0.5,
                        method = "Hausdorff",
                        normalize = FALSE,
                        SpanNormalize = TRUE,
                        euclidea = TRUE,
                        q = 2)
model <- hclust(DM)
plot(model, hang = -1)

Linear regression for intervals

Training

data(int_prost_train)
data(int_prost_test)
res.cm <- sym.lm(formula = lpsa~., sym.data = int_prost_train, method = 'cm')
res.cm
#> 
#> Call:
#> stats::lm(formula = formula, data = centers)
#> 
#> Coefficients:
#> (Intercept)       lcavol      lweight          age         lbph          svi  
#>    0.411537     0.579327     0.614128    -0.018659     0.143918     0.730937  
#>         lcp      gleason        pgg45  
#>   -0.205536    -0.030924     0.009507

Prediction

pred.cm <- sym.predict(model = res.cm, new.sym.data = int_prost_test)

Testing

RMSE.L(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.7229999
RMSE.U(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.7192467
R2.L(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.501419
R2.U(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.5058389
deter.coefficient(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.4962964

LASSO regression for intervals

data(int_prost_train)
data(int_prost_test)

Training

res.cm.lasso <- sym.glm(sym.data = int_prost_train,
                        response = 9,
                        method = 'cm',
                        alpha = 1,
                        nfolds = 10,
                        grouped = TRUE)

Prediction

pred.cm.lasso <- sym.predict(res.cm.lasso,
                             response = 9,
                             int_prost_test,
                             method = 'cm')

Testing

plot(res.cm.lasso)

plot(res.cm.lasso$glmnet.fit, "lambda", label=TRUE)

RMSE.L(int_prost_test$lpsa,pred.cm.lasso)
#> [1] 0.6945169
RMSE.U(int_prost_test$lpsa,pred.cm.lasso) 
#> [1] 0.6914335
R2.L(int_prost_test$lpsa,pred.cm.lasso) 
#> [1] 0.5409863
R2.U(int_prost_test$lpsa,pred.cm.lasso) 
#> [1] 0.544571
deter.coefficient(int_prost_test$lpsa, pred.cm.lasso)
#> [1] 0.4857015

RIDGE regression for intervals

Training

data(int_prost_train)
data(int_prost_test)

res.cm.ridge <- sym.glm(sym.data = int_prost_train,
                        response = 9,
                        method = 'cm',
                        alpha = 0,
                        nfolds = 10,
                        grouped = TRUE)

Prediction

pred.cm.ridge <- sym.predict(res.cm.ridge,
                             response = 9,
                             int_prost_test,
                             method = 'cm')

Testing

plot(res.cm.ridge)

plot(res.cm.ridge$glmnet.fit, "lambda", label=TRUE)

RMSE.L(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.703543
RMSE.U(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.7004145
R2.L(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.5286114
R2.U(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.5322683
deter.coefficient(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.4808652

PCA for intervals

Example 1

data("oils")
res <- sym.pca(oils,'centers')
plot(res, choix = "ind")

plot(res, choix = "var")

Example 2

res <- sym.pca(oils,'tops')
plot(res, choix = "ind")

Example 3

res <- sym.pca(oils, 'principal.curves')
plot(res, choix = "ind")

Example 4

res <- sym.pca(oils,'optimized.distance')
plot(res, choix = "ind")

plot(res, choix = "var")

Example 5

res <- sym.pca(oils,'optimized.variance')
plot(res, choix = "ind")

plot(res, choix = "var")

Symbolic Multiple Correspondence Analysis

Example 1

data("ex_mcfa1") 
ex_mcfa1
#>     suspect age     hair    eyes    region
#> 1         1  42    h_red e_brown     Bronx
#> 2         2  20  h_black e_green     Bronx
#> 3         3  64  h_brown e_brown  Brooklyn
#> 4         4  55 h_blonde e_brown     Bronx
#> 5         5   4  h_brown e_green Manhattan
#> 6         6  61 h_blonde e_green     Bronx
#> 7         7  61  h_white e_black    Queens
#> 8         8  32 h_blonde e_brown Manhattan
#> 9         9  39 h_blonde e_black  Brooklyn
#> 10       10  50  h_brown e_brown Manhattan
#> 11       11  41    h_red  e_blue Manhattan
#> 12       12  35 h_blonde e_green  Brooklyn
#> 13       13  56 h_blonde e_brown     Bronx
#> 14       14  52    h_red e_brown    Queens
#> 15       15  55    h_red e_green  Brooklyn
#> 16       16  25  h_brown e_brown    Queens
#> 17       17  52 h_blonde e_brown  Brooklyn
#> 18       18  28    h_red e_brown Manhattan
#> 19       19  21  h_white  e_blue Manhattan
#> 20       20  66  h_black e_black  Brooklyn
#> 21       21  67 h_blonde e_brown    Queens
#> 22       22  13  h_white  e_blue  Brooklyn
#> 23       23  39  h_brown e_green Manhattan
#> 24       24  47  h_black e_green  Brooklyn
#> 25       25  54 h_blonde e_brown     Bronx
#> 26       26  75  h_brown  e_blue  Brooklyn
#> 27       27   3  h_white e_green Manhattan
#> 28       28  40  h_white e_green Manhattan
#> 29       29  58    h_red  e_blue    Queens
#> 30       30  41  h_brown e_green     Bronx
#> 31       31  25  h_white e_black  Brooklyn
#> 32       32  75 h_blonde  e_blue Manhattan
#> 33       33  58  h_white e_brown     Bronx
#> 34       34  61  h_white e_brown Manhattan
#> 35       35  52  h_white  e_blue     Bronx
#> 36       36  19    h_red e_black    Queens
#> 37       37  58    h_red e_black     Bronx
#> 38       38  46  h_black e_green Manhattan
#> 39       39  74  h_brown e_black Manhattan
#> 40       40  26 h_blonde e_brown  Brooklyn
#> 41       41  63 h_blonde  e_blue    Queens
#> 42       42  40  h_brown e_black    Queens
#> 43       43  65  h_black e_brown  Brooklyn
#> 44       44  51 h_blonde e_brown  Brooklyn
#> 45       45  15  h_white e_black  Brooklyn
#> 46       46  32 h_blonde e_brown     Bronx
#> 47       47  68  h_white e_black Manhattan
#> 48       48  51  h_white e_black    Queens
#> 49       49  14    h_red e_green    Queens
#> 50       50  72  h_white e_brown  Brooklyn
#> 51       51   7    h_red  e_blue  Brooklyn
#> 52       52  22    h_red e_brown     Bronx
#> 53       53  52    h_red e_brown  Brooklyn
#> 54       54  62  h_brown e_green     Bronx
#> 55       55  41  h_black e_brown    Queens
#> 56       56  32  h_black e_black Manhattan
#> 57       57  58  h_brown e_brown    Queens
#> 58       58  25  h_black e_brown    Queens
#> 59       59  70 h_blonde e_green  Brooklyn
#> 60       60  64  h_brown  e_blue    Queens
#> 61       61  25  h_white  e_blue     Bronx
#> 62       62  42  h_black e_black  Brooklyn
#> 63       63  56    h_red e_black  Brooklyn
#> 64       64  41 h_blonde e_black  Brooklyn
#> 65       65   8  h_white e_black Manhattan
#> 66       66   7  h_black e_green  Brooklyn
#> 67       67  42  h_white e_brown    Queens
#> 68       68  10  h_white  e_blue Manhattan
#> 69       69  60  h_brown e_black     Bronx
#> 70       70  52 h_blonde e_brown  Brooklyn
#> 71       71  39  h_brown  e_blue Manhattan
#> 72       72  69  h_brown e_green    Queens
#> 73       73  67 h_blonde e_green Manhattan
#> 74       74  46    h_red e_black  Brooklyn
#> 75       75  72  h_black e_black    Queens
#> 76       76  66    h_red  e_blue    Queens
#> 77       77   4  h_black  e_blue Manhattan
#> 78       78  62  h_black e_green  Brooklyn
#> 79       79  10 h_blonde  e_blue     Bronx
#> 80       80  16 h_blonde e_black Manhattan
#> 81       81  59 h_blonde e_brown     Bronx
#> 82       82  63 h_blonde  e_blue Manhattan
#> 83       83  54    h_red  e_blue    Queens
#> 84       84  14  h_brown  e_blue  Brooklyn
#> 85       85  48  h_black e_green Manhattan
#> 86       86  59 h_blonde e_black     Bronx
#> 87       87  73 h_blonde e_black     Bronx
#> 88       88  51  h_brown e_brown     Bronx
#> 89       89  14  h_white e_black     Bronx
#> 90       90  58 h_blonde e_black    Queens
#> 91       91  56    h_red e_green Manhattan
#> 92       92  26    h_red  e_blue  Brooklyn
#> 93       93  59  h_brown e_black Manhattan
#> 94       94  27  h_white e_green Manhattan
#> 95       95  38  h_black e_green Manhattan
#> 96       96   5 h_blonde e_green     Bronx
#> 97       97  14  h_black  e_blue    Queens
#> 98       98  13  h_black e_brown Manhattan
#> 99       99  54  h_white  e_blue  Brooklyn
#> 100     100  66  h_white e_green Manhattan
#> 101       1  22    h_red e_black     Bronx
#> 102       2  57 h_blonde e_black Manhattan
#> 103       3  29  h_white e_green    Queens
#> 104       4  14 h_blonde e_black Manhattan
#> 105       5  47    h_red e_green     Bronx
#> 106       6  32  h_white  e_blue    Queens
#> 107       7  49    h_red  e_blue     Bronx
#> 108       8   8  h_white e_black  Brooklyn
#> 109       9  67  h_white e_brown     Bronx
#> 110      10  68  h_black e_green     Bronx
#> 111      11  15  h_black e_brown Manhattan
#> 112      12  46  h_white e_brown     Bronx
#> 113      13  68  h_white e_black Manhattan
#> 114      14  55 h_blonde  e_blue Manhattan
#> 115      15   7  h_white e_green     Bronx
#> 116      16  10  h_black e_brown  Brooklyn
#> 117      17  49    h_red  e_blue Manhattan
#> 118      18  12  h_brown  e_blue  Brooklyn
#> 119      19  41  h_white  e_blue     Bronx
#> 120      20  10  h_brown  e_blue     Bronx
#> 121      21  12  h_white e_green Manhattan
#> 122      22  53  h_white  e_blue Manhattan
#> 123      23   5  h_black e_black Manhattan
#> 124      24  46  h_brown e_black    Queens
#> 125      25  14  h_brown e_black    Queens
#> 126      26  55  h_white e_green  Brooklyn
#> 127      27  53    h_red e_brown Manhattan
#> 128      28  31  h_black e_brown Manhattan
#> 129      29  31 h_blonde e_brown    Queens
#> 130      30  55  h_brown e_black  Brooklyn
sym.table <- classic.to.sym(x = ex_mcfa1, 
                            concept = suspect, 
                            default.categorical = sym.set)
sym.table
#> # A tibble: 100 × 4
#>                age               hair              eyes               region
#>         <symblc_n>         <symblc_s>        <symblc_s>           <symblc_s>
#>  1 [22.00 : 42.00]            {h_red} {e_brown,e_black}              {Bronx}
#>  2 [20.00 : 57.00] {h_black,h_blonde} {e_green,e_black}    {Bronx,Manhattan}
#>  3 [29.00 : 64.00]  {h_brown,h_white} {e_brown,e_green}    {Brooklyn,Queens}
#>  4 [14.00 : 55.00]         {h_blonde} {e_brown,e_black}    {Bronx,Manhattan}
#>  5  [4.00 : 47.00]    {h_brown,h_red}         {e_green}    {Manhattan,Bronx}
#>  6 [32.00 : 61.00] {h_blonde,h_white}  {e_green,e_blue}       {Bronx,Queens}
#>  7 [49.00 : 61.00]    {h_white,h_red}  {e_black,e_blue}       {Queens,Bronx}
#>  8  [8.00 : 32.00] {h_blonde,h_white} {e_brown,e_black} {Manhattan,Brooklyn}
#>  9 [39.00 : 67.00] {h_blonde,h_white} {e_black,e_brown}     {Brooklyn,Bronx}
#> 10 [50.00 : 68.00]  {h_brown,h_black} {e_brown,e_green}    {Manhattan,Bronx}
#> # … with 90 more rows
res <- sym.mcfa(sym.table, c(2,3))
mcfa.scatterplot(res[,2], res[,3], sym.data = sym.table, pos.var = c(2,3))

res <- sym.mcfa(sym.table, c(2,3,4))
mcfa.scatterplot(res[,2], res[,3], sym.data = sym.table, pos.var = c(2,3,4))

Symbolic UMAP

Ejemplo Oils

data <- oils
data
#> # A tibble: 8 × 4
#>             GRA               FRE               IOD               SAP
#> *    <symblc_n>        <symblc_n>        <symblc_n>        <symblc_n>
#> 1 [0.93 : 0.94] [-27.00 : -18.00] [170.00 : 204.00] [118.00 : 196.00]
#> 2 [0.93 : 0.94]   [-5.00 : -4.00] [192.00 : 208.00] [188.00 : 197.00]
#> 3 [0.92 : 0.92]   [-6.00 : -1.00]  [99.00 : 113.00] [189.00 : 198.00]
#> 4 [0.92 : 0.93]   [-6.00 : -4.00] [104.00 : 116.00] [187.00 : 193.00]
#> 5 [0.92 : 0.92] [-25.00 : -15.00]   [80.00 : 82.00] [189.00 : 193.00]
#> 6 [0.91 : 0.92]     [0.00 : 6.00]   [79.00 : 90.00] [187.00 : 196.00]
#> 7 [0.86 : 0.87]   [30.00 : 38.00]   [40.00 : 48.00] [190.00 : 199.00]
#> 8 [0.86 : 0.86]   [22.00 : 32.00]   [53.00 : 77.00] [190.00 : 202.00]
coords <- sym.umap(data)
coords
#>            V1            V2
#> 1   -2.115430  -5.932068954
#> 2   -1.954026  -6.093473703
#> 3   -1.802698  -6.244936180
#> 4   -1.911227  -6.137141003
#> 5   -2.055794  -5.991380366
#> 6   -2.155173  -5.891868069
#> 7   -2.018848  -6.028642365
#> 8   -1.840133  -6.207315883
#> 9   -7.043801 -11.914521092
#> 10  -6.810658 -11.929515381
#> 11  -7.057139 -11.829152600
#> 12  -7.104821 -11.803920375
#> 13  -6.899722 -12.114140843
#> 14  -6.806376 -12.250424660
#> 15  -7.026197 -12.151660743
#> 16  -7.124126 -12.073180632
#> 17  -8.408478 -12.090026527
#> 18  -8.313943 -12.592761074
#> 19  -8.204598 -12.468866768
#> 20  -8.056253 -12.573297470
#> 21  -8.328437 -12.229818158
#> 22  -8.249429 -12.294366841
#> 23  -8.490012 -12.168744756
#> 24  -8.245589 -12.453569166
#> 25  -7.839864 -12.249897454
#> 26  -7.931728 -12.027331945
#> 27  -7.775770 -12.510457438
#> 28  -7.900560 -12.090919420
#> 29  -7.636157 -12.392316066
#> 30  -7.543132 -12.331554460
#> 31  -7.500538 -12.343282751
#> 32  -7.752418 -12.220507897
#> 33  -5.235231   1.767590170
#> 34  -4.972326   1.622102703
#> 35  -5.689030   1.882306605
#> 36  -5.836856   2.005416012
#> 37  -5.006563   1.334982998
#> 38  -4.980298   1.090985551
#> 39  -5.336948   1.290530224
#> 40  -5.356591   1.346105714
#> 41  -6.103772   0.189964672
#> 42  -6.099633   0.149992699
#> 43  -6.685271  -0.006012316
#> 44  -6.625425   0.101780131
#> 45  -5.864915  -0.076967025
#> 46  -5.820806   0.035541375
#> 47  -6.060941  -0.155246194
#> 48  -6.128711  -0.204963764
#> 49  -4.664122   1.812260227
#> 50  -4.360073   1.685271920
#> 51  -4.848387   1.716976975
#> 52  -4.392929   1.655983220
#> 53  -4.617946   1.422627612
#> 54  -4.518084   1.313231479
#> 55  -4.847020   1.522515869
#> 56  -4.310571   1.403768546
#> 57  -5.526307   0.625535169
#> 58  -4.881434   0.549848950
#> 59  -5.553716   0.711274734
#> 60  -4.804018   0.445893500
#> 61  -5.167961   0.542516307
#> 62  -4.936037   0.640029722
#> 63  -5.281246   0.461077016
#> 64  -4.889400   0.528984955
#> 65  -2.103998  21.635644516
#> 66  -2.156375  21.342605553
#> 67  -2.990322  19.774950870
#> 68  -2.998939  19.783198202
#> 69  -2.123680  21.395502588
#> 70  -2.038553  21.524883540
#> 71  -3.007471  19.791290630
#> 72  -3.030844  19.810974883
#> 73  -1.998866  21.605077506
#> 74  -2.287929  21.329298323
#> 75  -2.705131  19.487057256
#> 76  -2.745367  19.524048302
#> 77  -2.156892  21.462006209
#> 78  -1.875270  21.682769745
#> 79  -2.995182  19.777431035
#> 80  -2.772396  19.547678008
#> 81  -6.504910   2.156184623
#> 82  -6.336243   2.356432263
#> 83  -6.819049   2.022376084
#> 84  -6.802843   2.065996465
#> 85  -6.299863   2.181479097
#> 86  -6.120538   2.188138154
#> 87  -6.849061   2.004239979
#> 88  -6.816935   2.101729542
#> 89  -6.993410   0.540555205
#> 90  -7.099367   0.423528160
#> 91  -7.204375   0.687581959
#> 92  -7.288208   0.819067773
#> 93  -7.036797   0.375369604
#> 94  -6.991133   0.378727570
#> 95  -7.069539   0.776693065
#> 96  -7.276018   0.769030333
#> 97  16.031371  -1.852578461
#> 98  15.712515  -1.622235040
#> 99  15.520256  -1.512869213
#> 100 15.530231  -1.637371831
#> 101 15.895129  -1.814920926
#> 102 15.969025  -1.838762759
#> 103 15.471363  -1.989783215
#> 104 15.622516  -1.731147437
#> 105 16.315830  -0.603047599
#> 106 16.071306  -0.623885520
#> 107 15.743349  -0.825479754
#> 108 15.629294  -0.698731776
#> 109 16.299508  -0.475051499
#> 110 16.040455  -0.733048306
#> 111 15.963306  -0.706051623
#> 112 15.497287  -0.852238111
#> 113 16.476197  -1.626630416
#> 114 16.452892  -1.535980293
#> 115 16.129286  -1.990077217
#> 116 16.024939  -1.848716539
#> 117 16.718104  -1.951063613
#> 118 16.833524  -1.926957622
#> 119 16.411540  -1.908083140
#> 120 16.564387  -1.761240903
#> 121 16.494093  -0.701665965
#> 122 16.865679  -0.856889004
#> 123 16.475344  -0.340839133
#> 124 16.258653  -0.502463745
#> 125 16.953888  -0.830180010
#> 126 17.141119  -0.578451129
#> 127 16.795344  -0.631074201
#> 128 16.893419  -0.594875751
plot(coords)

Ejemplo Cardiological

data <- Cardiological
data
#> # A tibble: 11 × 3
#>               Pulse              Syst             Diast
#>          <symblc_n>        <symblc_n>        <symblc_n>
#>  1  [44.00 : 68.00]  [90.00 : 100.00]   [50.00 : 70.00]
#>  2  [60.00 : 72.00]  [90.00 : 130.00]   [70.00 : 90.00]
#>  3  [56.00 : 90.00] [140.00 : 180.00]  [90.00 : 100.00]
#>  4 [70.00 : 112.00] [110.00 : 142.00]  [80.00 : 108.00]
#>  5  [54.00 : 72.00]  [90.00 : 100.00]   [50.00 : 70.00]
#>  6 [70.00 : 100.00] [130.00 : 160.00]  [80.00 : 110.00]
#>  7  [63.00 : 75.00]  [60.00 : 100.00] [140.00 : 150.00]
#>  8 [72.00 : 100.00] [130.00 : 160.00]   [76.00 : 90.00]
#>  9  [76.00 : 98.00] [110.00 : 190.00]  [70.00 : 110.00]
#> 10  [86.00 : 96.00] [138.00 : 180.00]  [90.00 : 110.00]
#> 11 [86.00 : 100.00] [110.00 : 150.00]  [78.00 : 100.00]
coords <- sym.umap(data)
coords
#>             V1           V2
#> 1   0.88225165 -4.833915487
#> 2   1.41329096 -4.576491508
#> 3   0.97374232 -4.936444010
#> 4   1.44650637 -4.543790803
#> 5   0.56354796 -4.265549719
#> 6   1.15280707 -3.480044038
#> 7   0.67785069 -4.404144661
#> 8   1.18835857 -3.252497089
#> 9   0.85709528 -3.766104580
#> 10  1.41652921 -3.406536337
#> 11  1.15813042 -1.370668265
#> 12  1.27501459 -1.507290295
#> 13  0.62709175 -3.369186881
#> 14  0.69542489 -3.111179120
#> 15  0.54378160 -0.987756383
#> 16  0.82393495 -0.926021684
#> 17  0.66022343 -0.599905430
#> 18 -0.69964552  2.855966376
#> 19  0.80351468 -0.030899773
#> 20  0.40077986  2.797968996
#> 21  0.40990622 -0.630207736
#> 22 -1.28342274  2.464611435
#> 23  0.64803946  0.035632638
#> 24  0.46333561  2.653775793
#> 25  1.07363247 -2.502261402
#> 26 -1.33774588  4.063220271
#> 27  1.20481470 -0.915162602
#> 28 -0.60349378  4.005837681
#> 29 -0.57340687 -0.865182666
#> 30 -1.89848533  3.355329306
#> 31 -0.02868254 -0.395835447
#> 32 -1.48935536  3.047579283
#> 33  0.80525849 -4.637748286
#> 34  1.50619607 -4.500812501
#> 35  0.91680520 -4.421646215
#> 36  1.36792646 -4.745336088
#> 37  0.60741803 -4.121619436
#> 38  1.24054422 -3.103163942
#> 39  0.73311912 -4.018410364
#> 40  1.14717677 -2.908740471
#> 41  1.06784148 -1.166887079
#> 42 -0.95725600  3.973265441
#> 43  1.11304832 -0.232400414
#> 44 -0.24790331  3.745543366
#> 45 -0.28048149 -0.768862209
#> 46 -1.77963058  2.968322467
#> 47  0.19358927 -0.002892163
#> 48 -1.07215350  2.785552421
#> 49 -1.84855852 -1.635630261
#> 50 -1.77680790 -1.503670653
#> 51 -1.68725918 -1.121903429
#> 52 -1.62630121 -1.174339701
#> 53 -1.63201128 -1.379039238
#> 54 -1.80542518 -1.355786863
#> 55 -1.39709701 -1.223233227
#> 56 -1.35142994 -1.338095912
#> 57  0.98209534 -1.242628213
#> 58 -0.93991344  4.308015339
#> 59  1.39887220 -0.169960163
#> 60 -0.18360634  3.694380388
#> 61  0.67436461 -0.937795946
#> 62 -1.24722238  3.574870839
#> 63  1.19620570 -0.256273965
#> 64 -0.52929620  3.453932143
#> 65  1.25006443 -2.596170244
#> 66 -1.33334773  4.131258564
#> 67  1.31261237  0.538464790
#> 68  0.17551875  3.395772456
#> 69 -0.71499954 -0.693004059
#> 70 -1.90134952  2.936716200
#> 71  0.71287295  2.248425211
#> 72  0.60282280  2.840592922
#> 73 -0.75214691  2.260350570
#> 74 -1.12652598  3.319003698
#> 75  0.76807775  2.593855237
#> 76  0.34663038  3.173571827
#> 77 -1.27928516  2.185295188
#> 78 -1.53317089  2.588498241
#> 79  0.46805675  2.500199912
#> 80  0.69444383  3.040741038
#> 81  1.53384820 -2.308468417
#> 82 -1.30855935  4.005632699
#> 83 -0.39739783  2.478021227
#> 84 -0.43124136  3.929644492
#> 85 -1.39598260  1.497955800
#> 86 -1.79577573  3.263241684
#> 87 -0.96036416  2.410098042
#> 88 -0.96827599  3.120451395
plot(coords)

Symbolic TSNE

Ejemplo Oils

data <- oils
data
#> # A tibble: 8 × 4
#>             GRA               FRE               IOD               SAP
#> *    <symblc_n>        <symblc_n>        <symblc_n>        <symblc_n>
#> 1 [0.93 : 0.94] [-27.00 : -18.00] [170.00 : 204.00] [118.00 : 196.00]
#> 2 [0.93 : 0.94]   [-5.00 : -4.00] [192.00 : 208.00] [188.00 : 197.00]
#> 3 [0.92 : 0.92]   [-6.00 : -1.00]  [99.00 : 113.00] [189.00 : 198.00]
#> 4 [0.92 : 0.93]   [-6.00 : -4.00] [104.00 : 116.00] [187.00 : 193.00]
#> 5 [0.92 : 0.92] [-25.00 : -15.00]   [80.00 : 82.00] [189.00 : 193.00]
#> 6 [0.91 : 0.92]     [0.00 : 6.00]   [79.00 : 90.00] [187.00 : 196.00]
#> 7 [0.86 : 0.87]   [30.00 : 38.00]   [40.00 : 48.00] [190.00 : 199.00]
#> 8 [0.86 : 0.86]   [22.00 : 32.00]   [53.00 : 77.00] [190.00 : 202.00]
coords <- sym.tsne(data)
#> sigma summary: Min. : 0.427189106149214 |1st Qu. : 0.51201012887553 |Median : 0.690989987731773 |Mean : 0.665050802005762 |3rd Qu. : 0.74957300862727 |Max. : 0.982930144429843 |
#> Epoch: Iteration #100 error is: 11.2701178134774
#> Epoch: Iteration #200 error is: 0.070490641651848
#> Epoch: Iteration #300 error is: 0.0678287140596689
#> Epoch: Iteration #400 error is: 0.0677567451754522
#> Epoch: Iteration #500 error is: 0.0677560994371542
#> Epoch: Iteration #600 error is: 0.0677560598810392
#> Epoch: Iteration #700 error is: 0.0677560513310311
#> Epoch: Iteration #800 error is: 0.0677560488125753
#> Epoch: Iteration #900 error is: 0.0677560481385896
#> Epoch: Iteration #1000 error is: 0.0677560479046974
coords
#>              V1         V2
#> 1   -0.57660522 -5.8085965
#> 2   -0.74371383 -5.9446565
#> 3   -0.81360435 -6.3490141
#> 4   -0.93039562 -6.4541146
#> 5   -0.03754516 -5.8857410
#> 6   -0.15214944 -5.9920592
#> 7   -0.21890265 -6.3913564
#> 8   -0.38559644 -6.5291022
#> 9   -7.49699975  3.8477600
#> 10  -7.68437568  3.9068359
#> 11  -8.07689006  4.5781666
#> 12  -8.24464071  4.6842053
#> 13  -6.93120710  4.3258483
#> 14  -7.06057097  4.4171411
#> 15  -7.27753811  5.1201137
#> 16  -7.46922908  5.3916896
#> 17  -6.72690934  8.0831559
#> 18  -6.14817186  9.2997799
#> 19  -6.70515136  8.2164809
#> 20  -6.12326456  9.3586967
#> 21  -7.81367463  8.1533272
#> 22  -6.86467032  9.0954464
#> 23  -7.76325500  8.2477608
#> 24  -6.83293074  9.2466504
#> 25  -7.02673412  7.5575709
#> 26  -6.09261528  8.5885644
#> 27  -6.98025106  7.7059761
#> 28  -6.04666798  8.6745615
#> 29  -7.75189111  7.4729313
#> 30  -7.33736224  8.7620645
#> 31  -7.74119570  7.5828899
#> 32  -7.32818206  8.9190979
#> 33  -1.42603731  0.8162364
#> 34  -0.90346632  0.9369104
#> 35   0.62959488  1.9844183
#> 36   1.18951606  2.2772908
#> 37  -3.06481631  2.7684941
#> 38  -2.66097838  2.9161572
#> 39  -1.31503716  3.7490573
#> 40  -0.73127398  3.7534241
#> 41  -2.72355512  1.1360299
#> 42  -2.37642537  1.1592711
#> 43  -0.85456081  2.3160877
#> 44  -0.32523964  2.6586690
#> 45  -3.40465924  1.7303141
#> 46  -3.15163716  1.7471943
#> 47  -1.98705488  2.3770976
#> 48  -1.59054688  2.5626944
#> 49  -0.25536299  1.1614961
#> 50   1.94495821  2.7245040
#> 51   0.67800373  2.0318839
#> 52   2.39423836  2.7684270
#> 53  -2.06913207  3.4395127
#> 54   0.78999717  3.1308767
#> 55  -1.12103204  3.6618639
#> 56   1.23579688  3.2515624
#> 57  -1.23270919  1.6129120
#> 58   1.40325902  3.7705367
#> 59  -0.40574384  2.2961146
#> 60   1.84778690  3.7918933
#> 61  -2.39461714  2.3406144
#> 62   0.13647044  3.5575580
#> 63  -1.64427359  2.8660051
#> 64   0.61471452  3.9232376
#> 65  -7.24860065 -2.7297749
#> 66  -6.96631905 -2.5764302
#> 67  -4.99474146 -1.9544539
#> 68  -4.72769196 -1.7861336
#> 69  -7.35658321 -2.7248809
#> 70  -7.02437146 -2.5943384
#> 71  -5.06372493 -1.9515523
#> 72  -4.77400649 -1.7934323
#> 73  -7.60885343 -2.1747003
#> 74  -7.30044545 -2.0144300
#> 75  -5.40940951 -1.3828091
#> 76  -5.12496742 -1.2434583
#> 77  -7.66573510 -2.2022813
#> 78  -7.38429966 -2.0111150
#> 79  -5.45431030 -1.3742906
#> 80  -5.20237350 -1.2320000
#> 81   2.40593281  1.5133704
#> 82   3.45619792  2.0750549
#> 83   4.12114384  2.5106469
#> 84   4.82898851  2.9039922
#> 85   1.51545530  1.6264780
#> 86   2.94945832  2.3240403
#> 87   3.82200123  2.6318289
#> 88   4.62397398  2.9098121
#> 89   1.15697194  1.0717549
#> 90   2.89142543  3.4807016
#> 91   3.52900879  3.7342162
#> 92   4.44907163  3.7200538
#> 93  -0.16204851  1.9127094
#> 94   2.29506135  3.7684400
#> 95   3.14264303  4.1236218
#> 96   4.14235510  3.8866158
#> 97   7.88526412 -6.7059189
#> 98   8.01617262 -4.5634297
#> 99   7.33386278 -4.9831400
#> 100  7.39049887 -3.3832380
#> 101  7.76039940 -6.9335704
#> 102  7.93126046 -4.7626759
#> 103  7.25428679 -5.2768833
#> 104  7.41681507 -3.4641314
#> 105  8.44950452 -7.2792853
#> 106  8.66659271 -4.9331854
#> 107  8.11401929 -5.4513954
#> 108  7.84919314 -3.4421261
#> 109  8.22042521 -7.5313088
#> 110  8.64919453 -5.2044505
#> 111  8.07554312 -5.7214128
#> 112  7.89833626 -3.5278743
#> 113  8.29722294 -9.1522999
#> 114  8.47496432 -8.3223251
#> 115  7.48025103 -7.0446095
#> 116  7.45816335 -5.9357718
#> 117  7.54982919 -9.6412674
#> 118  7.58298879 -8.9515906
#> 119  6.81252100 -7.9654339
#> 120  6.75995288 -7.0226752
#> 121  8.04745397 -9.5712198
#> 122  8.31225778 -8.8370475
#> 123  7.76162869 -7.8259790
#> 124  8.55625866 -6.4993638
#> 125  7.39885470 -9.8910989
#> 126  7.35419928 -9.4316070
#> 127  6.79288620 -8.5100307
#> 128  6.74470601 -7.7813293
plot(coords)

Ejemplo Cardiological

data <- Cardiological
data
#> # A tibble: 11 × 3
#>               Pulse              Syst             Diast
#>          <symblc_n>        <symblc_n>        <symblc_n>
#>  1  [44.00 : 68.00]  [90.00 : 100.00]   [50.00 : 70.00]
#>  2  [60.00 : 72.00]  [90.00 : 130.00]   [70.00 : 90.00]
#>  3  [56.00 : 90.00] [140.00 : 180.00]  [90.00 : 100.00]
#>  4 [70.00 : 112.00] [110.00 : 142.00]  [80.00 : 108.00]
#>  5  [54.00 : 72.00]  [90.00 : 100.00]   [50.00 : 70.00]
#>  6 [70.00 : 100.00] [130.00 : 160.00]  [80.00 : 110.00]
#>  7  [63.00 : 75.00]  [60.00 : 100.00] [140.00 : 150.00]
#>  8 [72.00 : 100.00] [130.00 : 160.00]   [76.00 : 90.00]
#>  9  [76.00 : 98.00] [110.00 : 190.00]  [70.00 : 110.00]
#> 10  [86.00 : 96.00] [138.00 : 180.00]  [90.00 : 110.00]
#> 11 [86.00 : 100.00] [110.00 : 150.00]  [78.00 : 100.00]
coords <- sym.tsne(data)
#> sigma summary: Min. : 0.519788650406116 |1st Qu. : 0.624854575274286 |Median : 0.674709191007808 |Mean : 0.686729097668156 |3rd Qu. : 0.736954182050816 |Max. : 0.940417289391571 |
#> Epoch: Iteration #100 error is: 16.5909741445595
#> Epoch: Iteration #200 error is: 1.50624602779076
#> Epoch: Iteration #300 error is: 0.876586150256424
#> Epoch: Iteration #400 error is: 0.660093868282571
#> Epoch: Iteration #500 error is: 0.55668936801157
#> Epoch: Iteration #600 error is: 0.507748824251466
#> Epoch: Iteration #700 error is: 0.483970301892018
#> Epoch: Iteration #800 error is: 0.411382463902802
#> Epoch: Iteration #900 error is: 0.310704029322917
#> Epoch: Iteration #1000 error is: 0.29376574438089
coords
#>             V1          V2
#> 1  -272.086722  -79.335055
#> 2  -241.652093   37.832428
#> 3  -243.967933  -71.045164
#> 4  -266.458838   16.662256
#> 5  -216.805524  -97.786254
#> 6  -174.396427   40.584094
#> 7  -190.350174 -111.541903
#> 8  -170.239564    7.150869
#> 9  -190.722829  -22.473556
#> 10 -184.080425   77.266449
#> 11  -69.738482   20.661015
#> 12  -94.629587   -6.144006
#> 13 -144.156270  -55.347750
#> 14 -131.921174   26.275593
#> 15  -61.277238  -85.162090
#> 16  -20.018135  -39.906431
#> 17  -46.911579 -112.313494
#> 18  139.650820   60.721695
#> 19  -37.622810   92.406621
#> 20   66.511306   29.947946
#> 21  -20.424841 -125.236826
#> 22  155.513811   29.152490
#> 23  -29.234498  118.095229
#> 24  129.439258  -50.626212
#> 25 -136.007351  -13.459552
#> 26  238.253066  214.127112
#> 27  -35.235264   -1.882379
#> 28  255.813161  173.513094
#> 29   25.311379  -59.978299
#> 30  209.725278  177.280757
#> 31   20.341493 -110.676414
#> 32  213.516317  140.399070
#> 33 -242.218003  -23.683630
#> 34 -247.845145   71.392379
#> 35 -269.546826  -39.016589
#> 36 -274.633253   49.472769
#> 37 -205.402745  -51.792913
#> 38 -203.627781   57.440814
#> 39 -182.902334  -72.254798
#> 40 -199.803468   22.024338
#> 41  -61.615301  -22.115729
#> 42  242.471471  113.949166
#> 43    2.784803   21.759220
#> 44  127.399678  135.064874
#> 45    4.429684  -86.946924
#> 46  220.935271   68.491729
#> 47   53.382310 -118.190699
#> 48  192.637893    1.151124
#> 49 -112.887473 -239.286907
#> 50 -103.621104 -273.511583
#> 51  -31.521993 -201.169540
#> 52  -44.909397 -238.948561
#> 53  -85.006403 -235.582023
#> 54  -75.387440 -270.360553
#> 55  -59.395892 -202.235110
#> 56  -18.300773 -234.330283
#> 57  -88.346691  -35.056025
#> 58  259.643994  137.302103
#> 59  -20.443563   35.712527
#> 60   99.615369  147.748466
#> 61  -41.437027  -51.654864
#> 62  203.977305  106.856926
#> 63   13.872593   -6.692383
#> 64  135.845459   99.228344
#> 65 -145.768123   64.435739
#> 66  173.876363  226.546751
#> 67   10.997909   64.512104
#> 68   53.854649  109.707035
#> 69   49.579342  -49.491146
#> 70  176.064305  143.855952
#> 71   95.116566 -116.224241
#> 72  175.945740  -73.838870
#> 73  119.891472   39.303022
#> 74  171.072546   92.088527
#> 75  135.455568  -85.220724
#> 76   62.556413   64.272518
#> 77  117.189514   -2.679359
#> 78  206.301038   35.751057
#> 79  101.154619  -74.835926
#> 80  167.646522  -45.062345
#> 81 -120.621790   98.538214
#> 82  202.701074  219.323843
#> 83  100.376654   71.969794
#> 84  103.407546  115.932759
#> 85   76.861148  -12.832275
#> 86  175.128094  173.876946
#> 87  147.317250   -2.250455
#> 88  179.614234   58.394079
plot(coords)