Code
library(tidyverse)
# result <- read.csv(file = 'total_elements_mindat.csv')
<- read.csv(file = 'total_elements_mindat.csv') result
Describing the dataset
Jiyin Zhang
January 26, 2023
This dataset is generated based on the elements coexistence counts from Mindat.org database. The original data source is retrieved via Mindat API and stored in JSON format. Then after data pre-process and data cleaning steps, the retrieved data is cured and stored in CSV format. The dataset can be accessed at the Github repository, in the name of total_elements_mindat.csv.
The dataset was retrieved via Mindat API as a JSON file. In the data preprocessing step, the elements information are extracted and statistically recorded in a new JSON file. Then I reorganized the data into 72 spread sheets as CSV formats.
I’m going to use the built-in read.csv
package to import CSV file.
The glimpse
command in the Tidyverse
package is a nice way to summarize the data frame:
Rows: 5,184
Columns: 74
$ zaxis <chr> "H", "H", "H", "H", "H", "H", "H", "H", "H", "H", "H", "H", "H",…
$ yaxis <chr> "H", "Li", "Be", "B", "C", "N", "O", "F", "Na", "Mg", "Al", "Si"…
$ H <int> 3228, 51, 66, 193, 330, 128, 3205, 141, 643, 472, 817, 1023, 524…
$ Li <int> 51, 51, 4, 8, 2, 0, 51, 4, 19, 6, 25, 39, 9, 1, 1, 6, 8, 0, 6, 0…
$ Be <int> 66, 4, 66, 3, 1, 0, 66, 1, 13, 4, 8, 31, 27, 0, 0, 4, 34, 0, 0, …
$ B <int> 193, 8, 3, 193, 13, 3, 192, 10, 50, 55, 52, 73, 3, 8, 24, 9, 89,…
$ C <int> 330, 2, 1, 13, 330, 23, 317, 12, 65, 55, 50, 46, 17, 30, 19, 11,…
$ N <int> 128, 0, 0, 3, 23, 128, 118, 5, 23, 13, 22, 10, 18, 48, 23, 6, 8,…
$ O <int> 3205, 51, 66, 192, 317, 118, 3205, 138, 643, 472, 817, 1021, 524…
$ F <int> 141, 4, 1, 10, 12, 5, 138, 141, 54, 22, 79, 58, 30, 24, 8, 14, 5…
$ Na <int> 643, 19, 13, 50, 65, 23, 643, 54, 643, 106, 182, 356, 89, 102, 3…
$ Mg <int> 472, 6, 4, 55, 55, 13, 472, 22, 106, 472, 147, 161, 86, 61, 28, …
$ Al <int> 817, 25, 8, 52, 50, 22, 817, 79, 182, 147, 817, 401, 169, 135, 3…
$ Si <int> 1023, 39, 31, 73, 46, 10, 1021, 58, 356, 161, 401, 1023, 15, 36,…
$ P <int> 524, 9, 27, 3, 17, 18, 524, 30, 89, 86, 169, 15, 524, 21, 1, 32,…
$ S <int> 531, 1, 0, 8, 30, 48, 531, 24, 102, 61, 135, 36, 21, 531, 27, 53…
$ Cl <int> 210, 1, 0, 24, 19, 23, 204, 8, 39, 28, 33, 36, 1, 27, 210, 26, 5…
$ K <int> 287, 6, 4, 9, 11, 6, 287, 14, 82, 50, 81, 162, 32, 53, 26, 287, …
$ Ca <int> 918, 8, 34, 89, 97, 8, 918, 58, 187, 131, 269, 424, 134, 84, 53,…
$ Sc <int> 8, 0, 0, 0, 0, 0, 8, 0, 1, 3, 0, 4, 3, 0, 0, 0, 5, 8, 0, 0, 0, 0…
$ Ti <int> 145, 6, 0, 3, 2, 0, 145, 18, 81, 11, 9, 116, 9, 2, 2, 30, 32, 0,…
$ V <int> 181, 0, 0, 3, 6, 8, 180, 2, 28, 24, 31, 27, 14, 12, 3, 10, 55, 0…
$ Cr <int> 40, 0, 0, 4, 5, 0, 40, 2, 7, 11, 4, 13, 2, 6, 3, 3, 11, 0, 1, 2,…
$ Mn <int> 406, 4, 9, 15, 17, 2, 406, 14, 81, 42, 70, 185, 98, 23, 17, 28, …
$ Fe <int> 663, 11, 9, 18, 22, 18, 663, 15, 130, 78, 136, 216, 176, 122, 32…
$ Co <int> 35, 0, 0, 0, 4, 1, 35, 0, 3, 0, 1, 1, 1, 12, 2, 0, 4, 0, 0, 1, 0…
$ Ni <int> 63, 0, 0, 0, 12, 4, 62, 0, 2, 1, 6, 7, 2, 15, 5, 1, 4, 0, 0, 2, …
$ Cu <int> 359, 1, 0, 4, 36, 10, 358, 6, 16, 16, 39, 29, 40, 87, 61, 16, 50…
$ Zn <int> 208, 0, 3, 0, 15, 1, 208, 1, 11, 16, 23, 32, 44, 46, 7, 7, 29, 0…
$ Ga <int> 4, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0…
$ Ge <int> 10, 0, 0, 0, 1, 0, 10, 0, 0, 0, 3, 1, 0, 4, 0, 0, 2, 0, 0, 0, 0,…
$ As <int> 397, 0, 3, 2, 4, 5, 397, 3, 36, 40, 42, 17, 9, 21, 14, 12, 112, …
$ Se <int> 26, 0, 0, 0, 0, 0, 26, 0, 2, 1, 4, 1, 0, 3, 2, 1, 3, 0, 0, 0, 0,…
$ Br <int> 3, 0, 0, 0, 0, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ Rb <int> 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ Sr <int> 77, 1, 0, 9, 12, 0, 77, 6, 23, 4, 30, 30, 15, 5, 2, 4, 20, 0, 8,…
$ Y <int> 54, 1, 1, 2, 27, 0, 54, 5, 18, 1, 13, 21, 6, 6, 2, 2, 18, 0, 2, …
$ Zr <int> 70, 1, 3, 2, 9, 0, 70, 3, 56, 2, 1, 58, 9, 1, 10, 10, 31, 0, 1, …
$ Nb <int> 54, 0, 0, 0, 4, 0, 54, 4, 26, 4, 1, 32, 4, 1, 2, 12, 17, 0, 7, 1…
$ Mo <int> 49, 0, 0, 0, 0, 1, 49, 0, 6, 4, 3, 0, 6, 5, 0, 4, 9, 0, 0, 0, 0,…
$ Ru <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ Rh <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ Pd <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ Ag <int> 3, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 1, 2, 1, 0, 0, 0, 0, 0, 0…
$ Cd <int> 12, 0, 0, 0, 0, 0, 12, 1, 0, 0, 1, 0, 2, 8, 0, 1, 0, 0, 0, 0, 0,…
$ In <int> 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ Sn <int> 21, 0, 2, 1, 0, 1, 20, 0, 2, 2, 2, 8, 0, 1, 2, 0, 7, 1, 0, 0, 0,…
$ Sb <int> 32, 0, 0, 0, 0, 0, 32, 0, 4, 2, 5, 2, 0, 10, 3, 2, 5, 0, 2, 0, 0…
$ Te <int> 52, 0, 0, 0, 2, 0, 52, 0, 0, 8, 1, 0, 0, 9, 7, 0, 6, 0, 0, 0, 1,…
$ I <int> 12, 0, 0, 0, 0, 1, 12, 0, 3, 3, 0, 0, 0, 1, 3, 2, 3, 0, 0, 0, 2,…
$ Cs <int> 12, 0, 1, 1, 0, 0, 12, 2, 4, 0, 2, 8, 1, 0, 0, 1, 4, 0, 2, 1, 0,…
$ Ba <int> 134, 2, 3, 4, 12, 0, 134, 16, 42, 11, 34, 76, 32, 5, 9, 8, 18, 1…
$ La <int> 29, 0, 0, 0, 7, 0, 29, 0, 3, 1, 14, 15, 3, 0, 0, 0, 10, 0, 1, 1,…
$ Ce <int> 77, 0, 2, 5, 16, 0, 77, 8, 17, 10, 25, 47, 8, 4, 1, 1, 28, 0, 13…
$ Nd <int> 19, 0, 1, 0, 8, 0, 19, 1, 1, 0, 3, 5, 3, 2, 0, 0, 5, 0, 0, 0, 0,…
$ Sm <int> 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ Gd <int> 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
$ Dy <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ Er <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ Yb <int> 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ Hf <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ Ta <int> 5, 0, 0, 0, 0, 0, 5, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
$ W <int> 22, 0, 0, 0, 1, 0, 22, 0, 5, 1, 2, 3, 1, 0, 0, 0, 7, 0, 0, 1, 0,…
$ Re <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ Os <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ Ir <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ Pt <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ Au <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ Hg <int> 11, 0, 0, 0, 3, 4, 10, 0, 0, 0, 1, 0, 1, 1, 4, 0, 0, 0, 0, 0, 0,…
$ Tl <int> 7, 0, 0, 0, 0, 0, 7, 0, 0, 0, 1, 0, 0, 4, 2, 2, 0, 0, 0, 2, 0, 0…
$ Pb <int> 229, 0, 1, 5, 28, 3, 228, 6, 3, 7, 27, 32, 27, 54, 39, 3, 13, 0,…
$ Bi <int> 42, 0, 0, 0, 0, 2, 41, 0, 1, 1, 1, 1, 11, 4, 1, 0, 1, 0, 0, 5, 1…
$ Th <int> 15, 0, 0, 1, 2, 0, 15, 2, 2, 0, 2, 4, 5, 0, 0, 1, 3, 0, 1, 0, 0,…
$ U <int> 272, 1, 0, 0, 41, 12, 272, 8, 33, 24, 21, 20, 51, 61, 2, 27, 54,…
The dataset is stored as a great Flat Table, the items are arranged as each of the 72 elements, with the attributes of element triplets coexistence in the cells of corresponding sheets.
c('H', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Cs', 'Ba', 'La', 'Ce', 'Nd', 'Sm', 'Gd', 'Dy', 'Er', 'Yb', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Th', 'U')
[1] "H" "Li" "Be" "B" "C" "N" "O" "F" "Na" "Mg" "Al" "Si" "P" "S" "Cl"
[16] "K" "Ca" "Sc" "Ti" "V" "Cr" "Mn" "Fe" "Co" "Ni" "Cu" "Zn" "Ga" "Ge" "As"
[31] "Se" "Br" "Rb" "Sr" "Y" "Zr" "Nb" "Mo" "Ru" "Rh" "Pd" "Ag" "Cd" "In" "Sn"
[46] "Sb" "Te" "I" "Cs" "Ba" "La" "Ce" "Nd" "Sm" "Gd" "Dy" "Er" "Yb" "Hf" "Ta"
[61] "W" "Re" "Os" "Ir" "Pt" "Au" "Hg" "Tl" "Pb" "Bi" "Th" "U"
The attributes of the data are recorded in a 2-dimensional format, therefore the data frame rows will looks similar to the result of glimpse
function.
zaxis yaxis H Li Be B C N O F Na Mg Al Si P S Cl K
1 H H 3228 51 66 193 330 128 3205 141 643 472 817 1023 524 531 210 287
2 H Li 51 51 4 8 2 0 51 4 19 6 25 39 9 1 1 6
3 H Be 66 4 66 3 1 0 66 1 13 4 8 31 27 0 0 4
4 H B 193 8 3 193 13 3 192 10 50 55 52 73 3 8 24 9
5 H C 330 2 1 13 330 23 317 12 65 55 50 46 17 30 19 11
6 H N 128 0 0 3 23 128 118 5 23 13 22 10 18 48 23 6
Ca Sc Ti V Cr Mn Fe Co Ni Cu Zn Ga Ge As Se Br Rb Sr Y Zr Nb Mo Ru
1 918 8 145 181 40 406 663 35 63 359 208 4 10 397 26 3 1 77 54 70 54 49 0
2 8 0 6 0 0 4 11 0 0 1 0 0 0 0 0 0 0 1 1 1 0 0 0
3 34 0 0 0 0 9 9 0 0 0 3 0 0 3 0 0 0 0 1 3 0 0 0
4 89 0 3 3 4 15 18 0 0 4 0 0 0 2 0 0 1 9 2 2 0 0 0
5 97 0 2 6 5 17 22 4 12 36 15 0 1 4 0 0 0 12 27 9 4 0 0
6 8 0 0 8 0 2 18 1 4 10 1 0 0 5 0 1 0 0 0 0 0 1 0
Rh Pd Ag Cd In Sn Sb Te I Cs Ba La Ce Nd Sm Gd Dy Er Yb Hf Ta W Re Os Ir
1 0 0 3 12 2 21 32 52 12 12 134 29 77 19 1 1 0 0 1 0 5 22 0 0 0
2 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 2 0 0 0 1 3 0 2 1 0 0 0 0 1 0 0 0 0 0 0
4 0 0 0 0 0 1 0 0 0 1 4 0 5 0 0 0 0 0 0 0 0 0 0 0 0
5 0 0 0 0 0 0 0 2 0 0 12 7 16 8 0 1 0 0 0 0 0 1 0 0 0
6 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Pt Au Hg Tl Pb Bi Th U
1 0 0 11 7 229 42 15 272
2 0 0 0 0 0 0 0 1
3 0 0 0 0 1 0 0 0
4 0 0 0 0 5 0 1 0
5 0 0 3 0 28 0 2 41
6 0 0 4 0 3 2 0 12