add std_dev extension for building USDA sqlite
authorShane Jaroch <chown_tee@proton.me>
Sat, 24 Feb 2024 16:21:37 +0000 (11:21 -0500)
committerShane Jaroch <chown_tee@proton.me>
Sat, 24 Feb 2024 16:21:37 +0000 (11:21 -0500)
.gitignore
docs/usda.svg
sql/extensions/fetch.sh [new file with mode: 0755]
sql/tables_aux.sql

index 94196e748e93055ab6e1683732fd57a4ff477b7e..8ee6227427d1260d0a696e30246495b9bb57a0f6 100644 (file)
@@ -11,6 +11,7 @@
 # Our files
 .env*
 *.swp
+*.so
 
 # Database files, csv, intermediates
 *.db
index 13672a76f6cd865475ed45b401725d6581099de0..cf688fca940afc0b80c0766b2b3a3de1b8237ca1 100644 (file)
@@ -4,26 +4,26 @@
 <!-- Generated by graphviz version 2.43.0 (0)
  -->
 <!-- Title: undefined Pages: 1 -->
-<svg width="1003pt" height="1501pt"
- viewBox="0.00 0.00 1002.60 1500.60" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="1016pt" height="1501pt"
+ viewBox="0.00 0.00 1015.60 1500.60" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(28.8 1450.8)">
 <title>undefined</title>
-<polygon fill="white" stroke="transparent" points="-28.8,49.8 -28.8,-1450.8 973.8,-1450.8 973.8,49.8 -28.8,49.8"/>
-<text text-anchor="start" x="435" y="12.6" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="12.00">usda.sqlite</text>
+<polygon fill="white" stroke="transparent" points="-28.8,49.8 -28.8,-1450.8 986.8,-1450.8 986.8,49.8 -28.8,49.8"/>
+<text text-anchor="start" x="441.5" y="12.6" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="12.00">usda.sqlite</text>
 <!-- version -->
 <g id="node1" class="node">
 <title>version</title>
-<path fill="none" stroke="black" d="M813.24,-356.64C813.24,-356.64 933.24,-356.64 933.24,-356.64 939.24,-356.64 945.24,-362.64 945.24,-368.64 945.24,-368.64 945.24,-448.64 945.24,-448.64 945.24,-454.64 939.24,-460.64 933.24,-460.64 933.24,-460.64 813.24,-460.64 813.24,-460.64 807.24,-460.64 801.24,-454.64 801.24,-448.64 801.24,-448.64 801.24,-368.64 801.24,-368.64 801.24,-362.64 807.24,-356.64 813.24,-356.64"/>
-<text text-anchor="start" x="846.24" y="-440.24" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="13.00">version</text>
-<polyline fill="none" stroke="black" points="801.24,-430.64 945.24,-430.64 "/>
-<text text-anchor="start" x="808.24" y="-417.04" font-family="Helvetica,sans-Serif" font-size="12.00">id* </text>
-<text text-anchor="start" x="829.24" y="-417.04" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="12.00">integer</text>
-<text text-anchor="start" x="808.24" y="-400.04" font-family="Helvetica,sans-Serif" font-size="12.00">version </text>
-<text text-anchor="start" x="856.24" y="-400.04" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="12.00">text</text>
-<text text-anchor="start" x="808.24" y="-383.04" font-family="Helvetica,sans-Serif" font-size="12.00">created </text>
-<text text-anchor="start" x="858.24" y="-383.04" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="12.00">timestamp</text>
-<text text-anchor="start" x="808.24" y="-366.04" font-family="Helvetica,sans-Serif" font-size="12.00">notes </text>
-<text text-anchor="start" x="845.24" y="-366.04" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="12.00">text</text>
+<path fill="none" stroke="black" d="M742.24,-1022.64C742.24,-1022.64 862.24,-1022.64 862.24,-1022.64 868.24,-1022.64 874.24,-1028.64 874.24,-1034.64 874.24,-1034.64 874.24,-1114.64 874.24,-1114.64 874.24,-1120.64 868.24,-1126.64 862.24,-1126.64 862.24,-1126.64 742.24,-1126.64 742.24,-1126.64 736.24,-1126.64 730.24,-1120.64 730.24,-1114.64 730.24,-1114.64 730.24,-1034.64 730.24,-1034.64 730.24,-1028.64 736.24,-1022.64 742.24,-1022.64"/>
+<text text-anchor="start" x="775.24" y="-1106.24" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="13.00">version</text>
+<polyline fill="none" stroke="black" points="730.24,-1096.64 874.24,-1096.64 "/>
+<text text-anchor="start" x="737.24" y="-1083.04" font-family="Helvetica,sans-Serif" font-size="12.00">id* </text>
+<text text-anchor="start" x="758.24" y="-1083.04" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="12.00">integer</text>
+<text text-anchor="start" x="737.24" y="-1066.04" font-family="Helvetica,sans-Serif" font-size="12.00">version </text>
+<text text-anchor="start" x="785.24" y="-1066.04" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="12.00">text</text>
+<text text-anchor="start" x="737.24" y="-1049.04" font-family="Helvetica,sans-Serif" font-size="12.00">created </text>
+<text text-anchor="start" x="787.24" y="-1049.04" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="12.00">timestamp</text>
+<text text-anchor="start" x="737.24" y="-1032.04" font-family="Helvetica,sans-Serif" font-size="12.00">notes </text>
+<text text-anchor="start" x="774.24" y="-1032.04" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="12.00">text</text>
 </g>
 <!-- nutr_def -->
 <g id="node2" class="node">
 <text text-anchor="start" x="298.64" y="-1115.56" font-family="Helvetica,sans-Serif" font-size="12.00">num_data_pts </text>
 <text text-anchor="start" x="386.64" y="-1115.56" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="12.00">int</text>
 <text text-anchor="start" x="298.64" y="-1098.56" font-family="Helvetica,sans-Serif" font-size="12.00">std_err </text>
-<text text-anchor="start" x="343.64" y="-1098.56" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="12.00">float</text>
+<text text-anchor="start" x="344.64" y="-1098.56" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="12.00">float</text>
 <text text-anchor="start" x="298.64" y="-1081.56" font-family="Helvetica,sans-Serif" font-size="12.00">src_cd </text>
 <text text-anchor="start" x="340.64" y="-1081.56" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="12.00">text</text>
 <text text-anchor="start" x="298.64" y="-1064.56" font-family="Helvetica,sans-Serif" font-size="12.00">deriv_cd </text>
 <path fill="none" stroke="black" stroke-width="0.9" d="M524.81,-837.26C459.17,-828.85 361.7,-825.93 288.44,-864.96 230.35,-895.91 190.16,-963.41 168.79,-1007.92"/>
 <polygon fill="black" stroke="black" stroke-width="0.9" points="166.14,-1006.97 165.26,-1015.41 171.2,-1009.36 166.14,-1006.97"/>
 </g>
+<!-- nutrients_overview -->
+<g id="node15" class="node">
+<title>nutrients_overview</title>
+<path fill="none" stroke="black" d="M814.74,-730.64C814.74,-730.64 945.74,-730.64 945.74,-730.64 951.74,-730.64 957.74,-736.64 957.74,-742.64 957.74,-742.64 957.74,-890.64 957.74,-890.64 957.74,-896.64 951.74,-902.64 945.74,-902.64 945.74,-902.64 814.74,-902.64 814.74,-902.64 808.74,-902.64 802.74,-896.64 802.74,-890.64 802.74,-890.64 802.74,-742.64 802.74,-742.64 802.74,-736.64 808.74,-730.64 814.74,-730.64"/>
+<text text-anchor="start" x="810.24" y="-882.24" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="13.00">nutrients_overview</text>
+<polyline fill="none" stroke="black" points="802.74,-872.64 957.74,-872.64 "/>
+<text text-anchor="start" x="815.24" y="-859.04" font-family="Helvetica,sans-Serif" font-size="12.00">id </text>
+<text text-anchor="start" x="830.24" y="-859.04" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="12.00">int</text>
+<text text-anchor="start" x="815.24" y="-842.04" font-family="Helvetica,sans-Serif" font-size="12.00">rda </text>
+<text text-anchor="start" x="839.24" y="-842.04" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="12.00">real</text>
+<text text-anchor="start" x="815.24" y="-825.04" font-family="Helvetica,sans-Serif" font-size="12.00">unit </text>
+<text text-anchor="start" x="842.24" y="-825.04" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="12.00">text</text>
+<text text-anchor="start" x="815.24" y="-808.04" font-family="Helvetica,sans-Serif" font-size="12.00">tagname </text>
+<text text-anchor="start" x="873.24" y="-808.04" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="12.00">text</text>
+<text text-anchor="start" x="815.24" y="-791.04" font-family="Helvetica,sans-Serif" font-size="12.00">nutr_desc </text>
+<text text-anchor="start" x="878.24" y="-791.04" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="12.00">text</text>
+<text text-anchor="start" x="815.24" y="-774.04" font-family="Helvetica,sans-Serif" font-size="12.00">n_foods </text>
+<text text-anchor="start" x="866.24" y="-774.04" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="12.00">int</text>
+<text text-anchor="start" x="815.24" y="-757.04" font-family="Helvetica,sans-Serif" font-size="12.00">avg_val </text>
+<text text-anchor="start" x="864.24" y="-757.04" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="12.00">real</text>
+<text text-anchor="start" x="815.24" y="-740.04" font-family="Helvetica,sans-Serif" font-size="12.00">std_dev </text>
+<text text-anchor="start" x="865.24" y="-740.04" font-family="Helvetica,sans-Serif" font-weight="bold" font-size="12.00">real</text>
+</g>
 </g>
 </svg>
diff --git a/sql/extensions/fetch.sh b/sql/extensions/fetch.sh
new file mode 100755 (executable)
index 0000000..bbf5b95
--- /dev/null
@@ -0,0 +1,5 @@
+#!/bin/bash -e
+
+rm -f *.so *.zip
+wget https://github.com/nalgeon/sqlean/releases/download/0.21.10/sqlean-linux-x86.zip
+unzip *.zip
index ca1e720873108017a79e1a80d4657ca66896e9ad..4aae5892b13105f2313660adf615a1a1eb535cf6 100644 (file)
@@ -1,3 +1,5 @@
+-- NOTE: need to first run: fetch.sh
+.load ./extensions/stats
 -- Saves time intensive query in new table
 CREATE TABLE nutrients_overview AS
 SELECT
@@ -6,8 +8,9 @@ SELECT
   unit,
   tagname,
   nutr_desc,
-  COUNT(nut_data.nutr_id) AS n_foods,
-  ROUND(avg(nut_data.nutr_val), 3) AS avg_val
+  CAST(COUNT(nut_data.nutr_id) AS INTEGER) AS n_foods,
+  CAST(ROUND(avg(nut_data.nutr_val), 3) AS real) AS avg_val,
+  CAST(ROUND(stddev(nut_data.nutr_val), 2) AS real) AS std_dev
 FROM
   nutr_def
   INNER JOIN nut_data ON nut_data.nutr_id = id