From: Shane Jaroch Date: Sat, 19 Sep 2020 23:14:12 +0000 (-0400) Subject: wip X-Git-Url: https://git.nutra.tk/v1?a=commitdiff_plain;h=7d403a209734a1d20fbbf908bf2312e4d0dc38ac;p=nutratech%2Fusda-sqlite.git wip --- diff --git a/README.rst b/README.rst index 9972bca..d5e543c 100644 --- a/README.rst +++ b/README.rst @@ -12,7 +12,7 @@ See CLI: https://github.com/nutratech/cli Pypi page: https://pypi.org/project/nutra -Building the database +Building USDA database ######################### 1. Install ``access2csv`` dependency, @@ -26,7 +26,7 @@ Building the database .. code-block:: bash - cd data + cd usda/data bash setup.sh python3 process.py @@ -46,8 +46,8 @@ Building the database .. code-block:: bash - cd ../sql - sqlite3 nutra.db + cd .. + sqlite3 usda.db NOTE: FOLLOW STEPS 5 and 6 FROM INSIDE THE SQL SHELL @@ -63,9 +63,9 @@ Or alternatively from the bash shell (outside SQL) .. code-block:: bash - sqlite3 nutra.db -init init.sql + sqlite3 usda.db -init init.sql -6. Verify the tables (again inside the SQL shell :code:`sqlite nutra.db`), +6. Verify the tables (again inside the SQL shell :code:`sqlite usda.db`), .. code-block:: sql @@ -75,14 +75,14 @@ Or alternatively from the bash shell (outside SQL) SELECT * FROM nt_ver; .exit -7. If everything looks good, compress into :code:`nutra-X.X.X.db.tar.xz` and upload to binary host. +7. If everything looks good, compress into :code:`usda-X.X.X.db.tar.xz` and upload to binary host. Tables (Relational Design) ########################## -See :code:`sql/tables.sql` for details. +See :code:`usda/sql/tables.sql` and :code:`nt/*/sql/tables.sql` for details. This is frequently updated, see :code:`docs/` for more info. -.. image:: docs/nutra.svg +.. image:: docs/usda.svg diff --git a/data/access2csv b/data/access2csv deleted file mode 160000 index 2edd3cb..0000000 --- a/data/access2csv +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 2edd3cba957024bcde1c78c69f2c53443a155595 diff --git a/data/rda.csv b/data/rda.csv deleted file mode 100644 index 27329c5..0000000 --- a/data/rda.csv +++ /dev/null @@ -1,189 +0,0 @@ -id,rda,units,tagname,nutr_desc,anti_nutrient,weight -203,60,g,PRO,Protein,, -204,90,g,FAT,Total lipid (fat),, -205,230,g,CARB,"Carbohydrate, by difference",, -207,,g,ASH,Ash,, -208,1850,kcal,CAL,Energy,t,-0.4 -209,,g,STARCH,Starch,, -210,,g,SUCS,Sucrose,, -211,,g,GLUS,Glucose (dextrose),, -212,,g,FRUS,Fructose,, -213,,g,LACS,Lactose,, -214,,g,MALS,Maltose,, -221,,g,ALC,"Alcohol, ethyl",, -255,,g,WATER,Water,, -262,,mg,CAFFN,Caffeine,, -263,,mg,THEBRN,Theobromine,, -268,,kJ,ENERC_KJ,Energy,, -269,80,g,SUGAR,"Sugars, total",t,-0.5 -287,,g,GALS,Galactose,, -291,30,g,FIBTG,"Fiber, total dietary",, -301,1300,mg,CA,"Calcium, Ca",, -303,18,mg,FE,"Iron, Fe",, -304,420,mg,MG,"Magnesium, Mg",, -305,1250,mg,P,"Phosphorus, P",, -306,4700,mg,K,"Potassium, K",, -307,1500,mg,NA,"Sodium, Na",t,-0.5 -309,11,mg,ZN,"Zinc, Zn",, -312,0.9,mg,CU,"Copper, Cu",, -313,,µg,FL,"Fluoride, F",t,-0.5 -315,2.3,mg,MN,"Manganese, Mn",, -317,55,µg,SE,"Selenium, Se",, -318,5000,IU,VITA_IU,"Vitamin A, IU",, -319,,µg,RETOL,Retinol,, -320,900,µg,VITA_RAE,"Vitamin A, RAE",, -321,,µg,CARTB,"Carotene, beta",, -322,,µg,CARTA,"Carotene, alpha",, -323,,mg,TOCPHA,Vitamin E (alpha-tocopherol),, -324,800,IU,VITD_IU,Vitamin D,, -325,,µg,ERGCAL,Vitamin D2 (ergocalciferol),, -326,,µg,CHOCAL,Vitamin D3 (cholecalciferol),, -328,20,µg,VITD,Vitamin D (D2 + D3),, -334,,µg,CRYPX,"Cryptoxanthin, beta",, -337,3000,µg,LYCPN,Lycopene,, -338,7000,µg,LUTZEA,Lutein + zeaxanthin,, -341,,mg,TOCPHB,"Tocopherol, beta",, -342,,mg,TOCPHG,"Tocopherol, gamma",, -343,,mg,TOCPHD,"Tocopherol, delta",, -344,,mg,TOCTRA,"Tocotrienol, alpha",, -345,,mg,TOCTRB,"Tocotrienol, beta",, -346,,mg,TOCTRG,"Tocotrienol, gamma",, -347,,mg,TOCTRD,"Tocotrienol, delta",, -401,90,mg,VITC,"Vitamin C, total ascorbic acid",, -404,1.2,mg,B1,Thiamin,, -405,1.3,mg,B2,Riboflavin,, -406,16,mg,B3,Niacin,, -410,5,mg,B5,Pantothenic acid,, -415,1.7,mg,B6,Vitamin B-6,, -417,400,µg,B9,"Folate, total",, -418,2.4,µg,B12,Vitamin B-12,, -421,550,mg,CHO,"Choline, total",, -428,,µg,MK4,Menaquinone-4,, -429,,µg,VITK1D,Dihydrophylloquinone,, -430,120,µg,VITK,Vitamin K (phylloquinone),, -431,,µg,FOLAC,Folic acid,, -432,,µg,FOLFD,"Folate, food",, -435,,µg,FOLDFE,"Folate, DFE",, -454,,mg,BETN,Betaine,, -501,0.3,g,TRP_G,Tryptophan,, -502,1,g,THR_G,Threonine,, -503,1.4,g,ILE_G,Isoleucine,, -504,2.7,g,LEU_G,Leucine,, -505,2.1,g,LYS_G,Lysine,, -506,0.8,g,MET_G,Methionine,, -507,,g,CYS_G,Cystine,, -508,1.2,g,PHE_G,Phenylalanine,, -509,1.4,g,TYR_G,Tyrosine,, -510,1.8,g,VAL_G,Valine,, -511,1,g,ARG_G,Arginine,, -512,0.8,g,HISTN_G,Histidine,, -513,1,g,ALA_G,Alanine,, -514,1,g,ASP_G,Aspartic acid,, -515,2,g,GLU_G,Glutamic acid,, -516,0.8,g,GLY_G,Glycine,, -517,0.6,g,PRO_G,Proline,, -518,1.6,g,SER_G,Serine,, -521,,g,HYP,Hydroxyproline,, -573,,mg,VITE_ADD,"Vitamin E, added",, -578,,µg,B12_ADD,"Vitamin B-12, added",, -601,200,mg,CHOLEST,Cholesterol,t,-0.4 -605,,g,FATRN,"Fatty acids, total trans",t,-1.2 -606,30,g,FASAT,"Fatty acids, total saturated",t,-0.7 -607,,g,F4D0,4:0,, -608,,g,F6D0,6:0,, -609,,g,F8D0,8:0,, -610,,g,F10D0,10:0,, -611,,g,F12D0,12:0,, -612,,g,F14D0,14:0,, -613,,g,F16D0,16:0,, -614,,g,F18D0,18:0,, -615,,g,F20D0,20:0,, -617,,g,F18D1,18:1 undifferentiated,, -618,,g,F18D2,18:2 undifferentiated,, -619,,g,F18D3,18:3 undifferentiated,, -620,,g,F20D4,20:4 undifferentiated,, -621,0.2,g,F22D6,22:6 n-3 (DHA),, -624,,g,F22D0,22:0,, -625,,g,F14D1,14:1,, -626,,g,F16D1,16:1 undifferentiated,, -627,,g,F18D4,18:4,, -628,,g,F20D1,20:1,, -629,0.1,g,F20D5,20:5 n-3 (EPA),, -630,,g,F22D1,22:1 undifferentiated,, -631,,g,F22D5,22:5 n-3 (DPA),, -636,,mg,PHYSTR,Phytosterols,, -638,,mg,STID7,Stigmasterol,, -639,,mg,CAMD5,Campesterol,, -641,,mg,SITSTR,Beta-sitosterol,, -645,35,g,FAMS,"Fatty acids, total monounsaturated",, -646,25,g,FAPU,"Fatty acids, total polyunsaturated",, -652,,g,F15D0,15:0,, -653,,g,F17D0,17:0,, -654,,g,F24D0,24:0,, -662,,g,F16D1T,16:1 t,, -663,,g,F18D1T,18:1 t,, -664,,g,F22D1T,22:1 t,, -665,,g,F18D2NFD,18:2 t not further defined,, -666,,g,F18D2I,18:2 i,, -669,,g,F18D2TT,"18:2 t,t",, -670,,g,F18D2CLA,18:2 CLAs,, -671,,g,F24D1C,24:1 c,, -672,,g,F20D2CN6,"20:2 n-6 c,c",, -673,,g,F16D1C,16:1 c,, -674,,g,F18D1C,18:1 c,, -675,,g,F18D2CN6,"18:2 n-6 c,c",, -676,,g,F22D1C,22:1 c,, -685,,g,F18D3CN6,"18:3 n-6 c,c,c",, -687,,g,F17D1,17:1,, -689,,g,F20D3,20:3 undifferentiated,, -693,,g,FATRNM,"Fatty acids, total trans-monoenoic",, -695,,g,FATRNP,"Fatty acids, total trans-polyenoic",, -696,,g,F13D0,13:0,, -697,,g,F15D1,15:1,, -851,2,g,F18D3CN3,"18:3 n-3 c,c,c (ALA)",, -852,,g,F20D3N3,20:3 n-3,, -853,,g,F20D3N6,20:3 n-6,, -855,,g,F20D4N6,20:4 n-6,, -856,,g,F18D3I,18:3i,, -857,,g,F21D5,21:5,, -858,,g,F22D4,22:4,, -859,,g,F18D1TN7,18:1-11 t (18:1t n-7),, -710,30,mg,DAID,Daidzein,, -711,15,mg,GENI,Genistein,, -712,,mg,GLYC,Glycitein,, -713,45,mg,TOTISO,Total isoflavones,, -714,,mg,BIOC,Biochanin A,, -715,,mg,FORM,Formononetin,, -716,,mg,COUM,Coumestrol,, -734,5,mg,PAdimer,Proanthocyanidin dimers,, -735,5,mg,PAtrimer,Proanthocyanidin trimers,, -736,20,mg,PA4-6mer,Proanthocyanidin 4-6mers,, -737,20,mg,PA7-10mer,Proanthocyanidin 7-10mers,, -738,60,mg,PApolymer,Proanthocyanidin polymers (>10mers),, -731,20,mg,CYAD,Cyanidin,, -740,,mg,PETUNIDIN,Petunidin,, -741,,mg,DELPH,Delphinidin ,, -742,,mg,MALVIDIN,Malvidin,, -743,,mg,PELA,Pelargonidin,, -745,,mg,PEONIDIN,Peonidin,, -749,40,mg,CATE,(+)-Catechin,, -750,40,mg,EPICATEGC,(-)-Epigallocatechin,, -751,40,mg,EPICATEC,(-)-Epicatechin,, -752,40,mg,EPICATECG3,(-)-Epicatechin 3-gallate,, -753,60,mg,EGCG,(-)-Epigallocatechin 3-gallate,, -755,,mg,THEAF,Theaflavin,, -756,,mg,THEAR,Thearubigins,, -758,,mg,ERIOD,Eriodictyol,, -759,80,mg,HESPT,Hesperetin,, -762,60,mg,NARING,Naringenin,, -770,30,mg,APIGEN,Apigenin,, -773,20,mg,LUTEOL,Luteolin,, -785,5,mg,ISORHAM,Isorhamnetin,, -786,5,mg,KAEMF,Kaempferol,, -788,2,mg,MYRIC,Myricetin,, -789,20,mg,QUERCE,Quercetin,, -791,,mg,THEA3DI,"Theaflavin-3,3'-digallate",, -792,,mg,THEA3GP,Theaflavin-3'-gallate,, -793,,mg,THEA3G,Theaflavin-3-gallate,, -794,,mg,GALCATEGC,(+)-Gallocatechin,, -2000,1.5,mg,B,"Boron, B",, diff --git a/docs/nutra.svg b/docs/nutra.svg deleted file mode 100644 index 306ea19..0000000 --- a/docs/nutra.svg +++ /dev/null @@ -1,363 +0,0 @@ - - - - - - -undefined - -nutra.db - - -version - -version - -id* -integer -version -text -created -timestamp - - - -sqlite_sequence - -sqlite_sequence - -name - -seq - - - - -nutr_def - -nutr_def - -id* -integer -rda -float -unit -text -tagname -text -nutr_desc -text -anti_nutrient -boolean -num_dec -int -sr_order -int -flav_class -text - - - -fdgrp - -fdgrp - -id* -integer -fdgrp_desc -text - - - -food_des - -food_des - -id* -integer -fdgrp_id -int -long_desc -text -shrt_desc -text -com_name -text -manufac_name -text -survey -text -ref_desc -text -refuse -int -sci_name -text -n_factor -float -pro_factor -float -fat_factor -float -cho_factor -float - - - -food_des->fdgrp - - - - - -src_cd - -src_cd - -id* -text -description -text - - - -deriv_cd - -deriv_cd - -id* -text -description -text - - - -nut_data - -nut_data - -food_id -int -nutr_id -int -nutr_val -float -num_data_pts -int -std_err -float -src_cd -text -deriv_cd -text -ref_food_id -int -add_nutr_mark -text -num_studies -int -min -float -max -float -df -long -low_eb -float -up_eb -float -stat_cmt -text -add_mod_date -date -cc -text - - - -nut_data->nutr_def - - - - - -nut_data->food_des - - - - - -nut_data->src_cd - - - - - -nut_data->deriv_cd - - - - - -lang_desc - -lang_desc - -id* -text -description -text - - - -langual - -langual - -food_id -int -factor_id -text - - - -langual->food_des - - - - - -langual->lang_desc - - - - - -data_src - -data_src - -id* -text -authors -text -title -text -year -text -journal -text -vol_city -text -issue_state -text -start_page -text -end_page -text - - - -datsrcln - -datsrcln - -food_id -int -nutr_id -int -data_src_id -text - - - -datsrcln->nutr_def - - - - - -datsrcln->food_des - - - - - -datsrcln->data_src - - - - - -footnote - -footnote - -food_id -int -footnt_no -int -footnt_typ -text -nutr_id -int -footnt_txt -text - - - -footnote->nutr_def - - - - - -footnote->food_des - - - - - -serv_desc - -serv_desc - -id* -integer -msre_desc -text - - - -serving - -serving - -food_id -int -msre_id -int -grams -float -num_data_pts -int -std_dev -float - - - -serving->food_des - - - - - -serving->serv_desc - - - - - diff --git a/docs/sqleton.sh b/docs/sqleton.sh index e5e52c1..0457620 100755 --- a/docs/sqleton.sh +++ b/docs/sqleton.sh @@ -2,6 +2,6 @@ # cd to script's directory cd "$(dirname "$0")" -cd ../sql +cd ../usda -sqleton -o ../docs/nutra.svg nutra.db +sqleton -o ../docs/usda.svg usda.db diff --git a/docs/usda.svg b/docs/usda.svg new file mode 100644 index 0000000..ad21bc5 --- /dev/null +++ b/docs/usda.svg @@ -0,0 +1,361 @@ + + + + + + +undefined + +usda.db + + +version + +version + +id* +integer +version +text +created +timestamp + + + +sqlite_sequence + +sqlite_sequence + +name + +seq + + + + +nutr_def + +nutr_def + +id* +integer +rda +float +unit +text +tagname +text +nutr_desc +text +num_dec +int +sr_order +int +flav_class +text + + + +fdgrp + +fdgrp + +id* +integer +fdgrp_desc +text + + + +food_des + +food_des + +id* +integer +fdgrp_id +int +long_desc +text +shrt_desc +text +com_name +text +manufac_name +text +survey +text +ref_desc +text +refuse +int +sci_name +text +n_factor +float +pro_factor +float +fat_factor +float +cho_factor +float + + + +food_des->fdgrp + + + + + +src_cd + +src_cd + +id* +text +description +text + + + +deriv_cd + +deriv_cd + +id* +text +description +text + + + +nut_data + +nut_data + +food_id +int +nutr_id +int +nutr_val +float +num_data_pts +int +std_err +float +src_cd +text +deriv_cd +text +ref_food_id +int +add_nutr_mark +text +num_studies +int +min +float +max +float +df +long +low_eb +float +up_eb +float +stat_cmt +text +add_mod_date +date +cc +text + + + +nut_data->nutr_def + + + + + +nut_data->food_des + + + + + +nut_data->src_cd + + + + + +nut_data->deriv_cd + + + + + +lang_desc + +lang_desc + +id* +text +description +text + + + +langual + +langual + +food_id +int +factor_id +text + + + +langual->food_des + + + + + +langual->lang_desc + + + + + +data_src + +data_src + +id* +text +authors +text +title +text +year +text +journal +text +vol_city +text +issue_state +text +start_page +text +end_page +text + + + +datsrcln + +datsrcln + +food_id +int +nutr_id +int +data_src_id +text + + + +datsrcln->nutr_def + + + + + +datsrcln->food_des + + + + + +datsrcln->data_src + + + + + +footnote + +footnote + +food_id +int +footnt_no +int +footnt_typ +text +nutr_id +int +footnt_txt +text + + + +footnote->nutr_def + + + + + +footnote->food_des + + + + + +serv_desc + +serv_desc + +id* +integer +msre_desc +text + + + +serving + +serving + +food_id +int +msre_id +int +grams +float +num_data_pts +int +std_dev +float + + + +serving->food_des + + + + + +serving->serv_desc + + + + + diff --git a/sql/tables.sql b/nt/biometrics/tables.sql similarity index 100% copy from sql/tables.sql copy to nt/biometrics/tables.sql diff --git a/sql/build.sh b/nt/build.sh similarity index 100% copy from sql/build.sh copy to nt/build.sh diff --git a/sql/format-sql.sh b/nt/format-sql.sh similarity index 100% copy from sql/format-sql.sh copy to nt/format-sql.sh diff --git a/sql/init.sql b/nt/init.sql similarity index 100% copy from sql/init.sql copy to nt/init.sql diff --git a/sql/tables.sql b/nt/plates/tables.sql similarity index 100% copy from sql/tables.sql copy to nt/plates/tables.sql diff --git a/sql/tables.sql b/nt/recipes/tables.sql similarity index 100% copy from sql/tables.sql copy to nt/recipes/tables.sql diff --git a/sql/version.csv b/sql/version.csv deleted file mode 100644 index 316748e..0000000 --- a/sql/version.csv +++ /dev/null @@ -1,6 +0,0 @@ -id,version,created -1,0.0.0,Wed 05 Aug 2020 07:09:35 PM EDT -2,0.0.1,Wed 05 Aug 2020 08:14:52 PM EDT -3,0.0.2,Thu 06 Aug 2020 11:39:54 AM EDT -4,0.0.3,Tue 11 Aug 2020 09:37:50 PM EDT -5,0.0.4,Mon 07 Sep 2020 10:01:45 AM EDT diff --git a/sql/build.sh b/usda/build.sh similarity index 72% rename from sql/build.sh rename to usda/build.sh index ffeb8bd..5e0df3c 100755 --- a/sql/build.sh +++ b/usda/build.sh @@ -10,20 +10,20 @@ fi cd "$(dirname "$0")" # printf "\\n\e[1;31m\e[0m\\n" -rm_cmd="rm -f nutra.db" +rm_cmd="rm -f usda.db" printf "\\n\e[1;31m${rm_cmd}\e[0m\\n\n" $rm_cmd -pack_msg="==> Pack nutra.db-$VERSION" +pack_msg="==> Pack usda.db-$VERSION" printf "\\n\\x1b[32m${pack_msg}\x1b[0m\n\n" # Create SQL file -pack_cmd="sqlite3 nutra.db \".read init.sql\"" +pack_cmd="sqlite3 usda.db \".read init.sql\"" printf "\\n\e[1;31m${pack_cmd}\e[0m\\n" bash -exec "$pack_cmd" # Compress xzip -tar_cmd="tar cJvf nutra.db-$VERSION.tar.xz nutra.db" +tar_cmd="tar cJvf usda.db-$VERSION.tar.xz usda.db" printf "\\n\e[1;31m${tar_cmd}\e[0m\\n" $tar_cmd @@ -31,6 +31,6 @@ $tar_cmd # printf "\\n\e[1;31m${rm_cmd}\e[0m\\n" # $rm_cmd -mv_cmd="mv nutra.db-$VERSION.tar.xz dist" +mv_cmd="mv usda.db-$VERSION.tar.xz dist" printf "\\n\e[1;31m${mv_cmd}\e[0m\\n\n" $mv_cmd diff --git a/usda/data/access2csv/.gitignore b/usda/data/access2csv/.gitignore new file mode 100644 index 0000000..5b13aa4 --- /dev/null +++ b/usda/data/access2csv/.gitignore @@ -0,0 +1,7 @@ +/access2csv.jar +/bin +/target/ +.classpath +.settings +.project +/data diff --git a/usda/data/access2csv/.travis.yml b/usda/data/access2csv/.travis.yml new file mode 100644 index 0000000..8ff9659 --- /dev/null +++ b/usda/data/access2csv/.travis.yml @@ -0,0 +1,10 @@ +language: java +sudo: false +cache: + directories: + - $HOME/.m2 +jdk: + - oraclejdk7 + - oraclejdk8 +after_success: + - mvn clean test jacoco:report coveralls:report diff --git a/usda/data/access2csv/Dockerfile b/usda/data/access2csv/Dockerfile new file mode 100644 index 0000000..1f56a8d --- /dev/null +++ b/usda/data/access2csv/Dockerfile @@ -0,0 +1,16 @@ +FROM openjdk:8-jdk-alpine as build + +RUN apk update && apk add git maven + +ADD ./ /app + +WORKDIR /app + +RUN mvn clean install + +FROM openjdk:8-jre-alpine + +COPY --from=build /app/target /app +WORKDIR /app + +ENTRYPOINT [ "/app/appassembler/bin/access2csv" ] \ No newline at end of file diff --git a/usda/data/access2csv/LICENSE b/usda/data/access2csv/LICENSE new file mode 100644 index 0000000..5df17bb --- /dev/null +++ b/usda/data/access2csv/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2013 Accelerated Data Works, Ryan Davis + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/usda/data/access2csv/README.md b/usda/data/access2csv/README.md new file mode 100644 index 0000000..bac3c4a --- /dev/null +++ b/usda/data/access2csv/README.md @@ -0,0 +1,67 @@ +# access2csv + +Simple program to extract data from Access databases into CSV files. + +## Features + + * view the schema of the database + * export all tables to csv files named after the table + * export one table + +## Examples + +Dumping a schema: + + $ ./access2csv myfile.accdb --schema + CREATE TABLE Test( + Id INT, + Name TEXT, + ) + CREATE TABLE Test2( + Id INT, + Name TEXT + ) + +Exporting all tables: + + $ ./access2csv myfile.accdb + Exporting 'Test' to /home/ryepup/Test.csv + 2 rows exported + Exporting 'Test2' to /home/ryepup/Test2.csv + 100000 rows exported + +Export one table: + + $ ./access2csv myfile.accdb Test + 1,"foo" + 2,"bar" + +## Installation + +Binaries are available at +https://github.com/AccelerationNet/access2csv/releases, download a jar +file from there then use it as shown above. + +### Compile from source + + $ git clone https://github.com/AccelerationNet/access2csv.git + $ cd access2csv + $ mvn clean install + +Now you should have a `access2csv.jar` in the target directory, ready to go. + +Note December, 2017. Things have changed a little. If nothing else works then, (after compiling with mvn clean install) try running something +like this (example of Windows batch file) in the root of the repository (replace the path\to\file): +
 ".\target\appassembler\bin\access2csv.bat" --input ".\path\to\file" --output . --write-null NULL --quote-all false --schema --with-header 
+ + +## Depenencies + + * [Jackess](http://jackcess.sourceforge.net/) - a pure Java library + for reading from and writing to MS Access databases + * [opencsv](http://opencsv.sourceforge.net/) - CSV library + +## Contributing + +Use https://github.com/AccelerationNet/access2csv to open issues or +pull requests. diff --git a/usda/data/access2csv/access2csv b/usda/data/access2csv/access2csv new file mode 100755 index 0000000..a78eb1a --- /dev/null +++ b/usda/data/access2csv/access2csv @@ -0,0 +1,13 @@ +#!/bin/bash +# This script runs the Access 2 CSV code. +# Before running this script for the first time +# you may need to run: +# chmod +x access2csv +# + +if [ ! -d "target/appassembler/bin" ]; then + mvn -quiet clean install -DskipTests -Djetty.skip +fi + +chmod u+x target/appassembler/bin/* +target/appassembler/bin/access2csv "$@" diff --git a/usda/data/access2csv/pom.xml b/usda/data/access2csv/pom.xml new file mode 100755 index 0000000..40d59e5 --- /dev/null +++ b/usda/data/access2csv/pom.xml @@ -0,0 +1,399 @@ + + + 4.0.0 + com.github.AccelerationNet.access2csv + access2csv + 0.2-SNAPSHOT + Access to CSV + Converter from Access to CSV + jar + + https://github.com/AccelerationNet/access2csv/ + + + MIT License + https://github.com/AccelerationNet/access2csv/blob/master/LICENSE + repo + + + + git@github.com:AccelerationNet/access2csv.git + scm:git:git@github.com:AccelerationNet/access2csv.git + scm:git:git@github.com:AccelerationNet/access2csv.git + + + + Ryan Davis + + + Peter Ansell + + + + + UTF-8 + UTF-8 + + 4.12 + 1.7.16 + + + 3.0.5 + + + + + com.opencsv + opencsv + + + com.healthmarketscience.jackcess + jackcess + + + net.sf.jopt-simple + jopt-simple + + + org.slf4j + jcl-over-slf4j + + + org.slf4j + slf4j-simple + + + junit + junit + + + + + + + com.opencsv + opencsv + 3.7 + + + com.healthmarketscience.jackcess + jackcess + 2.1.3 + + + commons-logging + commons-logging + + + + + net.sf.jopt-simple + jopt-simple + 4.9 + + + junit + junit + ${junit.version} + test + + + commons-logging + commons-logging + + + + + org.slf4j + slf4j-api + ${slf4j.version} + + + org.slf4j + jcl-over-slf4j + ${slf4j.version} + runtime + + + org.slf4j + slf4j-simple + ${slf4j.version} + runtime + + + commons-lang + commons-lang + 2.6 + + + org.apache.commons + commons-lang3 + 3.4 + + + + + + + + org.codehaus.mojo + appassembler-maven-plugin + + + package + + assemble + + + + + + + access2csv.Driver + access2csv + + + + + + org.jacoco + jacoco-maven-plugin + + + prepare-agent + + prepare-agent + + + + + + org.apache.maven.plugins + maven-assembly-plugin + + + make-assembly + package + + single + + + + + + + + + + org.apache.maven.plugins + maven-assembly-plugin + 2.6 + + + bin + jar-with-dependencies + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.5.1 + + 1.6 + 1.6 + + + + org.apache.maven.plugins + maven-javadoc-plugin + 2.10.3 + + + org.apache.maven.plugins + maven-deploy-plugin + 2.8.2 + + + org.apache.maven.plugins + maven-resources-plugin + 2.7 + + + org.apache.maven.plugins + maven-install-plugin + 2.5.2 + + + org.apache.maven.plugins + maven-clean-plugin + 3.0.0 + + + org.apache.maven.plugins + maven-gpg-plugin + 1.6 + + + org.apache.maven.plugins + maven-jar-plugin + 2.6 + + + + test-jar + + + + + + org.apache.maven.plugins + maven-source-plugin + 2.4 + + + attach-source + + jar + + + + attach-test-sources + + test-jar-no-fork + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.19.1 + + + org.codehaus.mojo + appassembler-maven-plugin + 1.10 + + + + org.eluder.coveralls + coveralls-maven-plugin + 3.0.1 + + + org.jacoco + jacoco-maven-plugin + 0.7.4.201502262128 + + + + org.eclipse.m2e + lifecycle-mapping + 1.0.0 + + + + + + org.jacoco + + jacoco-maven-plugin + + + [0.7.2.201409121644,) + + + prepare-agent + + + + + + + + + + + + + + + + + sonatype-nexus-snapshots + Sonatype Nexus Snapshots + https://oss.sonatype.org/content/repositories/snapshots + + false + + + true + + + + + + + + sonatype-nexus-snapshots + Sonatype Nexus Snapshots + https://oss.sonatype.org/content/repositories/snapshots/ + + + sonatype-nexus-staging + Nexus Release Repository + https://oss.sonatype.org/service/local/staging/deploy/maven2/ + + + + + + sonatype-oss-release + + + + org.apache.maven.plugins + maven-source-plugin + + + attach-sources + + jar-no-fork + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + + + attach-javadocs + + jar + + + + + + org.apache.maven.plugins + maven-gpg-plugin + + + sign-artifacts + verify + + sign + + + + + + + + + + diff --git a/usda/data/access2csv/src/access2csv/Driver.java b/usda/data/access2csv/src/access2csv/Driver.java new file mode 100644 index 0000000..65f32d6 --- /dev/null +++ b/usda/data/access2csv/src/access2csv/Driver.java @@ -0,0 +1,189 @@ +package access2csv; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintWriter; +import java.io.Writer; +import java.util.Arrays; +import java.util.ArrayList; +import java.util.List; + +import com.opencsv.CSVWriter; + +import com.healthmarketscience.jackcess.*; + +public class Driver { + + static int export(Database db, String tableName, Writer csv, boolean withHeader) throws IOException{ + Table table = db.getTable(tableName); + String[] buffer = new String[table.getColumnCount()]; + CSVWriter writer = new CSVWriter(new BufferedWriter(csv)); + int rows = 0; + try{ + if (withHeader) { + int x = 0; + for(Column col : table.getColumns()){ + buffer[x++] = col.getName(); + } + writer.writeNext(buffer); + } + + for(Row row : table){ + int i = 0; + for (Object object : row.values()) { + buffer[i++] = object == null ? null : object.toString(); + } + writer.writeNext(buffer); + rows++; + } + }finally{ + writer.close(); + } + return rows; + } + + static Database openDatabase( String filename, String db_passwd ) throws IOException{ + if( db_passwd.equals("")) { + Database db = DatabaseBuilder.open(new File(filename)); + return( db ) ; + } else { + Database db = new DatabaseBuilder(new File(filename)) + .setCodecProvider(new CryptCodecProvider(db_passwd)) + .open(); + return( db ) ; + } + } + + static void export(String filename, String tableName, boolean withHeader, String db_passwd) throws IOException{ + Database db = openDatabase( filename, db_passwd ) ; + + try{ + export(db, tableName, new PrintWriter(System.out), withHeader); + }finally{ + db.close(); + } + } + + static void schema(String filename, String db_passwd) throws IOException{ + Database db = openDatabase( filename, db_passwd ) ; + + try{ + for(String tableName : db.getTableNames()){ + Table table = db.getTable(tableName); + System.out.println(String.format("CREATE TABLE %s (", tableName)); + for(Column col : table.getColumns()){ + System.out.println(String.format(" %s %s,", + col.getName(), col.getType())); + } + System.out.println(")"); + } + }finally{ + db.close(); + } + + } + + static void exportAll(String filename, boolean withHeader, String db_passwd) throws IOException{ + Database db = openDatabase( filename, db_passwd ) ; + + try{ + for(String tableName : db.getTableNames()){ + String csvName = tableName + ".csv"; + Writer csv = new FileWriter(csvName); + try{ + System.out.println(String.format("Exporting '%s' to %s/%s", + tableName, System.getProperty("user.dir"), csvName)); + int rows = export(db, tableName, csv, withHeader); + System.out.println(String.format("%d rows exported", rows)); + }finally{ + try{ + csv.flush(); + csv.close(); + }catch(IOException ex){} + } + } + }finally{ + db.close(); + } + + } + + static void printUsage(){ + System.out.println("Usage:"); + System.out.println(" java -jar access2csv.jar [ACCESS FILE] [OPTIONS]"); + System.out.println(""); + System.out.println("Options:"); + System.out.println(""); + System.out.println(" * if no options are provided, all tables will be exported to CSV files,"); + System.out.println(" one file per table. Output file paths will be printed to stdout"); + System.out.println(" * '--password password' - tries to open an encrypted database with the specified password"); + System.out.println(" * '--schema' - prints the database schema"); + System.out.println(" * '--with-header' - export the header with the field names"); + System.out.println(" * [TABLENAME] - prints the given table as CSV to stdout"); + } + + /** + * @param args + * @throws IOException + */ + public static void main(String[] cmdLineArgs) throws IOException { + List helpCommands = Arrays.asList(new String[]{"-h", "--help", "-H", "/?"}); + List passwdCommands = Arrays.asList(new String[]{"-p", "--password", "--passwd"}); + + // Make a copy of the command line args and then + // process them to remove and record any passwords + // and note any --with-header options + String password = "" ; + boolean includeHeaders = false ; // the default + List argList = new ArrayList<>() ; + for( int j=0 ; j < cmdLineArgs.length ; j++ ) { + if( passwdCommands.contains(cmdLineArgs[j]) ) { + if( ++j < cmdLineArgs.length ) { + password = cmdLineArgs[j] ; + } else { + printUsage(); + System.exit(0); + } + } + else if( cmdLineArgs[j].equals("--with-header") ) { + includeHeaders = true ; + } + else { + // Save the arg to handle later + argList.add( cmdLineArgs[j] ) ; + } + } + + // Copy argList into an array so that the block below will work + String[] args = argList.toArray(new String[argList.size()]) ; + + // Handle remaining args + if( args.length == 1 ) { + if( helpCommands.contains(args[0]) ){ + printUsage(); + } + else { + exportAll(args[0], includeHeaders, password); + } + + System.exit(0); + } + else if( args.length == 2 ) { + if( args[1].equals("--schema") ){ + schema(args[0], password) ; + } + else { + export(args[0], args[1], includeHeaders, password); + } + + System.exit(0) ; + } + else { + System.err.println("Invalid arguments."); + printUsage(); + System.exit(1); + } + } +} diff --git a/usda/data/access2csv/src/main/java/access2csv/Driver.java b/usda/data/access2csv/src/main/java/access2csv/Driver.java new file mode 100644 index 0000000..8b5dbd2 --- /dev/null +++ b/usda/data/access2csv/src/main/java/access2csv/Driver.java @@ -0,0 +1,174 @@ +package access2csv; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintWriter; +import java.io.Writer; +import java.util.Arrays; +import java.util.List; + +import com.opencsv.CSVWriter; + +import joptsimple.OptionException; +import joptsimple.OptionParser; +import joptsimple.OptionSet; +import joptsimple.OptionSpec; + +import com.healthmarketscience.jackcess.*; + +public class Driver { + + + static int export(Database db, String tableName, Writer csv, boolean withHeader, boolean applyQuotesToAll, String nullText) throws IOException{ + Table table = db.getTable(tableName); + String[] buffer = new String[table.getColumnCount()]; + CSVWriter writer = new CSVWriter(new BufferedWriter(csv), CSVWriter.DEFAULT_SEPARATOR, CSVWriter.DEFAULT_QUOTE_CHARACTER); + int rows = 0; + try{ + if (withHeader) { + int x = 0; + for(Column col : table.getColumns()){ + buffer[x++] = col.getName(); + } + writer.writeNext(buffer, applyQuotesToAll); + } + + for(Row row : table){ + int i = 0; + for (Object object : row.values()) { + buffer[i++] = object == null ? nullText : object.toString(); + } + writer.writeNext(buffer, applyQuotesToAll); + rows++; + } + }finally{ + writer.close(); + } + return rows; + } + + static void export(File inputFile, String tableName, boolean withHeader, File outputDir, String csvPrefix, boolean applyQuotesToAll, String nullText) throws IOException{ + Database db = DatabaseBuilder.open(inputFile); + try{ + export(db, tableName, new FileWriter(new File(outputDir, csvPrefix + tableName + ".csv")), withHeader, applyQuotesToAll, nullText); + }finally{ + db.close(); + } + } + + static void schema(File inputFile) throws IOException{ + + Database db = DatabaseBuilder.open(inputFile); + try{ + for(String tableName : db.getTableNames()){ + Table table = db.getTable(tableName); + System.out.println(String.format("CREATE TABLE %s (", tableName)); + for(Column col : table.getColumns()){ + System.out.println(String.format(" %s %s,", + col.getName(), col.getType())); + } + System.out.println(")"); + } + }finally{ + db.close(); + } + + } + + static void exportAll(File inputFile, boolean withHeader, File outputDir, String csvPrefix, boolean applyQuotesToAll, String nullText) throws IOException{ + Database db = DatabaseBuilder.open(inputFile); + try{ + for(String tableName : db.getTableNames()){ + String csvName = csvPrefix + tableName + ".csv"; + File outputFile = new File(outputDir, csvName); + Writer csv = new FileWriter(outputFile); + try{ + System.out.println(String.format("Exporting '%s' to %s", + tableName, outputFile.toString())); + int rows = export(db, tableName, csv, withHeader, applyQuotesToAll, nullText); + System.out.println(String.format("%d rows exported", rows)); + }finally{ + try{ + csv.flush(); + csv.close(); + }catch(IOException ex){} + } + } + }finally{ + db.close(); + } + + } + + public static void main(String[] args) throws Exception { + final OptionParser parser = new OptionParser(); + + final OptionSpec help = parser.acceptsAll(Arrays.asList("help")).forHelp(); + final OptionSpec schema = parser.accepts("schema").withOptionalArg() + .describedAs("The schema is written to standard output."); + final OptionSpec withHeader = parser.accepts("with-header").withOptionalArg() + .describedAs("When with-header is included, a header line of column names is written to each data file."); + final OptionSpec input = parser.accepts("input").withRequiredArg().ofType(File.class).required() + .describedAs("The input accdb file."); + final OptionSpec table = parser.accepts("table").withRequiredArg().ofType(String.class).describedAs("The table name to export, or all if it is not specified."); + final OptionSpec output = parser.accepts("output").requiredUnless("schema").withRequiredArg().ofType(File.class) + .describedAs("The output directory for data files. This is required for writing data output. This not required for schema output."); + final OptionSpec csvPrefix = parser.accepts("csv-prefix").withRequiredArg().ofType(String.class).defaultsTo("").describedAs("A prefix to add to all of the generated CSV file names"); + final OptionSpec quoteAll = parser.accepts("quote-all").withOptionalArg().ofType(Boolean.class).defaultsTo(true) + .describedAs("Set quote-all to true if all values are to be quoted. " + + "Set to false if quotes are only to be applied to values which contain " + + "the separator, secape, quote, or new line characters. The default is true."); + final OptionSpec writeNull = parser.accepts("write-null").withOptionalArg().ofType(String.class).defaultsTo("") + .describedAs("The text to write when entry is NULL. Defaults to empty output if not specified or if no argument supplied. " + + "If quote-all is set to true then the value for write-null is also quoted."); + + + OptionSet options = null; + + try { + options = parser.parse(args); + } catch (final OptionException e) { + System.out.println(e.getMessage()); + parser.printHelpOn(System.out); + throw e; + } + + if (options.has(help)) { + parser.printHelpOn(System.out); + return; + } + + File inputFile = input.value(options); + if(!inputFile.exists()) { + throw new FileNotFoundException("Could not find input file: " + inputFile.toString()); + } + + File outputDir = null; + if (options.has(output)) { + outputDir = output.value(options); + if(!outputDir.exists()) { + outputDir.mkdirs(); + } + } + + boolean applyQuotesToAll = quoteAll.value(options); + String nullText = writeNull.value(options); + + if (options.has(schema)) { + schema(inputFile); + } + + if (null != outputDir) { + if (options.has(table)){ + export(inputFile, table.value(options), options.has(withHeader), outputDir, csvPrefix.value(options), applyQuotesToAll, nullText); + } + else { + exportAll(inputFile, options.has(withHeader), outputDir, csvPrefix.value(options), applyQuotesToAll, nullText); + } + } + } + +} diff --git a/usda/data/access2csv/src/test/java/access2csv/DriverTest.java b/usda/data/access2csv/src/test/java/access2csv/DriverTest.java new file mode 100644 index 0000000..a6c7d3f --- /dev/null +++ b/usda/data/access2csv/src/test/java/access2csv/DriverTest.java @@ -0,0 +1,103 @@ +/** + * + */ +package access2csv; + +import static org.junit.Assert.*; + +import java.io.File; +import java.io.FileNotFoundException; + +import org.junit.After; +import org.junit.Before; +import org.junit.Ignore; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.junit.rules.TemporaryFolder; + +/** + * Tests for {@link Driver}. + * + * @author Peter Ansell p_ansell@yahoo.com + */ +public class DriverTest { + + @Rule + public ExpectedException thrown = ExpectedException.none(); + + @Rule + public TemporaryFolder tempDir = new TemporaryFolder(); + + private File testDir; + + @Before + public void setUp() throws Exception { + testDir = tempDir.newFolder(); + // TODO: Copy test file into testDir. Note, it will be deleted automatically by TemporaryFolder after each test method + } + + /** + * Test method for {@link access2csv.Driver#export(com.healthmarketscience.jackcess.Database, java.lang.String, java.io.Writer, boolean)}. + */ + @Ignore("TODO: Crerate sample file") + @Test + public final void testExportDatabaseStringWriterBoolean() throws Exception { + fail("Not yet implemented"); // TODO + } + + /** + * Test method for {@link access2csv.Driver#export(java.lang.String, java.lang.String)}. + */ + @Ignore("TODO: Crerate sample file") + @Test + public final void testExportStringString() throws Exception { + fail("Not yet implemented"); // TODO + } + + /** + * Test method for {@link access2csv.Driver#schema(java.lang.String)}. + */ + @Ignore("TODO: Crerate sample file") + @Test + public final void testSchema() throws Exception { + fail("Not yet implemented"); // TODO + } + + /** + * Test method for {@link access2csv.Driver#exportAll(java.lang.String, boolean)}. + */ + @Ignore("TODO: Crerate sample file") + @Test + public final void testExportAll() throws Exception { + fail("Not yet implemented"); // TODO + } + + /** + * Test method for {@link access2csv.Driver#main(java.lang.String[])}. + */ + @Test + public final void testMainHelp() throws Exception { + Driver.main(new String[] { "--help" }); + Driver.main(new String[] { "-h" }); + } + + /** + * Test method for {@link access2csv.Driver#main(java.lang.String[])}. + */ + @Test + public final void testMainSchemaMissingDatabase() throws Exception { + thrown.expect(FileNotFoundException.class); + Driver.main(new String[] { "--input", "does-not-exist.accdb", "--schema" , "--output", new File(testDir, "dir-does-not-exist").toString()}); + } + + /** + * Test method for {@link access2csv.Driver#main(java.lang.String[])}. + */ + @Test + public final void testHeaderMissingDatabase() throws Exception { + thrown.expect(FileNotFoundException.class); + Driver.main(new String[] { "--input", "does-not-exist.accdb", "--with-header" , "--output", new File(testDir, "dir-does-not-exist").toString()}); + } + +} diff --git a/data/dep-graph.yml b/usda/data/dep-graph.yml similarity index 100% rename from data/dep-graph.yml rename to usda/data/dep-graph.yml diff --git a/data/process.py b/usda/data/process.py similarity index 100% rename from data/process.py rename to usda/data/process.py diff --git a/data/setup.sh b/usda/data/setup.sh similarity index 100% rename from data/setup.sh rename to usda/data/setup.sh diff --git a/sql/format-sql.sh b/usda/format-sql.sh similarity index 100% rename from sql/format-sql.sh rename to usda/format-sql.sh diff --git a/sql/import.sql b/usda/import.sql similarity index 54% rename from sql/import.sql rename to usda/import.sql index 8716ce4..e6e74d6 100644 --- a/sql/import.sql +++ b/usda/import.sql @@ -18,26 +18,26 @@ .import '| tail -n +2 version.csv' version -.import '| tail -n +2 ../data/nt/nutr_def.csv' nutr_def -.import '| tail -n +2 ../data/nt/fdgrp.csv' fdgrp -.import '| tail -n +2 ../data/nt/food_des.csv' food_des +.import '| tail -n +2 ./data/nt/nutr_def.csv' nutr_def +.import '| tail -n +2 ./data/nt/fdgrp.csv' fdgrp +.import '| tail -n +2 ./data/nt/food_des.csv' food_des -.import '| tail -n +2 ../data/nt/src_cd.csv' src_cd -.import '| tail -n +2 ../data/nt/deriv_cd.csv' deriv_cd -.import '| tail -n +2 ../data/nt/nut_data.csv' nut_data +.import '| tail -n +2 ./data/nt/src_cd.csv' src_cd +.import '| tail -n +2 ./data/nt/deriv_cd.csv' deriv_cd +.import '| tail -n +2 ./data/nt/nut_data.csv' nut_data -.import '| tail -n +2 ../data/nt/lang_desc.csv' lang_desc -.import '| tail -n +2 ../data/nt/langual.csv' langual +.import '| tail -n +2 ./data/nt/lang_desc.csv' lang_desc +.import '| tail -n +2 ./data/nt/langual.csv' langual -.import '| tail -n +2 ../data/nt/data_src.csv' data_src -.import '| tail -n +2 ../data/nt/datsrcln.csv' datsrcln +.import '| tail -n +2 ./data/nt/data_src.csv' data_src +.import '| tail -n +2 ./data/nt/datsrcln.csv' datsrcln -.import '| tail -n +2 ../data/nt/serv_desc.csv' serv_desc -.import '| tail -n +2 ../data/nt/serving.csv' serving +.import '| tail -n +2 ./data/nt/serv_desc.csv' serv_desc +.import '| tail -n +2 ./data/nt/serving.csv' serving -.import '| tail -n +2 ../data/nt/footnote.csv' footnote +.import '| tail -n +2 ./data/nt/footnote.csv' footnote .header on .mode column diff --git a/sql/init.sql b/usda/init.sql similarity index 100% rename from sql/init.sql rename to usda/init.sql diff --git a/sql/tables.sql b/usda/tables.sql similarity index 100% rename from sql/tables.sql rename to usda/tables.sql