(: file: count_distinct_unitids_all_years_v2.xq date: 22-June-2010 author: Gary Lewis purpose: Combines unitids from all 25 directory files and then determines the overall distinct count of unitids. use: Program can be run from any directory. notes: 1. This program replaces: gen_xq_distinct_unitids_all_years.xq, count_distinct_unitids_all_years.xq, and the 25 files called distinct_unitids_by_year.xml (1 file per year in the time series). 2. This program obsoletes the following: gen_xq_distinct_unitids.xq, distinct_unitids_by_year.sh, distinct_unitids_by_year.xq. example: zorba -f -q count_distinct_unitids_all_years_v2.xq -z indent=yes :) { let $tmp1 := ( for $i in fn:doc("/home/gml/rwebdb/projects/ipeds/xml/metadata/ipedsData.xml")/data/file[@type = "xml"] for $j in fn:doc("/home/gml/rwebdb/projects/ipeds/xml/metadata/ipedsFiles.xml")/ipedsFiles/file[@dir_ind = "1"] let $name := $i/@name, $path := $i/@path, $xmlFile := concat($path, $name), $name_wo_ext := substring-before($name, "."), $csvName := $j/@csvName, $year := $j/@year where $csvName = $name_wo_ext (: join between the two xml metadata files :) order by $year return ) return let $tmp2 := for $k in $tmp1 let $year := $k/@year, $xmlFile := $k/@xmlFile for $unitid in fn:doc($xmlFile)/data/record/@unitid return return count(distinct-values($tmp2/@unitid)) }