Query library¶
Welcome to the SQL query library for ClickHouse.
Introduction¶
The schema schema and base table name base in queries must be updated to match the relevant environment. Note that variables are specified using Jinja syntax.
DHIS2¶
Metadata¶
Data elements with data set membership¶
SELECT
de."Name" as "DataElementName",
arrayStringConcat(groupArray(ds."Name"), ', ') AS "DataSetName"
FROM
{{ schema }}.{{ base }}_datasetelement dse
INNER JOIN {{ schema }}.{{ base }}_dataset ds on dse."DataSetID" = ds."ID"
INNER JOIN {{ schema }}.{{ base }}_dataelement de on dse."DataElementID" = de."ID"
GROUP BY
de."Name"
ORDER BY
de."Name";
Programs, program stages and data elements¶
SELECT
pr."ID" as "ProgramID",
pr."Name" AS "ProgramName",
ps."ID" as "ProgramStageID",
ps."Name" AS "ProgramStageName",
de."ID" as "DataElementID",
de."Name" AS "DataElementName",
de."ValueType" as "DataElementValueType",
os."ID" as "OptionSetID",
os."Name" as "OptionSetName"
FROM
{{ schema }}.{{ base }}_program AS pr
INNER JOIN {{ schema }}.{{ base }}_programstage AS ps ON pr."ID" = ps."ProgramID"
INNER JOIN {{ schema }}.{{ base }}_programstagedataelement AS psde ON ps."ID" = psde."ProgramStageID"
INNER JOIN {{ schema }}.{{ base }}_dataelement AS de ON psde."DataElementID" = de."ID"
LEFT JOIN {{ schema }}.{{ base }}_optionset AS os on de."OptionSetID" = os."ID";
Data value count by data element¶
SELECT
de."Name" AS "DataElementName",
ds."Name" AS "DataSetName",
count(dv.*) AS "DataValueCount"
FROM
{{ schema }}.{{ base }}_datavalue AS dv
INNER JOIN {{ schema }}.{{ base }}_dataelement de on dv."DataElementID" = de."ID"
INNER JOIN {{ schema }}.{{ base }}_datasetelement dse on de."ID" = dse."DataElementID"
INNER JOIN {{ schema }}.{{ base }}_dataset ds on dse."DataSetID" = ds."ID"
GROUP BY
"DataElementName",
"DataSetName"
ORDER BY
"DataValueCount" DESC;
Data value count by data set¶
SELECT
ds."Name" as "DataSetName",
count(dv.*) AS "DataValueCount"
FROM
{{ schema }}.{{ base }}_dataset AS ds
INNER JOIN {{ schema }}.{{ base }}_datasetelement AS dse on ds."ID" = dse."DataSetID"
INNER JOIN {{ schema }}.{{ base }}_datavalue AS dv ON dse."DataElementID" = dv."DataElementID"
GROUP BY
"DataSetName"
ORDER BY
"DataValueCount" DESC;
Metadata integrity¶
Category combos with identical categories¶
WITH
fingerprint AS (
SELECT
"CategoryComboID",
arraySort (groupArray ("CategoryID")) AS "CategoryFingerprint"
FROM
{{ schema }}.{{ base }}_categorycombos_categories
group by
"CategoryComboID"
)
SELECT
"CategoryFingerprint",
count() AS "Occurrence",
groupArray ("CategoryComboID") AS "DuplicateCategoryCombos"
FROM
fingerprint
GROUP BY
"CategoryFingerprint"
HAVING
"Occurrence" > 1;
Category combos without categories¶
SELECT
"ID",
"Name"
FROM
{{ schema }}.{{ base }}_categorycombo
WHERE
"ID" NOT IN (
SELECT
"CategoryComboID"
FROM
{{ schema }}.{{ base }}_categorycombos_categories
);
Category combos without category option combos¶
SELECT
"ID",
"Name"
FROM
{{ schema }}.{{ base }}_categorycombo
WHERE
"ID" NOT IN (
SELECT
"CategoryComboID"
FROM
{{ schema }}.{{ base }}_categorycombos_optioncombo
);
Categories with identical category options¶
WITH
fingerprint AS (
SELECT
"CategoryID",
arraySort(groupArray("CategoryOptionID")) AS "CategoryOptionFingerprint"
FROM
{{ schema }}.{{ base }}_categories_categoryoptions
GROUP BY
"CategoryID"
)
SELECT
"CategoryOptionFingerprint",
count() AS "Occurrence",
groupArray("CategoryID") AS "DuplicateCategories"
FROM
fingerprint
GROUP BY
"CategoryOptionFingerprint"
HAVING
"Occurrence" > 1;
Categories with a single category option¶
SELECT
"ID",
"Name"
FROM
{{ schema }}.{{ base }}_data_category
WHERE
"Name" != 'default'
AND "ID" IN (
SELECT
"CategoryID"
FROM
{{ schema }}.{{ base }}_categories_categoryoptions
GROUP BY
"CategoryID"
HAVING
count(*) = 1
);
Categories without category options¶
SELECT
"ID",
"Name"
FROM
{{ schema }}.{{ base }}_category
WHERE
"ID" NOT IN(
SELECT
"CategoryID"
FROM
{{ schema }}.{{ base }}_categories_categoryoptions
);
Categories without category combos¶
SELECT
"ID",
"Name"
FROM
{{ schema }}.{{ base }}_category
WHERE
"ID" NOT IN(
SELECT
"CategoryID"
FROM
{{ schema }}.{{ base }}_categorycombos_categories
);
Category options without categories¶
SELECT
"ID",
"Name"
FROM
{{ schema }}.{{ base }}_categoryoption
WHERE
"ID" NOT IN (
SELECT
"CategoryOptionID"
FROM
{{ schema }}.{{ base }}_categories_categoryoptions
);
Category options without category option combos¶
SELECT
"ID",
"Name"
FROM
{{ schema }}.{{ base }}_categoryoption
WHERE
"ID" NOT IN (
SELECT
"CategoryOptionID"
FROM
{{ schema }}.{{ base }}_categoryoptioncombos_categoryoptions
);
Category option combos with identical category options¶
WITH
fingerprint AS (
SELECT
"CategoryOptionComboID",
arraySort(groupArray("CategoryOptionID")) AS "OptionFingerprint"
FROM
{{ schema }}.{{ base }}_categoryoptioncombos_categoryoptions
GROUP BY
"CategoryOptionComboID"
)
SELECT
"OptionFingerprint",
count() AS "Occurrence",
groupArray("CategoryOptionComboID") AS "DuplicateCategoryOptionCombos"
FROM
fingerprint
GROUP BY
"OptionFingerprint"
HAVING
"Occurrence" > 1;
Category option combos without category options¶
SELECT
"ID",
"Name"
FROM
{{ schema }}.{{ base }}_categoryoptioncombo
WHERE
"ID" NOT IN (
SELECT
"CategoryOptionComboID"
FROM
{{ schema }}.{{ base }}_categoryoptioncombos_categoryoptions
);
Category option combos without category combos¶
SELECT
"ID",
"Name"
FROM
{{ schema }}.{{ base }}_categoryoptioncombo
WHERE
"ID" NOT IN (
SELECT
"CategoryOptionComboID"
FROM
{{ schema }}.{{ base }}_categorycombos_optioncombos
);
Org unit count by level¶
SELECT
"Name" as "LevelName",
"Level" as "Level",
count(*) As "OrgUnitCount"
FROM
{{ schema }}.{{ base }}_orgunitlevel AS oul
INNER JOIN {{ schema }}.{{ base }}_orgunit as ou on oul."Level" = ou."HierarchyLevel"
GROUP BY
"LevelName",
"Level"
ORDER BY
"Level" ASC;
Analytics¶
Aggregation of data items by quarterly periods and region org unit level¶
SELECT
"PT Quarterly",
"OU Region",
"DataItem",
sum("Value") AS "Value"
FROM
{{ schema }}.{{ base }} ax
WHERE
"DataItem" LIKE '%TX_CURR%'
AND "PT Yearly" IN ('2024Q1', '2024Q2', '2024Q3', '2024Q4')
GROUP BY
"PT Quarterly",
"OU Region",
"DataItem";
Aggregation of data items for last 5 years order by aggregate value¶
SELECT
"PT Quarterly",
"OU Region",
"DataItem",
sum("Value") AS "Value"
FROM
{{ schema }}.{{ base }}
WHERE
"DataItem" LIKE '%TX_CURR%'
AND "PT Yearly" IN (
SELECT
"ID"
FROM
{{ schema }}."{{ base }}_relative_yearly_period"
WHERE
"Index" <= 5
)
GROUP BY
"PT Quarterly",
"OU Region",
"DataItem"
ORDER BY
"Value" DESC
Aggregation of events into 5 year age groups for age in years integer column¶
SELECT
"PT Monthly",
"OU District",
intDiv("Age in Years" / 5) * 5 AS "Age Group 5y",
count("EventID") AS "Events"
FROM
{{ schema }}.{{ table }}
GROUP BY
"PT Monthly",
"OU District",
"Age Group 5y"
ORDER BY"
"PT Monthly",
"OU District";
Convert date of birth date column to age in years integer column¶
SELECT
dx."Date of Birth",
age('year', "Date of Birth", today()) AS "Age in Years"
FROM
{{ schema }}.{{ table }};
Aggregation of data with relative periods for last 12 months¶
SELECT
dx."PT Monthly",
dx."OU District",
dx."DataItem",
sum(dx."Value") AS "Value"
FROM
{{ schema }}.{{ base }} AS dx
INNER JOIN
-- Join with monthly relative period table
{{ schema }}.{{ base }}_relative_monthly_period AS pe ON dx."PT Monthly" = pe."ID"
WHERE
-- Filter for last 12 months
p."Index" <= 12
GROUP BY
dx."PT Monthly",
dx."OU District",
dx."DataItem",
ORDER BY
dx."PT Monthly",
dx."OU District",
dx."DataItem";
Indicator expression calculation¶
SELECT
-- Dimensions
"OU Federal",
"PT Yearly",
-- Indicator "Indicator A"
-- Numerator: "#{aBqcjyRWnjv.BDXPvhUlGvb}"
sum(CASE
WHEN "DataItemID" = 'aBqcjyRWnjv'
AND "CatOptComboID" = 'BDXPvhUlGvb' THEN "Value"
ELSE 0
END) AS "Num Indicator A",
-- Denominator: "#{aBqcjyRWnjv.FpgtJFWJWK2}"
sum(CASE
WHEN "DataItemID" = 'aBqcjyRWnjv'
AND "CatOptComboID" = 'FpgtJFWJWK2' THEN "Value"
ELSE 0
END) AS "Denom Indicator A",
-- Expression calculation with division by zero protection
round(IF(
"Denom Indicator A" != 0, (
"Num Indicator A" / "Denom Indicator A") * 100, 0), 2) AS "Indicator A",
-- Indicator "Indicator B"
-- Numerator: "#{IEpn79MpSo3.BDXPvhUlGvb}"
sum(CASE
WHEN "DataItemID" = 'IEpn79MpSo3'
AND "CatOptComboID" = 'BDXPvhUlGvb' THEN "Value"
ELSE 0
END) AS "Num Indicator B",
-- Denominator: "#{IEpn79MpSo3.FpgtJFWJWK2}"
sum(CASE
WHEN "DataItemID" = 'IEpn79MpSo3' THEN "Value"
AND "CatOptComboID" = 'FpgtJFWJWK2' THEN "Value"
ELSE 0
END) AS "Denom Indicator B",
-- Expression calculation with division by zero protection
round(IF(
"Denom Indicator B" != 0, (
"Num Indicator B" / "Denom Indicator B") * 100, 0), 2) AS "Indicator B",
FROM
{{ schema }}.{{ base }}
-- Data element filter
WHERE
"DataItemID" IN ('aBqcjyRWnjv', 'IEpn79MpSo3')
-- Group by dimensions
GROUP BY
"OU Federal",
"PT Yearly";