Query library¶
Welcome to the SQL query library for ClickHouse.
Introduction¶
The schema schema and base table name base in queries must be updated to match the relevant environment. Note that variables are specified using Jinja syntax.
DHIS2¶
Metadata¶
Data elements with data set membership¶
SELECT
de."Name" as "DataElementName",
arrayStringConcat(groupArray(ds."Name"), ', ') AS "DataSetName"
FROM
{{ schema }}.datasetelement dse
INNER JOIN {{ schema }}.dataset ds on dse."DataSetID" = ds."ID"
INNER JOIN {{ schema }}.dataelement de on dse."DataElementID" = de."ID"
GROUP BY
de."Name"
ORDER BY
de."Name";
Data value count by data element¶
SELECT
de."Name" AS "DataElementName",
ds."Name" AS "DataSetName",
count(dv."Value") AS "DataValueCount"
FROM
{{ schema }}.datavalue AS dv
INNER JOIN {{ schema }}.dataelement de on dv."DataElementID" = de."ID"
INNER JOIN {{ schema }}.datasetelement dse on de."ID" = dse."DataElementID"
INNER JOIN {{ schema }}.dataset ds on dse."DataSetID" = ds."ID"
GROUP BY
"DataElementName",
"DataSetName"
ORDER BY
"DataValueCount" DESC;
Data value count by data set¶
SELECT
ds."Name" as "DataSetName",
count(dv."Value") AS "DataValueCount"
FROM
{{ schema }}.dataset AS ds
INNER JOIN {{ schema }}.datasetelement AS dse on ds."ID" = dse."DataSetID"
INNER JOIN {{ schema }}.datavalue AS dv ON dse."DataElementID" = dv."DataElementID"
GROUP BY
"DataSetName"
ORDER BY
"DataValueCount" DESC;
Programs, program stages and data elements¶
SELECT
pr."ID" as "ProgramID",
pr."Name" AS "ProgramName",
ps."ID" as "ProgramStageID",
ps."Name" AS "ProgramStageName",
de."ID" as "DataElementID",
de."Name" AS "DataElementName",
de."ValueType" as "DataElementValueType",
os."ID" as "OptionSetID",
os."Name" as "OptionSetName"
FROM
{{ schema }}.program AS pr
INNER JOIN {{ schema }}.programstage AS ps ON pr."ID" = ps."ProgramID"
INNER JOIN {{ schema }}.programstagedataelement AS psde ON ps."ID" = psde."ProgramStageID"
INNER JOIN {{ schema }}.dataelement AS de ON psde."DataElementID" = de."ID"
LEFT JOIN {{ schema }}.optionset AS os on de."OptionSetID" = os."ID";
Metadata integrity¶
Category combos with identical categories¶
WITH
fingerprint AS (
SELECT
"CategoryComboID",
arraySort (groupArray ("CategoryID")) AS "CategoryFingerprint"
FROM
{{ schema }}.categorycombos_categories
group by
"CategoryComboID"
)
SELECT
"CategoryFingerprint",
count() AS "Occurrence",
groupArray ("CategoryComboID") AS "DuplicateCategoryCombos"
FROM
fingerprint
GROUP BY
"CategoryFingerprint"
HAVING
"Occurrence" > 1;
Category combos without categories¶
SELECT
"ID",
"Name"
FROM
{{ schema }}.categorycombo
WHERE
"ID" NOT IN (
SELECT
"CategoryComboID"
FROM
{{ schema }}.categorycombos_categories
);
Category combos without category option combos¶
SELECT
"ID",
"Name"
FROM
{{ schema }}.categorycombo
WHERE
"ID" NOT IN (
SELECT
"CategoryComboID"
FROM
{{ schema }}.categorycombos_optioncombo
);
Categories with identical category options¶
WITH
fingerprint AS (
SELECT
"CategoryID",
arraySort(groupArray("CategoryOptionID")) AS "CategoryOptionFingerprint"
FROM
{{ schema }}.categories_categoryoptions
GROUP BY
"CategoryID"
)
SELECT
"CategoryOptionFingerprint",
count() AS "Occurrence",
groupArray("CategoryID") AS "DuplicateCategories"
FROM
fingerprint
GROUP BY
"CategoryOptionFingerprint"
HAVING
"Occurrence" > 1;
Categories with a single category option¶
SELECT
"ID",
"Name"
FROM
{{ schema }}.data_category
WHERE
"Name" != 'default'
AND "ID" IN (
SELECT
"CategoryID"
FROM
{{ schema }}.categories_categoryoptions
GROUP BY
"CategoryID"
HAVING
count(*) = 1
);
Categories without category options¶
SELECT
"ID",
"Name"
FROM
{{ schema }}.category
WHERE
"ID" NOT IN(
SELECT
"CategoryID"
FROM
{{ schema }}.categories_categoryoptions
);
Categories without category combos¶
SELECT
"ID",
"Name"
FROM
{{ schema }}.category
WHERE
"ID" NOT IN(
SELECT
"CategoryID"
FROM
{{ schema }}.categorycombos_categories
);
Category options without categories¶
SELECT
"ID",
"Name"
FROM
{{ schema }}.categoryoption
WHERE
"ID" NOT IN (
SELECT
"CategoryOptionID"
FROM
{{ schema }}.categories_categoryoptions
);
Category options without category option combos¶
SELECT
"ID",
"Name"
FROM
{{ schema }}.categoryoption
WHERE
"ID" NOT IN (
SELECT
"CategoryOptionID"
FROM
{{ schema }}.categoryoptioncombos_categoryoptions
);
Category option combos with identical category options¶
WITH
fingerprint AS (
SELECT
"CategoryOptionComboID",
arraySort(groupArray("CategoryOptionID")) AS "OptionFingerprint"
FROM
{{ schema }}.categoryoptioncombos_categoryoptions
GROUP BY
"CategoryOptionComboID"
)
SELECT
"OptionFingerprint",
count() AS "Occurrence",
groupArray("CategoryOptionComboID") AS "DuplicateCategoryOptionCombos"
FROM
fingerprint
GROUP BY
"OptionFingerprint"
HAVING
"Occurrence" > 1;
Category option combos without category options¶
SELECT
"ID",
"Name"
FROM
{{ schema }}.categoryoptioncombo
WHERE
"ID" NOT IN (
SELECT
"CategoryOptionComboID"
FROM
{{ schema }}.categoryoptioncombos_categoryoptions
);
Category option combos without category combos¶
SELECT
"ID",
"Name"
FROM
{{ schema }}.categoryoptioncombo
WHERE
"ID" NOT IN (
SELECT
"CategoryOptionComboID"
FROM
{{ schema }}.categorycombos_optioncombos
);
Org unit count by level¶
SELECT
"Name" as "LevelName",
"Level" as "Level",
count(*) As "OrgUnitCount"
FROM
{{ schema }}.orgunitlevel AS oul
INNER JOIN {{ schema }}.orgunit as ou on oul."Level" = ou."HierarchyLevel"
GROUP BY
"LevelName",
"Level"
ORDER BY
"Level" ASC;
Analytics¶
Aggregation of data items by quarterly periods and region org unit level¶
SELECT
"PT Quarterly",
"OU Region",
"DataItem",
sum("Value") AS "Value"
FROM
{{ schema }}.analytics
WHERE
"DataItem" LIKE '%TX_CURR%'
AND "PT Yearly" IN ('2024Q1', '2024Q2', '2024Q3', '2024Q4')
GROUP BY
"PT Quarterly",
"OU Region",
"DataItem";
Aggregation of data items for last 5 years order by aggregate value¶
SELECT
"PT Quarterly",
"OU Region",
"DataItem",
sum("Value") AS "Value"
FROM
{{ schema }}.analytics
WHERE
"DataItem" LIKE '%TX_CURR%'
AND "PT Yearly" IN (
SELECT
"ID"
FROM
{{ schema }}."relative_yearly_period"
WHERE
"Index" <= 5
)
GROUP BY
"PT Quarterly",
"OU Region",
"DataItem"
ORDER BY
"Value" DESC
Aggregation of events into 5 year age groups for age in years integer column¶
SELECT
"PT Monthly",
"OU District",
intDiv("Age in Years" / 5) * 5 AS "Age Group 5y",
count("EventID") AS "Events"
FROM
{{ schema }}.{{ table }}
GROUP BY
"PT Monthly",
"OU District",
"Age Group 5y"
ORDER BY"
"PT Monthly",
"OU District";
Convert date of birth date column to age in years integer column¶
SELECT
dx."Date of Birth",
age('year', "Date of Birth", today()) AS "Age in Years"
FROM
{{ schema }}.{{ table }};
Aggregation of data with relative periods for last 12 months¶
SELECT
dx."PT Monthly",
dx."OU District",
dx."DataItem",
sum(dx."Value") AS "Value"
FROM
{{ schema }}.analytics AS dx
INNER JOIN
-- Join with monthly relative period table
{{ schema }}.relative_monthly_period AS pe ON dx."PT Monthly" = pe."ID"
WHERE
-- Filter for last 12 months
p."Index" <= 12
GROUP BY
dx."PT Monthly",
dx."OU District",
dx."DataItem",
ORDER BY
dx."PT Monthly",
dx."OU District",
dx."DataItem";
Indicator expression calculation¶
SELECT
-- Dimensions
"OU Federal",
"PT Yearly",
-- Indicator "Indicator A"
-- Numerator: "#{aBqcjyRWnjv.BDXPvhUlGvb}"
sum(CASE
WHEN "DataItemID" = 'aBqcjyRWnjv'
AND "CatOptComboID" = 'BDXPvhUlGvb' THEN "Value"
ELSE 0
END) AS "Num Indicator A",
-- Denominator: "#{aBqcjyRWnjv.FpgtJFWJWK2}"
sum(CASE
WHEN "DataItemID" = 'aBqcjyRWnjv'
AND "CatOptComboID" = 'FpgtJFWJWK2' THEN "Value"
ELSE 0
END) AS "Denom Indicator A",
-- Expression calculation with division by zero protection
round(IF(
"Denom Indicator A" != 0, (
"Num Indicator A" / "Denom Indicator A") * 100, 0), 2) AS "Indicator A",
-- Indicator "Indicator B"
-- Numerator: "#{IEpn79MpSo3.BDXPvhUlGvb}"
sum(CASE
WHEN "DataItemID" = 'IEpn79MpSo3'
AND "CatOptComboID" = 'BDXPvhUlGvb' THEN "Value"
ELSE 0
END) AS "Num Indicator B",
-- Denominator: "#{IEpn79MpSo3.FpgtJFWJWK2}"
sum(CASE
WHEN "DataItemID" = 'IEpn79MpSo3' THEN "Value"
AND "CatOptComboID" = 'FpgtJFWJWK2' THEN "Value"
ELSE 0
END) AS "Denom Indicator B",
-- Expression calculation with division by zero protection
round(IF(
"Denom Indicator B" != 0, (
"Num Indicator B" / "Denom Indicator B") * 100, 0), 2) AS "Indicator B",
FROM
{{ schema }}.analytics
-- Data element filter
WHERE
"DataItemID" IN ('aBqcjyRWnjv', 'IEpn79MpSo3')
-- Group by dimensions
GROUP BY
"OU Federal",
"PT Yearly";