Skip to content

Query library

Welcome to the SQL query library for ClickHouse.

Introduction

The schema schema and base table name base in queries must be updated to match the relevant environment. Note that variables are specified using Jinja syntax.

DHIS2

Metadata

Data elements with data set membership

SELECT
  de."Name" as "DataElementName",
  arrayStringConcat(groupArray(ds."Name"), ', ') AS "DataSetName"
FROM
  {{ schema }}.datasetelement dse
  INNER JOIN {{ schema }}.dataset ds on dse."DataSetID" = ds."ID"
  INNER JOIN {{ schema }}.dataelement de on dse."DataElementID" = de."ID"
GROUP BY
  de."Name"
ORDER BY
  de."Name";

Data value count by data element

SELECT
  de."Name" AS "DataElementName",
  ds."Name" AS "DataSetName",
  count(dv."Value") AS "DataValueCount"
FROM
  {{ schema }}.datavalue AS dv
  INNER JOIN {{ schema }}.dataelement de on dv."DataElementID" = de."ID"
  INNER JOIN {{ schema }}.datasetelement dse on de."ID" = dse."DataElementID"
  INNER JOIN {{ schema }}.dataset ds on dse."DataSetID" = ds."ID"
GROUP BY
  "DataElementName",
  "DataSetName"
ORDER BY
  "DataValueCount" DESC;

Data value count by data set

SELECT
  ds."Name" as "DataSetName",
  count(dv."Value") AS "DataValueCount"
FROM
  {{ schema }}.dataset AS ds
  INNER JOIN {{ schema }}.datasetelement AS dse on ds."ID" = dse."DataSetID"
  INNER JOIN {{ schema }}.datavalue AS dv ON dse."DataElementID" = dv."DataElementID"
GROUP BY
  "DataSetName"
ORDER BY
  "DataValueCount" DESC;

Programs, program stages and data elements

SELECT
  pr."ID" as "ProgramID",
  pr."Name" AS "ProgramName",
  ps."ID" as "ProgramStageID",
  ps."Name" AS "ProgramStageName",
  de."ID" as "DataElementID",
  de."Name" AS "DataElementName",
  de."ValueType" as "DataElementValueType",
  os."ID" as "OptionSetID",
  os."Name" as "OptionSetName"
FROM
  {{ schema }}.program AS pr
INNER JOIN {{ schema }}.programstage AS ps ON pr."ID" = ps."ProgramID"
INNER JOIN {{ schema }}.programstagedataelement AS psde ON ps."ID" = psde."ProgramStageID"
INNER JOIN {{ schema }}.dataelement AS de ON psde."DataElementID" = de."ID"
LEFT JOIN {{ schema }}.optionset AS os on de."OptionSetID" = os."ID";

Metadata integrity

Category combos with identical categories

WITH
  fingerprint AS (
    SELECT
      "CategoryComboID",
      arraySort (groupArray ("CategoryID")) AS "CategoryFingerprint"
    FROM
      {{ schema }}.categorycombos_categories
    group by
      "CategoryComboID"
  )
SELECT
  "CategoryFingerprint",
  count() AS "Occurrence",
  groupArray ("CategoryComboID") AS "DuplicateCategoryCombos"
FROM
  fingerprint
GROUP BY
  "CategoryFingerprint"
HAVING
  "Occurrence" > 1;

Category combos without categories

SELECT
  "ID",
  "Name"
FROM
  {{ schema }}.categorycombo
WHERE
  "ID" NOT IN (
    SELECT
      "CategoryComboID"
    FROM
      {{ schema }}.categorycombos_categories
  );

Category combos without category option combos

SELECT
  "ID",
  "Name"
FROM
  {{ schema }}.categorycombo
WHERE
  "ID" NOT IN (
    SELECT
      "CategoryComboID"
    FROM
      {{ schema }}.categorycombos_optioncombo
  );

Categories with identical category options

WITH
  fingerprint AS (
    SELECT
      "CategoryID",
      arraySort(groupArray("CategoryOptionID")) AS "CategoryOptionFingerprint"
    FROM
      {{ schema }}.categories_categoryoptions
    GROUP BY
      "CategoryID"
  )
SELECT
  "CategoryOptionFingerprint",
  count() AS "Occurrence",
  groupArray("CategoryID") AS "DuplicateCategories"
FROM
  fingerprint
GROUP BY
  "CategoryOptionFingerprint"
HAVING
  "Occurrence" > 1;

Categories with a single category option

SELECT
  "ID",
  "Name"
FROM
  {{ schema }}.data_category
WHERE
  "Name" != 'default'
  AND "ID" IN (
    SELECT
      "CategoryID"
    FROM
      {{ schema }}.categories_categoryoptions
    GROUP BY
      "CategoryID"
    HAVING
      count(*) = 1
  );

Categories without category options

SELECT
  "ID",
  "Name"
FROM
  {{ schema }}.category
WHERE
  "ID" NOT IN(
    SELECT
      "CategoryID"
    FROM
      {{ schema }}.categories_categoryoptions
  );

Categories without category combos

SELECT
  "ID",
  "Name"
FROM
  {{ schema }}.category
WHERE
  "ID" NOT IN(
    SELECT
      "CategoryID"
    FROM
      {{ schema }}.categorycombos_categories
  );

Category options without categories

SELECT
  "ID",
  "Name"
FROM
  {{ schema }}.categoryoption
WHERE
  "ID" NOT IN (
    SELECT
      "CategoryOptionID"
    FROM
      {{ schema }}.categories_categoryoptions
  );

Category options without category option combos

SELECT
  "ID",
  "Name"
FROM 
  {{ schema }}.categoryoption
WHERE
  "ID" NOT IN (
    SELECT
      "CategoryOptionID"
    FROM 
      {{ schema }}.categoryoptioncombos_categoryoptions
  );

Category option combos with identical category options

WITH
  fingerprint AS (
    SELECT
      "CategoryOptionComboID",
      arraySort(groupArray("CategoryOptionID")) AS "OptionFingerprint"
    FROM
      {{ schema }}.categoryoptioncombos_categoryoptions
    GROUP BY
      "CategoryOptionComboID"
  )
SELECT
  "OptionFingerprint",
  count() AS "Occurrence",
  groupArray("CategoryOptionComboID") AS "DuplicateCategoryOptionCombos"
FROM
  fingerprint
GROUP BY
  "OptionFingerprint"
HAVING
  "Occurrence" > 1;

Category option combos without category options

SELECT
  "ID",
  "Name"
FROM
  {{ schema }}.categoryoptioncombo
WHERE
  "ID" NOT IN (
    SELECT
      "CategoryOptionComboID"
    FROM
      {{ schema }}.categoryoptioncombos_categoryoptions
  );

Category option combos without category combos

SELECT
  "ID",
  "Name"
FROM
  {{ schema }}.categoryoptioncombo
WHERE
  "ID" NOT IN (
    SELECT
      "CategoryOptionComboID"
    FROM
      {{ schema }}.categorycombos_optioncombos
  );

Org unit count by level

SELECT
  "Name" as "LevelName",
  "Level" as "Level",
  count(*) As "OrgUnitCount"
FROM
  {{ schema }}.orgunitlevel AS oul
  INNER JOIN {{ schema }}.orgunit as ou on oul."Level" = ou."HierarchyLevel"
GROUP BY
  "LevelName",
  "Level"
ORDER BY
  "Level" ASC;

Analytics

Aggregation of data items by quarterly periods and region org unit level

SELECT
  "PT Quarterly",
  "OU Region",
  "DataItem",
  sum("Value") AS "Value"
FROM
  {{ schema }}.analytics
WHERE
  "DataItem" LIKE '%TX_CURR%'
  AND "PT Yearly" IN ('2024Q1', '2024Q2', '2024Q3', '2024Q4')
GROUP BY
  "PT Quarterly",
  "OU Region",
  "DataItem";

Aggregation of data items for last 5 years order by aggregate value

SELECT
  "PT Quarterly",
  "OU Region",
  "DataItem",
  sum("Value") AS "Value"
FROM
  {{ schema }}.analytics
WHERE
  "DataItem" LIKE '%TX_CURR%'
  AND "PT Yearly" IN (
    SELECT
      "ID"
    FROM
      {{ schema }}."relative_yearly_period"
    WHERE
      "Index" <= 5
  )
GROUP BY
  "PT Quarterly",
  "OU Region",
  "DataItem"
ORDER BY
  "Value" DESC

Aggregation of events into 5 year age groups for age in years integer column

SELECT
  "PT Monthly",
  "OU District",
  intDiv("Age in Years" / 5) * 5 AS "Age Group 5y",
  count("EventID") AS "Events"
FROM 
  {{ schema }}.{{ table }}
GROUP BY
  "PT Monthly",
  "OU District",
  "Age Group 5y"
ORDER BY"
  "PT Monthly",
  "OU District";

Convert date of birth date column to age in years integer column

SELECT
  dx."Date of Birth",
  age('year', "Date of Birth", today()) AS "Age in Years"
FROM
  {{ schema }}.{{ table }};

Aggregation of data with relative periods for last 12 months

SELECT
  dx."PT Monthly",
  dx."OU District",
  dx."DataItem",
  sum(dx."Value") AS "Value"
FROM 
  {{ schema }}.analytics AS dx
INNER JOIN 
  -- Join with monthly relative period table
  {{ schema }}.relative_monthly_period AS pe ON dx."PT Monthly" = pe."ID"
WHERE
  -- Filter for last 12 months
  p."Index" <= 12
GROUP BY
  dx."PT Monthly",
  dx."OU District",
  dx."DataItem",
ORDER BY
  dx."PT Monthly",
  dx."OU District",
  dx."DataItem";

Indicator expression calculation

SELECT
  -- Dimensions
  "OU Federal",
  "PT Yearly",

  -- Indicator "Indicator A"
  -- Numerator: "#{aBqcjyRWnjv.BDXPvhUlGvb}"
  sum(CASE
    WHEN "DataItemID" = 'aBqcjyRWnjv'
    AND "CatOptComboID" = 'BDXPvhUlGvb' THEN "Value"
    ELSE 0
  END) AS "Num Indicator A",
  -- Denominator: "#{aBqcjyRWnjv.FpgtJFWJWK2}"
  sum(CASE
    WHEN "DataItemID" = 'aBqcjyRWnjv'
    AND "CatOptComboID" = 'FpgtJFWJWK2' THEN "Value"
    ELSE 0
  END) AS "Denom Indicator A",
  -- Expression calculation with division by zero protection
  round(IF(
    "Denom Indicator A" != 0, (
    "Num Indicator A" / "Denom Indicator A") * 100, 0), 2) AS "Indicator A",

  -- Indicator "Indicator B"
  -- Numerator: "#{IEpn79MpSo3.BDXPvhUlGvb}"
  sum(CASE
    WHEN "DataItemID" = 'IEpn79MpSo3'
    AND "CatOptComboID" = 'BDXPvhUlGvb' THEN "Value"
    ELSE 0
  END) AS "Num Indicator B",
  -- Denominator: "#{IEpn79MpSo3.FpgtJFWJWK2}"
  sum(CASE
    WHEN "DataItemID" = 'IEpn79MpSo3' THEN "Value"
    AND "CatOptComboID" = 'FpgtJFWJWK2' THEN "Value"
    ELSE 0
  END) AS "Denom Indicator B",
  -- Expression calculation with division by zero protection
  round(IF(
    "Denom Indicator B" != 0, (
    "Num Indicator B" / "Denom Indicator B") * 100, 0), 2) AS "Indicator B",
FROM
  {{ schema }}.analytics
-- Data element filter
WHERE
  "DataItemID" IN ('aBqcjyRWnjv', 'IEpn79MpSo3')
-- Group by dimensions
GROUP BY
  "OU Federal",
  "PT Yearly";