Skip to content

Query library

Welcome to the SQL query library for ClickHouse.

Introduction

The schema schema and base table name base in queries must be updated to match the relevant environment. Note that variables are specified using Jinja syntax.

DHIS2

Metadata

Data elements with data set membership

SELECT
  de."Name" as "DataElementName",
  arrayStringConcat(groupArray(ds."Name"), ', ') AS "DataSetName"
FROM
  {{ schema }}.{{ base }}_datasetelement dse
  INNER JOIN {{ schema }}.{{ base }}_dataset ds on dse."DataSetID" = ds."ID"
  INNER JOIN {{ schema }}.{{ base }}_dataelement de on dse."DataElementID" = de."ID"
GROUP BY
  de."Name"
ORDER BY
  de."Name";

Programs, program stages and data elements

SELECT
  pr."ID" as "ProgramID",
  pr."Name" AS "ProgramName",
  ps."ID" as "ProgramStageID",
  ps."Name" AS "ProgramStageName",
  de."ID" as "DataElementID",
  de."Name" AS "DataElementName",
  de."ValueType" as "DataElementValueType",
  os."ID" as "OptionSetID",
  os."Name" as "OptionSetName"
FROM
  {{ schema }}.{{ base }}_program AS pr
INNER JOIN {{ schema }}.{{ base }}_programstage AS ps ON pr."ID" = ps."ProgramID"
INNER JOIN {{ schema }}.{{ base }}_programstagedataelement AS psde ON ps."ID" = psde."ProgramStageID"
INNER JOIN {{ schema }}.{{ base }}_dataelement AS de ON psde."DataElementID" = de."ID"
LEFT JOIN {{ schema }}.{{ base }}_optionset AS os on de."OptionSetID" = os."ID";

Data value count by data element

SELECT
  de."Name" AS "DataElementName",
  ds."Name" AS "DataSetName",
  count(dv.*) AS "DataValueCount"
FROM
  {{ schema }}.{{ base }}_datavalue AS dv
  INNER JOIN {{ schema }}.{{ base }}_dataelement de on dv."DataElementID" = de."ID"
  INNER JOIN {{ schema }}.{{ base }}_datasetelement dse on de."ID" = dse."DataElementID"
  INNER JOIN {{ schema }}.{{ base }}_dataset ds on dse."DataSetID" = ds."ID"
GROUP BY
  "DataElementName",
  "DataSetName"
ORDER BY
  "DataValueCount" DESC;

Data value count by data set

SELECT
  ds."Name" as "DataSetName",
  count(dv.*) AS "DataValueCount"
FROM
  {{ schema }}.{{ base }}_dataset AS ds
  INNER JOIN {{ schema }}.{{ base }}_datasetelement AS dse on ds."ID" = dse."DataSetID"
  INNER JOIN {{ schema }}.{{ base }}_datavalue AS dv ON dse."DataElementID" = dv."DataElementID"
GROUP BY
  "DataSetName"
ORDER BY
  "DataValueCount" DESC;

Metadata integrity

Category combos with identical categories

WITH
  fingerprint AS (
    SELECT
      "CategoryComboID",
      arraySort (groupArray ("CategoryID")) AS "CategoryFingerprint"
    FROM
      {{ schema }}.{{ base }}_categorycombos_categories
    group by
      "CategoryComboID"
  )
SELECT
  "CategoryFingerprint",
  count() AS "Occurrence",
  groupArray ("CategoryComboID") AS "DuplicateCategoryCombos"
FROM
  fingerprint
GROUP BY
  "CategoryFingerprint"
HAVING
  "Occurrence" > 1;

Category combos without categories

SELECT
  "ID",
  "Name"
FROM
  {{ schema }}.{{ base }}_categorycombo
WHERE
  "ID" NOT IN (
    SELECT
      "CategoryComboID"
    FROM
      {{ schema }}.{{ base }}_categorycombos_categories
  );

Category combos without category option combos

SELECT
  "ID",
  "Name"
FROM
  {{ schema }}.{{ base }}_categorycombo
WHERE
  "ID" NOT IN (
    SELECT
      "CategoryComboID"
    FROM
      {{ schema }}.{{ base }}_categorycombos_optioncombo
  );

Categories with identical category options

WITH
  fingerprint AS (
    SELECT
      "CategoryID",
      arraySort(groupArray("CategoryOptionID")) AS "CategoryOptionFingerprint"
    FROM
      {{ schema }}.{{ base }}_categories_categoryoptions
    GROUP BY
      "CategoryID"
  )
SELECT
  "CategoryOptionFingerprint",
  count() AS "Occurrence",
  groupArray("CategoryID") AS "DuplicateCategories"
FROM
  fingerprint
GROUP BY
  "CategoryOptionFingerprint"
HAVING
  "Occurrence" > 1;

Categories with a single category option

SELECT
  "ID",
  "Name"
FROM
  {{ schema }}.{{ base }}_data_category
WHERE
  "Name" != 'default'
  AND "ID" IN (
    SELECT
      "CategoryID"
    FROM
      {{ schema }}.{{ base }}_categories_categoryoptions
    GROUP BY
      "CategoryID"
    HAVING
      count(*) = 1
  );

Categories without category options

SELECT
  "ID",
  "Name"
FROM
  {{ schema }}.{{ base }}_category
WHERE
  "ID" NOT IN(
    SELECT
      "CategoryID"
    FROM
      {{ schema }}.{{ base }}_categories_categoryoptions
  );

Categories without category combos

SELECT
  "ID",
  "Name"
FROM
  {{ schema }}.{{ base }}_category
WHERE
  "ID" NOT IN(
    SELECT
      "CategoryID"
    FROM
      {{ schema }}.{{ base }}_categorycombos_categories
  );

Category options without categories

SELECT
  "ID",
  "Name"
FROM
  {{ schema }}.{{ base }}_categoryoption
WHERE
  "ID" NOT IN (
    SELECT
      "CategoryOptionID"
    FROM
      {{ schema }}.{{ base }}_categories_categoryoptions
  );

Category options without category option combos

SELECT
  "ID",
  "Name"
FROM 
  {{ schema }}.{{ base }}_categoryoption
WHERE
  "ID" NOT IN (
    SELECT
      "CategoryOptionID"
    FROM 
      {{ schema }}.{{ base }}_categoryoptioncombos_categoryoptions
  );

Category option combos with identical category options

WITH
  fingerprint AS (
    SELECT
      "CategoryOptionComboID",
      arraySort(groupArray("CategoryOptionID")) AS "OptionFingerprint"
    FROM
      {{ schema }}.{{ base }}_categoryoptioncombos_categoryoptions
    GROUP BY
      "CategoryOptionComboID"
  )
SELECT
  "OptionFingerprint",
  count() AS "Occurrence",
  groupArray("CategoryOptionComboID") AS "DuplicateCategoryOptionCombos"
FROM
  fingerprint
GROUP BY
  "OptionFingerprint"
HAVING
  "Occurrence" > 1;

Category option combos without category options

SELECT
  "ID",
  "Name"
FROM
  {{ schema }}.{{ base }}_categoryoptioncombo
WHERE
  "ID" NOT IN (
    SELECT
      "CategoryOptionComboID"
    FROM
      {{ schema }}.{{ base }}_categoryoptioncombos_categoryoptions
  );

Category option combos without category combos

SELECT
  "ID",
  "Name"
FROM
  {{ schema }}.{{ base }}_categoryoptioncombo
WHERE
  "ID" NOT IN (
    SELECT
      "CategoryOptionComboID"
    FROM
      {{ schema }}.{{ base }}_categorycombos_optioncombos
  );

Org unit count by level

SELECT
  "Name" as "LevelName",
  "Level" as "Level",
  count(*) As "OrgUnitCount"
FROM
  {{ schema }}.{{ base }}_orgunitlevel AS oul
  INNER JOIN {{ schema }}.{{ base }}_orgunit as ou on oul."Level" = ou."HierarchyLevel"
GROUP BY
  "LevelName",
  "Level"
ORDER BY
  "Level" ASC;

Analytics

Aggregation of data items by quarterly periods and region org unit level

SELECT
  "PT Quarterly",
  "OU Region",
  "DataItem",
  sum("Value") AS "Value"
FROM
  {{ schema }}.{{ base }} ax
WHERE
  "DataItem" LIKE '%TX_CURR%'
  AND "PT Yearly" IN ('2024Q1', '2024Q2', '2024Q3', '2024Q4')
GROUP BY
  "PT Quarterly",
  "OU Region",
  "DataItem";

Aggregation of data items for last 5 years order by aggregate value

SELECT
  "PT Quarterly",
  "OU Region",
  "DataItem",
  sum("Value") AS "Value"
FROM
  {{ schema }}.{{ base }}
WHERE
  "DataItem" LIKE '%TX_CURR%'
  AND "PT Yearly" IN (
    SELECT
      "ID"
    FROM
      {{ schema }}."{{ base }}_relative_yearly_period"
    WHERE
      "Index" <= 5
  )
GROUP BY
  "PT Quarterly",
  "OU Region",
  "DataItem"
ORDER BY
  "Value" DESC

Aggregation of events into 5 year age groups for age in years integer column

SELECT
  "PT Monthly",
  "OU District",
  intDiv("Age in Years" / 5) * 5 AS "Age Group 5y",
  count("EventID") AS "Events"
FROM 
  {{ schema }}.{{ table }}
GROUP BY
  "PT Monthly",
  "OU District",
  "Age Group 5y"
ORDER BY"
  "PT Monthly",
  "OU District";

Convert date of birth date column to age in years integer column

SELECT
  dx."Date of Birth",
  age('year', "Date of Birth", today()) AS "Age in Years"
FROM
  {{ schema }}.{{ table }};

Aggregation of data with relative periods for last 12 months

SELECT
  dx."PT Monthly",
  dx."OU District",
  dx."DataItem",
  sum(dx."Value") AS "Value"
FROM 
  {{ schema }}.{{ base }} AS dx
INNER JOIN 
  -- Join with monthly relative period table
  {{ schema }}.{{ base }}_relative_monthly_period AS pe ON dx."PT Monthly" = pe."ID"
WHERE
  -- Filter for last 12 months
  p."Index" <= 12
GROUP BY
  dx."PT Monthly",
  dx."OU District",
  dx."DataItem",
ORDER BY
  dx."PT Monthly",
  dx."OU District",
  dx."DataItem";

Indicator expression calculation

SELECT
  -- Dimensions
  "OU Federal",
  "PT Yearly",

  -- Indicator "Indicator A"
  -- Numerator: "#{aBqcjyRWnjv.BDXPvhUlGvb}"
  sum(CASE
    WHEN "DataItemID" = 'aBqcjyRWnjv'
    AND "CatOptComboID" = 'BDXPvhUlGvb' THEN "Value"
    ELSE 0
  END) AS "Num Indicator A",
  -- Denominator: "#{aBqcjyRWnjv.FpgtJFWJWK2}"
  sum(CASE
    WHEN "DataItemID" = 'aBqcjyRWnjv'
    AND "CatOptComboID" = 'FpgtJFWJWK2' THEN "Value"
    ELSE 0
  END) AS "Denom Indicator A",
  -- Expression calculation with division by zero protection
  round(IF(
    "Denom Indicator A" != 0, (
    "Num Indicator A" / "Denom Indicator A") * 100, 0), 2) AS "Indicator A",

  -- Indicator "Indicator B"
  -- Numerator: "#{IEpn79MpSo3.BDXPvhUlGvb}"
  sum(CASE
    WHEN "DataItemID" = 'IEpn79MpSo3'
    AND "CatOptComboID" = 'BDXPvhUlGvb' THEN "Value"
    ELSE 0
  END) AS "Num Indicator B",
  -- Denominator: "#{IEpn79MpSo3.FpgtJFWJWK2}"
  sum(CASE
    WHEN "DataItemID" = 'IEpn79MpSo3' THEN "Value"
    AND "CatOptComboID" = 'FpgtJFWJWK2' THEN "Value"
    ELSE 0
  END) AS "Denom Indicator B",
  -- Expression calculation with division by zero protection
  round(IF(
    "Denom Indicator B" != 0, (
    "Num Indicator B" / "Denom Indicator B") * 100, 0), 2) AS "Indicator B",
FROM
  {{ schema }}.{{ base }}
-- Data element filter
WHERE
  "DataItemID" IN ('aBqcjyRWnjv', 'IEpn79MpSo3')
-- Group by dimensions
GROUP BY
  "OU Federal",
  "PT Yearly";