Skip to content

Emory SQL Query Library

A collection of common SQL queries for working with Emory's OMOP data lake on Redshift. For the broader OHDSI community query library, see the OHDSI Query Library.

Patient Counts

Total patients in the CDM

SELECT COUNT(*) AS total_persons
FROM cdm.person;

Patients by gender

SELECT
    c.concept_name AS gender,
    COUNT(*) AS person_count
FROM cdm.person p
JOIN cdm.concept c ON p.gender_concept_id = c.concept_id
GROUP BY c.concept_name
ORDER BY person_count DESC;

Patients by age group

SELECT
    CASE
        WHEN DATEDIFF(year, CAST(year_of_birth || '-01-01' AS DATE), CURRENT_DATE) < 18 THEN '0-17'
        WHEN DATEDIFF(year, CAST(year_of_birth || '-01-01' AS DATE), CURRENT_DATE) < 35 THEN '18-34'
        WHEN DATEDIFF(year, CAST(year_of_birth || '-01-01' AS DATE), CURRENT_DATE) < 50 THEN '35-49'
        WHEN DATEDIFF(year, CAST(year_of_birth || '-01-01' AS DATE), CURRENT_DATE) < 65 THEN '50-64'
        ELSE '65+'
    END AS age_group,
    COUNT(*) AS person_count
FROM cdm.person
GROUP BY 1
ORDER BY 1;

Condition Queries

Top 20 most common conditions

SELECT
    c.concept_name AS condition_name,
    COUNT(DISTINCT co.person_id) AS patient_count
FROM cdm.condition_occurrence co
JOIN cdm.concept c ON co.condition_concept_id = c.concept_id
WHERE c.standard_concept = 'S'
GROUP BY c.concept_name
ORDER BY patient_count DESC
LIMIT 20;

Find patients with a specific condition

SELECT DISTINCT co.person_id
FROM cdm.condition_occurrence co
JOIN cdm.concept_ancestor ca
    ON co.condition_concept_id = ca.descendant_concept_id
WHERE ca.ancestor_concept_id = 201826  -- Type 2 diabetes (includes all subtypes)
;

Drug Queries

Medications for a patient cohort

SELECT
    c.concept_name AS drug_name,
    COUNT(DISTINCT de.person_id) AS patient_count
FROM cdm.drug_exposure de
JOIN cdm.concept c ON de.drug_concept_id = c.concept_id
WHERE de.person_id IN (
    -- Your cohort here
    SELECT DISTINCT person_id
    FROM cdm.condition_occurrence
    WHERE condition_concept_id = 201826
)
AND c.standard_concept = 'S'
GROUP BY c.concept_name
ORDER BY patient_count DESC
LIMIT 20;

Measurement Queries

Most recent lab value for a patient

SELECT
    m.person_id,
    c.concept_name AS measurement_name,
    m.value_as_number,
    u.concept_name AS unit,
    m.measurement_date
FROM cdm.measurement m
JOIN cdm.concept c ON m.measurement_concept_id = c.concept_id
LEFT JOIN cdm.concept u ON m.unit_concept_id = u.concept_id
WHERE m.person_id = <person_id>
  AND c.concept_name ILIKE '%hemoglobin a1c%'
ORDER BY m.measurement_date DESC
LIMIT 1;

Visit Queries

Visit type distribution

SELECT
    c.concept_name AS visit_type,
    COUNT(*) AS visit_count,
    COUNT(DISTINCT vo.person_id) AS patient_count
FROM cdm.visit_occurrence vo
JOIN cdm.concept c ON vo.visit_concept_id = c.concept_id
GROUP BY c.concept_name
ORDER BY visit_count DESC;

Vocabulary Queries

Search for a concept by name

SELECT
    concept_id,
    concept_name,
    domain_id,
    vocabulary_id,
    concept_class_id,
    standard_concept
FROM cdm.concept
WHERE concept_name ILIKE '%search term%'
  AND standard_concept = 'S'
ORDER BY concept_name
LIMIT 50;

Find source-to-standard concept mappings

SELECT
    source.concept_id AS source_concept_id,
    source.concept_name AS source_concept_name,
    source.vocabulary_id AS source_vocabulary,
    target.concept_id AS standard_concept_id,
    target.concept_name AS standard_concept_name,
    target.vocabulary_id AS standard_vocabulary
FROM cdm.concept_relationship cr
JOIN cdm.concept source ON cr.concept_id_1 = source.concept_id
JOIN cdm.concept target ON cr.concept_id_2 = target.concept_id
WHERE cr.relationship_id = 'Maps to'
  AND source.concept_name ILIKE '%search term%'
  AND target.standard_concept = 'S';