Version: 9.4.0

Search

The library provides functions for converting FHIR search expressions into Spark Columns. These columns can be used to filter resources based on search criteria defined in the FHIR specification.

Search parameters provide a standardised way to filter FHIR resources. For example, you can filter patients by gender, birth date, or active status using the same search syntax used in FHIR API queries.

Basic filtering

The search_to_column function converts a FHIR search expression into a boolean Column that can be used with the filter operation.

In this example, we filter patients by gender using a simple search parameter.

Python
R
Scala
Java

from pathling import PathlingContext

pc = PathlingContext.create()
data_source = pc.read.ndjson("data/ndjson")
patients = data_source.read("Patient")

# Filter patients by gender.
gender_filter = pc.search_to_column("Patient", "gender=male")
patients.filter(gender_filter).select("id", "gender", "name.family").show()

library(sparklyr)
library(pathling)

pc <- pathling_connect()
data_source <- pc %>% pathling_read_ndjson("data/ndjson")
patients <- data_source %>% ds_read("Patient")

# Filter patients by gender using search syntax.
patients %>%
        pathling_filter(pc, "Patient", "gender=male", type = "search") %>%
        select(id, gender, name.family) %>%
        show()

pc %>% pathling_disconnect()

import au.csiro.pathling.library.PathlingContext

val pc = PathlingContext.create()
val dataSource = pc.read.ndjson("data/ndjson")
val patients = dataSource.read("Patient")

// Filter patients by gender.
val genderFilter = pc.searchToColumn("Patient", "gender=male")
patients.filter(genderFilter).select("id", "gender", "name.family").show()

import au.csiro.pathling.library.PathlingContext;
import au.csiro.pathling.library.io.source.DataSource;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.Column;

class MyApp {

    public static void main(String[] args) {
        PathlingContext pc = PathlingContext.create();
        DataSource dataSource = pc.read().ndjson("data/ndjson");
        Dataset<Row> patients = dataSource.read("Patient");

        // Filter patients by gender.
        Column genderFilter = pc.searchToColumn("Patient", "gender=male");
        patients.filter(genderFilter)
                .select("id", "gender", "name.family")
                .show();
    }
}

Results in:

id	gender	family
8ee183e2-b3c0-4151-be94-b945d6aa8c6d	male	Runte378
93ee0b14-4f22-4c1a-93e2-b4e5c0d7f0d6	male	Smith

Boolean logic

AND logic

Multiple search parameters can be combined using &, which applies AND logic. All conditions must be satisfied for a resource to match.

Python
R
Scala
Java

from pathling import PathlingContext

pc = PathlingContext.create()
data_source = pc.read.ndjson("data/ndjson")
patients = data_source.read("Patient")

# Filter patients by gender AND active status.
combined_filter = pc.search_to_column("Patient", "gender=male&active=true")
patients.filter(combined_filter).select("id", "gender", "active").show()

library(sparklyr)
library(pathling)

pc <- pathling_connect()
data_source <- pc %>% pathling_read_ndjson("data/ndjson")
patients <- data_source %>% ds_read("Patient")

# Filter patients by gender AND active status.
patients %>%
        pathling_filter(pc, "Patient", "gender=male&active=true", type = "search") %>%
        select(id, gender, active) %>%
        show()

pc %>% pathling_disconnect()

import au.csiro.pathling.library.PathlingContext

val pc = PathlingContext.create()
val dataSource = pc.read.ndjson("data/ndjson")
val patients = dataSource.read("Patient")

// Filter patients by gender AND active status.
val combinedFilter = pc.searchToColumn("Patient", "gender=male&active=true")
patients.filter(combinedFilter).select("id", "gender", "active").show()

import au.csiro.pathling.library.PathlingContext;
import au.csiro.pathling.library.io.source.DataSource;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.Column;

class MyApp {

    public static void main(String[] args) {
        PathlingContext pc = PathlingContext.create();
        DataSource dataSource = pc.read().ndjson("data/ndjson");
        Dataset<Row> patients = dataSource.read("Patient");

        // Filter patients by gender AND active status.
        Column combinedFilter = pc.searchToColumn("Patient", "gender=male&active=true");
        patients.filter(combinedFilter)
                .select("id", "gender", "active")
                .show();
    }
}

Results in:

id	gender	active
8ee183e2-b3c0-4151-be94-b945d6aa8c6d	male	true

OR logic

Multiple values for the same parameter can be combined using commas, which applies OR logic. A resource matches if any of the values match.

Python
R
Scala
Java

from pathling import PathlingContext

pc = PathlingContext.create()
data_source = pc.read.ndjson("data/ndjson")
patients = data_source.read("Patient")

# Filter patients with gender male OR female.
or_filter = pc.search_to_column("Patient", "gender=male,female")
patients.filter(or_filter).select("id", "gender").show()

library(sparklyr)
library(pathling)

pc <- pathling_connect()
data_source <- pc %>% pathling_read_ndjson("data/ndjson")
patients <- data_source %>% ds_read("Patient")

# Filter patients with gender male OR female.
patients %>%
        pathling_filter(pc, "Patient", "gender=male,female", type = "search") %>%
        select(id, gender) %>%
        show()

pc %>% pathling_disconnect()

import au.csiro.pathling.library.PathlingContext

val pc = PathlingContext.create()
val dataSource = pc.read.ndjson("data/ndjson")
val patients = dataSource.read("Patient")

// Filter patients with gender male OR female.
val orFilter = pc.searchToColumn("Patient", "gender=male,female")
patients.filter(orFilter).select("id", "gender").show()

import au.csiro.pathling.library.PathlingContext;
import au.csiro.pathling.library.io.source.DataSource;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.Column;

class MyApp {

    public static void main(String[] args) {
        PathlingContext pc = PathlingContext.create();
        DataSource dataSource = pc.read().ndjson("data/ndjson");
        Dataset<Row> patients = dataSource.read("Patient");

        // Filter patients with gender male OR female.
        Column orFilter = pc.searchToColumn("Patient", "gender=male,female");
        patients.filter(orFilter)
                .select("id", "gender")
                .show();
    }
}

Results in:

id	gender
8ee183e2-b3c0-4151-be94-b945d6aa8c6d	male
7b4d8c2f-9a3e-4d5b-8c1f-2e3d4c5b6a7d	female

Comparison prefixes

Search parameters support prefixes for comparisons on dates, numbers, and quantities. The following prefixes are supported:

eq (equal, default)
ne (not equal)
lt (less than)
le (less than or equal)
gt (greater than)
ge (greater than or equal)

Date comparisons

Prefixes can be applied to date search parameters to filter based on temporal relationships.

Python
R
Scala
Java

from pathling import PathlingContext

pc = PathlingContext.create()
data_source = pc.read.ndjson("data/ndjson")
patients = data_source.read("Patient")

# Filter patients born on or after 1990-01-01.
date_filter = pc.search_to_column("Patient", "birthdate=ge1990-01-01")
patients.filter(date_filter).select("id", "birthDate").show()

library(sparklyr)
library(pathling)

pc <- pathling_connect()
data_source <- pc %>% pathling_read_ndjson("data/ndjson")
patients <- data_source %>% ds_read("Patient")

# Filter patients born on or after 1990-01-01.
patients %>%
        pathling_filter(pc, "Patient", "birthdate=ge1990-01-01", type = "search") %>%
        select(id, birthDate) %>%
        show()

pc %>% pathling_disconnect()

import au.csiro.pathling.library.PathlingContext

val pc = PathlingContext.create()
val dataSource = pc.read.ndjson("data/ndjson")
val patients = dataSource.read("Patient")

// Filter patients born on or after 1990-01-01.
val dateFilter = pc.searchToColumn("Patient", "birthdate=ge1990-01-01")
patients.filter(dateFilter).select("id", "birthDate").show()

import au.csiro.pathling.library.PathlingContext;
import au.csiro.pathling.library.io.source.DataSource;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.Column;

class MyApp {

    public static void main(String[] args) {
        PathlingContext pc = PathlingContext.create();
        DataSource dataSource = pc.read().ndjson("data/ndjson");
        Dataset<Row> patients = dataSource.read("Patient");

        // Filter patients born on or after 1990-01-01.
        Column dateFilter = pc.searchToColumn("Patient", "birthdate=ge1990-01-01");
        patients.filter(dateFilter)
                .select("id", "birthDate")
                .show();
    }
}

Results in:

id	birthDate
93ee0b14-4f22-4c1a-93e2-b4e5c0d7f0d6	1995-06-15

Quantity comparisons

Prefixes also apply to quantity parameters, enabling filtering based on numeric values with units.

Python
R
Scala
Java

from pathling import PathlingContext

pc = PathlingContext.create()
data_source = pc.read.ndjson("data/ndjson")
observations = data_source.read("Observation")

# Filter observations with value greater than or equal to 80 mmHg.
quantity_filter = pc.search_to_column("Observation", "value-quantity=ge80|http://unitsofmeasure.org|mm[Hg]")
observations.filter(quantity_filter).select("id", "code.coding.code", "valueQuantity.value", "valueQuantity.unit").show()

library(sparklyr)
library(pathling)

pc <- pathling_connect()
data_source <- pc %>% pathling_read_ndjson("data/ndjson")
observations <- data_source %>% ds_read("Observation")

# Filter observations with value greater than or equal to 80 mmHg.
observations %>%
        pathling_filter(pc, "Observation", "value-quantity=ge80|http://unitsofmeasure.org|mm[Hg]", type = "search") %>%
        select(id, code.coding.code, valueQuantity.value, valueQuantity.unit) %>%
        show()

pc %>% pathling_disconnect()

import au.csiro.pathling.library.PathlingContext

val pc = PathlingContext.create()
val dataSource = pc.read.ndjson("data/ndjson")
val observations = dataSource.read("Observation")

// Filter observations with value greater than or equal to 80 mmHg.
val quantityFilter = pc.searchToColumn("Observation", "value-quantity=ge80|http://unitsofmeasure.org|mm[Hg]")
observations.filter(quantityFilter).select("id", "code.coding.code", "valueQuantity.value", "valueQuantity.unit").show()

import au.csiro.pathling.library.PathlingContext;
import au.csiro.pathling.library.io.source.DataSource;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.Column;

class MyApp {

    public static void main(String[] args) {
        PathlingContext pc = PathlingContext.create();
        DataSource dataSource = pc.read().ndjson("data/ndjson");
        Dataset<Row> observations = dataSource.read("Observation");

        // Filter observations with value greater than or equal to 80 mmHg.
        Column quantityFilter = pc.searchToColumn("Observation", "value-quantity=ge80|http://unitsofmeasure.org|mm[Hg]");
        observations.filter(quantityFilter)
                .select("id", "code.coding.code", "valueQuantity.value", "valueQuantity.unit")
                .show();
    }
}

Results in:

id	code	value	unit
1a2b3c4d-5e6f-7g8h-9i0j-1k2l3m4n5o6p	85354-9	120.0	mm[Hg]
2b3c4d5e-6f7g-8h9i-0j1k-2l3m4n5o6p7q	8480-6	90.0	mm[Hg]

Search parameter types

Different FHIR search parameter types support different matching behaviours.

Quantity parameters

Quantity parameters match numeric values with units. The syntax is [prefix]value|system|code where the system and code identify the unit.

Python
R
Scala
Java

from pathling import PathlingContext

pc = PathlingContext.create()
data_source = pc.read.ndjson("data/ndjson")
observations = data_source.read("Observation")

# Filter observations by quantity with specific unit.
quantity_filter = pc.search_to_column("Observation", "value-quantity=5.4|http://unitsofmeasure.org|mmol/L")
observations.filter(quantity_filter).select("id", "valueQuantity.value", "valueQuantity.unit").show()

library(sparklyr)
library(pathling)

pc <- pathling_connect()
data_source <- pc %>% pathling_read_ndjson("data/ndjson")
observations <- data_source %>% ds_read("Observation")

# Filter observations by quantity with specific unit.
observations %>%
        pathling_filter(pc, "Observation", "value-quantity=5.4|http://unitsofmeasure.org|mmol/L", type = "search") %>%
        select(id, valueQuantity.value, valueQuantity.unit) %>%
        show()

pc %>% pathling_disconnect()

import au.csiro.pathling.library.PathlingContext

val pc = PathlingContext.create()
val dataSource = pc.read.ndjson("data/ndjson")
val observations = dataSource.read("Observation")

// Filter observations by quantity with specific unit.
val quantityFilter = pc.searchToColumn("Observation", "value-quantity=5.4|http://unitsofmeasure.org|mmol/L")
observations.filter(quantityFilter).select("id", "valueQuantity.value", "valueQuantity.unit").show()

import au.csiro.pathling.library.PathlingContext;
import au.csiro.pathling.library.io.source.DataSource;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.Column;

class MyApp {

    public static void main(String[] args) {
        PathlingContext pc = PathlingContext.create();
        DataSource dataSource = pc.read().ndjson("data/ndjson");
        Dataset<Row> observations = dataSource.read("Observation");

        // Filter observations by quantity with specific unit.
        Column quantityFilter = pc.searchToColumn("Observation", "value-quantity=5.4|http://unitsofmeasure.org|mmol/L");
        observations.filter(quantityFilter)
                .select("id", "valueQuantity.value", "valueQuantity.unit")
                .show();
    }
}

Results in:

id	value	unit
3c4d5e6f-7g8h-9i0j-1k2l-3m4n5o6p7q8r	5.4	mmol/L

String parameters

String parameters perform case-insensitive partial matching by default. The search value matches if it appears anywhere within the target string.

Python
R
Scala
Java

from pathling import PathlingContext

pc = PathlingContext.create()
data_source = pc.read.ndjson("data/ndjson")
patients = data_source.read("Patient")

# Filter patients by family name containing "smith".
name_filter = pc.search_to_column("Patient", "family=smith")
patients.filter(name_filter).select("id", "name.family").show()

library(sparklyr)
library(pathling)

pc <- pathling_connect()
data_source <- pc %>% pathling_read_ndjson("data/ndjson")
patients <- data_source %>% ds_read("Patient")

# Filter patients by family name containing "smith".
patients %>%
        pathling_filter(pc, "Patient", "family=smith", type = "search") %>%
        select(id, name.family) %>%
        show()

pc %>% pathling_disconnect()

import au.csiro.pathling.library.PathlingContext

val pc = PathlingContext.create()
val dataSource = pc.read.ndjson("data/ndjson")
val patients = dataSource.read("Patient")

// Filter patients by family name containing "smith".
val nameFilter = pc.searchToColumn("Patient", "family=smith")
patients.filter(nameFilter).select("id", "name.family").show()

import au.csiro.pathling.library.PathlingContext;
import au.csiro.pathling.library.io.source.DataSource;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.Column;

class MyApp {

    public static void main(String[] args) {
        PathlingContext pc = PathlingContext.create();
        DataSource dataSource = pc.read().ndjson("data/ndjson");
        Dataset<Row> patients = dataSource.read("Patient");

        // Filter patients by family name containing "smith".
        Column nameFilter = pc.searchToColumn("Patient", "family=smith");
        patients.filter(nameFilter)
                .select("id", "name.family")
                .show();
    }
}

Results in:

id	family
93ee0b14-4f22-4c1a-93e2-b4e5c0d7f0d6	Smith
4d5e6f7g-8h9i-0j1k-2l3m-4n5o6p7q8r9s	Goldsmith

Reference parameters

Reference parameters filter resources based on references to other resources. The value can be a resource ID or a full reference.

Python
R
Scala
Java

from pathling import PathlingContext

pc = PathlingContext.create()
data_source = pc.read.ndjson("data/ndjson")
observations = data_source.read("Observation")

# Filter observations by patient reference.
ref_filter = pc.search_to_column("Observation", "subject=Patient/8ee183e2-b3c0-4151-be94-b945d6aa8c6d")
observations.filter(ref_filter).select("id", "subject.reference").show()

library(sparklyr)
library(pathling)

pc <- pathling_connect()
data_source <- pc %>% pathling_read_ndjson("data/ndjson")
observations <- data_source %>% ds_read("Observation")

# Filter observations by patient reference.
observations %>%
        pathling_filter(pc, "Observation", "subject=Patient/8ee183e2-b3c0-4151-be94-b945d6aa8c6d", type = "search") %>%
        select(id, subject.reference) %>%
        show()

pc %>% pathling_disconnect()

import au.csiro.pathling.library.PathlingContext

val pc = PathlingContext.create()
val dataSource = pc.read.ndjson("data/ndjson")
val observations = dataSource.read("Observation")

// Filter observations by patient reference.
val refFilter = pc.searchToColumn("Observation", "subject=Patient/8ee183e2-b3c0-4151-be94-b945d6aa8c6d")
observations.filter(refFilter).select("id", "subject.reference").show()

import au.csiro.pathling.library.PathlingContext;
import au.csiro.pathling.library.io.source.DataSource;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.Column;

class MyApp {

    public static void main(String[] args) {
        PathlingContext pc = PathlingContext.create();
        DataSource dataSource = pc.read().ndjson("data/ndjson");
        Dataset<Row> observations = dataSource.read("Observation");

        // Filter observations by patient reference.
        Column refFilter = pc.searchToColumn("Observation", "subject=Patient/8ee183e2-b3c0-4151-be94-b945d6aa8c6d");
        observations.filter(refFilter)
                .select("id", "subject.reference")
                .show();
    }
}

Results in:

id	reference
5e6f7g8h-9i0j-1k2l-3m4n-5o6p7q8r9s0t	Patient/8ee183e2-b3c0-4151-be94-b945d6aa8c6d

Number parameters

Number parameters match numeric values without units. Prefixes can be used for range comparisons.

Python
R
Scala
Java

from pathling import PathlingContext

pc = PathlingContext.create()
data_source = pc.read.ndjson("data/ndjson")
risk_assessments = data_source.read("RiskAssessment")

# Filter risk assessments by probability.
number_filter = pc.search_to_column("RiskAssessment", "probability=gt0.5")
risk_assessments.filter(number_filter).select("id", "prediction.probabilityDecimal").show()

library(sparklyr)
library(pathling)

pc <- pathling_connect()
data_source <- pc %>% pathling_read_ndjson("data/ndjson")
risk_assessments <- data_source %>% ds_read("RiskAssessment")

# Filter risk assessments by probability.
risk_assessments %>%
        pathling_filter(pc, "RiskAssessment", "probability=gt0.5", type = "search") %>%
        select(id, prediction.probabilityDecimal) %>%
        show()

pc %>% pathling_disconnect()

import au.csiro.pathling.library.PathlingContext

val pc = PathlingContext.create()
val dataSource = pc.read.ndjson("data/ndjson")
val riskAssessments = dataSource.read("RiskAssessment")

// Filter risk assessments by probability.
val numberFilter = pc.searchToColumn("RiskAssessment", "probability=gt0.5")
riskAssessments.filter(numberFilter).select("id", "prediction.probabilityDecimal").show()

import au.csiro.pathling.library.PathlingContext;
import au.csiro.pathling.library.io.source.DataSource;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.Column;

class MyApp {

    public static void main(String[] args) {
        PathlingContext pc = PathlingContext.create();
        DataSource dataSource = pc.read().ndjson("data/ndjson");
        Dataset<Row> riskAssessments = dataSource.read("RiskAssessment");

        // Filter risk assessments by probability.
        Column numberFilter = pc.searchToColumn("RiskAssessment", "probability=gt0.5");
        riskAssessments.filter(numberFilter)
                .select("id", "prediction.probabilityDecimal")
                .show();
    }
}

Results in:

id	probabilityDecimal
6f7g8h9i-0j1k-2l3m-4n5o-6p7q8r9s0t1u	0.75

URI parameters

URI parameters match Uniform Resource Identifiers exactly. These are commonly used for identifiers, profiles, and code system URIs.

Python
R
Scala
Java

from pathling import PathlingContext

pc = PathlingContext.create()
data_source = pc.read.ndjson("data/ndjson")
patients = data_source.read("Patient")

# Filter patients by identifier system.
uri_filter = pc.search_to_column("Patient", "identifier=http://example.org/fhir/identifier|")
patients.filter(uri_filter).select("id", "identifier.system", "identifier.value").show()

library(sparklyr)
library(pathling)

pc <- pathling_connect()
data_source <- pc %>% pathling_read_ndjson("data/ndjson")
patients <- data_source %>% ds_read("Patient")

# Filter patients by identifier system.
patients %>%
        pathling_filter(pc, "Patient", "identifier=http://example.org/fhir/identifier|", type = "search") %>%
        select(id, identifier.system, identifier.value) %>%
        show()

pc %>% pathling_disconnect()

import au.csiro.pathling.library.PathlingContext

val pc = PathlingContext.create()
val dataSource = pc.read.ndjson("data/ndjson")
val patients = dataSource.read("Patient")

// Filter patients by identifier system.
val uriFilter = pc.searchToColumn("Patient", "identifier=http://example.org/fhir/identifier|")
patients.filter(uriFilter).select("id", "identifier.system", "identifier.value").show()

import au.csiro.pathling.library.PathlingContext;
import au.csiro.pathling.library.io.source.DataSource;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.Column;

class MyApp {

    public static void main(String[] args) {
        PathlingContext pc = PathlingContext.create();
        DataSource dataSource = pc.read().ndjson("data/ndjson");
        Dataset<Row> patients = dataSource.read("Patient");

        // Filter patients by identifier system.
        Column uriFilter = pc.searchToColumn("Patient", "identifier=http://example.org/fhir/identifier|");
        patients.filter(uriFilter)
                .select("id", "identifier.system", "identifier.value")
                .show();
    }
}

Results in:

id	system	value
8ee183e2-b3c0-4151-be94-b945d6aa8c6d	http://example.org/fhir/identifier	MRN123456

Search modifiers

Modifiers alter the behaviour of search parameters. They are appended to the parameter name using a colon.

:not modifier

The :not modifier negates the search condition, matching resources where the parameter does NOT have the specified value.

Python
R
Scala
Java

from pathling import PathlingContext

pc = PathlingContext.create()
data_source = pc.read.ndjson("data/ndjson")
patients = data_source.read("Patient")

# Filter patients where gender is NOT male.
not_filter = pc.search_to_column("Patient", "gender:not=male")
patients.filter(not_filter).select("id", "gender").show()

library(sparklyr)
library(pathling)

pc <- pathling_connect()
data_source <- pc %>% pathling_read_ndjson("data/ndjson")
patients <- data_source %>% ds_read("Patient")

# Filter patients where gender is NOT male.
patients %>%
        pathling_filter(pc, "Patient", "gender:not=male", type = "search") %>%
        select(id, gender) %>%
        show()

pc %>% pathling_disconnect()

import au.csiro.pathling.library.PathlingContext

val pc = PathlingContext.create()
val dataSource = pc.read.ndjson("data/ndjson")
val patients = dataSource.read("Patient")

// Filter patients where gender is NOT male.
val notFilter = pc.searchToColumn("Patient", "gender:not=male")
patients.filter(notFilter).select("id", "gender").show()

import au.csiro.pathling.library.PathlingContext;
import au.csiro.pathling.library.io.source.DataSource;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.Column;

class MyApp {

    public static void main(String[] args) {
        PathlingContext pc = PathlingContext.create();
        DataSource dataSource = pc.read().ndjson("data/ndjson");
        Dataset<Row> patients = dataSource.read("Patient");

        // Filter patients where gender is NOT male.
        Column notFilter = pc.searchToColumn("Patient", "gender:not=male");
        patients.filter(notFilter)
                .select("id", "gender")
                .show();
    }
}

Results in:

id	gender
7b4d8c2f-9a3e-4d5b-8c1f-2e3d4c5b6a7d	female
9c0d1e2f-3a4b-5c6d-7e8f-9g0h1i2j3k4l	unknown

:exact modifier

The :exact modifier changes string matching from case-insensitive partial matching to case-sensitive exact matching.

Python
R
Scala
Java

from pathling import PathlingContext

pc = PathlingContext.create()
data_source = pc.read.ndjson("data/ndjson")
patients = data_source.read("Patient")

# Filter patients with exact family name "Smith".
exact_filter = pc.search_to_column("Patient", "family:exact=Smith")
patients.filter(exact_filter).select("id", "name.family").show()

library(sparklyr)
library(pathling)

pc <- pathling_connect()
data_source <- pc %>% pathling_read_ndjson("data/ndjson")
patients <- data_source %>% ds_read("Patient")

# Filter patients with exact family name "Smith".
patients %>%
        pathling_filter(pc, "Patient", "family:exact=Smith", type = "search") %>%
        select(id, name.family) %>%
        show()

pc %>% pathling_disconnect()

import au.csiro.pathling.library.PathlingContext

val pc = PathlingContext.create()
val dataSource = pc.read.ndjson("data/ndjson")
val patients = dataSource.read("Patient")

// Filter patients with exact family name "Smith".
val exactFilter = pc.searchToColumn("Patient", "family:exact=Smith")
patients.filter(exactFilter).select("id", "name.family").show()

import au.csiro.pathling.library.PathlingContext;
import au.csiro.pathling.library.io.source.DataSource;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.Column;

class MyApp {

    public static void main(String[] args) {
        PathlingContext pc = PathlingContext.create();
        DataSource dataSource = pc.read().ndjson("data/ndjson");
        Dataset<Row> patients = dataSource.read("Patient");

        // Filter patients with exact family name "Smith".
        Column exactFilter = pc.searchToColumn("Patient", "family:exact=Smith");
        patients.filter(exactFilter)
                .select("id", "name.family")
                .show();
    }
}

Results in:

id	family
93ee0b14-4f22-4c1a-93e2-b4e5c0d7f0d6	Smith

Note that "smith" (lowercase) and "Goldsmith" would not match with the :exact modifier.

FHIRPath expressions

For more complex filtering requirements beyond what search parameters support, you can use FHIRPath expressions directly.

Using fhirpath_to_column

The fhirpath_to_column method provides direct access to the FHIRPath engine, allowing you to evaluate arbitrary FHIRPath expressions against resources.

Python
R
Scala
Java

from pathling import PathlingContext

pc = PathlingContext.create()
data_source = pc.read.ndjson("data/ndjson")
patients = data_source.read("Patient")

# Filter patients using FHIRPath expression.
fhirpath_filter = pc.fhirpath_to_column("Patient", "name.family contains 'Smith'")
patients.filter(fhirpath_filter).select("id", "name.family").show()

library(sparklyr)
library(pathling)

pc <- pathling_connect()
data_source <- pc %>% pathling_read_ndjson("data/ndjson")
patients <- data_source %>% ds_read("Patient")

# Filter patients using FHIRPath expression.
patients %>%
        pathling_filter(pc, "Patient", "name.family contains 'Smith'") %>%
        select(id, name.family) %>%
        show()

pc %>% pathling_disconnect()

import au.csiro.pathling.library.PathlingContext

val pc = PathlingContext.create()
val dataSource = pc.read.ndjson("data/ndjson")
val patients = dataSource.read("Patient")

// Filter patients using FHIRPath expression.
val fhirpathFilter = pc.fhirpathToColumn("Patient", "name.family contains 'Smith'")
patients.filter(fhirpathFilter).select("id", "name.family").show()

import au.csiro.pathling.library.PathlingContext;
import au.csiro.pathling.library.io.source.DataSource;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.Column;

class MyApp {

    public static void main(String[] args) {
        PathlingContext pc = PathlingContext.create();
        DataSource dataSource = pc.read().ndjson("data/ndjson");
        Dataset<Row> patients = dataSource.read("Patient");

        // Filter patients using FHIRPath expression.
        Column fhirpathFilter = pc.fhirpathToColumn("Patient", "name.family contains 'Smith'");
        patients.filter(fhirpathFilter)
                .select("id", "name.family")
                .show();
    }
}

Results in:

id	family
93ee0b14-4f22-4c1a-93e2-b4e5c0d7f0d6	Smith
4d5e6f7g-8h9i-0j1k-2l3m-4n5o6p7q8r9s	Goldsmith

Combining filters

Multiple search column expressions can be combined using boolean operators to create complex filter conditions.

Python
R
Scala
Java

from pathling import PathlingContext

pc = PathlingContext.create()
data_source = pc.read.ndjson("data/ndjson")
patients = data_source.read("Patient")

# Create separate filters.
male_filter = pc.search_to_column("Patient", "gender=male")
female_filter = pc.search_to_column("Patient", "gender=female")

# Combine with OR logic using | operator.
gender_filter = male_filter | female_filter

patients.filter(gender_filter).select("id", "gender").show()

library(sparklyr)
library(pathling)

pc <- pathling_connect()
data_source <- pc %>% pathling_read_ndjson("data/ndjson")
patients <- data_source %>% ds_read("Patient")

# Chain multiple filter operations.
patients %>%
        pathling_filter(pc, "Patient", "gender=male", type = "search") %>%
        pathling_filter(pc, "Patient", "birthdate=ge1990-01-01", type = "search") %>%
        select(id, gender, birthDate) %>%
        show()

pc %>% pathling_disconnect()

import au.csiro.pathling.library.PathlingContext

val pc = PathlingContext.create()
val dataSource = pc.read.ndjson("data/ndjson")
val patients = dataSource.read("Patient")

// Create separate filters.
val maleFilter = pc.searchToColumn("Patient", "gender=male")
val femaleFilter = pc.searchToColumn("Patient", "gender=female")

// Combine with OR logic using || operator.
val genderFilter = maleFilter || femaleFilter

patients.filter(genderFilter).select("id", "gender").show()

import au.csiro.pathling.library.PathlingContext;
import au.csiro.pathling.library.io.source.DataSource;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.Column;

class MyApp {

    public static void main(String[] args) {
        PathlingContext pc = PathlingContext.create();
        DataSource dataSource = pc.read().ndjson("data/ndjson");
        Dataset<Row> patients = dataSource.read("Patient");

        // Create separate filters.
        Column maleFilter = pc.searchToColumn("Patient", "gender=male");
        Column femaleFilter = pc.searchToColumn("Patient", "gender=female");

        // Combine with OR logic using or() method.
        Column genderFilter = maleFilter.or(femaleFilter);

        patients.filter(genderFilter)
                .select("id", "gender")
                .show();
    }
}

Results in:

id	gender
8ee183e2-b3c0-4151-be94-b945d6aa8c6d	male
7b4d8c2f-9a3e-4d5b-8c1f-2e3d4c5b6a7d	female

Empty query

An empty search expression matches all resources, which is useful for dynamic filtering scenarios where the filter may be conditionally applied.

Python
R
Scala
Java

from pathling import PathlingContext

pc = PathlingContext.create()
data_source = pc.read.ndjson("data/ndjson")
patients = data_source.read("Patient")

# Empty search expression matches all resources.
all_filter = pc.search_to_column("Patient", "")
patients.filter(all_filter).select("id", "gender").show()

library(sparklyr)
library(pathling)

pc <- pathling_connect()
data_source <- pc %>% pathling_read_ndjson("data/ndjson")
patients <- data_source %>% ds_read("Patient")

# Empty search expression matches all resources.
patients %>%
        pathling_filter(pc, "Patient", "", type = "search") %>%
        select(id, gender) %>%
        show()

pc %>% pathling_disconnect()

import au.csiro.pathling.library.PathlingContext

val pc = PathlingContext.create()
val dataSource = pc.read.ndjson("data/ndjson")
val patients = dataSource.read("Patient")

// Empty search expression matches all resources.
val allFilter = pc.searchToColumn("Patient", "")
patients.filter(allFilter).select("id", "gender").show()

import au.csiro.pathling.library.PathlingContext;
import au.csiro.pathling.library.io.source.DataSource;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.Column;

class MyApp {

    public static void main(String[] args) {
        PathlingContext pc = PathlingContext.create();
        DataSource dataSource = pc.read().ndjson("data/ndjson");
        Dataset<Row> patients = dataSource.read("Patient");

        // Empty search expression matches all resources.
        Column allFilter = pc.searchToColumn("Patient", "");
        patients.filter(allFilter)
                .select("id", "gender")
                .show();
    }
}

Results in:

id	gender
8ee183e2-b3c0-4151-be94-b945d6aa8c6d	male
7b4d8c2f-9a3e-4d5b-8c1f-2e3d4c5b6a7d	female
93ee0b14-4f22-4c1a-93e2-b4e5c0d7f0d6	male

Basic filtering​

Boolean logic​

AND logic​

OR logic​

Comparison prefixes​

Date comparisons​

Quantity comparisons​

Search parameter types​

Quantity parameters​

String parameters​

Reference parameters​

Number parameters​

URI parameters​

Search modifiers​

:not modifier​

:exact modifier​

FHIRPath expressions​

Using fhirpath_to_column​

Combining filters​

Empty query​

Basic filtering

Boolean logic

AND logic

OR logic

Comparison prefixes

Date comparisons

Quantity comparisons

Search parameter types

Quantity parameters

String parameters

Reference parameters

Number parameters

URI parameters

Search modifiers

:not modifier

:exact modifier

FHIRPath expressions

Using fhirpath_to_column

Combining filters

Empty query