Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions crates/core/src/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,8 @@ expr_fn!(length, string);
expr_fn!(char_length, string);
expr_fn!(chr, arg, "Returns the character with the given code.");
expr_fn_vec!(coalesce);
expr_fn_vec!(greatest);
expr_fn_vec!(least);
expr_fn!(cos, num);
expr_fn!(cosh, num);
expr_fn!(cot, num);
Expand Down Expand Up @@ -543,6 +545,11 @@ expr_fn!(
x y,
"Returns x if x is not NULL otherwise returns y."
);
expr_fn!(
nvl2,
x y z,
"Returns y if x is not NULL; otherwise returns z."
);
expr_fn!(nullif, arg_1 arg_2);
expr_fn!(
octet_length,
Expand Down Expand Up @@ -981,13 +988,15 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(floor))?;
m.add_wrapped(wrap_pyfunction!(from_unixtime))?;
m.add_wrapped(wrap_pyfunction!(gcd))?;
m.add_wrapped(wrap_pyfunction!(greatest))?;
// m.add_wrapped(wrap_pyfunction!(grouping))?;
m.add_wrapped(wrap_pyfunction!(in_list))?;
m.add_wrapped(wrap_pyfunction!(initcap))?;
m.add_wrapped(wrap_pyfunction!(isnan))?;
m.add_wrapped(wrap_pyfunction!(iszero))?;
m.add_wrapped(wrap_pyfunction!(levenshtein))?;
m.add_wrapped(wrap_pyfunction!(lcm))?;
m.add_wrapped(wrap_pyfunction!(least))?;
m.add_wrapped(wrap_pyfunction!(left))?;
m.add_wrapped(wrap_pyfunction!(length))?;
m.add_wrapped(wrap_pyfunction!(ln))?;
Expand All @@ -1005,6 +1014,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(named_struct))?;
m.add_wrapped(wrap_pyfunction!(nanvl))?;
m.add_wrapped(wrap_pyfunction!(nvl))?;
m.add_wrapped(wrap_pyfunction!(nvl2))?;
m.add_wrapped(wrap_pyfunction!(now))?;
m.add_wrapped(wrap_pyfunction!(nullif))?;
m.add_wrapped(wrap_pyfunction!(octet_length))?;
Expand Down
69 changes: 69 additions & 0 deletions python/datafusion/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@
"floor",
"from_unixtime",
"gcd",
"greatest",
"ifnull",
"in_list",
"initcap",
"isnan",
Expand All @@ -157,6 +159,7 @@
"last_value",
"lcm",
"lead",
"least",
"left",
"length",
"levenshtein",
Expand Down Expand Up @@ -212,6 +215,7 @@
"ntile",
"nullif",
"nvl",
"nvl2",
"octet_length",
"order_by",
"overlay",
Expand Down Expand Up @@ -1027,6 +1031,34 @@ def gcd(x: Expr, y: Expr) -> Expr:
return Expr(f.gcd(x.expr, y.expr))


def greatest(*args: Expr) -> Expr:
"""Returns the greatest value from a list of expressions.

Returns NULL if all expressions are NULL.

Examples:
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1, 3], "b": [2, 1]})
>>> result = df.select(
... dfn.functions.greatest(dfn.col("a"), dfn.col("b")).alias("greatest"))
>>> result.collect_column("greatest")[0].as_py()
2
>>> result.collect_column("greatest")[1].as_py()
3
"""
args = [arg.expr for arg in args]
return Expr(f.greatest(*args))


def ifnull(x: Expr, y: Expr) -> Expr:
"""Returns ``x`` if ``x`` is not NULL. Otherwise returns ``y``.

See Also:
This is an alias for :py:func:`nvl`.
"""
return nvl(x, y)


def initcap(string: Expr) -> Expr:
"""Set the initial letter of each word to capital.

Expand Down Expand Up @@ -1080,6 +1112,25 @@ def lcm(x: Expr, y: Expr) -> Expr:
return Expr(f.lcm(x.expr, y.expr))


def least(*args: Expr) -> Expr:
"""Returns the least value from a list of expressions.

Returns NULL if all expressions are NULL.

Examples:
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [1, 3], "b": [2, 1]})
>>> result = df.select(
... dfn.functions.least(dfn.col("a"), dfn.col("b")).alias("least"))
>>> result.collect_column("least")[0].as_py()
1
>>> result.collect_column("least")[1].as_py()
1
"""
args = [arg.expr for arg in args]
return Expr(f.least(*args))


def left(string: Expr, n: Expr) -> Expr:
"""Returns the first ``n`` characters in the ``string``.

Expand Down Expand Up @@ -1264,6 +1315,24 @@ def nvl(x: Expr, y: Expr) -> Expr:
return Expr(f.nvl(x.expr, y.expr))


def nvl2(x: Expr, y: Expr, z: Expr) -> Expr:
"""Returns ``y`` if ``x`` is not NULL. Otherwise returns ``z``.

Examples:
>>> ctx = dfn.SessionContext()
>>> df = ctx.from_pydict({"a": [None, 1], "b": [10, 20], "c": [30, 40]})
>>> result = df.select(
... dfn.functions.nvl2(
... dfn.col("a"), dfn.col("b"), dfn.col("c")).alias("nvl2")
... )
>>> result.collect_column("nvl2")[0].as_py()
30
>>> result.collect_column("nvl2")[1].as_py()
20
"""
return Expr(f.nvl2(x.expr, y.expr, z.expr))


def octet_length(arg: Expr) -> Expr:
"""Returns the number of bytes of a string.

Expand Down
162 changes: 162 additions & 0 deletions python/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1435,3 +1435,165 @@ def test_coalesce(df):
assert result.column(0) == pa.array(
["Hello", "fallback", "!"], type=pa.string_view()
)


def test_greatest(df):
Copy link

Copilot AI Mar 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

df fixture is accepted but never used in this test. Because pytest will still construct the fixture, this adds unnecessary setup cost and can slow the suite. Either remove the df parameter or refactor the test to reuse the provided fixture/context instead of creating a new SessionContext.

Suggested change
def test_greatest(df):
def test_greatest():

Copilot uses AI. Check for mistakes.
ctx = SessionContext()
batch = pa.RecordBatch.from_arrays(
[
pa.array([1, 5, None]),
pa.array([3, 2, None]),
pa.array([2, 8, None]),
],
names=["a", "b", "c"],
)
df_test = ctx.create_dataframe([[batch]])

# Test greatest with two columns
result = df_test.select(
f.greatest(column("a"), column("b")).alias("greatest_ab")
).collect()[0]
assert result.column(0) == pa.array([3, 5, None], type=pa.int64())

# Test greatest with three columns
result = df_test.select(
f.greatest(column("a"), column("b"), column("c")).alias("greatest_abc")
).collect()[0]
assert result.column(0) == pa.array([3, 8, None], type=pa.int64())

# Test greatest with nulls mixed in (partial nulls)
batch2 = pa.RecordBatch.from_arrays(
[
pa.array([None, 10]),
pa.array([5, None]),
],
names=["x", "y"],
)
df_test2 = ctx.create_dataframe([[batch2]])
result = df_test2.select(f.greatest(column("x"), column("y")).alias("g")).collect()[
0
]
assert result.column(0) == pa.array([5, 10], type=pa.int64())

# Test greatest with string columns
batch3 = pa.RecordBatch.from_arrays(
[
pa.array(["apple", "cherry"]),
pa.array(["banana", "apricot"]),
],
names=["s1", "s2"],
)
df_test3 = ctx.create_dataframe([[batch3]])
result = df_test3.select(
f.greatest(column("s1"), column("s2")).alias("g")
).collect()[0]
assert result.column(0).to_pylist() == ["banana", "cherry"]


def test_least(df):
Copy link

Copilot AI Mar 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

df fixture is accepted but never used in this test. Because pytest will still construct the fixture, this adds unnecessary setup cost and can slow the suite. Either remove the df parameter or refactor the test to reuse the provided fixture/context instead of creating a new SessionContext.

Suggested change
def test_least(df):
def test_least():

Copilot uses AI. Check for mistakes.
ctx = SessionContext()
batch = pa.RecordBatch.from_arrays(
[
pa.array([1, 5, None]),
pa.array([3, 2, None]),
pa.array([2, 8, None]),
],
names=["a", "b", "c"],
)
df_test = ctx.create_dataframe([[batch]])

# Test least with two columns
result = df_test.select(
f.least(column("a"), column("b")).alias("least_ab")
).collect()[0]
assert result.column(0) == pa.array([1, 2, None], type=pa.int64())

# Test least with three columns
result = df_test.select(
f.least(column("a"), column("b"), column("c")).alias("least_abc")
).collect()[0]
assert result.column(0) == pa.array([1, 2, None], type=pa.int64())

# Test least with partial nulls
batch2 = pa.RecordBatch.from_arrays(
[
pa.array([None, 10]),
pa.array([5, None]),
],
names=["x", "y"],
)
df_test2 = ctx.create_dataframe([[batch2]])
result = df_test2.select(f.least(column("x"), column("y")).alias("l")).collect()[0]
assert result.column(0) == pa.array([5, 10], type=pa.int64())

# Test least with string columns
batch3 = pa.RecordBatch.from_arrays(
[
pa.array(["apple", "cherry"]),
pa.array(["banana", "apricot"]),
],
names=["s1", "s2"],
)
df_test3 = ctx.create_dataframe([[batch3]])
result = df_test3.select(f.least(column("s1"), column("s2")).alias("l")).collect()[
0
]
assert result.column(0).to_pylist() == ["apple", "apricot"]


def test_nvl2(df):
ctx = SessionContext()
batch = pa.RecordBatch.from_arrays(
[
Comment on lines +1544 to +1547
Copy link

Copilot AI Mar 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

df fixture is accepted but never used in this test. Because pytest will still construct the fixture, this adds unnecessary setup cost and can slow the suite. Either remove the df parameter or refactor the test to reuse the provided fixture/context instead of creating a new SessionContext.

Copilot uses AI. Check for mistakes.
pa.array([None, 1, None, 4]),
pa.array([10, 20, 30, 40]),
pa.array([100, 200, 300, 400]),
],
names=["a", "b", "c"],
)
df_test = ctx.create_dataframe([[batch]])

# nvl2 returns b when a is not null, c when a is null
result = df_test.select(
f.nvl2(column("a"), column("b"), column("c")).alias("result")
).collect()[0]
assert result.column(0) == pa.array([100, 20, 300, 40], type=pa.int64())

# Test with string columns
batch2 = pa.RecordBatch.from_arrays(
[
pa.array(["x", None]),
pa.array(["not_null", "not_null"]),
pa.array(["is_null", "is_null"]),
],
names=["a", "b", "c"],
)
df_test2 = ctx.create_dataframe([[batch2]])
result = df_test2.select(
f.nvl2(column("a"), column("b"), column("c")).alias("result")
).collect()[0]
assert result.column(0).to_pylist() == ["not_null", "is_null"]


def test_ifnull(df):
ctx = SessionContext()
batch = pa.RecordBatch.from_arrays(
Comment on lines +1578 to +1580
Copy link

Copilot AI Mar 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

df fixture is accepted but never used in this test. Because pytest will still construct the fixture, this adds unnecessary setup cost and can slow the suite. Either remove the df parameter or refactor the test to reuse the provided fixture/context instead of creating a new SessionContext.

Copilot uses AI. Check for mistakes.
[
pa.array([None, 1, None, 4]),
pa.array([10, 20, 30, 40]),
],
names=["a", "b"],
)
df_test = ctx.create_dataframe([[batch]])

# ifnull returns a when a is not null, b when a is null (same as nvl)
result = df_test.select(
f.ifnull(column("a"), column("b")).alias("result")
).collect()[0]
assert result.column(0) == pa.array([10, 1, 30, 4], type=pa.int64())

# Verify ifnull matches nvl behavior
result_nvl = df_test.select(
f.nvl(column("a"), column("b")).alias("nvl_result")
).collect()[0]
assert result.column(0) == result_nvl.column(0)
Loading