Skip to content

Commit b8daf93

Browse files
timsaucerclaude
andcommitted
Add unit tests for greatest, least, nvl2, and ifnull functions
Tests cover multiple data types (integers, strings), null handling (all-null, partial-null), multiple arguments, and ifnull/nvl equivalence. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 4d749c7 commit b8daf93

1 file changed

Lines changed: 162 additions & 0 deletions

File tree

python/tests/test_functions.py

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1468,3 +1468,165 @@ def test_coalesce(df):
14681468
assert result.column(0) == pa.array(
14691469
["Hello", "fallback", "!"], type=pa.string_view()
14701470
)
1471+
1472+
1473+
def test_greatest(df):
1474+
ctx = SessionContext()
1475+
batch = pa.RecordBatch.from_arrays(
1476+
[
1477+
pa.array([1, 5, None]),
1478+
pa.array([3, 2, None]),
1479+
pa.array([2, 8, None]),
1480+
],
1481+
names=["a", "b", "c"],
1482+
)
1483+
df_test = ctx.create_dataframe([[batch]])
1484+
1485+
# Test greatest with two columns
1486+
result = df_test.select(
1487+
f.greatest(column("a"), column("b")).alias("greatest_ab")
1488+
).collect()[0]
1489+
assert result.column(0) == pa.array([3, 5, None], type=pa.int64())
1490+
1491+
# Test greatest with three columns
1492+
result = df_test.select(
1493+
f.greatest(column("a"), column("b"), column("c")).alias("greatest_abc")
1494+
).collect()[0]
1495+
assert result.column(0) == pa.array([3, 8, None], type=pa.int64())
1496+
1497+
# Test greatest with nulls mixed in (partial nulls)
1498+
batch2 = pa.RecordBatch.from_arrays(
1499+
[
1500+
pa.array([None, 10]),
1501+
pa.array([5, None]),
1502+
],
1503+
names=["x", "y"],
1504+
)
1505+
df_test2 = ctx.create_dataframe([[batch2]])
1506+
result = df_test2.select(f.greatest(column("x"), column("y")).alias("g")).collect()[
1507+
0
1508+
]
1509+
assert result.column(0) == pa.array([5, 10], type=pa.int64())
1510+
1511+
# Test greatest with string columns
1512+
batch3 = pa.RecordBatch.from_arrays(
1513+
[
1514+
pa.array(["apple", "cherry"]),
1515+
pa.array(["banana", "apricot"]),
1516+
],
1517+
names=["s1", "s2"],
1518+
)
1519+
df_test3 = ctx.create_dataframe([[batch3]])
1520+
result = df_test3.select(
1521+
f.greatest(column("s1"), column("s2")).alias("g")
1522+
).collect()[0]
1523+
assert result.column(0).to_pylist() == ["banana", "cherry"]
1524+
1525+
1526+
def test_least(df):
1527+
ctx = SessionContext()
1528+
batch = pa.RecordBatch.from_arrays(
1529+
[
1530+
pa.array([1, 5, None]),
1531+
pa.array([3, 2, None]),
1532+
pa.array([2, 8, None]),
1533+
],
1534+
names=["a", "b", "c"],
1535+
)
1536+
df_test = ctx.create_dataframe([[batch]])
1537+
1538+
# Test least with two columns
1539+
result = df_test.select(
1540+
f.least(column("a"), column("b")).alias("least_ab")
1541+
).collect()[0]
1542+
assert result.column(0) == pa.array([1, 2, None], type=pa.int64())
1543+
1544+
# Test least with three columns
1545+
result = df_test.select(
1546+
f.least(column("a"), column("b"), column("c")).alias("least_abc")
1547+
).collect()[0]
1548+
assert result.column(0) == pa.array([1, 2, None], type=pa.int64())
1549+
1550+
# Test least with partial nulls
1551+
batch2 = pa.RecordBatch.from_arrays(
1552+
[
1553+
pa.array([None, 10]),
1554+
pa.array([5, None]),
1555+
],
1556+
names=["x", "y"],
1557+
)
1558+
df_test2 = ctx.create_dataframe([[batch2]])
1559+
result = df_test2.select(f.least(column("x"), column("y")).alias("l")).collect()[0]
1560+
assert result.column(0) == pa.array([5, 10], type=pa.int64())
1561+
1562+
# Test least with string columns
1563+
batch3 = pa.RecordBatch.from_arrays(
1564+
[
1565+
pa.array(["apple", "cherry"]),
1566+
pa.array(["banana", "apricot"]),
1567+
],
1568+
names=["s1", "s2"],
1569+
)
1570+
df_test3 = ctx.create_dataframe([[batch3]])
1571+
result = df_test3.select(f.least(column("s1"), column("s2")).alias("l")).collect()[
1572+
0
1573+
]
1574+
assert result.column(0).to_pylist() == ["apple", "apricot"]
1575+
1576+
1577+
def test_nvl2(df):
1578+
ctx = SessionContext()
1579+
batch = pa.RecordBatch.from_arrays(
1580+
[
1581+
pa.array([None, 1, None, 4]),
1582+
pa.array([10, 20, 30, 40]),
1583+
pa.array([100, 200, 300, 400]),
1584+
],
1585+
names=["a", "b", "c"],
1586+
)
1587+
df_test = ctx.create_dataframe([[batch]])
1588+
1589+
# nvl2 returns b when a is not null, c when a is null
1590+
result = df_test.select(
1591+
f.nvl2(column("a"), column("b"), column("c")).alias("result")
1592+
).collect()[0]
1593+
assert result.column(0) == pa.array([100, 20, 300, 40], type=pa.int64())
1594+
1595+
# Test with string columns
1596+
batch2 = pa.RecordBatch.from_arrays(
1597+
[
1598+
pa.array(["x", None]),
1599+
pa.array(["not_null", "not_null"]),
1600+
pa.array(["is_null", "is_null"]),
1601+
],
1602+
names=["a", "b", "c"],
1603+
)
1604+
df_test2 = ctx.create_dataframe([[batch2]])
1605+
result = df_test2.select(
1606+
f.nvl2(column("a"), column("b"), column("c")).alias("result")
1607+
).collect()[0]
1608+
assert result.column(0).to_pylist() == ["not_null", "is_null"]
1609+
1610+
1611+
def test_ifnull(df):
1612+
ctx = SessionContext()
1613+
batch = pa.RecordBatch.from_arrays(
1614+
[
1615+
pa.array([None, 1, None, 4]),
1616+
pa.array([10, 20, 30, 40]),
1617+
],
1618+
names=["a", "b"],
1619+
)
1620+
df_test = ctx.create_dataframe([[batch]])
1621+
1622+
# ifnull returns a when a is not null, b when a is null (same as nvl)
1623+
result = df_test.select(
1624+
f.ifnull(column("a"), column("b")).alias("result")
1625+
).collect()[0]
1626+
assert result.column(0) == pa.array([10, 1, 30, 4], type=pa.int64())
1627+
1628+
# Verify ifnull matches nvl behavior
1629+
result_nvl = df_test.select(
1630+
f.nvl(column("a"), column("b")).alias("nvl_result")
1631+
).collect()[0]
1632+
assert result.column(0) == result_nvl.column(0)

0 commit comments

Comments
 (0)