Skip to content

Commit a22293f

Browse files
committed
actually find longest list of keys so we export all data consistently
Signed-off-by: John Seekins <john@robot-house.us>
1 parent 250fbc7 commit a22293f

1 file changed

Lines changed: 13 additions & 1 deletion

File tree

utils.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# For general helpers, regexes, or shared logic (e.g. phone/address parsing functions).
2+
import copy
23
import logging
34
import os
45
import polars
@@ -95,7 +96,18 @@ def _flatdict(d: dict, parent_key: str = "", sep: str = ".", list_sep: str = ","
9596
def convert_to_dataframe(d: dict) -> polars.DataFrame:
9697
"""internal dict to dataframe"""
9798
flatdata = [_flatdict(f) for f in d.values()]
98-
fieldnames = [k for k in flatdata[0].keys() if k not in flatdata_filtered_keys]
99+
"""
100+
Field names should find the _longest_ set of keys, not just the first one
101+
to avoid dropping data by accident from some rows (with things like additional inspections)
102+
"""
103+
longest: list = list(flatdata[0].keys())
104+
longest_len: int = len(longest)
105+
for dobj in flatdata:
106+
keys = list(dobj.keys())
107+
if len(keys) > longest_len:
108+
longest = copy.deepcopy(keys)
109+
longest_len = len(longest)
110+
fieldnames = [k for k in longest if k not in flatdata_filtered_keys]
99111
# https://docs.pola.rs/api/python/stable/reference/api/polars.from_dicts.html
100112
df = polars.from_dicts(flatdata, schema=fieldnames)
101113
# logger.debug("Dataframe: %s", df)

0 commit comments

Comments
 (0)