|
48 | 48 | } |
49 | 49 | } |
50 | 50 |
|
51 | | - fn emit_to_values(&mut self, emit_to: EmitTo) -> Vec<Vec<T::Native>> { |
52 | | - let num_emitted = match emit_to { |
53 | | - EmitTo::All => self.num_groups, |
54 | | - EmitTo::First(n) => n, |
55 | | - }; |
56 | | - |
57 | | - let mut group_values: Vec<Vec<T::Native>> = vec![Vec::new(); num_emitted]; |
58 | | - let mut remaining = HashSet::default(); |
59 | | - |
60 | | - for (group_idx, value) in self.seen.drain() { |
61 | | - if group_idx < num_emitted { |
62 | | - group_values[group_idx].push(value); |
63 | | - } else { |
64 | | - remaining.insert((group_idx - num_emitted, value)); |
65 | | - } |
66 | | - } |
67 | | - |
68 | | - self.seen = remaining; |
69 | | - match emit_to { |
70 | | - EmitTo::All => self.num_groups = 0, |
71 | | - EmitTo::First(n) => self.num_groups = self.num_groups.saturating_sub(n), |
72 | | - } |
73 | | - |
74 | | - group_values |
75 | | - } |
76 | 51 | } |
77 | 52 |
|
78 | 53 | impl<T: ArrowPrimitiveType> Default for PrimitiveDistinctCountGroupsAccumulator<T> |
@@ -106,13 +81,60 @@ where |
106 | 81 | } |
107 | 82 |
|
108 | 83 | fn evaluate(&mut self, emit_to: EmitTo) -> datafusion_common::Result<ArrayRef> { |
109 | | - let group_values = self.emit_to_values(emit_to); |
110 | | - let counts: Vec<i64> = group_values.iter().map(|v| v.len() as i64).collect(); |
| 84 | + let num_emitted = match emit_to { |
| 85 | + EmitTo::All => self.num_groups, |
| 86 | + EmitTo::First(n) => n, |
| 87 | + }; |
| 88 | + |
| 89 | + let mut counts = vec![0i64; num_emitted]; |
| 90 | + |
| 91 | + if matches!(emit_to, EmitTo::All) { |
| 92 | + for &(group_idx, _) in self.seen.iter() { |
| 93 | + counts[group_idx] += 1; |
| 94 | + } |
| 95 | + self.seen.clear(); |
| 96 | + self.num_groups = 0; |
| 97 | + } else { |
| 98 | + let mut remaining = HashSet::default(); |
| 99 | + for (group_idx, value) in self.seen.drain() { |
| 100 | + if group_idx < num_emitted { |
| 101 | + counts[group_idx] += 1; |
| 102 | + } else { |
| 103 | + remaining.insert((group_idx - num_emitted, value)); |
| 104 | + } |
| 105 | + } |
| 106 | + self.seen = remaining; |
| 107 | + self.num_groups = self.num_groups.saturating_sub(num_emitted); |
| 108 | + } |
| 109 | + |
111 | 110 | Ok(Arc::new(Int64Array::from(counts))) |
112 | 111 | } |
113 | 112 |
|
114 | 113 | fn state(&mut self, emit_to: EmitTo) -> datafusion_common::Result<Vec<ArrayRef>> { |
115 | | - let group_values = self.emit_to_values(emit_to); |
| 114 | + let num_emitted = match emit_to { |
| 115 | + EmitTo::All => self.num_groups, |
| 116 | + EmitTo::First(n) => n, |
| 117 | + }; |
| 118 | + |
| 119 | + let mut group_values: Vec<Vec<T::Native>> = vec![Vec::new(); num_emitted]; |
| 120 | + |
| 121 | + if matches!(emit_to, EmitTo::All) { |
| 122 | + for (group_idx, value) in self.seen.drain() { |
| 123 | + group_values[group_idx].push(value); |
| 124 | + } |
| 125 | + self.num_groups = 0; |
| 126 | + } else { |
| 127 | + let mut remaining = HashSet::default(); |
| 128 | + for (group_idx, value) in self.seen.drain() { |
| 129 | + if group_idx < num_emitted { |
| 130 | + group_values[group_idx].push(value); |
| 131 | + } else { |
| 132 | + remaining.insert((group_idx - num_emitted, value)); |
| 133 | + } |
| 134 | + } |
| 135 | + self.seen = remaining; |
| 136 | + self.num_groups = self.num_groups.saturating_sub(num_emitted); |
| 137 | + } |
116 | 138 |
|
117 | 139 | let mut offsets = vec![0i32]; |
118 | 140 | let mut all_values = Vec::new(); |
|
0 commit comments