@@ -66,6 +66,9 @@ use super::{
66
66
LogStream , ARROW_FILE_EXTENSION ,
67
67
} ;
68
68
69
+ // ~16K rows is default in-memory limit for each recordbatch
70
+ const MAX_RECORD_BATCH_SIZE : usize = 16384 ;
71
+
69
72
/// Regex pattern for parsing arrow file names.
70
73
///
71
74
/// # Format
@@ -113,8 +116,8 @@ pub struct Stream {
113
116
pub metadata : RwLock < LogStreamMetadata > ,
114
117
pub data_path : PathBuf ,
115
118
pub options : Arc < Options > ,
116
- /// Writer with a 16KB buffer size for optimal I/O performance.
117
- pub writer : Mutex < Writer < 16384 > > ,
119
+ /// Writer with a ~16K rows limit for optimal I/O performance.
120
+ pub writer : Mutex < Writer < MAX_RECORD_BATCH_SIZE > > ,
118
121
pub ingestor_id : Option < String > ,
119
122
}
120
123
@@ -147,6 +150,11 @@ impl Stream {
147
150
custom_partition_values : & HashMap < String , String > ,
148
151
stream_type : StreamType ,
149
152
) -> Result < ( ) , StagingError > {
153
+ let row_count = record. num_rows ( ) ;
154
+ if row_count > MAX_RECORD_BATCH_SIZE {
155
+ return Err ( StagingError :: RowLimit ( row_count) ) ;
156
+ }
157
+
150
158
let mut guard = self . writer . lock ( ) . unwrap ( ) ;
151
159
if self . options . mode != Mode :: Query || stream_type == StreamType :: Internal {
152
160
match guard. disk . get_mut ( schema_key) {
0 commit comments