@@ -32,7 +32,8 @@ impl<'n> Debug for NexusChannel<'n> {
32
32
fn fmt ( & self , f : & mut std:: fmt:: Formatter < ' _ > ) -> std:: fmt:: Result {
33
33
write ! (
34
34
f,
35
- "I/O chan '{nex}' core:{core}({cur}) [R:{r} W:{w} L:{l} C:{c}]" ,
35
+ "{io} chan '{nex}' core:{core}({cur}) [R:{r} W:{w} L:{l} C:{c}]" ,
36
+ io = if self . is_io_chan { "I/O" } else { "Aux" } ,
36
37
nex = self . nexus. nexus_name( ) ,
37
38
core = self . core,
38
39
cur = Cores :: current( ) ,
@@ -76,37 +77,23 @@ impl Display for DrEvent {
76
77
}
77
78
}
78
79
80
+ #[ inline( always) ]
81
+ fn is_channel_debug_enabled ( ) -> bool {
82
+ super :: ENABLE_NEXUS_CHANNEL_DEBUG . load ( Ordering :: SeqCst )
83
+ }
84
+
79
85
impl < ' n > NexusChannel < ' n > {
80
- /// TODO
86
+ /// Creates a new nexus I/O channel.
81
87
pub ( crate ) fn new ( nexus : Pin < & mut Nexus < ' n > > ) -> Self {
82
88
debug ! ( "{nexus:?}: new channel on core {c}" , c = Cores :: current( ) ) ;
83
89
84
90
let b_init_thrd_hdls =
85
91
super :: ENABLE_IO_ALL_THRD_NX_CHAN . load ( Ordering :: SeqCst ) ;
92
+
86
93
let is_io_chan =
87
94
Thread :: current ( ) . unwrap ( ) != Thread :: primary ( ) || b_init_thrd_hdls;
88
95
89
- let mut writers = Vec :: new ( ) ;
90
- let mut readers = Vec :: new ( ) ;
91
-
92
- if is_io_chan {
93
- nexus. children_iter ( ) . filter ( |c| c. is_healthy ( ) ) . for_each (
94
- |c| match ( c. get_io_handle ( ) , c. get_io_handle ( ) ) {
95
- ( Ok ( w) , Ok ( r) ) => {
96
- writers. push ( w) ;
97
- readers. push ( r) ;
98
- }
99
- _ => {
100
- c. set_faulted_state ( FaultReason :: CantOpen ) ;
101
- error ! (
102
- "Failed to get I/O handle for {c}, \
103
- skipping block device",
104
- c = c. uri( )
105
- )
106
- }
107
- } ,
108
- ) ;
109
- } else {
96
+ if !is_io_chan {
110
97
// If we are here, this means the nexus channel being created is not
111
98
// the one to be used for normal IOs. Such a channel is
112
99
// created in rebuild path today, and it's on the init
@@ -118,12 +105,16 @@ impl<'n> NexusChannel<'n> {
118
105
// And the rebuild IOs are dispatched by
119
106
// directly calling write API without going via writers abstraction.
120
107
// Refer GTM-1075 for the race condition details.
121
- debug ! ( "{nexus:?}: skip nexus channel setup({t:?}). is_io_channel: {is_io_chan}" , t = Thread :: current( ) . unwrap( ) ) ;
108
+ debug ! (
109
+ "{nexus:?}: skipping nexus channel setup on init thread \
110
+ ({t:?}): not I/O channel",
111
+ t = Thread :: current( ) . unwrap( )
112
+ ) ;
122
113
}
123
114
124
- Self {
125
- writers,
126
- readers,
115
+ let mut res = Self {
116
+ writers : Vec :: new ( ) ,
117
+ readers : Vec :: new ( ) ,
127
118
detached : Vec :: new ( ) ,
128
119
io_logs : nexus. io_log_channels ( ) ,
129
120
previous_reader : UnsafeCell :: new ( 0 ) ,
@@ -133,10 +124,19 @@ impl<'n> NexusChannel<'n> {
133
124
frozen_ios : Vec :: new ( ) ,
134
125
core : Cores :: current ( ) ,
135
126
is_io_chan,
127
+ } ;
128
+
129
+ res. connect_children ( ) ;
130
+
131
+ if is_channel_debug_enabled ( ) {
132
+ debug ! ( "{res:?}: after new channel creation:" ) ;
133
+ res. dump_dbg ( ) ;
136
134
}
135
+
136
+ res
137
137
}
138
138
139
- /// TODO
139
+ /// Destroys a nexus I/O channel.
140
140
pub ( crate ) fn destroy ( mut self ) {
141
141
debug ! (
142
142
"{nex:?}: destroying I/O channel on core {core}" ,
@@ -145,6 +145,7 @@ impl<'n> NexusChannel<'n> {
145
145
) ;
146
146
self . writers . clear ( ) ;
147
147
self . readers . clear ( ) ;
148
+ self . detached . clear ( ) ;
148
149
self . io_logs . clear ( ) ;
149
150
}
150
151
@@ -238,6 +239,11 @@ impl<'n> NexusChannel<'n> {
238
239
}
239
240
240
241
debug ! ( "{self:?}: device '{device_name}' detached" ) ;
242
+
243
+ if is_channel_debug_enabled ( ) {
244
+ debug ! ( "{self:?}: after detach:" ) ;
245
+ self . dump_dbg ( ) ;
246
+ }
241
247
}
242
248
243
249
/// Disconnects previously detached device handles by dropping them.
@@ -269,11 +275,34 @@ impl<'n> NexusChannel<'n> {
269
275
/// we simply put back all the channels, and reopen the bdevs that are in
270
276
/// the online state.
271
277
pub ( crate ) fn reconnect_all ( & mut self ) {
278
+ debug ! ( "{self:?}: child devices reconnecting..." ) ;
279
+
280
+ if is_channel_debug_enabled ( ) {
281
+ debug ! ( "{self:?}: before reconnection:" ) ;
282
+ self . dump_dbg ( ) ;
283
+ }
284
+
272
285
// clear the vector of channels and reset other internal values,
273
286
// clearing the values will drop any existing handles in the
274
287
// channel
275
288
self . previous_reader = UnsafeCell :: new ( 0 ) ;
276
289
290
+ if self . is_io_channel ( ) {
291
+ self . connect_children ( ) ;
292
+ }
293
+
294
+ self . reconnect_io_logs ( ) ;
295
+
296
+ if is_channel_debug_enabled ( ) {
297
+ debug ! ( "{self:?}: after reconnection:" ) ;
298
+ self . dump_dbg ( ) ;
299
+ }
300
+
301
+ debug ! ( "{self:?}: child devices reconnected" ) ;
302
+ }
303
+
304
+ /// (Re)connects readers and writes.
305
+ fn connect_children ( & mut self ) {
277
306
// nvmx will drop the I/O qpairs which is different from all other
278
307
// bdevs we might be dealing with. So instead of clearing and refreshing
279
308
// which had no side effects before, we create a new vector and
@@ -290,6 +319,8 @@ impl<'n> NexusChannel<'n> {
290
319
( Ok ( w) , Ok ( r) ) => {
291
320
writers. push ( w) ;
292
321
readers. push ( r) ;
322
+
323
+ debug ! ( "{self:?}: connecting child device : {c:?}" ) ;
293
324
}
294
325
_ => {
295
326
c. set_faulted_state ( FaultReason :: CantOpen ) ;
@@ -322,10 +353,6 @@ impl<'n> NexusChannel<'n> {
322
353
323
354
self . writers = writers;
324
355
self . readers = readers;
325
-
326
- self . reconnect_io_logs ( ) ;
327
-
328
- debug ! ( "{self:?}: child devices reconnected" ) ;
329
356
}
330
357
331
358
/// Reconnects all active I/O logs.
@@ -394,4 +421,47 @@ impl<'n> NexusChannel<'n> {
394
421
trace ! ( "{io:?}: freezing I/O" ) ;
395
422
self . frozen_ios . push ( io)
396
423
}
424
+
425
+ /// Prints elaborate debug info to the logs.
426
+ fn dump_dbg ( & self ) {
427
+ let me = format ! (
428
+ "{self:p} [{io} {c}]" ,
429
+ io = if self . is_io_chan { "I/O" } else { "aux" } ,
430
+ c = self . core,
431
+ ) ;
432
+
433
+ debug ! ( "{me}: debug info: {self:?}" ) ;
434
+
435
+ debug ! ( "{me}: {n} children:" , n = self . nexus( ) . child_count( ) ) ;
436
+ self . nexus ( ) . children_iter ( ) . for_each ( |c| {
437
+ debug ! (
438
+ "{me}: {dev}: {c:?}" ,
439
+ dev = c. get_device_name( ) . unwrap_or( "-" . to_string( ) ) ,
440
+ )
441
+ } ) ;
442
+
443
+ fn dbg_devs (
444
+ prefix : & str ,
445
+ name : & str ,
446
+ devs : & Vec < Box < dyn BlockDeviceHandle > > ,
447
+ ) {
448
+ if devs. is_empty ( ) {
449
+ debug ! ( "{prefix}: no {name}" ) ;
450
+ } else {
451
+ debug ! ( "{prefix}: {n} {name}:" , n = devs. len( ) ) ;
452
+ devs. iter ( ) . for_each ( |dev| {
453
+ debug ! (
454
+ "{prefix}: {d}" ,
455
+ d = dev. get_device( ) . device_name( )
456
+ ) ;
457
+ } ) ;
458
+ }
459
+ }
460
+
461
+ dbg_devs ( & me, "readers" , & self . readers ) ;
462
+ dbg_devs ( & me, "writers" , & self . writers ) ;
463
+ dbg_devs ( & me, "detached" , & self . detached ) ;
464
+
465
+ debug ! ( "{me}: (end)" ) ;
466
+ }
397
467
}
0 commit comments