@@ -148,7 +148,7 @@ static void read_pages(struct readahead_control *rac, struct list_head *pages,
148148
149149 blk_finish_plug (& plug );
150150
151- BUG_ON (!list_empty (pages ));
151+ BUG_ON (pages && !list_empty (pages ));
152152 BUG_ON (readahead_count (rac ));
153153
154154out :
@@ -431,11 +431,103 @@ static int try_context_readahead(struct address_space *mapping,
431431 return 1 ;
432432}
433433
434+ /*
435+ * There are some parts of the kernel which assume that PMD entries
436+ * are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then,
437+ * limit the maximum allocation order to PMD size. I'm not aware of any
438+ * assumptions about maximum order if THP are disabled, but 8 seems like
439+ * a good order (that's 1MB if you're using 4kB pages)
440+ */
441+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
442+ #define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
443+ #else
444+ #define MAX_PAGECACHE_ORDER 8
445+ #endif
446+
447+ static inline int ra_alloc_folio (struct readahead_control * ractl , pgoff_t index ,
448+ pgoff_t mark , unsigned int order , gfp_t gfp )
449+ {
450+ int err ;
451+ struct folio * folio = filemap_alloc_folio (gfp , order );
452+
453+ if (!folio )
454+ return - ENOMEM ;
455+ if (mark - index < (1UL << order ))
456+ folio_set_readahead (folio );
457+ err = filemap_add_folio (ractl -> mapping , folio , index , gfp );
458+ if (err )
459+ folio_put (folio );
460+ else
461+ ractl -> _nr_pages += 1UL << order ;
462+ return err ;
463+ }
464+
465+ static void page_cache_ra_order (struct readahead_control * ractl ,
466+ struct file_ra_state * ra , unsigned int new_order )
467+ {
468+ struct address_space * mapping = ractl -> mapping ;
469+ pgoff_t index = readahead_index (ractl );
470+ pgoff_t limit = (i_size_read (mapping -> host ) - 1 ) >> PAGE_SHIFT ;
471+ pgoff_t mark = index + ra -> size - ra -> async_size ;
472+ int err = 0 ;
473+ gfp_t gfp = readahead_gfp_mask (mapping );
474+
475+ if (!mapping_large_folio_support (mapping ) || ra -> size < 4 )
476+ goto fallback ;
477+
478+ limit = min (limit , index + ra -> size - 1 );
479+
480+ if (new_order < MAX_PAGECACHE_ORDER ) {
481+ new_order += 2 ;
482+ if (new_order > MAX_PAGECACHE_ORDER )
483+ new_order = MAX_PAGECACHE_ORDER ;
484+ while ((1 << new_order ) > ra -> size )
485+ new_order -- ;
486+ }
487+
488+ while (index <= limit ) {
489+ unsigned int order = new_order ;
490+
491+ /* Align with smaller pages if needed */
492+ if (index & ((1UL << order ) - 1 )) {
493+ order = __ffs (index );
494+ if (order == 1 )
495+ order = 0 ;
496+ }
497+ /* Don't allocate pages past EOF */
498+ while (index + (1UL << order ) - 1 > limit ) {
499+ if (-- order == 1 )
500+ order = 0 ;
501+ }
502+ err = ra_alloc_folio (ractl , index , mark , order , gfp );
503+ if (err )
504+ break ;
505+ index += 1UL << order ;
506+ }
507+
508+ if (index > limit ) {
509+ ra -> size += index - limit - 1 ;
510+ ra -> async_size += index - limit - 1 ;
511+ }
512+
513+ read_pages (ractl , NULL , false);
514+
515+ /*
516+ * If there were already pages in the page cache, then we may have
517+ * left some gaps. Let the regular readahead code take care of this
518+ * situation.
519+ */
520+ if (!err )
521+ return ;
522+ fallback :
523+ do_page_cache_ra (ractl , ra -> size , ra -> async_size );
524+ }
525+
434526/*
435527 * A minimal readahead algorithm for trivial sequential/random reads.
436528 */
437529static void ondemand_readahead (struct readahead_control * ractl ,
438- bool hit_readahead_marker , unsigned long req_size )
530+ struct folio * folio , unsigned long req_size )
439531{
440532 struct backing_dev_info * bdi = inode_to_bdi (ractl -> mapping -> host );
441533 struct file_ra_state * ra = ractl -> ra ;
@@ -470,12 +562,12 @@ static void ondemand_readahead(struct readahead_control *ractl,
470562 }
471563
472564 /*
473- * Hit a marked page without valid readahead state.
565+ * Hit a marked folio without valid readahead state.
474566 * E.g. interleaved reads.
475567 * Query the pagecache for async_size, which normally equals to
476568 * readahead size. Ramp it up and use it as the new readahead size.
477569 */
478- if (hit_readahead_marker ) {
570+ if (folio ) {
479571 pgoff_t start ;
480572
481573 rcu_read_lock ();
@@ -548,7 +640,7 @@ static void ondemand_readahead(struct readahead_control *ractl,
548640 }
549641
550642 ractl -> _index = ra -> start ;
551- do_page_cache_ra (ractl , ra -> size , ra -> async_size );
643+ page_cache_ra_order (ractl , ra , folio ? folio_order ( folio ) : 0 );
552644}
553645
554646void page_cache_sync_ra (struct readahead_control * ractl ,
@@ -576,7 +668,7 @@ void page_cache_sync_ra(struct readahead_control *ractl,
576668 }
577669
578670 /* do read-ahead */
579- ondemand_readahead (ractl , false , req_count );
671+ ondemand_readahead (ractl , NULL , req_count );
580672}
581673EXPORT_SYMBOL_GPL (page_cache_sync_ra );
582674
@@ -605,7 +697,7 @@ void page_cache_async_ra(struct readahead_control *ractl,
605697 return ;
606698
607699 /* do read-ahead */
608- ondemand_readahead (ractl , true , req_count );
700+ ondemand_readahead (ractl , folio , req_count );
609701}
610702EXPORT_SYMBOL_GPL (page_cache_async_ra );
611703
0 commit comments