Skip to content

Commit

Permalink
Merge pull request #422 from humanmade/fix-slow-scandir
Browse files Browse the repository at this point in the history
Fix slow call to scandir() when uploading files
  • Loading branch information
joehoyle authored Jul 8, 2020
2 parents f2a21ad + 923a5ec commit a025465
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 4 deletions.
18 changes: 17 additions & 1 deletion inc/class-plugin.php
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ public function setup() {
add_action( 'wp_calculate_image_srcset', [ $this, 'add_s3_signed_params_to_attachment_image_srcset' ], 10, 5 );

add_filter( 'wp_generate_attachment_metadata', [ $this, 'set_attachment_private_on_generate_attachment_metadata' ], 10, 2 );

add_filter( 'pre_wp_unique_filename_file_list', [ $this, 'get_files_for_unique_filename_file_list' ], 10, 3 );
}

/**
Expand Down Expand Up @@ -491,7 +493,7 @@ public function set_attachment_files_acl( int $attachment_id, string $acl ) : ?W
* Get all the files stored for a given attachment.
*
* @param integer $attachment_id
* @return array Array of all full paths to the attachment's files.
* @return list<string> Array of all full paths to the attachment's files.
*/
public static function get_attachment_files( int $attachment_id ) : array {
$uploadpath = wp_get_upload_dir();
Expand Down Expand Up @@ -611,4 +613,18 @@ public function set_attachment_private_on_generate_attachment_metadata( array $m

return $metadata;
}

/**
* Override the files used for wp_unique_filename() comparisons
*
* @param array|null $files
* @param string $dir
* @return array
*/
public function get_files_for_unique_filename_file_list( ?array $files, string $dir, string $filename ) : array {
$name = pathinfo( $filename, PATHINFO_FILENAME );
// The s3:// streamwrapper support listing by partial prefixes with wildcards.
// For example, scandir( s3://bucket/2019/06/my-image* )
return scandir( trailingslashit( $dir ) . $name . '*' );
}
}
46 changes: 43 additions & 3 deletions inc/class-stream-wrapper.php
Original file line number Diff line number Diff line change
Expand Up @@ -583,11 +583,51 @@ public function dir_opendir( $path, $options ) {
}

if ( $params['Key'] ) {
$params['Key'] = rtrim( $params['Key'], $delimiter ) . $delimiter;
// Support paths ending in "*" to allow listing of arbitrary prefixes.
if ( substr( $params['Key'], -1, 1 ) === '*' ) {
$params['Key'] = rtrim( $params['Key'], '*' );
// Set the opened bucket prefix to be the directory. This is because $this->openedBucketPrefix
// will be removed from the resulting keys, and we want to return all files in the directory
// of the wildcard.
$this->openedBucketPrefix = substr( $params['Key'], 0, ( strrpos( $params['Key'], '/' ) ?: 0 ) + 1 );
} else {
$params['Key'] = rtrim( $params['Key'], $delimiter ) . $delimiter;
$this->openedBucketPrefix = $params['Key'];
}
$op['Prefix'] = $params['Key'];
}

$this->openedBucketPrefix = $params['Key'];
// WordPress attempts to scan whole directories via wp_unique_filename(), which can be very slow
// when there are thousands of files in a single uploads sub directory. This is due to behaviour
// introduced in https://core.trac.wordpress.org/changeset/46822/. Essentially when a file is uploaded,
// it's not enough to make sure no filename already exists (and append a `-1` to the end), because
// image sizes of that image could also conflict with already existing files too. Because image sizes
// (in the form of -800x600.jpg) can be arbitrary integers, it's not possible to iterate the filesystem
// for all possible matching / colliding file names. WordPress core uses a preg-match on all files that
// might conflict with the given filename.
//
// Fortunately, we can make use of S3 arbitrary prefixes to optimize this query. The WordPress regex
// done via _wp_check_existing_file_names() is essentially `^$filename-...`, so we can modify the prefix
// to include the filename, therefore only return a subset of files from S3 that are more likely to match
// the preg_match() call.
//
// Essentially, wp_unique_filename( my-file.jpg ) doing a `scandir( s3://bucket/2019/04/ )` will actually result in an s3
// listObject query for `s3://bucket/2019/04/my-file` which means even if there are millions of files in `2019/04/` we only
// return a much smaller subset.
//
// Anyone reading this far, brace yourselves for a mighty horrible hack.
$backtrace = debug_backtrace( 0, 3 ); // phpcs:ignore PHPCompatibility.FunctionUse.ArgumentFunctionsReportCurrentValue.NeedsInspection
if ( isset( $backtrace[1]['function'] )
&& $backtrace[1]['function'] === 'scandir'
&& isset( $backtrace[2]['function'] )
&& $backtrace[2]['function'] === 'wp_unique_filename' && isset( $backtrace[2]['args'][1] )
&& isset( $op['Prefix'] )
) {
/** @var string $filename */
$filename = $backtrace[2]['args'][1];
$name = pathinfo( $filename, PATHINFO_FILENAME );
$op['Prefix'] .= $name;
}

// Filter our "/" keys added by the console as directories, and ensure
// that if a filter function is provided that it passes the filter.
Expand Down Expand Up @@ -828,7 +868,7 @@ private function getOptions( $removeContextData = false ) {
*/
private function getOption( $name ) {
$options = $this->getOptions();
return $options[ $name ];
return $options[ $name ] ?? null;
}

/**
Expand Down
14 changes: 14 additions & 0 deletions tests/test-s3-uploads-stream-wrapper.php
Original file line number Diff line number Diff line change
Expand Up @@ -140,4 +140,18 @@ public function test_wp_handle_upload() {
$this->assertTrue( file_exists( $result['file'] ) );
$this->assertEquals( $contents, file_get_contents( $result['file'] ) );
}

public function test_list_directory_with_wildcard() {
$upload_dir = wp_upload_dir();

file_put_contents( $upload_dir['path'] . '/my-file-scaled.jpg', '' );
file_put_contents( $upload_dir['path'] . '/some-file-scaled.jpg', '' );
$files = scandir( $upload_dir['path'] . '/my-file*' );
$this->assertEquals(
[
'my-file-scaled.jpg',
],
$files
);
}
}
12 changes: 12 additions & 0 deletions tests/test-s3-uploads.php
Original file line number Diff line number Diff line change
Expand Up @@ -171,4 +171,16 @@ function test_get_s3_bucket() {
$this->assertEquals( 'hmn-uploads', $uploads->get_s3_bucket() );
}

function test_wp_unique_filename() {
S3_Uploads\Plugin::get_instance()->setup();
$upload_dir = wp_upload_dir();

file_put_contents( $upload_dir['path'] . '/my-file-scaled.jpg', '' );
$filename = wp_unique_filename( $upload_dir['path'], 'my-file.jpg' );
$this->assertEquals( 'my-file-1.jpg', $filename );

$filename = wp_unique_filename( $upload_dir['path'], 'my-new-file.jpg' );
$this->assertEquals( 'my-new-file.jpg', $filename );
}

}

0 comments on commit a025465

Please sign in to comment.