@@ -418,7 +418,9 @@ protected function index_next_entities( $count = 10000 ) {
418418 }
419419 }
420420 // @TODO: Consider using sha1 hashes to prevent huge URLs from blowing up the memory.
421- $ this ->indexed_assets_urls [ $ data ['attachment_url ' ] ] = true ;
421+ if ( isset ( $ data ['attachment_url ' ] ) ) {
422+ $ this ->indexed_assets_urls [ $ data ['attachment_url ' ] ] = true ;
423+ }
422424 } elseif ( isset ( $ data ['post_content ' ] ) ) {
423425 $ post = $ data ;
424426 $ p = new WP_Block_Markup_Url_Processor ( $ post ['post_content ' ], $ this ->source_site_url );
@@ -522,7 +524,10 @@ protected function frontload_next_entity() {
522524 if ( null === $ this ->next_stage ) {
523525 $ this ->entity_iterator ->set_entities_iterator ( $ this ->create_entity_iterator () );
524526 }
525- $ this ->downloader = new WP_Attachment_Downloader ( $ this ->options ['uploads_path ' ] );
527+ $ this ->downloader = new WP_Attachment_Downloader (
528+ $ this ->options ['uploads_path ' ],
529+ $ this ->options ['attachment_downloader_options ' ] ?? array ()
530+ );
526531 }
527532
528533 // Clear the frontloading events from the previous pass.
@@ -585,7 +590,12 @@ protected function frontload_next_entity() {
585590 break ;
586591 case 'post ' :
587592 if ( isset ( $ data ['post_type ' ] ) && $ data ['post_type ' ] === 'attachment ' ) {
588- $ this ->enqueue_attachment_download ( $ data ['attachment_url ' ] );
593+ if ( isset ( $ data ['attachment_url ' ] ) ) {
594+ $ this ->enqueue_attachment_download ( $ data ['attachment_url ' ] );
595+ } else {
596+ // @TODO: Emit warning / error event
597+ _doing_it_wrong ( __METHOD__ , 'No attachment URL or file path found in the post entity. ' , '1.0 ' );
598+ }
589599 } elseif ( isset ( $ data ['post_content ' ] ) ) {
590600 $ post = $ data ;
591601 $ p = new WP_Block_Markup_Url_Processor ( $ post ['post_content ' ], $ this ->source_site_url );
@@ -596,7 +606,7 @@ protected function frontload_next_entity() {
596606 $ this ->enqueue_attachment_download (
597607 $ p ->get_raw_url (),
598608 array (
599- 'context_path ' => $ post ['source_path ' ] ?? $ post ['slug ' ] ?? null ,
609+ 'context_path ' => $ post ['local_file_path ' ] ?? $ post ['slug ' ] ?? null ,
600610 )
601611 );
602612 }
@@ -644,52 +654,66 @@ protected function import_next_entity() {
644654 switch ( $ entity ->get_type () ) {
645655 case 'post ' :
646656 $ data = $ entity ->get_data ();
647- foreach ( array ( 'guid ' , 'post_content ' , 'post_excerpt ' ) as $ key ) {
648- if ( ! isset ( $ data [ $ key ] ) ) {
649- continue ;
657+ if ( isset ( $ data ['post_type ' ] ) && $ data ['post_type ' ] === 'attachment ' ) {
658+ if ( ! isset ( $ data ['attachment_url ' ] ) ) {
659+ // @TODO: Emit warning / error event
660+ _doing_it_wrong ( __METHOD__ , 'No attachment URL or file path found in the post entity. ' , '1.0 ' );
661+ break ;
650662 }
651- $ p = new WP_Block_Markup_Url_Processor ( $ data [ $ key ], $ this ->source_site_url );
652- while ( $ p ->next_url () ) {
653- // Relative URLs are okay at this stage.
654- if ( ! $ p ->get_raw_url () ) {
663+ $ asset_filename = $ this ->new_asset_filename (
664+ $ data ['attachment_url ' ],
665+ $ data ['local_file_path ' ] ?? $ data ['slug ' ] ?? null
666+ );
667+ unset( $ data ['attachment_url ' ] );
668+ $ data ['local_file_path ' ] = $ this ->options ['uploads_path ' ] . '/ ' . $ asset_filename ;
669+ } else {
670+ foreach ( array ( 'guid ' , 'post_content ' , 'post_excerpt ' ) as $ key ) {
671+ if ( ! isset ( $ data [ $ key ] ) ) {
655672 continue ;
656673 }
674+ $ p = new WP_Block_Markup_Url_Processor ( $ data [ $ key ], $ this ->source_site_url );
675+ while ( $ p ->next_url () ) {
676+ // Relative URLs are okay at this stage.
677+ if ( ! $ p ->get_raw_url () ) {
678+ continue ;
679+ }
657680
658- /**
659- * Any URL that has a corresponding frontloaded file is an asset URL.
660- */
661- $ asset_filename = $ this ->new_asset_filename (
662- $ p ->get_raw_url (),
663- $ data ['source_path ' ] ?? $ data ['slug ' ] ?? null
664- );
665- if ( file_exists ( $ this ->options ['uploads_path ' ] . '/ ' . $ asset_filename ) ) {
666- $ p ->set_raw_url (
667- $ this ->options ['uploads_url ' ] . '/ ' . $ asset_filename
668- );
669681 /**
670- * @TODO: How would we know a specific image block refers to a specific
671- * attachment? We need to cross-correlate that to rewrite the URL.
672- * The image block could have query parameters, too, but presumably the
673- * path would be the same at least? What if the same file is referred
674- * to by two different URLs? e.g. assets.site.com and site.com/assets/ ?
675- * A few ideas: GUID, block attributes, fuzzy matching. Maybe a configurable
676- * strategy? And the API consumer would make the decision?
682+ * Any URL that has a corresponding frontloaded file is an asset URL.
677683 */
678- continue ;
679- }
684+ $ asset_filename = $ this ->new_asset_filename (
685+ $ p ->get_raw_url (),
686+ $ data ['local_file_path ' ] ?? $ data ['slug ' ] ?? null
687+ );
688+ if ( file_exists ( $ this ->options ['uploads_path ' ] . '/ ' . $ asset_filename ) ) {
689+ $ p ->set_raw_url (
690+ $ this ->options ['uploads_url ' ] . '/ ' . $ asset_filename
691+ );
692+ /**
693+ * @TODO: How would we know a specific image block refers to a specific
694+ * attachment? We need to cross-correlate that to rewrite the URL.
695+ * The image block could have query parameters, too, but presumably the
696+ * path would be the same at least? What if the same file is referred
697+ * to by two different URLs? e.g. assets.site.com and site.com/assets/ ?
698+ * A few ideas: GUID, block attributes, fuzzy matching. Maybe a configurable
699+ * strategy? And the API consumer would make the decision?
700+ */
701+ continue ;
702+ }
680703
681- // Absolute URLs are required at this stage.
682- if ( ! $ p ->get_parsed_url () ) {
683- continue ;
684- }
704+ // Absolute URLs are required at this stage.
705+ if ( ! $ p ->get_parsed_url () ) {
706+ continue ;
707+ }
685708
686- $ target_base_url = $ this ->get_url_mapping_target ( $ p ->get_parsed_url () );
687- if ( false !== $ target_base_url ) {
688- $ p ->replace_base_url ( $ target_base_url );
689- continue ;
709+ $ target_base_url = $ this ->get_url_mapping_target ( $ p ->get_parsed_url () );
710+ if ( false !== $ target_base_url ) {
711+ $ p ->replace_base_url ( $ target_base_url );
712+ continue ;
713+ }
690714 }
715+ $ data [ $ key ] = $ p ->get_updated_html ();
691716 }
692- $ data [ $ key ] = $ p ->get_updated_html ();
693717 }
694718 $ entity ->set_data ( $ data );
695719 break ;
0 commit comments