@@ -1083,35 +1083,31 @@ public function parseH(\DOMElement $e, $is_backcompat = false, $has_nested_mf =
10831083
10841084 }
10851085
1086- // Check for u- url
1087- if (! array_key_exists ( ' url ' , $ return ) && ! $ is_backcompat ) {
1088- $ url = null ;
1089- // Look for img @src
1090- if ($ e ->tagName == 'a ' or $ e ->tagName == 'area ' ) {
1091- $ url = $ e ->getAttribute ('href ' );
1092- }
1093-
1094- // Look for nested a @ href
1095- foreach ( $ this -> xpath -> query ( './a[count(preceding-sibling::a)+ count(following-sibling::a)=0] ' , $ e ) as $ em ) {
1096- $ emNames = mfNamesFromElement ( $ em , ' h- ' );
1097- if ( empty ( $ emNames )) {
1098- $ url = $ em -> getAttribute ( ' href ' );
1099- break ;
1100- }
1101- }
1102-
1103- // Look for nested area @src
1104- foreach ( $ this ->xpath ->query (' ./area[count(preceding-sibling::area)+count(following-sibling::area)=0] ' , $ e ) as $ em ) {
1105- $ emNames = mfNamesFromElement ( $ em , ' h- ' );
1106- if ( empty ( $ emNames )) {
1107- $ url = $ em -> getAttribute ( ' href ' ) ;
1108- break ;
1086+ // Do we need to imply a url property?
1087+ // if no explicit " url" property, and no other explicit u-* properties, and no nested microformats
1088+ if (! array_key_exists ( ' url ' , $ return ) && ! in_array ( ' u- ' , $ prefixes ) && ! $ has_nested_mf && ! $ is_backcompat ) {
1089+ // a.h-x[href] or area.h-x[href]
1090+ if (( $ e ->tagName === 'a ' || $ e ->tagName === 'area ' ) && $ e -> hasAttribute ( ' href ' ) ) {
1091+ $ return [ ' url ' ][] = $ this -> resolveUrl ( $ e ->getAttribute ('href ' ) );
1092+ } else {
1093+ $ xpaths = array (
1094+ // .h-x>a[ href]:only-of-type:not[.h-*]
1095+ './a[not(contains(concat(" ", @class), " h-")) and count(../a) = 1 and @href] ' ,
1096+ // .h-x>area[href]:only-of-type:not[.h-*]
1097+ ' ./area[not(contains(concat(" ", @class), " h-")) and count(../area) = 1 and @href] ' ,
1098+ // .h-x>:only-child:not[.h-*]>a[ href]:only-of-type:not[.h-*]
1099+ ' ./*[not(contains(concat(" ", @class), " h-")) and count(../*) = 1 and count(a) = 1]/a[not(contains(concat(" ", @class), " h-")) and @href] ' ,
1100+ // .h-x>:only-child:not[.h-*]>area[href]:only-of-type:not[.h-*]
1101+ ' ./*[not(contains(concat(" ", @class), " h-")) and count(../*) = 1 and count(area) = 1]/area[not(contains(concat(" ", @class), " h-")) and @href] '
1102+ );
1103+ foreach ( $ xpaths as $ xpath ) {
1104+ $ url = $ this ->xpath ->query ($ xpath , $ e );
1105+ if ( $ url !== false && $ url -> length === 1 ) {
1106+ $ return [ ' url ' ][] = $ this -> resolveUrl ( $ url -> item ( 0 )-> getAttribute ( ' href ' ));
1107+ break ;
1108+ }
11091109 }
11101110 }
1111-
1112- if (!is_null ($ url )) {
1113- $ return ['url ' ][] = $ this ->resolveUrl ($ url );
1114- }
11151111 }
11161112
11171113 // Make sure things are unique and in alphabetical order
0 commit comments