1515 */
1616package org .htmlunit .cyberneko ;
1717
18- import java .util .ArrayList ;
1918import java .util .HashMap ;
20- import java .util .List ;
2119import java .util .Locale ;
2220
2321import org .htmlunit .cyberneko .util .FastHashMap ;
3230 * @author Ahmed Ashour
3331 * @author Marc Guillemot
3432 * @author Ronald Brill
33+ * @author René Schwietzke
3534 */
3635public class HTMLElements implements HTMLElementsProvider {
3736
@@ -202,7 +201,7 @@ public class HTMLElements implements HTMLElementsProvider {
202201 private final HashMap <String , Element > elementsByNameForReference_ = new HashMap <>();
203202
204203 // this is a optimized version which will be later queried
205- FastHashMap <String , Element >[] elementsByNamePerLength_ ;
204+ FastHashMap <String , Element > elementsByName_ ;
206205
207206 public HTMLElements () {
208207 final Element [][] elementsArray = new Element [26 ][];
@@ -557,7 +556,7 @@ public HTMLElements() {
557556 for (final Element [] elements : elementsArray ) {
558557 if (elements != null ) {
559558 for (final Element element : elements ) {
560- this . elementsByNameForReference_ .put (element .name , element );
559+ elementsByNameForReference_ .put (element .name , element );
561560 }
562561 }
563562 }
@@ -566,28 +565,22 @@ public HTMLElements() {
566565 setupOptimizedVersions ();
567566 }
568567
568+ /**
569+ * Adds or replaces an element definition in the collection.
570+ * Rebuilds the internal lookup structures to reflect the change.
571+ *
572+ * @param element the element to add or replace
573+ */
569574 public void setElement (final Element element ) {
570- this . elementsByNameForReference_ .put (element .name , element );
575+ elementsByNameForReference_ .put (element .name , element );
571576
572577 // rebuild the information "trees"
573578 setupOptimizedVersions ();
574579 }
575580
576581 private void setupOptimizedVersions () {
577- int maxCode = -1 ;
578- ArrayList <List <Element >> elementsByLength = new ArrayList <>(10 );
579- for (final Element element : elementsByNameForReference_ .values ()) {
580- if (element .code > maxCode ) {
581- maxCode = element .code ;
582- }
583-
584- int length = element .lowercaseName .length ();
585- while (elementsByLength .size () < length ) {
586- elementsByLength .add (new ArrayList <>(30 ));
587- }
588- List <Element > elements = elementsByLength .get (length - 1 );
589- elements .add (element );
590- }
582+ // get us the max length
583+ final int maxCode = elementsByNameForReference_ .values ().stream ().mapToInt (e -> e .code ).max ().orElse (0 );
591584
592585 // we got x amount of elements + 1 unknown
593586 // put that into an array instead of a map, that
@@ -598,22 +591,16 @@ private void setupOptimizedVersions() {
598591 elementsByNameForReference_ .values ().forEach (v -> elementsByCode_ [v .code ] = v );
599592 elementsByCode_ [NO_SUCH_ELEMENT .code ] = NO_SUCH_ELEMENT ;
600593
601- // get us a second version that is lowercase stringified to
602- // reduce lookup overhead
603- elementsByNamePerLength_ = new FastHashMap [elementsByLength .size ()];
604- int i = 0 ;
605- for (final List <Element > elements : elementsByLength ) {
606- if (elements .size () > 0 ) {
607- FastHashMap <String , Element > entry = new FastHashMap <>(elements .size (), 0.70f );
608- for (Element element : elements ) {
609- entry .put (element .lowercaseName , element );
610-
611- // initialize cross references to parent elements
612- defineParents (element );
613- }
614- elementsByNamePerLength_ [i ] = entry ;
615- }
616- i ++;
594+ // add all together and also get us a second version that is
595+ // lowercase only for faster lower case lookups, hence we have twice
596+ // the size of the map as we need to store both versions
597+ elementsByName_ = new FastHashMap <>(2 * maxCode , 0.50f );
598+
599+ for (final Element element : elementsByNameForReference_ .values ()) {
600+ // initialize cross references to parent elements
601+ defineParents (element );
602+
603+ elementsByName_ .put (element .lowercaseName , element );
617604 }
618605
619606 // NO_SUCH_ELEMENT is not part of elementsByLength
@@ -661,24 +648,14 @@ public final Element getElement(final String ename) {
661648 */
662649 @ Override
663650 public final Element getElement (final String ename , final Element elementIfNotFound ) {
664- int length = ename .length ();
665- if (length > elementsByNamePerLength_ .length ) {
666- return elementIfNotFound ;
667- }
668-
669- FastHashMap <String , Element > entry = elementsByNamePerLength_ [length - 1 ];
670- if (entry == null ) {
671- return elementIfNotFound ;
672- }
673-
674651 // check the current form casing first, which is mostly lowercase only
675- Element r = entry .get (ename );
652+ Element r = elementsByName_ .get (ename );
676653 if (r == null ) {
677654 // we have not found it in its current form, might be uppercase
678655 // or mixed case, so try all lowercase for sanity, we speculated that
679656 // good HTML is mostly all lowercase in the first place so this is the
680657 // fallback for atypical HTML
681- r = entry .get (ename .toLowerCase (Locale .ROOT ));
658+ r = elementsByName_ .get (ename .toLowerCase (Locale .ROOT ));
682659 }
683660 if (r == null ) {
684661 return elementIfNotFound ;
@@ -692,24 +669,24 @@ public final Element getElement(final String ename, final Element elementIfNotFo
692669 */
693670 @ Override
694671 public final Element getElementLC (final String enameLC , final Element elementIfNotFound ) {
695- int length = enameLC .length ();
696- if (length > elementsByNamePerLength_ .length ) {
697- return elementIfNotFound ;
698- }
699-
700- FastHashMap <String , Element > entry = elementsByNamePerLength_ [length - 1 ];
701- if (entry == null ) {
702- return elementIfNotFound ;
703- }
704-
705- Element r = entry .get (enameLC );
706- if (r == null ) {
707- return elementIfNotFound ;
708- }
709-
710- return r ;
672+ final Element r = elementsByName_ .get (enameLC );
673+ return r == null ? elementIfNotFound : r ;
711674 }
712675
676+ /**
677+ * An implementation of {@link HTMLElementsProvider} that wraps an {@link HTMLElements}
678+ * instance and adds a simple cache for unknown element lookups.
679+ * <p>
680+ * This class is optimized for repeated element name lookups, especially for names
681+ * that are not present in the known elements set. It avoids repeated lowercasing
682+ * and unnecessary lookups for unknown elements by caching misses.
683+ * </p>
684+ *
685+ * <p>
686+ * This implementation is not thread-safe.
687+ * </p>
688+ *
689+ */
713690 public static class HTMLElementsWithCache implements HTMLElementsProvider {
714691
715692 private final HTMLElements htmlElements_ ;
@@ -719,16 +696,22 @@ public static class HTMLElementsWithCache implements HTMLElementsProvider {
719696
720697 public HTMLElementsWithCache (final HTMLElements htmlElements ) {
721698 htmlElements_ = htmlElements ;
722- unknownElements_ = new FastHashMap <>(11 , 0.70f );
699+ unknownElements_ = new FastHashMap <>(11 , 0.50f );
723700 }
724701
702+ /**
703+ * {@inheritDoc}
704+ */
725705 @ Override
726706 public Element getElement (short code ) {
727707 return htmlElements_ .getElement (code );
728708 }
729709
710+ /**
711+ * {@inheritDoc}
712+ */
730713 @ Override
731- public Element getElement (String ename ) {
714+ public Element getElement (final String ename ) {
732715 Element element = getElement (ename , htmlElements_ .NO_SUCH_ELEMENT );
733716 if (element == htmlElements_ .NO_SUCH_ELEMENT ) {
734717 element = new Element (UNKNOWN ,
@@ -742,39 +725,12 @@ public Element getElement(String ename) {
742725 return element ;
743726 }
744727
728+ /**
729+ * {@inheritDoc}
730+ */
745731 @ Override
746- public Element getElement (String ename , Element elementIfNotFound ) {
747- int length = ename .length ();
748- if (length > htmlElements_ .elementsByNamePerLength_ .length ) {
749- if (unknownElements_ .get (ename ) != null ) {
750- // we added it to the cache, so we know it has been
751- // queried once unsuccessfully before
752- return elementIfNotFound ;
753- }
754-
755- // remember that we had a miss
756- unknownElements_ .put (ename , Boolean .TRUE );
757-
758- return elementIfNotFound ;
759- }
760-
761- FastHashMap <String , Element > entry = htmlElements_ .elementsByNamePerLength_ [length - 1 ];
762- if (entry == null ) {
763- // check first if we know that we don't know and avoid the
764- // lowercasing later
765- if (unknownElements_ .get (ename ) != null ) {
766- // we added it to the cache, so we know it has been
767- // queried once unsuccessfully before
768- return elementIfNotFound ;
769- }
770-
771- // remember that we had a miss
772- unknownElements_ .put (ename , Boolean .TRUE );
773-
774- return elementIfNotFound ;
775- }
776-
777- Element r = entry .get (ename );
732+ public Element getElement (final String ename , final Element elementIfNotFound ) {
733+ Element r = htmlElements_ .elementsByName_ .get (ename );
778734 if (r == null ) {
779735 // check first if we know that we don't know and avoid the
780736 // lowercasing later
@@ -789,7 +745,7 @@ public Element getElement(String ename, Element elementIfNotFound) {
789745 // good HTML is mostly all lowercase in the first place so this is the
790746 // fallback for atypical HTML
791747 // we also have not seen that element missing yet
792- r = entry .get (ename .toLowerCase (Locale .ROOT ));
748+ r = htmlElements_ . elementsByName_ .get (ename .toLowerCase (Locale .ROOT ));
793749 if (r == null ) {
794750 // remember that we had a miss
795751 unknownElements_ .put (ename , Boolean .TRUE );
@@ -801,62 +757,13 @@ public Element getElement(String ename, Element elementIfNotFound) {
801757 }
802758
803759 @ Override
804- public Element getElementLC (String enameLC , Element elementIfNotFound ) {
805- int length = enameLC .length ();
806- if (length > htmlElements_ .elementsByNamePerLength_ .length ) {
807- if (unknownElements_ .get (enameLC ) != null ) {
808- // we added it to the cache, so we know it has been
809- // queried once unsuccessfully before
810- return elementIfNotFound ;
811- }
812-
813- // remember that we had a miss
814- unknownElements_ .put (enameLC , Boolean .TRUE );
815-
816- return elementIfNotFound ;
817- }
818-
819- FastHashMap <String , Element > entry = htmlElements_ .elementsByNamePerLength_ [length - 1 ];
820- if (entry == null ) {
821- // check first if we know that we don't know and avoid the
822- // lowercasing later
823- if (unknownElements_ .get (enameLC ) != null ) {
824- // we added it to the cache, so we know it has been
825- // queried once unsuccessfully before
826- return elementIfNotFound ;
827- }
828-
829- // remember that we had a miss
830- unknownElements_ .put (enameLC , Boolean .TRUE );
831-
832- return elementIfNotFound ;
833- }
834-
835- Element r = entry .get (enameLC );
836- if (r == null ) {
837- // check first if we know that we don't know and avoid the
838- // lowercasing later
839- if (unknownElements_ .get (enameLC ) != null ) {
840- // we added it to the cache, so we know it has been
841- // queried once unsuccessfully before
842- return elementIfNotFound ;
843- }
844-
845- // remember that we had a miss
846- unknownElements_ .put (enameLC , Boolean .TRUE );
847-
848- return elementIfNotFound ;
849- }
850-
851- return r ;
760+ public Element getElementLC (final String enameLC , final Element elementIfNotFound ) {
761+ return htmlElements_ .getElementLC (enameLC , elementIfNotFound );
852762 }
853-
854763 }
855764
856765 /**
857766 * Element information.
858- *
859- * @author Andy Clark
860767 */
861768 public static class Element {
862769
0 commit comments