intel
diff --git a/‎.github/workflows/pr-code-format.yml‎
Lines changed: 3 additions & 0 deletions b/‎.github/workflows/pr-code-format.yml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎clang-tools-extra/clang-tidy/tool/run-clang-tidy.py‎
Lines changed: 4 additions & 6 deletions b/‎clang-tools-extra/clang-tidy/tool/run-clang-tidy.py‎
Lines changed: 4 additions & 6 deletions
diff --git a/‎clang-tools-extra/docs/clang-tidy/Contributing.rst‎
Lines changed: 47 additions & 18 deletions b/‎clang-tools-extra/docs/clang-tidy/Contributing.rst‎
Lines changed: 47 additions & 18 deletions
diff --git a/‎clang/docs/HLSL/ExpectedDifferences.rst‎
Lines changed: 109 additions & 12 deletions b/‎clang/docs/HLSL/ExpectedDifferences.rst‎
Lines changed: 109 additions & 12 deletions
diff --git a/‎clang/docs/tools/generate_formatted_state.py‎
Lines changed: 2 additions & 4 deletions b/‎clang/docs/tools/generate_formatted_state.py‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎clang/include/clang/Basic/BuiltinsWebAssembly.def‎
Lines changed: 9 additions & 0 deletions b/‎clang/include/clang/Basic/BuiltinsWebAssembly.def‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎clang/lib/AST/ByteCode/Compiler.cpp‎
Lines changed: 2 additions & 2 deletions b/‎clang/lib/AST/ByteCode/Compiler.cpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎clang/lib/Basic/SourceManager.cpp‎
Lines changed: 1 addition & 0 deletions b/‎clang/lib/Basic/SourceManager.cpp‎
Lines changed: 1 addition & 0 deletions
@@ -13,6 +13,9 @@ jobs:
   code_formatter:
     runs-on: ubuntu-latest
     timeout-minutes: 30
+    concurrency:
+      group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
+      cancel-in-progress: true
     if: (github.repository == 'llvm/llvm-project' || github.repository == 'intel/llvm') && !contains(github.event.pull_request.labels.*.name, 'disable-lint')
     steps:
       - name: Fetch LLVM sources
 
@@ -511,12 +511,10 @@ async def main() -> None:
         )
         invocation.append("-list-checks")
         invocation.append("-")
-        if args.quiet:
-            # Even with -quiet we still want to check if we can call clang-tidy.
-            with open(os.devnull, "w") as dev_null:
-                subprocess.check_call(invocation, stdout=dev_null)
-        else:
-            subprocess.check_call(invocation)
+        # Even with -quiet we still want to check if we can call clang-tidy.
+        subprocess.check_call(
+            invocation, stdout=subprocess.DEVNULL if args.quiet else None
+        )
     except:
         print("Unable to run clang-tidy.", file=sys.stderr)
         sys.exit(1)
 
@@ -127,14 +127,15 @@ Writing a clang-tidy Check
 
 So you have an idea of a useful check for :program:`clang-tidy`.
 
-First, if you're not familiar with LLVM development, read through the `Getting
-Started with LLVM`_ document for instructions on setting up your workflow and
+First, if you're not familiar with LLVM development, read through the `Getting Started 
+with the LLVM System`_ document for instructions on setting up your workflow and
 the `LLVM Coding Standards`_ document to familiarize yourself with the coding
-style used in the project. For code reviews we mostly use `LLVM Phabricator`_.
+style used in the project. For code reviews we currently use `LLVM Github`_,
+though historically we used Phabricator.
 
-.. _Getting Started with LLVM: https://llvm.org/docs/GettingStarted.html
+.. _Getting Started with the LLVM System: https://llvm.org/docs/GettingStarted.html
 .. _LLVM Coding Standards: https://llvm.org/docs/CodingStandards.html
-.. _LLVM Phabricator: https://llvm.org/docs/Phabricator.html
+.. _LLVM Github: https://github.com/llvm/llvm-project
 
 Next, you need to decide which module the check belongs to. Modules
 are located in subdirectories of `clang-tidy/
@@ -336,13 +337,25 @@ a starting point for your test cases.  A rough outline of the process looks like
 The quickest way to prototype your matcher is to use :program:`clang-query` to
 interactively build up your matcher.  For complicated matchers, build up a matching
 expression incrementally and use :program:`clang-query`'s ``let`` command to save named
-matching expressions to simplify your matcher.  Just like breaking up a huge function
-into smaller chunks with intention-revealing names can help you understand a complex
-algorithm, breaking up a matcher into smaller matchers with intention-revealing names
-can help you understand a complicated matcher.  Once you have a working matcher, the
-C++ API will be virtually identical to your interactively constructed matcher.  You can
-use local variables to preserve your intention-revealing names that you applied to
-nested matchers.
+matching expressions to simplify your matcher.
+
+.. code-block:: console
+
+  clang-query> let c1 cxxRecordDecl()
+  clang-query> match c1
+
+Alternatively, pressing the tab key after a previous matcher's open parentheses would also 
+show which matchers can be chained with the previous matcher, though some matchers that work 
+may not be listed.
+
+Just like breaking up a huge function into smaller chunks with intention-revealing names 
+can help you understand a complex algorithm, breaking up a matcher into smaller matchers 
+with intention-revealing names can help you understand a complicated matcher.  
+
+Once you have a working clang-query matcher, the C++ API matchers will be the same or similar 
+to your interactively constructed matcher (there can be cases where they differ slightly). 
+You can use local variables to preserve your intention-revealing names that you applied 
+to nested matchers.
 
 Creating private matchers
 ^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -646,10 +659,13 @@ directory.  The path to this directory is available in a lit test with the varia
 Out-of-tree check plugins
 -------------------------
 
+
 Developing an out-of-tree check as a plugin largely follows the steps
-outlined above. The plugin is a shared library whose code lives outside
+outlined above, including creating a new module and doing the hacks to 
+register the module. The plugin is a shared library whose code lives outside
 the clang-tidy build system. Build and link this shared library against
-LLVM as done for other kinds of Clang plugins.
+LLVM as done for other kinds of Clang plugins. If using CMake, use the keyword
+``MODULE`` while invoking ``add_library`` or ``llvm_add_library``.
 
 The plugin can be loaded by passing `-load` to `clang-tidy` in addition to the
 names of the checks to enable.
@@ -664,6 +680,19 @@ compiled against the version of clang-tidy that will be loading the plugin.
 The plugins can use threads, TLS, or any other facilities available to in-tree
 code which is accessible from the external headers.
 
+Note that testing out-of-tree checks might involve getting ``llvm-lit`` from an LLVM 
+installation compiled from source. See `Getting Started with the LLVM System`_ for ways 
+to do so.
+
+Alternatively, get `lit`_ following the `test-suite guide`_ and get the `FileCheck`_ binary, 
+and write a version of `check_clang_tidy.py`_ to suit your needs.
+
+.. _Getting Started with the LLVM System: https://llvm.org/docs/GettingStarted.html
+.. _test-suite guide: https://llvm.org/docs/TestSuiteGuide.html
+.. _lit: https://llvm.org/docs/CommandGuide/lit.html
+.. _FileCheck: https://llvm.org/docs/CommandGuide/FileCheck.html
+.. _check_clang_tidy.py: https://github.com/llvm/llvm-project/blob/main/clang-tools-extra/test/clang-tidy/check_clang_tidy.py
+
 Running clang-tidy on LLVM
 --------------------------
 
@@ -688,10 +717,10 @@ warnings and errors. The script provides multiple configuration flags.
 
 * To restrict the files examined you can provide one or more regex arguments
   that the file names are matched against.
-  ``run-clang-tidy.py clang-tidy/.*Check\.cpp`` will only analyze clang-tidy
+  ``run-clang-tidy.py clang-tidy/.*Check\.cpp`` will only analyze `clang-tidy`
   checks. It may also be necessary to restrict the header files that warnings
-  are displayed from using the ``-header-filter`` flag. It has the same behavior
-  as the corresponding :program:`clang-tidy` flag.
+  are displayed from by using the ``-header-filter`` and ``-exclude-header-filter`` flags. 
+  They have the same behavior as the corresponding :program:`clang-tidy` flags.
 
 * To apply suggested fixes ``-fix`` can be passed as an argument. This gathers
   all changes in a temporary directory and applies them. Passing ``-format``
@@ -758,4 +787,4 @@ There is only one argument that controls profile storage:
 
   * If you run :program:`clang-tidy` from within ``/foo`` directory, and specify
     ``-store-check-profile=.``, then the profile will still be saved to
-    ``/foo/<ISO8601-like timestamp>-example.cpp.json``
+    ``/foo/<ISO8601-like timestamp>-example.cpp.json``
@@ -54,6 +54,19 @@ HLSL 202x based on proposal
 and
 `0008 <https://github.com/microsoft/hlsl-specs/blob/main/proposals/0008-non-member-operator-overloading.md>`_.
 
+The largest difference between Clang and DXC's overload resolution is the
+algorithm used for identifying best-match overloads. There are more details
+about the algorithmic differences in the :ref:`multi_argument_overloads` section
+below. There are three high level differences that should be highlighted:
+
+* **There should be no cases** where DXC and Clang both successfully
+  resolve an overload where the resolved overload is different between the two.
+* There are cases where Clang will successfully resolve an overload that DXC
+  wouldn't because we've trimmed the overload set in Clang to remove ambiguity.
+* There are cases where DXC will successfully resolve an overload that Clang
+  will not for two reasons: (1) DXC only generates partial overload sets for
+  builtin functions and (2) DXC resolves cases that probably should be ambiguous.
+
 Clang's implementation extends standard overload resolution rules to HLSL
 library functionality. This causes subtle changes in overload resolution
 behavior between Clang and DXC. Some examples include:
@@ -71,18 +84,23 @@ behavior between Clang and DXC. Some examples include:
     uint U;
     int I;
     float X, Y, Z;
-    double3 A, B;
+    double3 R, G;
   }
 
-  void twoParams(int, int);
-  void twoParams(float, float);
+  void takesSingleDouble(double);
+  void takesSingleDouble(vector<double, 1>);
+
+  void scalarOrVector(double);
+  void scalarOrVector(vector<double, 2>);
 
   export void call() {
-    halfOrInt16(U); // DXC: Fails with call ambiguous between int16_t and uint16_t overloads
-                    // Clang: Resolves to halfOrInt16(uint16_t).
-    halfOrInt16(I); // All: Resolves to halfOrInt16(int16_t).
     half H;
+    halfOrInt16(I); // All: Resolves to halfOrInt16(int16_t).
+
   #ifndef IGNORE_ERRORS
+    halfOrInt16(U); // All: Fails with call ambiguous between int16_t and uint16_t
+                    // overloads
+
     // asfloat16 is a builtin with overloads for half, int16_t, and uint16_t.
     H = asfloat16(I); // DXC: Fails to resolve overload for int.
                       // Clang: Resolves to asfloat16(int16_t).
@@ -94,21 +112,28 @@ behavior between Clang and DXC. Some examples include:
 
     takesDoubles(X, Y, Z); // Works on all compilers
   #ifndef IGNORE_ERRORS
-    fma(X, Y, Z); // DXC: Fails to resolve no known conversion from float to double.
+    fma(X, Y, Z); // DXC: Fails to resolve no known conversion from float to
+                  //   double.
                   // Clang: Resolves to fma(double,double,double).
-  #endif
 
-    double D = dot(A, B); // DXC: Resolves to dot(double3, double3), fails DXIL Validation.
+    double D = dot(R, G); // DXC: Resolves to dot(double3, double3), fails DXIL Validation.
                           // FXC: Expands to compute double dot product with fmul/fadd
-                          // Clang: Resolves to dot(float3, float3), emits conversion warnings.
+                          // Clang: Fails to resolve as ambiguous against
+                          //   dot(half, half) or dot(float, float)
+  #endif
 
   #ifndef IGNORE_ERRORS
     tan(B); // DXC: resolves to tan(float).
             // Clang: Fails to resolve, ambiguous between integer types.
 
-    twoParams(I, X); // DXC: resolves twoParams(int, int).
-                     // Clang: Fails to resolve ambiguous conversions.
   #endif
+
+    double D;
+    takesSingleDouble(D); // All: Fails to resolve ambiguous conversions.
+    takesSingleDouble(R); // All: Fails to resolve ambiguous conversions.
+
+    scalarOrVector(D); // All: Resolves to scalarOrVector(double).
+    scalarOrVector(R); // All: Fails to resolve ambiguous conversions.
   }
 
 .. note::
@@ -119,3 +144,75 @@ behavior between Clang and DXC. Some examples include:
   diagnostic notifying the user of the conversion rather than silently altering
   precision relative to the other overloads (as FXC does) or generating code
   that will fail validation (as DXC does).
+
+.. _multi_argument_overloads:
+
+Multi-Argument Overloads
+------------------------
+
+In addition to the differences in single-element conversions, Clang and DXC
+differ dramatically in multi-argument overload resolution. C++ multi-argument
+overload resolution behavior (or something very similar) is required to
+implement
+`non-member operator overloading <https://github.com/microsoft/hlsl-specs/blob/main/proposals/0008-non-member-operator-overloading.md>`_.
+
+Clang adopts the C++ inspired language from the
+`draft HLSL specification <https://microsoft.github.io/hlsl-specs/specs/hlsl.pdf>`_,
+where an overload ``f1`` is a better candidate than ``f2`` if for all arguments the
+conversion sequences is not worse than the corresponding conversion sequence and
+for at least one argument it is better.
+
+.. code-block:: c++
+
+  cbuffer CB {
+    int I;
+    float X;
+    float4 V;
+  }
+
+  void twoParams(int, int);
+  void twoParams(float, float);
+  void threeParams(float, float, float);
+  void threeParams(float4, float4, float4);
+
+  export void call() {
+    twoParams(I, X); // DXC: resolves twoParams(int, int).
+                     // Clang: Fails to resolve ambiguous conversions.
+
+    threeParams(X, V, V); // DXC: resolves threeParams(float4, float4, float4).
+                          // Clang: Fails to resolve ambiguous conversions.
+  }
+
+For the examples above since ``twoParams`` called with mixed parameters produces
+implicit conversion sequences that are { ExactMatch, FloatingIntegral }  and {
+FloatingIntegral, ExactMatch }. In both cases an argument has a worse conversion
+in the other sequence, so the overload is ambiguous.
+
+In the ``threeParams`` example the sequences are { ExactMatch, VectorTruncation,
+VectorTruncation } or { VectorSplat, ExactMatch, ExactMatch }, again in both
+cases at least one parameter has a worse conversion in the other sequence, so
+the overload is ambiguous.
+
+.. note::
+
+  The behavior of DXC documented below is undocumented so this is gleaned from
+  observation and a bit of reading the source.
+
+DXC's approach for determining the best overload produces an integer score value
+for each implicit conversion sequence for each argument expression. Scores for
+casts are based on a bitmask construction that is complicated to reverse
+engineer. It seems that:
+
+* Exact match is 0
+* Dimension increase is 1
+* Promotion is 2
+* Integral -> Float conversion is 4
+* Float -> Integral conversion is 8
+* Cast is 16
+
+The masks are or'd against each other to produce a score for the cast.
+
+The scores of each conversion sequence are then summed to generate a score for
+the overload candidate. The overload candidate with the lowest score is the best
+candidate. If more than one overload are matched for the lowest score the call
+is ambiguous.
@@ -78,8 +78,6 @@ def get_style(count, passed):
      - {style2}`{percent}%`
 """
 
-FNULL = open(os.devnull, "w")
-
 
 with open(DOC_FILE, "wb") as output:
     cleanfiles = open(CLEAN_FILE, "wb")
@@ -101,8 +99,8 @@ def get_style(count, passed):
                 # interested in it, just the return code.
                 git_check = subprocess.Popen(
                     ["git", "ls-files", "--error-unmatch", act_sub_dir],
-                    stdout=FNULL,
-                    stderr=FNULL,
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.DEVNULL,
                 )
                 if git_check.wait() != 0:
                     print("Skipping directory: ", act_sub_dir)
 
@@ -124,6 +124,7 @@ TARGET_BUILTIN(__builtin_wasm_bitmask_i16x8, "UiV8s", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_bitmask_i32x4, "UiV4i", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_bitmask_i64x2, "UiV2LLi", "nc", "simd128")
 
+TARGET_BUILTIN(__builtin_wasm_abs_f16x8, "V8hV8h", "nc", "fp16")
 TARGET_BUILTIN(__builtin_wasm_abs_f32x4, "V4fV4f", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_abs_f64x2, "V2dV2d", "nc", "simd128")
 
@@ -140,6 +141,10 @@ TARGET_BUILTIN(__builtin_wasm_max_f16x8, "V8hV8hV8h", "nc", "fp16")
 TARGET_BUILTIN(__builtin_wasm_pmin_f16x8, "V8hV8hV8h", "nc", "fp16")
 TARGET_BUILTIN(__builtin_wasm_pmax_f16x8, "V8hV8hV8h", "nc", "fp16")
 
+TARGET_BUILTIN(__builtin_wasm_ceil_f16x8, "V8hV8h", "nc", "fp16")
+TARGET_BUILTIN(__builtin_wasm_floor_f16x8, "V8hV8h", "nc", "fp16")
+TARGET_BUILTIN(__builtin_wasm_trunc_f16x8, "V8hV8h", "nc", "fp16")
+TARGET_BUILTIN(__builtin_wasm_nearest_f16x8, "V8hV8h", "nc", "fp16")
 TARGET_BUILTIN(__builtin_wasm_ceil_f32x4, "V4fV4f", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_floor_f32x4, "V4fV4f", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_trunc_f32x4, "V4fV4f", "nc", "simd128")
@@ -151,9 +156,13 @@ TARGET_BUILTIN(__builtin_wasm_nearest_f64x2, "V2dV2d", "nc", "simd128")
 
 TARGET_BUILTIN(__builtin_wasm_dot_s_i32x4_i16x8, "V4iV8sV8s", "nc", "simd128")
 
+TARGET_BUILTIN(__builtin_wasm_sqrt_f16x8, "V8hV8h", "nc", "fp16")
 TARGET_BUILTIN(__builtin_wasm_sqrt_f32x4, "V4fV4f", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_sqrt_f64x2, "V2dV2d", "nc", "simd128")
 
+TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i16x8_f16x8, "V8sV8h", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i16x8_f16x8, "V8sV8h", "nc", "simd128")
+
 TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i32x4_f32x4, "V4iV4f", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i32x4_f32x4, "V4iV4f", "nc", "simd128")
 
 
@@ -5324,11 +5324,11 @@ bool Compiler<Emitter>::VisitVectorUnaryOperator(const UnaryOperator *E) {
 
   auto UnaryOp = E->getOpcode();
   if (UnaryOp != UO_Plus && UnaryOp != UO_Minus && UnaryOp != UO_LNot &&
-      UnaryOp != UO_Not)
+      UnaryOp != UO_Not && UnaryOp != UO_AddrOf)
     return this->emitInvalid(E);
 
   // Nothing to do here.
-  if (UnaryOp == UO_Plus)
+  if (UnaryOp == UO_Plus || UnaryOp == UO_AddrOf)
     return this->delegate(SubExpr);
 
   if (!Initializing) {
 
@@ -350,6 +350,7 @@ void SourceManager::clearIDTables() {
   LastLineNoContentCache = nullptr;
   LastFileIDLookup = FileID();
 
+  IncludedLocMap.clear();
   if (LineTable)
     LineTable->clear();