diff --git a/.nuget/NuGet.Config b/.nuget/NuGet.Config new file mode 100644 index 0000000..67f8ea0 --- /dev/null +++ b/.nuget/NuGet.Config @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.nuget/NuGet.exe b/.nuget/NuGet.exe new file mode 100644 index 0000000..c41a0d0 Binary files /dev/null and b/.nuget/NuGet.exe differ diff --git a/.nuget/NuGet.targets b/.nuget/NuGet.targets new file mode 100644 index 0000000..3f8c37b --- /dev/null +++ b/.nuget/NuGet.targets @@ -0,0 +1,144 @@ + + + + $(MSBuildProjectDirectory)\..\ + + + false + + + false + + + true + + + false + + + + + + + + + + + $([System.IO.Path]::Combine($(SolutionDir), ".nuget")) + + + + + $(SolutionDir).nuget + + + + $(MSBuildProjectDirectory)\packages.$(MSBuildProjectName.Replace(' ', '_')).config + $(MSBuildProjectDirectory)\packages.$(MSBuildProjectName).config + + + + $(MSBuildProjectDirectory)\packages.config + $(PackagesProjectConfig) + + + + + $(NuGetToolsPath)\NuGet.exe + @(PackageSource) + + "$(NuGetExePath)" + mono --runtime=v4.0.30319 "$(NuGetExePath)" + + $(TargetDir.Trim('\\')) + + -RequireConsent + -NonInteractive + + "$(SolutionDir) " + "$(SolutionDir)" + + + $(NuGetCommand) install "$(PackagesConfig)" -source "$(PackageSources)" $(NonInteractiveSwitch) $(RequireConsentSwitch) -solutionDir $(PaddedSolutionDir) + $(NuGetCommand) pack "$(ProjectPath)" -Properties "Configuration=$(Configuration);Platform=$(Platform)" $(NonInteractiveSwitch) -OutputDirectory "$(PackageOutputDir)" -symbols + + + + RestorePackages; + $(BuildDependsOn); + + + + + $(BuildDependsOn); + BuildPackage; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/AzureExamples/App.config b/AzureExamples/App.config index 8e15646..5d79355 100644 --- a/AzureExamples/App.config +++ b/AzureExamples/App.config @@ -1,6 +1,30 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/AzureExamples/Azure/ConnectedComponents.cs b/AzureExamples/Azure/ConnectedComponents.cs index 2254bd7..8641a1b 100644 --- a/AzureExamples/Azure/ConnectedComponents.cs +++ b/AzureExamples/Azure/ConnectedComponents.cs @@ -1,5 +1,5 @@ -/* - * Naiad ver. 0.4 +/* + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/AzureExamples/Azure/GraphGenerator.cs b/AzureExamples/Azure/GraphGenerator.cs index 2ad4cad..20ce3d0 100644 --- a/AzureExamples/Azure/GraphGenerator.cs +++ b/AzureExamples/Azure/GraphGenerator.cs @@ -1,5 +1,5 @@ -/* - * Naiad ver. 0.4 +/* + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/AzureExamples/Azure/Repartition.cs b/AzureExamples/Azure/Repartition.cs index c3742ad..2b6224a 100644 --- a/AzureExamples/Azure/Repartition.cs +++ b/AzureExamples/Azure/Repartition.cs @@ -1,5 +1,5 @@ -/* - * Naiad ver. 0.4 +/* + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/AzureExamples/AzureExamples.csproj b/AzureExamples/AzureExamples.csproj index 2e8b09c..b821c5c 100644 --- a/AzureExamples/AzureExamples.csproj +++ b/AzureExamples/AzureExamples.csproj @@ -1,5 +1,5 @@  - + Debug @@ -11,6 +11,8 @@ AzureExamples v4.5 512 + ..\ + true AnyCPU @@ -31,31 +33,55 @@ prompt 4 + + true + bin\x64\Debug\ + DEBUG;TRACE + full + x64 + prompt + MinimumRecommendedRules.ruleset + true + + + bin\x64\Release\ + TRACE + true + pdbonly + x64 + prompt + MinimumRecommendedRules.ruleset + true + - - ..\packages\Microsoft.Data.Edm.5.6.0\lib\net40\Microsoft.Data.Edm.dll - - - ..\packages\Microsoft.Data.OData.5.6.0\lib\net40\Microsoft.Data.OData.dll + + False + ..\packages\Microsoft.Data.Edm.5.6.2\lib\net40\Microsoft.Data.Edm.dll - - ..\packages\Microsoft.Data.Services.Client.5.6.0\lib\net40\Microsoft.Data.Services.Client.dll + + False + ..\packages\Microsoft.Data.OData.5.6.2\lib\net40\Microsoft.Data.OData.dll - + False - ..\packages\Microsoft.WindowsAzure.ConfigurationManager.1.8.0.0\lib\net35-full\Microsoft.WindowsAzure.Configuration.dll + ..\packages\Microsoft.Data.Services.Client.5.6.2\lib\net40\Microsoft.Data.Services.Client.dll - + + ..\packages\Microsoft.WindowsAzure.ConfigurationManager.2.0.3\lib\net40\Microsoft.WindowsAzure.Configuration.dll + + False - ..\packages\WindowsAzure.Storage.3.1.0.1\lib\net40\Microsoft.WindowsAzure.Storage.dll + ..\packages\WindowsAzure.Storage.4.3.0\lib\net40\Microsoft.WindowsAzure.Storage.dll - - ..\packages\Newtonsoft.Json.5.0.6\lib\net45\Newtonsoft.Json.dll + + False + ..\packages\Newtonsoft.Json.6.0.5\lib\net45\Newtonsoft.Json.dll - - ..\packages\System.Spatial.5.6.0\lib\net40\System.Spatial.dll + + False + ..\packages\System.Spatial.5.6.2\lib\net40\System.Spatial.dll @@ -64,6 +90,9 @@ + + Properties\SharedAssemblyInfo.cs + @@ -94,6 +123,13 @@ + + + + This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. + + + - \ No newline at end of file diff --git a/ClusterSubmission/AzureSubmission/Microsoft.Research.Naiad.Cluster.Azure.nuspec b/ClusterSubmission/AzureSubmission/Microsoft.Research.Naiad.Cluster.Azure.nuspec deleted file mode 100644 index dfd18b4..0000000 --- a/ClusterSubmission/AzureSubmission/Microsoft.Research.Naiad.Cluster.Azure.nuspec +++ /dev/null @@ -1,42 +0,0 @@ - - - - Microsoft.Research.Naiad.Cluster.Azure - 0.4.1-beta - Naiad - Azure submission tool - naiadquestions@microsoft.com - naiadquestions@microsoft.com,Microsoft - http://www.apache.org/licenses/LICENSE-2.0.html - http://research.microsoft.com/naiad/ - true - - Launcher for submitting Naiad programs to a Windows Azure HDInsight cluster. - - Microsoft Corporation - en-US - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/ClusterSubmission/AzureSubmission/Program.cs b/ClusterSubmission/AzureSubmission/Program.cs deleted file mode 100644 index ff0d3da..0000000 --- a/ClusterSubmission/AzureSubmission/Program.cs +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Naiad ver. 0.4 - * Copyright (c) Microsoft Corporation - * All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * THIS CODE IS PROVIDED ON AN *AS IS* BASIS, WITHOUT WARRANTIES OR - * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT - * LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR - * A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. - * - * See the Apache Version 2.0 License for specific language governing - * permissions and limitations under the License. - */ - -using Microsoft.Research.Naiad.Util; -using Microsoft.Research.Peloponnese.ClusterUtils; -using Microsoft.Research.Peloponnese.Storage; -using System; -using System.Collections.Generic; -using System.Configuration; -using System.IO; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using System.Xml.Linq; - -namespace Microsoft.Research.Naiad.Cluster.Azure -{ - class Program - { - private static Flag ShowHelp = Flags.Define("-h,--help", typeof(bool)); - - private static Flag NumHosts = Flags.Define("-n,--numprocs", 2); - - private static Flag AzureSubscriptionId = Flags.Define("--subscriptionid", typeof(string)); - private static Flag AzureClusterName = Flags.Define("--clustername", typeof(string)); - private static Flag AzureCertificateThumbprint = Flags.Define("--certthumbprint", typeof(string)); - private static Flag AzureStorageAccountName = Flags.Define("--storageaccount", typeof(string)); - private static Flag AzureStorageAccountKey = Flags.Define("--storagekey", typeof(string)); - private static Flag AzureStorageContainerName = Flags.Define("--container", typeof(string)); - - private const string Usage = @"Usage: AzureSubmission [Azure options] NaiadExecutable.exe [Naiad options] - -Runs the given Naiad executable on an HDInsight cluster. - -(N.B. For convenience, each option can be set in the App.config for this program, - using the long form option name.) - -Options: - -n,--numhosts Number of Naiad processes (default = 2) - -Azure options: - --subscriptionid Azure subscription ID (default = taken from Powershell settings) - --clustername HDInsight cluster name (default = cluster if a single cluster is registered to all subscriptions) - --storageaccount Azure storage account name for staging resources (default = cluster default storage account) - --storagekey Azure storage account key for staging resources (default = cluster default storage acount key) - --container Azure storage blob container name for staging resources (default = ""staging"")"; - - static int Run(string[] args) - { - - Flags.Parse(ConfigurationManager.AppSettings); - - args = Flags.Parse(args); - - if (ShowHelp.BooleanValue || args.Length == 0) - { - Console.Error.WriteLine(Usage); - return 0; - } - - if (!File.Exists(args[0])) - { - Console.Error.WriteLine("Error: Naiad program {0} does not exist.", args[0]); - Console.Error.WriteLine(Usage); - return -1; - } - - AzureSubscriptions subscriptionManagement = new AzureSubscriptions(); - - if (AzureSubscriptionId.IsSet && AzureCertificateThumbprint.IsSet) - { - subscriptionManagement.AddSubscription(AzureSubscriptionId.StringValue, AzureCertificateThumbprint.StringValue); - } - - string clusterName = null; - if (AzureClusterName.IsSet) - { - clusterName = AzureClusterName.StringValue; - - if (AzureStorageAccountName.IsSet && AzureStorageAccountKey.IsSet) - { - subscriptionManagement.SetClusterAccountAsync(clusterName, AzureStorageAccountName.StringValue, AzureStorageAccountKey.StringValue).Wait(); - } - } - else - { - IEnumerable clusters = subscriptionManagement.GetClusters(); - if (clusters.Count() == 1) - { - clusterName = clusters.Single().Name; - } - else - { - Console.Error.WriteLine("Error: Cluster name must be specified unless there is a single configured cluster in default and supplied subscriptions"); - Console.Error.WriteLine(Usage); - return -1; - } - } - - AzureCluster cluster; - try - { - cluster = subscriptionManagement.GetClusterAsync(clusterName).Result; - } - catch (Exception) - { - Console.Error.WriteLine("Error: Failed to find cluster " + clusterName + " in default or supplied subscriptions"); - Console.Error.WriteLine(Usage); - return -1; - } - if (cluster == null) - { - Console.Error.WriteLine("Error: Failed to find cluster {0} in default or supplied subscriptions", clusterName); - Console.Error.WriteLine(Usage); - return -1; - } - - string containerName = "staging"; - if (AzureStorageContainerName.IsSet) - { - containerName = AzureStorageContainerName.StringValue; - } - - // The args are augmented with an additional setting containing the Azure connection string. - args = args.Concat(new string[] { "--addsetting", "Microsoft.Research.Naiad.Cluster.Azure.DefaultConnectionString", string.Format("\"DefaultEndpointsProtocol=https;AccountName={0};AccountKey={1}\"", cluster.StorageAccount.Split('.').First(), cluster.StorageKey) }).ToArray(); - - Console.Error.WriteLine("Submitting job with args: {0}", string.Join(" ", args)); - - AzureDfsClient azureDfs = new AzureDfsClient(cluster.StorageAccount, cluster.StorageKey, containerName); - AzureYarnClient azureYarn = new AzureYarnClient(subscriptionManagement, azureDfs, ConfigHelpers.GetPPMHome(null), clusterName); - AzureYarnSubmission submission = new AzureYarnSubmission(azureDfs, azureYarn, NumHosts, args); - - submission.Submit(); - return submission.Join(); - } - - static void Main(string[] args) - { - try - { - int exitCode = Run(args); - Console.WriteLine("Application return exit code " + exitCode); - } - catch (Exception e) - { - Console.WriteLine("Exception " + e.Message + "\n" + e.ToString()); - } - } - } - -} diff --git a/ClusterSubmission/AzureSubmission/Properties/AssemblyInfo.cs b/ClusterSubmission/AzureSubmission/Properties/AssemblyInfo.cs deleted file mode 100644 index 9927bcb..0000000 --- a/ClusterSubmission/AzureSubmission/Properties/AssemblyInfo.cs +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Naiad ver. 0.4 - * Copyright (c) Microsoft Corporation - * All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * THIS CODE IS PROVIDED ON AN *AS IS* BASIS, WITHOUT WARRANTIES OR - * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT - * LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR - * A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. - * - * See the Apache Version 2.0 License for specific language governing - * permissions and limitations under the License. - */ - -using System.Reflection; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; - -// General Information about an assembly is controlled through the following -// set of attributes. Change these attribute values to modify the information -// associated with an assembly. -[assembly: AssemblyTitle("NaiadAzureSubmission")] -[assembly: AssemblyDescription("")] -[assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("")] -[assembly: AssemblyProduct("NaiadAzureSubmission")] -[assembly: AssemblyCopyright("Copyright © Microsoft Corporation. All rights reserved.")] -[assembly: AssemblyTrademark("")] -[assembly: AssemblyCulture("")] - -// Setting ComVisible to false makes the types in this assembly not visible -// to COM components. If you need to access a type in this assembly from -// COM, set the ComVisible attribute to true on that type. -[assembly: ComVisible(false)] - -// The following GUID is for the ID of the typelib if this project is exposed to COM -[assembly: Guid("fc1e2dbe-bddf-4550-9834-71e9784b5f30")] - -// Version information for an assembly consists of the following four values: -// -// Major Version -// Minor Version -// Build Number -// Revision -// -// You can specify all the values or you can default the Build and Revision Numbers -// by using the '*' as shown below: -// [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("0.4.1")] -[assembly: AssemblyFileVersion("0.4.1")] diff --git a/ClusterSubmission/AzureSubmission/packages.config b/ClusterSubmission/AzureSubmission/packages.config deleted file mode 100644 index 8fe539e..0000000 --- a/ClusterSubmission/AzureSubmission/packages.config +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/ClusterSubmission/ClusterSubmission.sln b/ClusterSubmission/ClusterSubmission.sln index 739cf94..f8aab64 100644 --- a/ClusterSubmission/ClusterSubmission.sln +++ b/ClusterSubmission/ClusterSubmission.sln @@ -1,84 +1,32 @@  Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 2013 -VisualStudioVersion = 12.0.30110.0 +VisualStudioVersion = 12.0.30723.0 MinimumVisualStudioVersion = 10.0.40219.1 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NaiadPeloponneseSupport", "NaiadPeloponneseSupport\NaiadPeloponneseSupport.csproj", "{271F7100-7AA3-4379-9C58-23618B73A5DD}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AzureSubmission", "AzureSubmission\AzureSubmission.csproj", "{1A0E23B8-BF34-4DD2-A22D-BB2470B29D7D}" -EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LocalSubmission", "LocalSubmission\LocalSubmission.csproj", "{BFDBEC5D-4768-4F4A-BD50-16815B6BFEE4}" -EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "YarnSubmission", "YarnSubmission\YarnSubmission.csproj", "{3A289ADE-2782-47D3-B682-C03115332646}" -EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Nuget common files", "Nuget common files", "{B867D391-330A-4D87-9C81-8FA4D9EE05F8}" ProjectSection(SolutionItems) = preProject ConfigFile.targets = ConfigFile.targets InstallConfigFile.ps1 = InstallConfigFile.ps1 EndProjectSection EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DependencyLister", "DependencyLister\DependencyLister.csproj", "{4B1A2CC2-1798-472C-954B-9C808B2C0748}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "RunNaiad", "RunNaiad\RunNaiad.csproj", "{47D22F4A-8B47-4829-A896-E5318ECA4CC2}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Any CPU = Debug|Any CPU - Debug|Mixed Platforms = Debug|Mixed Platforms Debug|x64 = Debug|x64 - Release|Any CPU = Release|Any CPU - Release|Mixed Platforms = Release|Mixed Platforms Release|x64 = Release|x64 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution - {271F7100-7AA3-4379-9C58-23618B73A5DD}.Debug|Any CPU.ActiveCfg = Debug|x64 - {271F7100-7AA3-4379-9C58-23618B73A5DD}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 - {271F7100-7AA3-4379-9C58-23618B73A5DD}.Debug|Mixed Platforms.Build.0 = Debug|x64 {271F7100-7AA3-4379-9C58-23618B73A5DD}.Debug|x64.ActiveCfg = Debug|x64 {271F7100-7AA3-4379-9C58-23618B73A5DD}.Debug|x64.Build.0 = Debug|x64 - {271F7100-7AA3-4379-9C58-23618B73A5DD}.Release|Any CPU.ActiveCfg = Release|x64 - {271F7100-7AA3-4379-9C58-23618B73A5DD}.Release|Mixed Platforms.ActiveCfg = Release|x64 - {271F7100-7AA3-4379-9C58-23618B73A5DD}.Release|Mixed Platforms.Build.0 = Release|x64 {271F7100-7AA3-4379-9C58-23618B73A5DD}.Release|x64.ActiveCfg = Release|x64 {271F7100-7AA3-4379-9C58-23618B73A5DD}.Release|x64.Build.0 = Release|x64 - {1A0E23B8-BF34-4DD2-A22D-BB2470B29D7D}.Debug|Any CPU.ActiveCfg = Debug|x64 - {1A0E23B8-BF34-4DD2-A22D-BB2470B29D7D}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 - {1A0E23B8-BF34-4DD2-A22D-BB2470B29D7D}.Debug|Mixed Platforms.Build.0 = Debug|x64 - {1A0E23B8-BF34-4DD2-A22D-BB2470B29D7D}.Debug|x64.ActiveCfg = Debug|x64 - {1A0E23B8-BF34-4DD2-A22D-BB2470B29D7D}.Debug|x64.Build.0 = Debug|x64 - {1A0E23B8-BF34-4DD2-A22D-BB2470B29D7D}.Release|Any CPU.ActiveCfg = Release|x64 - {1A0E23B8-BF34-4DD2-A22D-BB2470B29D7D}.Release|Mixed Platforms.ActiveCfg = Release|x64 - {1A0E23B8-BF34-4DD2-A22D-BB2470B29D7D}.Release|Mixed Platforms.Build.0 = Release|x64 - {1A0E23B8-BF34-4DD2-A22D-BB2470B29D7D}.Release|x64.ActiveCfg = Release|x64 - {1A0E23B8-BF34-4DD2-A22D-BB2470B29D7D}.Release|x64.Build.0 = Release|x64 - {BFDBEC5D-4768-4F4A-BD50-16815B6BFEE4}.Debug|Any CPU.ActiveCfg = Debug|x64 - {BFDBEC5D-4768-4F4A-BD50-16815B6BFEE4}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 - {BFDBEC5D-4768-4F4A-BD50-16815B6BFEE4}.Debug|Mixed Platforms.Build.0 = Debug|x64 - {BFDBEC5D-4768-4F4A-BD50-16815B6BFEE4}.Debug|x64.ActiveCfg = Debug|x64 - {BFDBEC5D-4768-4F4A-BD50-16815B6BFEE4}.Debug|x64.Build.0 = Debug|x64 - {BFDBEC5D-4768-4F4A-BD50-16815B6BFEE4}.Release|Any CPU.ActiveCfg = Release|x64 - {BFDBEC5D-4768-4F4A-BD50-16815B6BFEE4}.Release|Mixed Platforms.ActiveCfg = Release|x64 - {BFDBEC5D-4768-4F4A-BD50-16815B6BFEE4}.Release|Mixed Platforms.Build.0 = Release|x64 - {BFDBEC5D-4768-4F4A-BD50-16815B6BFEE4}.Release|x64.ActiveCfg = Release|x64 - {BFDBEC5D-4768-4F4A-BD50-16815B6BFEE4}.Release|x64.Build.0 = Release|x64 - {3A289ADE-2782-47D3-B682-C03115332646}.Debug|Any CPU.ActiveCfg = Debug|x64 - {3A289ADE-2782-47D3-B682-C03115332646}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 - {3A289ADE-2782-47D3-B682-C03115332646}.Debug|Mixed Platforms.Build.0 = Debug|x64 - {3A289ADE-2782-47D3-B682-C03115332646}.Debug|x64.ActiveCfg = Debug|x64 - {3A289ADE-2782-47D3-B682-C03115332646}.Debug|x64.Build.0 = Debug|x64 - {3A289ADE-2782-47D3-B682-C03115332646}.Release|Any CPU.ActiveCfg = Release|x64 - {3A289ADE-2782-47D3-B682-C03115332646}.Release|Mixed Platforms.ActiveCfg = Release|x64 - {3A289ADE-2782-47D3-B682-C03115332646}.Release|Mixed Platforms.Build.0 = Release|x64 - {3A289ADE-2782-47D3-B682-C03115332646}.Release|x64.ActiveCfg = Release|x64 - {3A289ADE-2782-47D3-B682-C03115332646}.Release|x64.Build.0 = Release|x64 - {4B1A2CC2-1798-472C-954B-9C808B2C0748}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {4B1A2CC2-1798-472C-954B-9C808B2C0748}.Debug|Any CPU.Build.0 = Debug|Any CPU - {4B1A2CC2-1798-472C-954B-9C808B2C0748}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU - {4B1A2CC2-1798-472C-954B-9C808B2C0748}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU - {4B1A2CC2-1798-472C-954B-9C808B2C0748}.Debug|x64.ActiveCfg = Debug|Any CPU - {4B1A2CC2-1798-472C-954B-9C808B2C0748}.Release|Any CPU.ActiveCfg = Release|Any CPU - {4B1A2CC2-1798-472C-954B-9C808B2C0748}.Release|Any CPU.Build.0 = Release|Any CPU - {4B1A2CC2-1798-472C-954B-9C808B2C0748}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU - {4B1A2CC2-1798-472C-954B-9C808B2C0748}.Release|Mixed Platforms.Build.0 = Release|Any CPU - {4B1A2CC2-1798-472C-954B-9C808B2C0748}.Release|x64.ActiveCfg = Release|Any CPU + {47D22F4A-8B47-4829-A896-E5318ECA4CC2}.Debug|x64.ActiveCfg = Debug|x64 + {47D22F4A-8B47-4829-A896-E5318ECA4CC2}.Debug|x64.Build.0 = Debug|x64 + {47D22F4A-8B47-4829-A896-E5318ECA4CC2}.Release|x64.ActiveCfg = Release|x64 + {47D22F4A-8B47-4829-A896-E5318ECA4CC2}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/ClusterSubmission/DependencyLister/App.config b/ClusterSubmission/DependencyLister/App.config deleted file mode 100644 index 8e15646..0000000 --- a/ClusterSubmission/DependencyLister/App.config +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/ClusterSubmission/DependencyLister/Program.cs b/ClusterSubmission/DependencyLister/Program.cs deleted file mode 100644 index a5a7a1b..0000000 --- a/ClusterSubmission/DependencyLister/Program.cs +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Naiad ver. 0.4 - * Copyright (c) Microsoft Corporation - * All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * THIS CODE IS PROVIDED ON AN *AS IS* BASIS, WITHOUT WARRANTIES OR - * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT - * LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR - * A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. - * - * See the Apache Version 2.0 License for specific language governing - * permissions and limitations under the License. - */ - -using System; -using System.Collections.Generic; -using System.Linq; -using System.Reflection; -using System.Text; -using System.Threading.Tasks; - -namespace Microsoft.Research.Naiad.Cluster.DependencyLister -{ - public class Lister : MarshalByRefObject - { - private static string[] FrameworkAssemblyNames = { "System", "System.Core", "mscorlib", "System.Xml" }; - - /// - /// Returns the non-framework assemblies on which a given assembly depends. - /// - /// The initial assembly - /// A set of non-framework assemblies on which the given assembly depends - private static HashSet Dependencies(Assembly source) - { - HashSet visited = new HashSet(); - Queue assemblyQueue = new Queue(); - assemblyQueue.Enqueue(source); - visited.Add(source); - - while (assemblyQueue.Count > 0) - { - Assembly currentAssembly = assemblyQueue.Dequeue(); - - foreach (AssemblyName name in currentAssembly.GetReferencedAssemblies()) - { - Assembly referencedAssembly = Assembly.Load(name); - if (!visited.Contains(referencedAssembly) && !FrameworkAssemblyNames.Contains(name.Name) && !(name.Name.StartsWith("System"))) - { - visited.Add(referencedAssembly); - assemblyQueue.Enqueue(referencedAssembly); - } - } - } - return visited; - } - - /// - /// Returns the locations of non-framework assemblies on which the assembly with the given filename depends. - /// - /// The filename of the assembly - /// An array of filenames for non-framework assemblies on which the given assembly depends - public string[] ListDependencies(string assemblyFilename) - { - Assembly assembly = Assembly.LoadFrom(assemblyFilename); - return Lister.Dependencies(assembly).Select(x => x.Location).ToArray(); - } - } -} diff --git a/ClusterSubmission/DependencyLister/Properties/AssemblyInfo.cs b/ClusterSubmission/DependencyLister/Properties/AssemblyInfo.cs deleted file mode 100644 index eef22f8..0000000 --- a/ClusterSubmission/DependencyLister/Properties/AssemblyInfo.cs +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Naiad ver. 0.4 - * Copyright (c) Microsoft Corporation - * All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * THIS CODE IS PROVIDED ON AN *AS IS* BASIS, WITHOUT WARRANTIES OR - * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT - * LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR - * A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. - * - * See the Apache Version 2.0 License for specific language governing - * permissions and limitations under the License. - */ - -using System.Reflection; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; - -// General Information about an assembly is controlled through the following -// set of attributes. Change these attribute values to modify the information -// associated with an assembly. -[assembly: AssemblyTitle("Microsoft.Research.Naiad.Cluster.DependencyLister")] -[assembly: AssemblyDescription("")] -[assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("")] -[assembly: AssemblyProduct("DependencyLister")] -[assembly: AssemblyCopyright("Copyright © Microsoft Corporation. All rights reserved.")] -[assembly: AssemblyTrademark("")] -[assembly: AssemblyCulture("")] - -// Setting ComVisible to false makes the types in this assembly not visible -// to COM components. If you need to access a type in this assembly from -// COM, set the ComVisible attribute to true on that type. -[assembly: ComVisible(false)] - -// The following GUID is for the ID of the typelib if this project is exposed to COM -[assembly: Guid("d4a52515-7a5f-4653-80a5-ee390e482006")] - -// Version information for an assembly consists of the following four values: -// -// Major Version -// Minor Version -// Build Number -// Revision -// -// You can specify all the values or you can default the Build and Revision Numbers -// by using the '*' as shown below: -// [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("0.4.1")] -[assembly: AssemblyFileVersion("0.4.1")] diff --git a/ClusterSubmission/LocalSubmission/App.config b/ClusterSubmission/LocalSubmission/App.config deleted file mode 100644 index 794423a..0000000 --- a/ClusterSubmission/LocalSubmission/App.config +++ /dev/null @@ -1,56 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/ClusterSubmission/LocalSubmission/LocalSubmission.csproj b/ClusterSubmission/LocalSubmission/LocalSubmission.csproj deleted file mode 100644 index 03fd112..0000000 --- a/ClusterSubmission/LocalSubmission/LocalSubmission.csproj +++ /dev/null @@ -1,177 +0,0 @@ - - - - - - Debug - AnyCPU - {BFDBEC5D-4768-4F4A-BD50-16815B6BFEE4} - Exe - Properties - Microsoft.Research.Naiad.Cluster.Local - LocalSubmission - v4.5 - 512 - - - true - bin\x64\Debug\ - DEBUG;TRACE - full - x64 - prompt - MinimumRecommendedRules.ruleset - true - - - bin\x64\Release\ - TRACE - true - pdbonly - x64 - prompt - MinimumRecommendedRules.ruleset - true - - - - False - ..\packages\Microsoft.Data.Edm.5.6.1\lib\net40\Microsoft.Data.Edm.dll - - - False - ..\packages\Microsoft.Data.OData.5.6.1\lib\net40\Microsoft.Data.OData.dll - - - False - ..\packages\Microsoft.Data.Services.Client.5.6.1\lib\net40\Microsoft.Data.Services.Client.dll - - - False - ..\packages\Microsoft.Hadoop.Client.1.1.1.8\lib\net40\Microsoft.Hadoop.Client.dll - - - False - ..\packages\Microsoft.Research.Peloponnese.0.7.5-beta\lib\net45\Microsoft.Research.Peloponnese.HadoopBridge.dll - - - False - ..\packages\Microsoft.Research.Peloponnese.0.7.5-beta\lib\net45\Microsoft.Research.Peloponnese.Utils.dll - - - False - ..\packages\Microsoft.Bcl.Async.1.0.168\lib\net40\Microsoft.Threading.Tasks.dll - - - False - ..\packages\Microsoft.Bcl.Async.1.0.168\lib\net40\Microsoft.Threading.Tasks.Extensions.dll - - - False - ..\packages\Microsoft.Bcl.Async.1.0.168\lib\net40\Microsoft.Threading.Tasks.Extensions.Desktop.dll - - - False - ..\packages\Microsoft.WindowsAzure.Common.1.1.1\lib\net45\Microsoft.WindowsAzure.Common.dll - - - False - ..\packages\Microsoft.WindowsAzure.Common.1.1.1\lib\net45\Microsoft.WindowsAzure.Common.NetFramework.dll - - - ..\packages\Microsoft.WindowsAzure.ConfigurationManager.2.0.3\lib\net40\Microsoft.WindowsAzure.Configuration.dll - - - False - ..\packages\Microsoft.WindowsAzure.Management.1.2.0\lib\net40\Microsoft.WindowsAzure.Management.dll - - - False - ..\packages\Microsoft.WindowsAzure.Management.HDInsight.1.1.1.8\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.dll - - - False - ..\packages\Microsoft.Hadoop.Client.1.1.1.8\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.Framework.dll - - - False - ..\packages\Microsoft.Hadoop.Client.1.1.1.8\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.Framework.Core.dll - - - False - ..\packages\Microsoft.WindowsAzure.Management.Storage.1.1.1\lib\net40\Microsoft.WindowsAzure.Management.Storage.dll - - - False - ..\packages\WindowsAzure.Storage.4.0.1\lib\net40\Microsoft.WindowsAzure.Storage.dll - - - False - ..\packages\Newtonsoft.Json.6.0.3\lib\net45\Newtonsoft.Json.dll - - - - - - - - - False - ..\packages\Microsoft.Net.Http.2.2.22\lib\net45\System.Net.Http.Extensions.dll - - - False - ..\packages\Microsoft.Net.Http.2.2.22\lib\net45\System.Net.Http.Primitives.dll - - - - False - ..\packages\System.Spatial.5.6.1\lib\net40\System.Spatial.dll - - - - - - - - - - - - - - - - Designer - - - - - - - {271f7100-7aa3-4379-9c58-23618b73a5dd} - NaiadPeloponneseSupport - - - - - - - - - - - This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - - - - - - - \ No newline at end of file diff --git a/ClusterSubmission/LocalSubmission/Program.cs b/ClusterSubmission/LocalSubmission/Program.cs deleted file mode 100644 index 5eb3c80..0000000 --- a/ClusterSubmission/LocalSubmission/Program.cs +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Naiad ver. 0.4 - * Copyright (c) Microsoft Corporation - * All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * THIS CODE IS PROVIDED ON AN *AS IS* BASIS, WITHOUT WARRANTIES OR - * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT - * LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR - * A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. - * - * See the Apache Version 2.0 License for specific language governing - * permissions and limitations under the License. - */ - -using Microsoft.Research.Naiad.Util; -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; - -namespace Microsoft.Research.Naiad.Cluster.Local -{ - class Program - { - private static Flag ShowHelp = Flags.Define("-h,--help", typeof(bool)); - - private static Flag NumHosts = Flags.Define("-n,--numhosts", 2); - private static Flag NumThreads = Flags.Define("-t,--threads", 8); - private static Flag LocalJobDirectory = Flags.Define("-l,--localdir", "LocalJobs"); - - private const string Usage = @"Usage: LocalSubmission [ptions] NaiadExecutable.exe [Naiad options] - -Runs the given Naiad executable in multiple processes on the local machine. - -(N.B. For convenience, each option can be set in the App.config for this program, - using the long form option name.) - -Options: - -n,--numhosts Number of Naiad processes (default = 2) - -t,--threads Number of worker threads per Naiad process (default = 8) - -l,--localdir Local job working directory (default = '%PWD%\LocalJobs')"; - - static int Run(string[] args) - { - Flags.Parse(System.Configuration.ConfigurationManager.AppSettings); - - args = Flags.Parse(args); - - if (ShowHelp.BooleanValue) - { - Console.Error.WriteLine(Usage); - return 0; - } - - LocalSubmission submission = new LocalSubmission(NumHosts, args, LocalJobDirectory); - submission.Submit(); - return submission.Join(); - } - - static void Main(string[] args) - { - try - { - int exitCode = Run(args); - Console.WriteLine("Application return exit code " + exitCode); - } - catch (Exception e) - { - Console.WriteLine("Exception " + e.Message + "\n" + e.ToString()); - } - } - } - -} diff --git a/ClusterSubmission/LocalSubmission/packages.config b/ClusterSubmission/LocalSubmission/packages.config deleted file mode 100644 index 8fe539e..0000000 --- a/ClusterSubmission/LocalSubmission/packages.config +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/ClusterSubmission/NaiadPeloponneseSupport/App.config b/ClusterSubmission/NaiadPeloponneseSupport/App.config index c5e4df0..cd42d28 100644 --- a/ClusterSubmission/NaiadPeloponneseSupport/App.config +++ b/ClusterSubmission/NaiadPeloponneseSupport/App.config @@ -1,7 +1,7 @@ - + - + @@ -9,44 +9,100 @@ - + - + - + - + - + - + - + - + - + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - \ No newline at end of file + diff --git a/ClusterSubmission/NaiadPeloponneseSupport/ClusterSubmission.cs b/ClusterSubmission/NaiadPeloponneseSupport/ClusterSubmission.cs index d5a00cc..76a614f 100644 --- a/ClusterSubmission/NaiadPeloponneseSupport/ClusterSubmission.cs +++ b/ClusterSubmission/NaiadPeloponneseSupport/ClusterSubmission.cs @@ -1,5 +1,5 @@ -/* - * Naiad ver. 0.4 +/* + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -28,24 +28,20 @@ using System.Xml; using System.Xml.Linq; -using Microsoft.Research.Peloponnese.Storage; using Microsoft.Research.Peloponnese.ClusterUtils; namespace Microsoft.Research.Naiad.Cluster { public class ClusterSubmission : PPMSubmission { - private readonly IDfsClient dfsClient; private readonly ClusterClient clusterClient; - private readonly string exeDirectory; private readonly XDocument launcherConfig; private static string[] FrameworkAssemblyNames = { "System", "System.Core", "mscorlib", "System.Xml" }; private ClusterJob clusterJob; - protected ClusterSubmission(IDfsClient dfs, ClusterClient cluster, int numberOfProcesses, string[] args) + protected ClusterSubmission(ClusterClient cluster, Uri stagingRoot, string queueName, int amMemoryInMB, int numberOfProcesses, int workerMemoryInMB, string[] args) { - this.dfsClient = dfs; this.clusterClient = cluster; string commandLine; @@ -55,106 +51,69 @@ protected ClusterSubmission(IDfsClient dfs, ClusterClient cluster, int numberOfP string ppmHome = ConfigHelpers.GetPPMHome(null); string exePath = args[0]; - this.exeDirectory = Path.GetDirectoryName(exePath); - string jobStaging = dfs.Combine("staging", Environment.UserName, "naiadJob"); + this.clusterClient.DfsClient.EnsureDirectory(stagingRoot, true); - XElement ppmResources = ConfigHelpers.MakePeloponneseResourceGroup(this.dfsClient, ppmHome); - XElement frameworkResources; - XElement jobResources; - MakeJobResourceGroups(exePath, jobStaging, out frameworkResources, out jobResources); + Uri jobStaging = this.clusterClient.DfsClient.Combine(stagingRoot, Environment.UserName, "naiadJob"); - XElement[] workerResources = { ppmResources, frameworkResources, jobResources }; + XElement ppmWorkerResources = ConfigHelpers.MakePeloponneseWorkerResourceGroup(this.clusterClient.DfsClient, stagingRoot, ppmHome); + XElement[] workerResources = { ppmWorkerResources }; + workerResources = workerResources.Concat(MakeJobResourceGroups(exePath, stagingRoot, jobStaging)).ToArray(); - XDocument config = Helpers.MakePeloponneseConfig(numberOfProcesses, "yarn", commandLine, commandLineArgs, false, workerResources); + XElement ppmResources = ConfigHelpers.MakePeloponneseResourceGroup(this.clusterClient.DfsClient, stagingRoot, ppmHome); + XDocument config = Helpers.MakePeloponneseConfig(numberOfProcesses, workerMemoryInMB, "yarn", commandLine, commandLineArgs, false, workerResources); string configName = "config.xml"; - XElement configResources = ConfigHelpers.MakeConfigResourceGroup(this.dfsClient, jobStaging, config, configName); + XElement configResources = ConfigHelpers.MakeConfigResourceGroup( + this.clusterClient.DfsClient, jobStaging, config, configName); XElement[] launcherResources = { ppmResources, configResources }; - this.launcherConfig = ConfigHelpers.MakeLauncherConfig("Naiad: " + commandLine, configName, launcherResources, this.clusterClient.JobDirectoryTemplate.Replace("_BASELOCATION_", "naiad-jobs")); + this.launcherConfig = ConfigHelpers.MakeLauncherConfig( + "Naiad: " + commandLine, configName, queueName, amMemoryInMB, launcherResources, + this.clusterClient.JobDirectoryTemplate.AbsoluteUri.Replace("_BASELOCATION_", "naiad-jobs")); } - private Assembly DependencyResolveEventHandler(object sender, ResolveEventArgs args) + public void Dispose() { - string leafName = args.Name.Substring(0, args.Name.IndexOf(",")); - string assemblyPath = Path.Combine(this.exeDirectory, leafName); + this.clusterClient.Dispose(); + } - string dll = assemblyPath + ".dll"; - if (File.Exists(dll)) + private XElement[] MakeJobResourceGroups(string exeName, Uri stagingRoot, Uri jobStaging) + { + if (exeName.ToLower().StartsWith("hdfs://")) { - return Assembly.LoadFrom(dll); + Uri exeDirectory = new Uri(exeName.Substring(0, exeName.LastIndexOf('/'))); + return new XElement[] { ConfigHelpers.MakeRemoteResourceGroup(this.clusterClient.DfsClient, exeDirectory, false) }; } - - string exe = assemblyPath + ".exe"; - if (File.Exists(exe)) + else { - return Assembly.LoadFrom(exe); - } - - throw new ApplicationException("Can't find assembly " + args.ToString()); - } - - /// - /// Returns the locations of non-framework assemblies on which the assembly with the given filename depends. - /// - /// The filename of the assembly - /// An array of filenames for non-framework assemblies on which the given assembly depends - private string[] Dependencies(string assemblyFilename) - { - Assembly assembly = Assembly.LoadFrom(assemblyFilename); - AppDomain.CurrentDomain. AssemblyResolve += new ResolveEventHandler(DependencyResolveEventHandler); - return GetDependenciesInternal(assembly).ToArray(); - } + IEnumerable dependencies = Microsoft.Research.Peloponnese.Shared.DependencyLister.Lister.ListDependencies(exeName); - public static IEnumerable GetDependenciesInternal(Assembly source) - { - AppDomainSetup setup = new AppDomainSetup(); - setup.ApplicationBase = Path.GetDirectoryName(source.Location); + if (File.Exists(exeName + ".config")) + { + dependencies = dependencies.Concat(new[] { exeName + ".config" }).ToArray(); + } - AppDomain dependencyDomain = AppDomain.CreateDomain("DependencyLister", null, setup); + IEnumerable peloponneseDependencies = dependencies.Where(x => Path.GetFileName(x).StartsWith("Microsoft.Research.Peloponnese")); + XElement peloponneseGroup = ConfigHelpers.MakeResourceGroup(this.clusterClient.DfsClient, this.clusterClient.DfsClient.Combine(stagingRoot, "peloponnese"), true, peloponneseDependencies); - DependencyLister.Lister lister = (DependencyLister.Lister) dependencyDomain.CreateInstanceFromAndUnwrap(typeof(Microsoft.Research.Naiad.Cluster.DependencyLister.Lister).Assembly.Location, "Microsoft.Research.Naiad.Cluster.DependencyLister.Lister"); - - List ret = lister.ListDependencies(source.Location).ToList(); + IEnumerable naiadDependencies = dependencies.Where(x => Path.GetFileName(x).StartsWith("Microsoft.Research.Naiad")); + XElement naiadGroup = ConfigHelpers.MakeResourceGroup(this.clusterClient.DfsClient, this.clusterClient.DfsClient.Combine(stagingRoot, "naiad"), true, naiadDependencies); - AppDomain.Unload(dependencyDomain); - - return ret; - } + IEnumerable jobDependencies = dependencies.Where(x => !Path.GetFileName(x).StartsWith("Microsoft.Research.Naiad") && !Path.GetFileName(x).StartsWith("Microsoft.Research.Peloponnese")); + XElement jobGroup = ConfigHelpers.MakeResourceGroup(this.clusterClient.DfsClient, jobStaging, false, jobDependencies); - private void MakeJobResourceGroups(string exeName, string jobStaging, out XElement frameworkGroup, out XElement jobGroup) - { - string[] naiadComponentsArray = - { - "Naiad.dll", - "Naiad.pdb" - }; - HashSet naiadComponents = new HashSet(); - foreach (string c in naiadComponentsArray) - { - naiadComponents.Add(c); + return new XElement[] { peloponneseGroup, naiadGroup, jobGroup }; } - - string[] dependencies = Dependencies(exeName); - - if (File.Exists(exeName + ".config")) - { - dependencies = dependencies.Concat(new[] { exeName + ".config" }).ToArray(); - } - - IEnumerable frameworkDependencies = dependencies.Where(x => naiadComponents.Contains(Path.GetFileName(x))); - frameworkGroup = ConfigHelpers.MakeResourceGroup(dfsClient, dfsClient.Combine("staging", "naiad"), true, frameworkDependencies); - - IEnumerable jobDependencies = dependencies.Where(x => !naiadComponents.Contains(Path.GetFileName(x))); - jobGroup = ConfigHelpers.MakeResourceGroup(dfsClient, jobStaging, false, jobDependencies); } public void Submit() { - this.clusterJob = this.clusterClient.Submit(this.launcherConfig, this.clusterClient.JobDirectoryTemplate.Replace("_BASELOCATION_", "naiad-jobs")); + this.clusterJob = this.clusterClient.Submit( + this.launcherConfig, + new Uri(this.clusterClient.JobDirectoryTemplate.AbsoluteUri.Replace("_BASELOCATION_", "naiad-jobs"))); } public int Join() @@ -171,6 +130,7 @@ public int Join() return 1; } } - } + public ClusterJob ClusterJob { get { return this.clusterJob; } } + } } diff --git a/ClusterSubmission/NaiadPeloponneseSupport/Flags.cs b/ClusterSubmission/NaiadPeloponneseSupport/Flags.cs index 2e0f79d..dd4161e 100644 --- a/ClusterSubmission/NaiadPeloponneseSupport/Flags.cs +++ b/ClusterSubmission/NaiadPeloponneseSupport/Flags.cs @@ -1,5 +1,5 @@ -/* - * Naiad ver. 0.4 +/* + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -318,6 +318,7 @@ public static String[] Parse(String[] args) { value = args[idx++]; } + //Console.WriteLine(name); flag.Parse(value); } } diff --git a/ClusterSubmission/NaiadPeloponneseSupport/Microsoft.Research.Naiad.Cluster.nuspec b/ClusterSubmission/NaiadPeloponneseSupport/Microsoft.Research.Naiad.Cluster.nuspec deleted file mode 100644 index b249c94..0000000 --- a/ClusterSubmission/NaiadPeloponneseSupport/Microsoft.Research.Naiad.Cluster.nuspec +++ /dev/null @@ -1,39 +0,0 @@ - - - - Microsoft.Research.Naiad.Cluster - 0.4.1-beta - Naiad - Cluster Support - naiadquestions@microsoft.com - naiadquestions@microsoft.com,Microsoft - http://www.apache.org/licenses/LICENSE-2.0.html - http://research.microsoft.com/naiad/ - true - - Support for running Naiad programs using Peloponnese. - - Microsoft Corporation - en-US - - - - - - - - - - - - - - - - - - - - - - - diff --git a/ClusterSubmission/NaiadPeloponneseSupport/NaiadPeloponneseSupport.csproj b/ClusterSubmission/NaiadPeloponneseSupport/NaiadPeloponneseSupport.csproj index 5a4b8c8..892e1c4 100644 --- a/ClusterSubmission/NaiadPeloponneseSupport/NaiadPeloponneseSupport.csproj +++ b/ClusterSubmission/NaiadPeloponneseSupport/NaiadPeloponneseSupport.csproj @@ -1,6 +1,11 @@  - + + + + + + Debug @@ -12,6 +17,7 @@ Microsoft.Research.Naiad.Cluster v4.5 512 + f8cc83fc true @@ -37,29 +43,23 @@ - + False - ..\packages\Microsoft.Data.Edm.5.6.1\lib\net40\Microsoft.Data.Edm.dll + ..\packages\Microsoft.Data.Edm.5.6.2\lib\net40\Microsoft.Data.Edm.dll - + False - ..\packages\Microsoft.Data.OData.5.6.1\lib\net40\Microsoft.Data.OData.dll + ..\packages\Microsoft.Data.OData.5.6.2\lib\net40\Microsoft.Data.OData.dll - + False - ..\packages\Microsoft.Data.Services.Client.5.6.1\lib\net40\Microsoft.Data.Services.Client.dll + ..\packages\Microsoft.Data.Services.Client.5.6.2\lib\net40\Microsoft.Data.Services.Client.dll - - False - ..\packages\Microsoft.Hadoop.Client.1.1.1.8\lib\net40\Microsoft.Hadoop.Client.dll + + ..\packages\Microsoft.Hadoop.Client.1.3.2.1\lib\net40\Microsoft.Hadoop.Client.dll - - False - ..\packages\Microsoft.Research.Peloponnese.0.7.5-beta\lib\net45\Microsoft.Research.Peloponnese.HadoopBridge.dll - - - False - ..\packages\Microsoft.Research.Peloponnese.0.7.5-beta\lib\net45\Microsoft.Research.Peloponnese.Utils.dll + + ..\packages\Microsoft.Hadoop.Client.1.3.2.1\lib\net40\Microsoft.HDInsight.Net.Http.Formatting.dll False @@ -73,61 +73,51 @@ False ..\packages\Microsoft.Bcl.Async.1.0.168\lib\net40\Microsoft.Threading.Tasks.Extensions.Desktop.dll - - False - ..\packages\Microsoft.WindowsAzure.Common.1.1.1\lib\net45\Microsoft.WindowsAzure.Common.dll + + ..\packages\Microsoft.WindowsAzure.Common.1.4.0\lib\net45\Microsoft.WindowsAzure.Common.dll - - False - ..\packages\Microsoft.WindowsAzure.Common.1.1.1\lib\net45\Microsoft.WindowsAzure.Common.NetFramework.dll + + ..\packages\Microsoft.WindowsAzure.Common.1.4.0\lib\net45\Microsoft.WindowsAzure.Common.NetFramework.dll ..\packages\Microsoft.WindowsAzure.ConfigurationManager.2.0.3\lib\net40\Microsoft.WindowsAzure.Configuration.dll - - False - ..\packages\Microsoft.WindowsAzure.Management.1.2.0\lib\net40\Microsoft.WindowsAzure.Management.dll + + ..\packages\Microsoft.WindowsAzure.Management.HDInsight.1.3.2.1\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.dll - - False - ..\packages\Microsoft.WindowsAzure.Management.HDInsight.1.1.1.8\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.dll + + ..\packages\Microsoft.Hadoop.Client.1.3.2.1\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.Framework.dll - - False - ..\packages\Microsoft.Hadoop.Client.1.1.1.8\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.Framework.dll + + ..\packages\Microsoft.Hadoop.Client.1.3.2.1\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.Framework.Core.dll - - False - ..\packages\Microsoft.Hadoop.Client.1.1.1.8\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.Framework.Core.dll - - - False - ..\packages\Microsoft.WindowsAzure.Management.Storage.1.1.1\lib\net40\Microsoft.WindowsAzure.Management.Storage.dll + + ..\packages\Microsoft.WindowsAzure.Management.Storage.3.1.0\lib\net40\Microsoft.WindowsAzure.Management.Storage.dll - + False - ..\packages\WindowsAzure.Storage.4.0.1\lib\net40\Microsoft.WindowsAzure.Storage.dll + ..\packages\WindowsAzure.Storage.4.3.0\lib\net40\Microsoft.WindowsAzure.Storage.dll False - ..\packages\Newtonsoft.Json.6.0.3\lib\net45\Newtonsoft.Json.dll + ..\packages\Newtonsoft.Json.6.0.5\lib\net45\Newtonsoft.Json.dll - + False - ..\packages\Microsoft.Net.Http.2.2.22\lib\net45\System.Net.Http.Extensions.dll + ..\packages\Microsoft.Net.Http.2.2.28\lib\net45\System.Net.Http.Extensions.dll - + False - ..\packages\Microsoft.Net.Http.2.2.22\lib\net45\System.Net.Http.Primitives.dll + ..\packages\Microsoft.Net.Http.2.2.28\lib\net45\System.Net.Http.Primitives.dll - + False - ..\packages\System.Spatial.5.6.1\lib\net40\System.Spatial.dll + ..\packages\System.Spatial.5.6.2\lib\net40\System.Spatial.dll @@ -145,31 +135,34 @@ Designer - - Designer - - - - {4b1a2cc2-1798-472c-954b-9c808b2c0748} - DependencyLister - - - - - - - This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - - + + + + + + + + + + + + + - + + + + + + + + + + + + + - + \ No newline at end of file diff --git a/ClusterSubmission/RunNaiad/Program.cs b/ClusterSubmission/RunNaiad/Program.cs new file mode 100644 index 0000000..c73ce89 --- /dev/null +++ b/ClusterSubmission/RunNaiad/Program.cs @@ -0,0 +1,497 @@ +/* + * Naiad ver. 0.5 + * Copyright (c) Microsoft Corporation + * All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * THIS CODE IS PROVIDED ON AN *AS IS* BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT + * LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR + * A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. + * + * See the Apache Version 2.0 License for specific language governing + * permissions and limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.Configuration; +using System.Diagnostics; +using System.IO; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +using Microsoft.Research.Peloponnese.Shared; +using Microsoft.Research.Peloponnese.Hdfs; +using Microsoft.Research.Peloponnese.WebHdfs; +using Microsoft.Research.Peloponnese.Azure; +using Microsoft.Research.Peloponnese.ClusterUtils; +using Microsoft.Research.Naiad.Util; + +namespace Microsoft.Research.Naiad.Cluster.Submission +{ + public class Program + { + private enum ExecutionType + { + Local, + Yarn, + Azure + }; + + private static Flag ShowHelp = Flags.Define("-h,--help", typeof(bool)); + + private static Flag ForceLocal = Flags.Define("--local", typeof(bool)); + private static Flag ForceAzure = Flags.Define("--azure", typeof(bool)); + private static Flag ForceYarn = Flags.Define("--yarn", typeof(bool)); + + private static Flag NumHosts = Flags.Define("-n,--numhosts", 2); + private static Flag PeloponneseHome = Flags.Define("-p,--peloponnesehome", typeof(string)); + + private static Flag RMHostAndPort = Flags.Define("-r,--rmhost", typeof(string)); + private static Flag NameNodeAndPort = Flags.Define("-nn,--namenode", typeof(string)); + private static Flag YarnJobQueue = Flags.Define("-yq,--yarnqueue", typeof(string)); + private static Flag YarnAMMemory = Flags.Define("-amm,--ammemorymb", typeof(int)); + private static Flag YarnWorkerMemory = Flags.Define("-wm,--workermemorymb", typeof(int)); + private static Flag WebHdfsPort = Flags.Define("-w,--webhdfsport", typeof(int)); + private static Flag LauncherHostAndPort = Flags.Define("-l,--launcher", typeof(string)); + private static Flag LogsDumpFile = Flags.Define("-f,--fetch", typeof(string)); + + private static Flag AzureSubscriptionId = Flags.Define("--subscriptionid", typeof(string)); + private static Flag AzureClusterName = Flags.Define("-c,--clustername", typeof(string)); + private static Flag AzureCertificateThumbprint = Flags.Define("--certthumbprint", typeof(string)); + private static Flag AzureStorageAccountName = Flags.Define("--storageaccount", typeof(string)); + private static Flag AzureStorageAccountKey = Flags.Define("--storagekey", typeof(string)); + private static Flag AzureStorageContainerName = Flags.Define("--container", typeof(string)); + + private static Flag LocalJobDirectory = Flags.Define("-ld,--localdir", typeof(string)); + + private const string Usage = @"Usage: RunNaiad [Shared options] [[Azure options]|[Yarn options]|[Local options]] NaiadExecutable.exe [Naiad options] + +Runs the given Naiad executable on an Azure HDInsight or YARN cluster, or a set of local processes. If no Azure or Yarn +options are specified, local execution is assumed. + +(N.B. For convenience, each option can be set in the App.config for this program, + using the long form option name.) + +Shared options: + -n,--numhosts Number of Naiad processes (default = 2) + -p,--peloponnesehome Location of Peloponnese binaries (defaults to directory of the running binary) + +Yarn options: + -r,--rmhost YARN cluster RM node hostname and optional port. Hostname is required, port defaults to 8088 + -nn,--namenode YARN cluster namenode and optional port, defaults to rm hostname + -yq,--yarnqueue YARN cluster job queue, defaults to cluster's default queue + -amm,--ammemorymb YARN container memory requested for AM (coordinator). Default is cluster's maximum container size + -wm,--workermemorymb YARN container memory requested for workers (Naiad processes). Default is cluster's maximum container size + -w,--webhdfsport Optional YARN namenode webhdfs port, defaults to 50070. If provided, RunNaiad will use + WebHdfs to upload resources. Otherwise, Java and YARN must be installed on the client computer. + -l,--launcher yarnlauncher hostname and optional port. If provided, RunNaiad will launch the job via the launcher + process. Otherwise, Java and YARN must be installed on the client computer. + -f,--fetch filename. fetch the job logs after the job finishes. yarn.cmd must be in the path for this to work. + +Azure options: + --c,clustername HDInsight cluster name (required) + --subscriptionid Azure subscription ID (default = taken from Powershell settings) + --certthumbprint Azure certificate thumbprint (required if and only if subscription ID is provided) + --storageaccount Azure storage account name for staging resources (default = cluster default storage account) + --storagekey Azure storage account key for staging resources (default = cluster default storage account key) + --container Azure storage blob container name for staging resources (default = ""staging"") + +Local options: + -ld,--localdir Local job working directory (default = '%PWD%\LocalJobs')"; + + private static void GetHostAndPort(string input, string defaultHost, int defaultPort, out string host, out int port) + { + if (input == null) + { + host = defaultHost; + port = defaultPort; + } + else + { + string[] parts = input.Split(':'); + host = parts[0].Trim(); + if (parts.Length == 2) + { + if (Int32.TryParse(parts[1], out port)) + { + } + else + { + throw new ApplicationException("Bad port specifier: " + input); + } + } + else if (parts.Length > 2) + { + throw new ApplicationException("Bad host:port specifier: " + input); + } + else + { + port = defaultPort; + } + } + } + + private static void FetchLogs(string dumpFile, string applicationId) + { + ProcessStartInfo startInfo = new ProcessStartInfo("cmd.exe"); + startInfo.Arguments = "/c yarn.cmd logs -applicationId " + applicationId + " -appOwner " + Environment.UserName; + startInfo.RedirectStandardOutput = true; + startInfo.UseShellExecute = false; + + Console.WriteLine("Fetch logs to '" + dumpFile + "' with command 'cmd.exe " + startInfo.Arguments + "'"); + + try + { + using (Stream dumpStream = new FileStream(dumpFile, FileMode.Create, FileAccess.Write, FileShare.ReadWrite)) + { + Process process = new Process(); + process.StartInfo = startInfo; + bool started = process.Start(); + if (!started) + { + Console.Error.WriteLine("Failed to start fetch command"); + return; + } + + using (StreamReader reader = process.StandardOutput) + { + Task finishCopy = reader.BaseStream.CopyToAsync(dumpStream); + + process.WaitForExit(); + + finishCopy.Wait(); + } + } + } + catch (Exception e) + { + Console.Error.WriteLine("Fetching logs got exception: " + e.ToString()); + } + } + + private static int RunNativeYarn(string[] args) + { + if (!RMHostAndPort.IsSet) + { + Console.Error.WriteLine("Error: Yarn cluster rm node hostname not set."); + Console.Error.WriteLine(Usage); + return 1; + } + + string rmHost; + int wsPort; + GetHostAndPort(RMHostAndPort.StringValue, null, 8088, out rmHost, out wsPort); + + string nameNode; + int hdfsPort; + GetHostAndPort(NameNodeAndPort.IsSet ? NameNodeAndPort.StringValue : null, rmHost, -1, out nameNode, out hdfsPort); + + string queueName = null; + if (YarnJobQueue.IsSet) + { + queueName = YarnJobQueue.StringValue; + } + + int amMemoryMB = -1; + if (YarnAMMemory.IsSet) + { + amMemoryMB = YarnAMMemory.IntValue; + } + + int workerMemoryMB = -1; + if (YarnWorkerMemory.IsSet) + { + workerMemoryMB = YarnWorkerMemory.IntValue; + } + + string launcherNode; + int launcherPort; + GetHostAndPort( + LauncherHostAndPort.IsSet ? LauncherHostAndPort.StringValue : null, null, -1, + out launcherNode, out launcherPort); + + DfsClient dfsClient; + if (WebHdfsPort.IsSet) + { + dfsClient = new WebHdfsClient(Environment.UserName, WebHdfsPort.IntValue); + } + else + { + dfsClient = new HdfsClient(); + } + + if (args[0].ToLower().StartsWith("hdfs://")) + { + if (!dfsClient.IsFileExists(new Uri(args[0]))) + { + Console.Error.WriteLine("Error: Naiad program {0} does not exist.", args[0]); + Console.Error.WriteLine(Usage); + return 1; + } + } + else + { + if (!File.Exists(args[0])) + { + Console.Error.WriteLine("Error: Naiad program {0} does not exist.", args[0]); + Console.Error.WriteLine(Usage); + return 1; + } + } + + UriBuilder builder = new UriBuilder(); + builder.Scheme = "hdfs"; + builder.Host = nameNode; + builder.Port = hdfsPort; + Uri jobRoot = dfsClient.Combine(builder.Uri, "user", Environment.UserName); + Uri stagingRoot = dfsClient.Combine(builder.Uri, "tmp", "staging"); + + NativeYarnSubmission submission; + + if (launcherNode == null) + { + submission = new NativeYarnSubmission(rmHost, wsPort, dfsClient, queueName, stagingRoot, jobRoot, PeloponneseHome, amMemoryMB, NumHosts, workerMemoryMB, args); + } + else + { + submission = new NativeYarnSubmission(rmHost, wsPort, dfsClient, queueName, stagingRoot, jobRoot, launcherNode, launcherPort, amMemoryMB, NumHosts, workerMemoryMB, args); + } + + submission.Submit(); + + Console.WriteLine("Waiting for application to complete"); + + int ret = submission.Join(); + + if (LogsDumpFile.IsSet) + { + FetchLogs(LogsDumpFile.StringValue, submission.ClusterJob.Id); + } + + submission.Dispose(); + + return ret; + } + + private static int RunHDInsight(string[] args) + { + if (!File.Exists(args[0])) + { + Console.Error.WriteLine("Error: Naiad program {0} does not exist.", args[0]); + Console.Error.WriteLine(Usage); + return 1; + } + + AzureSubscriptions subscriptionManagement = new AzureSubscriptions(); + + if (AzureSubscriptionId.IsSet && AzureCertificateThumbprint.IsSet) + { + subscriptionManagement.AddSubscription(AzureSubscriptionId.StringValue, AzureCertificateThumbprint.StringValue); + } + + string clusterName = null; + if (AzureClusterName.IsSet) + { + clusterName = AzureClusterName.StringValue; + + if (AzureStorageAccountName.IsSet && AzureStorageAccountKey.IsSet) + { + subscriptionManagement.SetClusterAccountAsync(clusterName, AzureStorageAccountName.StringValue, AzureStorageAccountKey.StringValue).Wait(); + } + } + else + { + IEnumerable clusters = subscriptionManagement.GetClusters(); + if (clusters.Count() == 1) + { + clusterName = clusters.Single().Name; + } + else + { + Console.Error.WriteLine("Error: Cluster name must be specified unless there is a single configured cluster in default and supplied subscriptions"); + Console.Error.WriteLine(Usage); + return 1; + } + } + + AzureCluster cluster; + try + { + cluster = subscriptionManagement.GetClusterAsync(clusterName).Result; + } + catch (Exception) + { + Console.Error.WriteLine("Error: Failed to find cluster " + clusterName + " in default or supplied subscriptions"); + Console.Error.WriteLine(Usage); + return 1; + } + if (cluster == null) + { + Console.Error.WriteLine("Error: Failed to find cluster {0} in default or supplied subscriptions", clusterName); + Console.Error.WriteLine(Usage); + return 1; + } + + string containerName = "staging"; + if (AzureStorageContainerName.IsSet) + { + containerName = AzureStorageContainerName.StringValue; + } + + // The args are augmented with an additional setting containing the Azure connection string. + args = args.Concat(new string[] { "--addsetting", "Microsoft.Research.Naiad.Cluster.Azure.DefaultConnectionString", string.Format("\"DefaultEndpointsProtocol=https;AccountName={0};AccountKey={1}\"", cluster.StorageAccount.Split('.').First(), cluster.StorageKey) }).ToArray(); + + Console.Error.WriteLine("Submitting job with args: {0}", string.Join(" ", args)); + + AzureDfsClient azureDfs = new AzureDfsClient(cluster.StorageAccount, cluster.StorageKey, containerName); + Uri baseUri = Utils.ToAzureUri(cluster.StorageAccount, containerName, "", null, cluster.StorageKey); + AzureYarnClient azureYarn = new AzureYarnClient(subscriptionManagement, azureDfs, baseUri, ConfigHelpers.GetPPMHome(null), clusterName); + AzureYarnSubmission submission = new AzureYarnSubmission(azureYarn, baseUri, NumHosts, args); + + submission.Submit(); + return submission.Join(); + } + + private static int RunLocal(string[] args) + { + if (!File.Exists(args[0])) + { + Console.Error.WriteLine("Error: Naiad program {0} does not exist.", args[0]); + Console.Error.WriteLine(Usage); + return 1; + } + + if (!LocalJobDirectory.IsSet) + { + LocalJobDirectory.Parse("LocalJobs"); + } + LocalSubmission submission = new LocalSubmission(NumHosts, args, LocalJobDirectory); + submission.Submit(); + return submission.Join(); + } + + public static int Run(string[] args) + { + if (Environment.GetEnvironmentVariable("PELOPONNESE_HOME") != null) + { + PeloponneseHome.Parse(Environment.GetEnvironmentVariable("PELOPONNESE_HOME")); + } + else + { + string exeName = System.Diagnostics.Process.GetCurrentProcess().MainModule.FileName; + PeloponneseHome.Parse(Path.GetDirectoryName(exeName)); + } + + Flags.Parse(ConfigurationManager.AppSettings); + + args = Flags.Parse(args); + + if (ShowHelp.BooleanValue) + { + Console.Error.WriteLine(Usage); + return 0; + } + + if (args.Length < 1) + { + Console.Error.WriteLine("Error: No Naiad program specified."); + Console.Error.WriteLine(Usage); + return 1; + } + + bool isLocal = ForceLocal.IsSet; + bool isNativeYarn = ForceYarn.IsSet; + bool isAzureHDInsight = ForceAzure.IsSet; + + // first find out if we forced an execution type with an explicit argument + if (isLocal) + { + if (isNativeYarn) + { + Console.Error.WriteLine("Can't force both Yarn and Local execution."); + Console.Error.WriteLine(Usage); + return 1; + } + if (isAzureHDInsight) + { + Console.Error.WriteLine("Can't force both Azure and Local execution."); + Console.Error.WriteLine(Usage); + return 1; + } + } + else if (isNativeYarn) + { + if (isAzureHDInsight) + { + Console.Error.WriteLine("Can't force both Azure and Yarn execution."); + Console.Error.WriteLine(Usage); + return 1; + } + } + else if (!isAzureHDInsight) + { + // there's no explicit argument to force execution type, so guess based on which arguments are set + isLocal = + (LocalJobDirectory.IsSet); + isNativeYarn = + (RMHostAndPort.IsSet || NameNodeAndPort.IsSet || YarnJobQueue.IsSet || YarnAMMemory.IsSet || YarnWorkerMemory.IsSet || + WebHdfsPort.IsSet || LauncherHostAndPort.IsSet || LogsDumpFile.IsSet); + isAzureHDInsight = + (AzureSubscriptionId.IsSet || AzureClusterName.IsSet || AzureCertificateThumbprint.IsSet || + AzureStorageAccountName.IsSet || AzureStorageAccountKey.IsSet || AzureStorageContainerName.IsSet); + } + + if (isNativeYarn) + { + if (isAzureHDInsight) + { + Console.Error.WriteLine("Can't specify Yarn and Azure options."); + Console.Error.WriteLine(Usage); + return 1; + } + if (isLocal) + { + Console.Error.WriteLine("Can't specify Yarn and local options."); + Console.Error.WriteLine(Usage); + return 1; + } + return RunNativeYarn(args); + } + else if (isAzureHDInsight) + { + if (isLocal) + { + Console.Error.WriteLine("Can't specify Azure and local options."); + Console.Error.WriteLine(Usage); + return 1; + } + return RunHDInsight(args); + } + else + { + return RunLocal(args); + } + } + + public static void Main(string[] args) + { + try + { + int exitCode = Run(args); + Console.WriteLine("Application return exit code " + exitCode); + } + catch (Exception e) + { + Console.WriteLine("Exception " + e.Message + "\n" + e.ToString()); + } + } + } +} diff --git a/ClusterSubmission/YarnSubmission/Properties/AssemblyInfo.cs b/ClusterSubmission/RunNaiad/Properties/AssemblyInfo.cs similarity index 84% rename from ClusterSubmission/YarnSubmission/Properties/AssemblyInfo.cs rename to ClusterSubmission/RunNaiad/Properties/AssemblyInfo.cs index 19dfbb5..d0b3e80 100644 --- a/ClusterSubmission/YarnSubmission/Properties/AssemblyInfo.cs +++ b/ClusterSubmission/RunNaiad/Properties/AssemblyInfo.cs @@ -1,5 +1,5 @@ -/* - * Naiad ver. 0.4 +/* + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -17,7 +17,6 @@ * See the Apache Version 2.0 License for specific language governing * permissions and limitations under the License. */ - using System.Reflection; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -25,12 +24,12 @@ // General Information about an assembly is controlled through the following // set of attributes. Change these attribute values to modify the information // associated with an assembly. -[assembly: AssemblyTitle("YarnSubmission")] +[assembly: AssemblyTitle("RunNaiad")] [assembly: AssemblyDescription("")] [assembly: AssemblyConfiguration("")] [assembly: AssemblyCompany("")] -[assembly: AssemblyProduct("YarnSubmission")] -[assembly: AssemblyCopyright("Copyright © Microsoft Corporation. All rights reserved.")] +[assembly: AssemblyProduct("RunNaiad")] +[assembly: AssemblyCopyright("Copyright © 2014")] [assembly: AssemblyTrademark("")] [assembly: AssemblyCulture("")] @@ -40,7 +39,7 @@ [assembly: ComVisible(false)] // The following GUID is for the ID of the typelib if this project is exposed to COM -[assembly: Guid("1f0eb873-2b1d-43c0-b001-256149e0c68c")] +[assembly: Guid("9bc369a3-3588-49fc-9ef1-f325702c8175")] // Version information for an assembly consists of the following four values: // @@ -52,5 +51,5 @@ // You can specify all the values or you can default the Build and Revision Numbers // by using the '*' as shown below: // [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("0.4.1")] -[assembly: AssemblyFileVersion("0.4.1")] +[assembly: AssemblyVersion("0.5.0.0")] +[assembly: AssemblyFileVersion("0.5.0.0")] diff --git a/ClusterSubmission/RunNaiad/RunNaiad.csproj b/ClusterSubmission/RunNaiad/RunNaiad.csproj new file mode 100644 index 0000000..05890b8 --- /dev/null +++ b/ClusterSubmission/RunNaiad/RunNaiad.csproj @@ -0,0 +1,178 @@ + + + + + + + + + + + Debug + AnyCPU + {47D22F4A-8B47-4829-A896-E5318ECA4CC2} + Exe + Properties + RunNaiad + RunNaiad + v4.5 + 512 + 55665545 + + + true + bin\x64\Debug\ + DEBUG;TRACE + full + x64 + prompt + MinimumRecommendedRules.ruleset + true + + + bin\x64\Release\ + TRACE + true + pdbonly + x64 + prompt + MinimumRecommendedRules.ruleset + true + + + + False + ..\packages\Microsoft.Data.Edm.5.6.2\lib\net40\Microsoft.Data.Edm.dll + + + False + ..\packages\Microsoft.Data.OData.5.6.2\lib\net40\Microsoft.Data.OData.dll + + + False + ..\packages\Microsoft.Data.Services.Client.5.6.2\lib\net40\Microsoft.Data.Services.Client.dll + + + ..\packages\Microsoft.Hadoop.Client.1.3.2.1\lib\net40\Microsoft.Hadoop.Client.dll + + + ..\packages\Microsoft.Hadoop.Client.1.3.2.1\lib\net40\Microsoft.HDInsight.Net.Http.Formatting.dll + + + False + ..\packages\Microsoft.Bcl.Async.1.0.168\lib\net40\Microsoft.Threading.Tasks.dll + + + False + ..\packages\Microsoft.Bcl.Async.1.0.168\lib\net40\Microsoft.Threading.Tasks.Extensions.dll + + + False + ..\packages\Microsoft.Bcl.Async.1.0.168\lib\net40\Microsoft.Threading.Tasks.Extensions.Desktop.dll + + + ..\packages\Microsoft.WindowsAzure.Common.1.4.0\lib\net45\Microsoft.WindowsAzure.Common.dll + + + ..\packages\Microsoft.WindowsAzure.Common.1.4.0\lib\net45\Microsoft.WindowsAzure.Common.NetFramework.dll + + + ..\packages\Microsoft.WindowsAzure.ConfigurationManager.2.0.3\lib\net40\Microsoft.WindowsAzure.Configuration.dll + + + ..\packages\Microsoft.WindowsAzure.Management.HDInsight.1.3.2.1\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.dll + + + ..\packages\Microsoft.Hadoop.Client.1.3.2.1\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.Framework.dll + + + ..\packages\Microsoft.Hadoop.Client.1.3.2.1\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.Framework.Core.dll + + + ..\packages\Microsoft.WindowsAzure.Management.Storage.3.1.0\lib\net40\Microsoft.WindowsAzure.Management.Storage.dll + + + False + ..\packages\WindowsAzure.Storage.4.3.0\lib\net40\Microsoft.WindowsAzure.Storage.dll + + + False + ..\packages\Newtonsoft.Json.6.0.5\lib\net45\Newtonsoft.Json.dll + + + + + + + + False + ..\packages\Microsoft.Net.Http.2.2.28\lib\net45\System.Net.Http.Extensions.dll + + + False + ..\packages\Microsoft.Net.Http.2.2.28\lib\net45\System.Net.Http.Primitives.dll + + + + False + ..\packages\System.Spatial.5.6.2\lib\net40\System.Spatial.dll + + + + + + + + + + + + + + + + + + + + + + + {271f7100-7aa3-4379-9c58-23618b73a5dd} + NaiadPeloponneseSupport + + + + + + This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/ClusterSubmission/RunNaiad/YarnSubmission.cs b/ClusterSubmission/RunNaiad/YarnSubmission.cs new file mode 100644 index 0000000..0ba1cfa --- /dev/null +++ b/ClusterSubmission/RunNaiad/YarnSubmission.cs @@ -0,0 +1,68 @@ +/* + * Naiad ver. 0.5 + * Copyright (c) Microsoft Corporation + * All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * THIS CODE IS PROVIDED ON AN *AS IS* BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT + * LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR + * A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. + * + * See the Apache Version 2.0 License for specific language governing + * permissions and limitations under the License. + */ + +using Microsoft.Research.Peloponnese.Yarn; +using Microsoft.Research.Peloponnese.Shared; +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Microsoft.Research.Naiad.Cluster.Submission +{ + class NativeYarnSubmission : ClusterSubmission + { + public NativeYarnSubmission( + string rmNode, int wsPort, DfsClient dfsClient, string queueName, Uri stagingUri, Uri jobUri, string launcherNode, int launcherPort, + int amMemoryInMB, int numberOfProcesses, int workerMemoryInMB, string[] args) + : base(new NativeYarnClient(rmNode, wsPort, dfsClient, jobUri, launcherNode, launcherPort), + stagingUri, queueName, amMemoryInMB, numberOfProcesses, workerMemoryInMB, args) + { + } + + public NativeYarnSubmission( + string rmNode, int wsPort, DfsClient dfsClient, string queueName, Uri stagingUri, Uri jobUri, string peloponneseDirectory, + int amMemoryInMB, int numberOfProcesses, int workerMemoryInMB, string[] args) + : base( + new NativeYarnClient(rmNode, wsPort, dfsClient, jobUri, LauncherJarFile(peloponneseDirectory), YarnDirectory()), + stagingUri, queueName, amMemoryInMB, numberOfProcesses, workerMemoryInMB, args) + { + } + + private static string LauncherJarFile(string peloponneseDirectory) + { + return Path.Combine(peloponneseDirectory, "Microsoft.Research.Peloponnese.YarnLauncher.jar"); + } + + private static string YarnDirectory() + { + string yarnDirectory = Environment.GetEnvironmentVariable("HADOOP_COMMON_HOME"); + + if (yarnDirectory == null) + { + throw new ApplicationException("No HADOOP_COMMON_HOME defined"); + } + + return yarnDirectory; + } + } +} diff --git a/ClusterSubmission/RunNaiad/packages.config b/ClusterSubmission/RunNaiad/packages.config new file mode 100644 index 0000000..47150e6 --- /dev/null +++ b/ClusterSubmission/RunNaiad/packages.config @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/ClusterSubmission/YarnSubmission/App.config b/ClusterSubmission/YarnSubmission/App.config deleted file mode 100644 index 0b6d0f2..0000000 --- a/ClusterSubmission/YarnSubmission/App.config +++ /dev/null @@ -1,30 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/ClusterSubmission/YarnSubmission/Microsoft.Research.Naiad.Cluster.Yarn.nuspec b/ClusterSubmission/YarnSubmission/Microsoft.Research.Naiad.Cluster.Yarn.nuspec deleted file mode 100644 index 8b30c9f..0000000 --- a/ClusterSubmission/YarnSubmission/Microsoft.Research.Naiad.Cluster.Yarn.nuspec +++ /dev/null @@ -1,40 +0,0 @@ - - - - Microsoft.Research.Naiad.Cluster.Yarn - 0.4.1-beta - Naiad - YARN cluster submission tool - naiadquestions@microsoft.com - naiadquestions@microsoft.com,Microsoft - http://www.apache.org/licenses/LICENSE-2.0.html - http://research.microsoft.com/naiad/ - true - - Launcher for submitting Naiad programs to a Hadoop 2.0 (YARN) cluster. - - Microsoft Corporation - en-US - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/ClusterSubmission/YarnSubmission/Program.cs b/ClusterSubmission/YarnSubmission/Program.cs deleted file mode 100644 index e311c48..0000000 --- a/ClusterSubmission/YarnSubmission/Program.cs +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Naiad ver. 0.4 - * Copyright (c) Microsoft Corporation - * All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * THIS CODE IS PROVIDED ON AN *AS IS* BASIS, WITHOUT WARRANTIES OR - * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT - * LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR - * A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. - * - * See the Apache Version 2.0 License for specific language governing - * permissions and limitations under the License. - */ - -using Microsoft.Research.Naiad.Util; -using System; -using System.Collections.Generic; -using System.Configuration; -using System.Linq; -using System.Text; -using System.Threading.Tasks; - -namespace Microsoft.Research.Naiad.Cluster.NativeYarn -{ - public class Program - { - private static Flag ShowHelp = Flags.Define("-h,--help", typeof(bool)); - private static Flag NumHosts = Flags.Define("-n,--numhosts", 2); - private static Flag NumThreads = Flags.Define("-t,--threads", 8); - private static Flag HeadNodeHostname = Flags.Define("-y,--yarncluster", typeof(string)); - private static Flag PeloponneseHome = Flags.Define("-p,--peloponnesehome", typeof(string)); - - private const string Usage = @"Usage: NativeYarnSubmission [Azure options] NaiadExecutable.exe [Naiad options] - -Runs the given Naiad executable on an YARN cluster. - -(N.B. For convenience, each option can be set in the App.config for this program, - using the long form option name.) - -Options: - -n,--numhosts Number of Naiad processes (default = 2) - -t,--threads Number of worker threads per Naiad process (default = 8) - -p,--peloponnesehome Location of Peloponnese binaries (default = %PELOPONNESE_HOME%) - -Azure options: - -y,--yarncluster YARN cluster head node hostname"; - - - public static int Run(string[] args) - { - if (Environment.GetEnvironmentVariable("PELOPONNESE_HOME") != null) - PeloponneseHome.Parse(Environment.GetEnvironmentVariable("PELOPONNESE_HOME")); - - Flags.Parse(ConfigurationManager.AppSettings); - - args = Flags.Parse(args); - - if (ShowHelp.BooleanValue) - { - Console.Error.WriteLine(Usage); - return 0; - } - if (!PeloponneseHome.IsSet) - { - Console.Error.WriteLine("Error: Peloponnese home directory not set."); - Console.Error.WriteLine(Usage); - return -1; - } - if (!HeadNodeHostname.IsSet) - { - Console.Error.WriteLine("Error: Yarn cluster head node hostname not set."); - Console.Error.WriteLine(Usage); - return -1; - } - - NativeYarnSubmission submission = new NativeYarnSubmission(HeadNodeHostname.StringValue, 9000, 50070, NumHosts, args); - - submission.Submit(); - return submission.Join(); - } - - public static void Main(string[] args) - { - try - { - int exitCode = Run(args); - Console.WriteLine("Application return exit code " + exitCode); - } - catch (Exception e) - { - Console.WriteLine("Exception " + e.Message + "\n" + e.ToString()); - } - } - } -} diff --git a/ClusterSubmission/YarnSubmission/YarnSubmission.csproj b/ClusterSubmission/YarnSubmission/YarnSubmission.csproj deleted file mode 100644 index 25395d7..0000000 --- a/ClusterSubmission/YarnSubmission/YarnSubmission.csproj +++ /dev/null @@ -1,178 +0,0 @@ - - - - - - Debug - AnyCPU - {3A289ADE-2782-47D3-B682-C03115332646} - Exe - Properties - Microsoft.Research.Naiad.Cluster.Yarn - YarnSubmission - v4.5 - 512 - - - true - bin\x64\Debug\ - DEBUG;TRACE - full - x64 - prompt - MinimumRecommendedRules.ruleset - true - - - bin\x64\Release\ - TRACE - true - pdbonly - x64 - prompt - MinimumRecommendedRules.ruleset - true - - - - False - ..\packages\Microsoft.Data.Edm.5.6.1\lib\net40\Microsoft.Data.Edm.dll - - - False - ..\packages\Microsoft.Data.OData.5.6.1\lib\net40\Microsoft.Data.OData.dll - - - False - ..\packages\Microsoft.Data.Services.Client.5.6.1\lib\net40\Microsoft.Data.Services.Client.dll - - - False - ..\packages\Microsoft.Hadoop.Client.1.1.1.8\lib\net40\Microsoft.Hadoop.Client.dll - - - False - ..\packages\Microsoft.Research.Peloponnese.0.7.5-beta\lib\net45\Microsoft.Research.Peloponnese.HadoopBridge.dll - - - False - ..\packages\Microsoft.Research.Peloponnese.0.7.5-beta\lib\net45\Microsoft.Research.Peloponnese.Utils.dll - - - False - ..\packages\Microsoft.Bcl.Async.1.0.168\lib\net40\Microsoft.Threading.Tasks.dll - - - False - ..\packages\Microsoft.Bcl.Async.1.0.168\lib\net40\Microsoft.Threading.Tasks.Extensions.dll - - - False - ..\packages\Microsoft.Bcl.Async.1.0.168\lib\net40\Microsoft.Threading.Tasks.Extensions.Desktop.dll - - - False - ..\packages\Microsoft.WindowsAzure.Common.1.1.1\lib\net45\Microsoft.WindowsAzure.Common.dll - - - False - ..\packages\Microsoft.WindowsAzure.Common.1.1.1\lib\net45\Microsoft.WindowsAzure.Common.NetFramework.dll - - - ..\packages\Microsoft.WindowsAzure.ConfigurationManager.2.0.3\lib\net40\Microsoft.WindowsAzure.Configuration.dll - - - False - ..\packages\Microsoft.WindowsAzure.Management.1.2.0\lib\net40\Microsoft.WindowsAzure.Management.dll - - - False - ..\packages\Microsoft.WindowsAzure.Management.HDInsight.1.1.1.8\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.dll - - - False - ..\packages\Microsoft.Hadoop.Client.1.1.1.8\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.Framework.dll - - - False - ..\packages\Microsoft.Hadoop.Client.1.1.1.8\lib\net40\Microsoft.WindowsAzure.Management.HDInsight.Framework.Core.dll - - - False - ..\packages\Microsoft.WindowsAzure.Management.Storage.1.1.1\lib\net40\Microsoft.WindowsAzure.Management.Storage.dll - - - False - ..\packages\WindowsAzure.Storage.4.0.1\lib\net40\Microsoft.WindowsAzure.Storage.dll - - - False - ..\packages\Newtonsoft.Json.6.0.3\lib\net45\Newtonsoft.Json.dll - - - - - - - - False - ..\packages\Microsoft.Net.Http.2.2.22\lib\net45\System.Net.Http.Extensions.dll - - - False - ..\packages\Microsoft.Net.Http.2.2.22\lib\net45\System.Net.Http.Primitives.dll - - - - False - ..\packages\System.Spatial.5.6.1\lib\net40\System.Spatial.dll - - - - - - - - - - - - - - - - Designer - - - Designer - - - - - - {271f7100-7aa3-4379-9c58-23618b73a5dd} - NaiadPeloponneseSupport - - - - - - - - - - - This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - - - - - - - \ No newline at end of file diff --git a/ClusterSubmission/YarnSubmission/packages.config b/ClusterSubmission/YarnSubmission/packages.config deleted file mode 100644 index 8fe539e..0000000 --- a/ClusterSubmission/YarnSubmission/packages.config +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/Documentation/NaiadDocumentation/NaiadDocumentation/Content/VersionHistory/VersionHistory.aml b/Documentation/NaiadDocumentation/NaiadDocumentation/Content/VersionHistory/VersionHistory.aml index e094d45..0cc630b 100644 --- a/Documentation/NaiadDocumentation/NaiadDocumentation/Content/VersionHistory/VersionHistory.aml +++ b/Documentation/NaiadDocumentation/NaiadDocumentation/Content/VersionHistory/VersionHistory.aml @@ -11,6 +11,9 @@ Select a version below to see a description of its changes. + + + diff --git a/Documentation/NaiadDocumentation/NaiadDocumentation/Content/VersionHistory/v0.5.aml b/Documentation/NaiadDocumentation/NaiadDocumentation/Content/VersionHistory/v0.5.aml new file mode 100644 index 0000000..cd011b6 --- /dev/null +++ b/Documentation/NaiadDocumentation/NaiadDocumentation/Content/VersionHistory/v0.5.aml @@ -0,0 +1,36 @@ + + + + + + + An update release of Naiad, version 0.5 includes support for reading and writing Hdfs files using either the Java-based + protocol or REST-based WebHdfs, as well as bug fixes. + + + + + + Version 0.5 was released on October 17th, 2014 + + + +
+ Changes in This Release + + + + + Frameworks for reading and writing data in Hdfs. + + + + +
+ + + + + +
+
diff --git a/Documentation/NaiadDocumentation/NaiadDocumentation/ContentLayout.content b/Documentation/NaiadDocumentation/NaiadDocumentation/ContentLayout.content index 74303c3..69c91c1 100644 --- a/Documentation/NaiadDocumentation/NaiadDocumentation/ContentLayout.content +++ b/Documentation/NaiadDocumentation/NaiadDocumentation/ContentLayout.content @@ -7,11 +7,16 @@ - + + + + + + diff --git a/Documentation/NaiadDocumentation/NaiadDocumentation/NaiadDocumentation.shfbproj b/Documentation/NaiadDocumentation/NaiadDocumentation/NaiadDocumentation.shfbproj index 9820ec3..60ffacd 100644 --- a/Documentation/NaiadDocumentation/NaiadDocumentation/NaiadDocumentation.shfbproj +++ b/Documentation/NaiadDocumentation/NaiadDocumentation/NaiadDocumentation.shfbproj @@ -37,6 +37,14 @@ + + + + + + + + OnlyWarningsAndErrors Website @@ -88,6 +96,7 @@ + diff --git a/Examples/App.config b/Examples/App.config index 8225cae..6443066 100644 --- a/Examples/App.config +++ b/Examples/App.config @@ -1,24 +1,24 @@ - + - + - - + + - - + + - - + + - - + + - + diff --git a/Examples/DifferentialDataflow/ConnectedComponents.cs b/Examples/DifferentialDataflow/ConnectedComponents.cs index a8d72ed..3785c45 100644 --- a/Examples/DifferentialDataflow/ConnectedComponents.cs +++ b/Examples/DifferentialDataflow/ConnectedComponents.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Examples/DifferentialDataflow/GraphColoring.cs b/Examples/DifferentialDataflow/GraphColoring.cs index 97f84a1..f7a0870 100644 --- a/Examples/DifferentialDataflow/GraphColoring.cs +++ b/Examples/DifferentialDataflow/GraphColoring.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Examples/DifferentialDataflow/SearchIndex.cs b/Examples/DifferentialDataflow/SearchIndex.cs index ed5967d..ea58bf4 100644 --- a/Examples/DifferentialDataflow/SearchIndex.cs +++ b/Examples/DifferentialDataflow/SearchIndex.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Examples/DifferentialDataflow/StronglyConnectedComponents.cs b/Examples/DifferentialDataflow/StronglyConnectedComponents.cs index 0eb6ce5..beeee03 100644 --- a/Examples/DifferentialDataflow/StronglyConnectedComponents.cs +++ b/Examples/DifferentialDataflow/StronglyConnectedComponents.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Examples/DifferentialDataflow/WordCount.cs b/Examples/DifferentialDataflow/WordCount.cs index 02659e7..18d7918 100644 --- a/Examples/DifferentialDataflow/WordCount.cs +++ b/Examples/DifferentialDataflow/WordCount.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Examples/Examples.csproj b/Examples/Examples.csproj index f68c92d..0386abc 100644 --- a/Examples/Examples.csproj +++ b/Examples/Examples.csproj @@ -9,13 +9,14 @@ Properties Examples Examples - v4.0 + v4.5 512 SAK SAK SAK SAK - Client + + AnyCPU @@ -26,6 +27,7 @@ DEBUG;TRACE prompt 4 + false AnyCPU @@ -36,6 +38,25 @@ prompt 4 false + true + + + true + bin\x64\Debug\ + DEBUG;TRACE + full + x64 + prompt + MinimumRecommendedRules.ruleset + + + bin\x64\Release\ + TRACE + true + pdbonly + x64 + prompt + MinimumRecommendedRules.ruleset @@ -48,6 +69,9 @@ + + Properties\SharedAssemblyInfo.cs + diff --git a/Examples/GraphLINQ/PageRank.cs b/Examples/GraphLINQ/PageRank.cs index f255288..3627ade 100644 --- a/Examples/GraphLINQ/PageRank.cs +++ b/Examples/GraphLINQ/PageRank.cs @@ -1,5 +1,5 @@ -/* - * Naiad ver. 0.4 +/* + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Examples/GraphLINQ/Reachability.cs b/Examples/GraphLINQ/Reachability.cs index 6227c0c..78fca39 100644 --- a/Examples/GraphLINQ/Reachability.cs +++ b/Examples/GraphLINQ/Reachability.cs @@ -1,5 +1,5 @@ -/* - * Naiad ver. 0.4 +/* + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Examples/Naiad/ConnectedComponents.cs b/Examples/Naiad/ConnectedComponents.cs index 53f1fe6..7c54c6c 100644 --- a/Examples/Naiad/ConnectedComponents.cs +++ b/Examples/Naiad/ConnectedComponents.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -307,9 +307,19 @@ public void Execute(string[] args) var random = new Random(0); var processes = computation.Configuration.Processes; - var graphFragment = new Pair[edgeCount / processes]; - for (int i = 0; i < graphFragment.Length; i++) - graphFragment[i] = new Pair(random.Next(nodeCount), random.Next(nodeCount)); + var thisProcess = computation.Configuration.ProcessID; + var graphFragmentList = new List>(); + for (int i = 0; i < edgeCount; i++) + { + // ensure we generate the same graph no matter how many processes there are + var edge = new Pair(random.Next(nodeCount), random.Next(nodeCount)); + if ((i % processes) == thisProcess) + { + graphFragmentList.Add(edge); + } + } + + var graphFragment = graphFragmentList.ToArray(); #endregion @@ -319,7 +329,7 @@ public void Execute(string[] args) // convert array of edges to single-epoch stream. var edges = graphFragment.AsNaiadStream(computation) - .Synchronize(); + .Synchronize(x => true); // symmetrize the graph by adding in transposed edges. edges = edges.Select(x => new Pair(x.Second, x.First)) @@ -341,4 +351,4 @@ public string Help get { return "Demonstrates an iterative dataflow computation, using streaming aggregation within the loop and a blocking aggregation outside the loop. Demonstrates how optional coordination can give good performance when not used and determinism when used."; } } } -} \ No newline at end of file +} diff --git a/Examples/Naiad/KeyValueLookup.cs b/Examples/Naiad/KeyValueLookup.cs index 6dfeada..eeddfd2 100644 --- a/Examples/Naiad/KeyValueLookup.cs +++ b/Examples/Naiad/KeyValueLookup.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Examples/Naiad/Latency.cs b/Examples/Naiad/Latency.cs index e4f59d0..451d265 100644 --- a/Examples/Naiad/Latency.cs +++ b/Examples/Naiad/Latency.cs @@ -1,5 +1,5 @@ -/* - * Naiad ver. 0.4 +/* + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -49,8 +49,8 @@ public override void OnNotify(IterationIn time) public static Stream> MakeStage(Stream> ingress, Stream> feedbackOutput, int iterations) { var stage = new Stage>(ingress.Context, (i, s) => new Barrier(i, s, iterations), "Barrier"); - var initialInput = stage.NewInput(ingress, (message, vertex) => { }, null); - var feedbackInput = stage.NewInput(feedbackOutput, (message, vertex) => { }, null); + stage.NewInput(ingress, (message, vertex) => { }, null); + stage.NewInput(feedbackOutput, (message, vertex) => { }, null); return stage.NewOutput(vertex => vertex.Output); } diff --git a/Examples/Naiad/Throughput.cs b/Examples/Naiad/Throughput.cs index bbc946e..d1e91e7 100644 --- a/Examples/Naiad/Throughput.cs +++ b/Examples/Naiad/Throughput.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -35,7 +35,7 @@ namespace Microsoft.Research.Naiad.Examples.Throughput { public class ProducerVertex : Vertex { - private readonly VertexOutputBuffer output; + private readonly VertexOutputBuffer, Epoch> output; private readonly int numberToSend; @@ -43,24 +43,29 @@ public override void OnNotify(Epoch time) { var output = this.output.GetBufferForTime(new Epoch(0)); for (int i = 0; i < this.numberToSend; ++i) - output.Send(this.VertexId); + output.Send(this.VertexId.PairWith(i)); } private ProducerVertex(int id, Stage stage, int numberToSend) : base(id, stage) { this.numberToSend = numberToSend; - this.output = new VertexOutputBuffer(this); + this.output = new VertexOutputBuffer, Epoch>(this); this.NotifyAt(new Epoch(0)); } - public static Stream MakeStage(int numberToSend, int numberOfPartitions, Stream input) + public static Stream, Epoch> MakeStage(int numberToSend, int startProcess, int endProcess, int numberOfWorkers, Stream, Epoch> input) { - Placement placement = new Placement.Explicit(Enumerable.Range(0, numberOfPartitions).Select(x => new VertexLocation(x, 0, x))); + var locations = new List(); + for (int i = 0; i < endProcess - startProcess; i++) + for (int j = 0; j < numberOfWorkers; j++) + locations.Add(new VertexLocation(locations.Count, i + startProcess, j)); + + Placement placement = new Placement.Explicit(locations); Stage stage = Foundry.NewStage(placement, input.Context, (i, s) => new ProducerVertex(i, s, numberToSend), "Producer"); stage.NewInput(input, (v, m) => { }, null); - Stream stream = stage.NewOutput(v => v.output); + Stream, Epoch> stream = stage.NewOutput(v => v.output); return stream; } } @@ -71,12 +76,12 @@ public class ConsumerVertex : Vertex private readonly int numberToConsume; private Stopwatch stopwatch = new Stopwatch(); - private void OnRecv(Message message) + private void OnRecv(Message, Epoch> message) { //Console.WriteLine("In OnRecv"); if (!stopwatch.IsRunning) stopwatch.Start(); - + numReceived += message.length; } @@ -92,12 +97,23 @@ private ConsumerVertex(int id, Stage stage, int numberToConsume) this.NotifyAt(new Epoch(0)); } - public static Stage MakeStage(int numberToConsume, int numberOfPartitions, Stream stream) + public static Stage MakeStage(int numberToConsume, int startProcess, int endProcess, int numberOfWorkers, bool exchange, Stream, Epoch> stream) { - Placement placement = new Placement.Explicit(Enumerable.Range(0, numberOfPartitions).Select(x => new VertexLocation(x, 1, x))); + var locations = new List(); + for (int i = 0; i < endProcess - startProcess; i++) + for (int j = 0; j < numberOfWorkers; j++) + locations.Add(new VertexLocation(locations.Count, i + startProcess, j)); + + Placement placement = new Placement.Explicit(locations); Stage stage = Foundry.NewStage(placement, stream.Context, (i, s) => new ConsumerVertex(i, s, numberToConsume), "Consumer"); - stage.NewInput(stream, (m, v) => v.OnRecv(m), x => x); + + + if (exchange) + stage.NewInput(stream, (m, v) => v.OnRecv(m), x => x.Second); + else + stage.NewInput(stream, (m, v) => v.OnRecv(m), x => x.First); + return stage; } } @@ -106,7 +122,7 @@ class Throughput : Example { public string Usage { - get { return "[records]"; } + get { return "records producers consumers [exchange]"; } } public void Execute(string[] args) @@ -114,11 +130,15 @@ public void Execute(string[] args) using (OneOffComputation computation = NewComputation.FromArgs(ref args)) { int numToExchange = args.Length > 1 ? int.Parse(args[1]) : 1000000; + int producers = Int32.Parse(args[2]); + int consumers = Int32.Parse(args[3]); + + var exchange = args.Length > 4 && args[4] == "exchange"; - Stream input = computation.NewInput(new ConstantDataSource(5)); + var input = new Pair[] { }.AsNaiadStream(computation); - Stream stream = ProducerVertex.MakeStage(numToExchange, computation.Configuration.WorkerCount, input); - Stage consumer = ConsumerVertex.MakeStage(numToExchange, computation.Configuration.WorkerCount, stream); + Stream, Epoch> stream = ProducerVertex.MakeStage(numToExchange, 0, producers, computation.Configuration.WorkerCount, input); + Stage consumer = ConsumerVertex.MakeStage(numToExchange, computation.Configuration.Processes - consumers, computation.Configuration.Processes, computation.Configuration.WorkerCount, exchange, stream); computation.Activate(); computation.Join(); diff --git a/Examples/Naiad/WordCount.cs b/Examples/Naiad/WordCount.cs index 50f27f4..31b3284 100644 --- a/Examples/Naiad/WordCount.cs +++ b/Examples/Naiad/WordCount.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -131,4 +131,4 @@ public string Help get { return "Demonstrates interactive counting of words in lines of text.\nPerhaps the simplest example of a self-contained Naiad program."; } } } -} \ No newline at end of file +} diff --git a/Examples/Program.cs b/Examples/Program.cs index 4d70264..8373925 100644 --- a/Examples/Program.cs +++ b/Examples/Program.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Examples/Properties/AssemblyInfo.cs b/Examples/Properties/AssemblyInfo.cs index 72b870b..5452515 100644 --- a/Examples/Properties/AssemblyInfo.cs +++ b/Examples/Properties/AssemblyInfo.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -26,31 +26,7 @@ // set of attributes. Change these attribute values to modify the information // associated with an assembly. [assembly: AssemblyTitle("Microsoft.Research.Naiad.Examples")] -[assembly: AssemblyDescription("")] [assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("")] -[assembly: AssemblyProduct("Microsoft.Research.Naiad.Examples")] -[assembly: AssemblyCopyright("Copyright © Microsoft Corporation. All rights reserved.")] -[assembly: AssemblyTrademark("")] -[assembly: AssemblyCulture("")] - -// Setting ComVisible to false makes the types in this assembly not visible -// to COM components. If you need to access a type in this assembly from -// COM, set the ComVisible attribute to true on that type. -[assembly: ComVisible(false)] // The following GUID is for the ID of the typelib if this project is exposed to COM [assembly: Guid("43490246-380e-41db-a6fd-90112b8b6189")] - -// Version information for an assembly consists of the following four values: -// -// Major Version -// Minor Version -// Build Number -// Revision -// -// You can specify all the values or you can default the Build and Revision Numbers -// by using the '*' as shown below: -// [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("0.4.2")] -[assembly: AssemblyFileVersion("0.4.2")] diff --git a/Frameworks/AzureSupport/AzureSupport.csproj b/Frameworks/AzureSupport/AzureSupport.csproj index dccb4e7..bbf3361 100644 --- a/Frameworks/AzureSupport/AzureSupport.csproj +++ b/Frameworks/AzureSupport/AzureSupport.csproj @@ -1,5 +1,5 @@  - + Debug @@ -9,9 +9,12 @@ Properties Microsoft.Research.Naiad.AzureSupport Microsoft.Research.Naiad.AzureSupport - v4.0 + v4.5 512 - Client + + + ..\..\ + true true @@ -21,6 +24,7 @@ DEBUG;TRACE prompt 4 + false pdbonly @@ -30,46 +34,65 @@ prompt 4 bin\Release\Microsoft.Research.Naiad.AzureSupport.XML + false + + + true + bin\x64\Debug\ + DEBUG;TRACE + full + x64 + prompt + MinimumRecommendedRules.ruleset + + + bin\x64\Release\ + TRACE + bin\Release\Microsoft.Research.Naiad.AzureSupport.XML + true + pdbonly + x64 + prompt + MinimumRecommendedRules.ruleset - + False - ..\..\packages\Microsoft.Data.Edm.5.6.1\lib\net40\Microsoft.Data.Edm.dll + ..\..\packages\Microsoft.Data.Edm.5.6.2\lib\net40\Microsoft.Data.Edm.dll - + False - ..\..\packages\Microsoft.Data.OData.5.6.1\lib\net40\Microsoft.Data.OData.dll + ..\..\packages\Microsoft.Data.OData.5.6.2\lib\net40\Microsoft.Data.OData.dll - + False - ..\..\packages\Microsoft.Data.Services.Client.5.6.1\lib\net40\Microsoft.Data.Services.Client.dll + ..\..\packages\Microsoft.Data.Services.Client.5.6.2\lib\net40\Microsoft.Data.Services.Client.dll - - False - ..\..\packages\Microsoft.WindowsAzure.ConfigurationManager.1.8.0.0\lib\net35-full\Microsoft.WindowsAzure.Configuration.dll + + ..\..\packages\Microsoft.WindowsAzure.ConfigurationManager.2.0.3\lib\net40\Microsoft.WindowsAzure.Configuration.dll - + False - ..\..\packages\WindowsAzure.Storage.3.1.0.1\lib\net40\Microsoft.WindowsAzure.Storage.dll + ..\..\packages\WindowsAzure.Storage.4.3.0\lib\net40\Microsoft.WindowsAzure.Storage.dll False - ..\..\packages\Newtonsoft.Json.6.0.2\lib\net40\Newtonsoft.Json.dll + ..\..\packages\Newtonsoft.Json.6.0.5\lib\net45\Newtonsoft.Json.dll - + False - ..\..\packages\Rx-Core.2.2.2\lib\net40\System.Reactive.Core.dll + ..\..\packages\Rx-Core.2.2.5\lib\net40\System.Reactive.Core.dll - + False - ..\..\packages\Rx-Interfaces.2.2.2\lib\net40\System.Reactive.Interfaces.dll + ..\..\packages\Rx-Interfaces.2.2.5\lib\net40\System.Reactive.Interfaces.dll - + False - ..\..\packages\System.Spatial.5.6.1\lib\net40\System.Spatial.dll + ..\..\packages\System.Spatial.5.6.2\lib\net40\System.Spatial.dll @@ -78,6 +101,9 @@ + + Properties\SharedAssemblyInfo.cs + @@ -91,6 +117,10 @@ {bdc6546c-7ba0-472b-b260-0d596b6152e4} Lindi + + {0dca9543-ff9d-48d6-9748-a966dc39c35d} + Storage + @@ -98,6 +128,13 @@ + + + + This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. + + + + + + + - + \ No newline at end of file diff --git a/Frameworks/AzureSupport/Properties/AssemblyInfo.cs b/Frameworks/AzureSupport/Properties/AssemblyInfo.cs index 86a5e33..8458a73 100644 --- a/Frameworks/AzureSupport/Properties/AssemblyInfo.cs +++ b/Frameworks/AzureSupport/Properties/AssemblyInfo.cs @@ -1,5 +1,5 @@ -/* - * Naiad ver. 0.4 +/* + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -26,31 +26,7 @@ // set of attributes. Change these attribute values to modify the information // associated with an assembly. [assembly: AssemblyTitle("Microsoft.Research.Naiad.AzureSupport")] -[assembly: AssemblyDescription("")] [assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("")] -[assembly: AssemblyProduct("Microsoft.Research.Naiad.AzureSupport")] -[assembly: AssemblyCopyright("Copyright © Microsoft Corporation. All rights reserved.")] -[assembly: AssemblyTrademark("")] -[assembly: AssemblyCulture("")] - -// Setting ComVisible to false makes the types in this assembly not visible -// to COM components. If you need to access a type in this assembly from -// COM, set the ComVisible attribute to true on that type. -[assembly: ComVisible(false)] // The following GUID is for the ID of the typelib if this project is exposed to COM [assembly: Guid("3d489523-573a-4d15-a7ef-ce273702c39d")] - -// Version information for an assembly consists of the following four values: -// -// Major Version -// Minor Version -// Build Number -// Revision -// -// You can specify all the values or you can default the Build and Revision Numbers -// by using the '*' as shown below: -// [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("0.4.2")] -[assembly: AssemblyFileVersion("0.4.2")] diff --git a/Frameworks/AzureSupport/Storage.cs b/Frameworks/AzureSupport/Storage.cs index 3758f3c..bf442f9 100644 --- a/Frameworks/AzureSupport/Storage.cs +++ b/Frameworks/AzureSupport/Storage.cs @@ -1,5 +1,5 @@ -/* - * Naiad ver. 0.4 +/* + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -37,6 +37,8 @@ using Microsoft.Research.Naiad.Serialization; using Microsoft.Research.Naiad.Input; +using Microsoft.Research.Naiad.Frameworks.Storage; + namespace Microsoft.Research.Naiad.Frameworks.Azure { /// @@ -143,7 +145,7 @@ public static Stream ReadFromAzureBlobs(this Computatio /// Naiad stream containing the records extracted from files in the Azure directory public static Stream ReadBinaryFromAzureBlobs(this Computation manager, CloudBlobContainer container, string prefix) { - return manager.ReadFromAzureBlobs(container, prefix, stream => GetNaiadReaderEnumerable(stream, manager.Controller.SerializationFormat)); + return manager.ReadFromAzureBlobs(container, prefix, stream => Utils.GetNaiadReaderEnumerable(stream)); } /// @@ -188,37 +190,6 @@ public static Stream ReadFromAzureTable(this Computatio return tables.AsNaiadStream(manager).SelectMany(x => x.ExecuteQuery(query)); } - /// - /// Enumerates lines of text from a stream - /// - /// source stream - /// Each line of text in the source stream - internal static IEnumerable ReadLines(this System.IO.Stream stream) - { - using (var reader = new System.IO.StreamReader(stream)) - { - while (!reader.EndOfStream) - yield return reader.ReadLine(); - } - } - - /// - /// Enumerates records from a stream in the Naiad serialization format. - /// - /// Type of record in the stream - /// A stream containing records serialized in the Naiad messaging format - /// code generator - /// An enumeration of records in the stream - internal static IEnumerable GetNaiadReaderEnumerable(System.IO.Stream stream, SerializationFormat codeGenerator) - { - NaiadReader reader = new NaiadReader(stream, codeGenerator); - NaiadSerialization deserializer = codeGenerator.GetSerializer(); - TRecord nextElement; - while (reader.TryRead(deserializer, out nextElement)) - yield return nextElement; - } - - #endregion #region Azure file-writing extension methods @@ -243,7 +214,7 @@ public static Subscription WriteToAzureBlobs(this Stream(this StreamSubscription corresponding to the Azure writer public static Subscription WriteBinaryToAzureBlobs(this Stream source, CloudBlobContainer container, string format) { - return source.WriteToAzureBlobs(container, format, stream => GetNaiadWriterObserver(stream, source.ForStage.Computation.Controller.SerializationFormat)); - } - - /// - /// Returns an record observer that writes records to the given stream in the Naiad message format. - /// - /// Type of records to be written - /// Target I/O stream - /// code generator - /// A record observer that writes records to the given stream. - internal static IObserver GetNaiadWriterObserver(System.IO.Stream stream, SerializationFormat codeGenerator) - { - NaiadWriter writer = new NaiadWriter(stream, codeGenerator); - return Observer.Create(r => - { - writer.Write(r); - }, - () => writer.Dispose()); + return source.WriteToAzureBlobs(container, format, stream => Utils.GetNaiadWriterObserver(stream, source.ForStage.Computation.Controller.SerializationFormat)); } /// diff --git a/Frameworks/AzureSupport/app.config b/Frameworks/AzureSupport/app.config index 131adf3..fc87146 100644 --- a/Frameworks/AzureSupport/app.config +++ b/Frameworks/AzureSupport/app.config @@ -4,20 +4,24 @@ - + - + - + + + + + - \ No newline at end of file + diff --git a/Frameworks/AzureSupport/packages.config b/Frameworks/AzureSupport/packages.config index 509d1e0..b1295d7 100644 --- a/Frameworks/AzureSupport/packages.config +++ b/Frameworks/AzureSupport/packages.config @@ -1,12 +1,12 @@  - - - - - - - - - + + + + + + + + + \ No newline at end of file diff --git a/Frameworks/DifferentialDataflow/Collection.cs b/Frameworks/DifferentialDataflow/Collection.cs index cc0b650..f522c3c 100644 --- a/Frameworks/DifferentialDataflow/Collection.cs +++ b/Frameworks/DifferentialDataflow/Collection.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/CollectionInterfaces.cs b/Frameworks/DifferentialDataflow/CollectionInterfaces.cs index ca4f922..a9f331c 100644 --- a/Frameworks/DifferentialDataflow/CollectionInterfaces.cs +++ b/Frameworks/DifferentialDataflow/CollectionInterfaces.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -40,7 +40,7 @@ namespace Microsoft.Research.Naiad.Frameworks.DifferentialDataflow /// The Differential Dataflow operators are defined in terms of objects, each of which wraps /// a Naiad stream and allows it to be interpreted with multiset semantics. /// - /// The class is the Differential Dataflow–specific wrapper for the Naiad + /// The class is the Differential Dataflow–specific wrapper for the Naiad /// class. /// /// diff --git a/Frameworks/DifferentialDataflow/CollectionTrace/CollectionTrace.cs b/Frameworks/DifferentialDataflow/CollectionTrace/CollectionTrace.cs index 06295ce..fb09990 100644 --- a/Frameworks/DifferentialDataflow/CollectionTrace/CollectionTrace.cs +++ b/Frameworks/DifferentialDataflow/CollectionTrace/CollectionTrace.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/CollectionTrace/CollectionTraceWithAggregation.cs b/Frameworks/DifferentialDataflow/CollectionTrace/CollectionTraceWithAggregation.cs index 04b58e7..e036c13 100644 --- a/Frameworks/DifferentialDataflow/CollectionTrace/CollectionTraceWithAggregation.cs +++ b/Frameworks/DifferentialDataflow/CollectionTrace/CollectionTraceWithAggregation.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/CollectionTrace/CollectionTraceWithHeap.cs b/Frameworks/DifferentialDataflow/CollectionTrace/CollectionTraceWithHeap.cs index 9ce155b..3769645 100644 --- a/Frameworks/DifferentialDataflow/CollectionTrace/CollectionTraceWithHeap.cs +++ b/Frameworks/DifferentialDataflow/CollectionTrace/CollectionTraceWithHeap.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/CollectionTrace/CollectionTraceWithoutHeap.cs b/Frameworks/DifferentialDataflow/CollectionTrace/CollectionTraceWithoutHeap.cs index 7dffd16..a843a24 100644 --- a/Frameworks/DifferentialDataflow/CollectionTrace/CollectionTraceWithoutHeap.cs +++ b/Frameworks/DifferentialDataflow/CollectionTrace/CollectionTraceWithoutHeap.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/CollectionTrace/Heaps.cs b/Frameworks/DifferentialDataflow/CollectionTrace/Heaps.cs index 30e206e..a770bc9 100644 --- a/Frameworks/DifferentialDataflow/CollectionTrace/Heaps.cs +++ b/Frameworks/DifferentialDataflow/CollectionTrace/Heaps.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/CollectionTrace/Increments.cs b/Frameworks/DifferentialDataflow/CollectionTrace/Increments.cs index f8de2be..56daeb2 100644 --- a/Frameworks/DifferentialDataflow/CollectionTrace/Increments.cs +++ b/Frameworks/DifferentialDataflow/CollectionTrace/Increments.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/CollectionTrace/OffsetLength.cs b/Frameworks/DifferentialDataflow/CollectionTrace/OffsetLength.cs index ff7efba..c660f87 100644 --- a/Frameworks/DifferentialDataflow/CollectionTrace/OffsetLength.cs +++ b/Frameworks/DifferentialDataflow/CollectionTrace/OffsetLength.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/CommonNaiadables.cs b/Frameworks/DifferentialDataflow/CommonNaiadables.cs index 6362b6e..cf81caf 100644 --- a/Frameworks/DifferentialDataflow/CommonNaiadables.cs +++ b/Frameworks/DifferentialDataflow/CommonNaiadables.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/CoreGenerics.cs b/Frameworks/DifferentialDataflow/CoreGenerics.cs index e1b4073..76e0f14 100644 --- a/Frameworks/DifferentialDataflow/CoreGenerics.cs +++ b/Frameworks/DifferentialDataflow/CoreGenerics.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/DifferentialDataflow.csproj b/Frameworks/DifferentialDataflow/DifferentialDataflow.csproj index d9232d7..71d74f5 100644 --- a/Frameworks/DifferentialDataflow/DifferentialDataflow.csproj +++ b/Frameworks/DifferentialDataflow/DifferentialDataflow.csproj @@ -9,9 +9,10 @@ Properties Microsoft.Research.Naiad.Frameworks.DifferentialDataflow Microsoft.Research.Naiad.DifferentialDataflow - v4.0 + v4.5 512 - Client + + SAK SAK SAK @@ -49,6 +50,27 @@ + + true + bin\x64\Debug\ + DEBUG;TRACE + true + bin\Release\Microsoft.Research.Naiad.DifferentialDataflow.xml + full + x64 + prompt + MinimumRecommendedRules.ruleset + + + bin\x64\Release\ + true + bin\Release\Microsoft.Research.Naiad.DifferentialDataflow.xml + true + pdbonly + x64 + prompt + MinimumRecommendedRules.ruleset + @@ -59,6 +81,9 @@ + + Properties\SharedAssemblyInfo.cs + diff --git a/Frameworks/DifferentialDataflow/ExtensionMethods.cs b/Frameworks/DifferentialDataflow/ExtensionMethods.cs index db48cc3..3b6f530 100644 --- a/Frameworks/DifferentialDataflow/ExtensionMethods.cs +++ b/Frameworks/DifferentialDataflow/ExtensionMethods.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/InputCollection.cs b/Frameworks/DifferentialDataflow/InputCollection.cs index 09b6a33..ef37a06 100644 --- a/Frameworks/DifferentialDataflow/InputCollection.cs +++ b/Frameworks/DifferentialDataflow/InputCollection.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/KeyIndices.cs b/Frameworks/DifferentialDataflow/KeyIndices.cs index ce7114f..ad0934c 100644 --- a/Frameworks/DifferentialDataflow/KeyIndices.cs +++ b/Frameworks/DifferentialDataflow/KeyIndices.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/LatticeInternTable.cs b/Frameworks/DifferentialDataflow/LatticeInternTable.cs index 1d62586..632941e 100644 --- a/Frameworks/DifferentialDataflow/LatticeInternTable.cs +++ b/Frameworks/DifferentialDataflow/LatticeInternTable.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/Microsoft.Research.Naiad.DifferentialDataflow.nuspec b/Frameworks/DifferentialDataflow/Microsoft.Research.Naiad.DifferentialDataflow.nuspec index b480a22..02a2742 100644 --- a/Frameworks/DifferentialDataflow/Microsoft.Research.Naiad.DifferentialDataflow.nuspec +++ b/Frameworks/DifferentialDataflow/Microsoft.Research.Naiad.DifferentialDataflow.nuspec @@ -1,9 +1,9 @@ - - + + Microsoft.Research.Naiad.DifferentialDataflow Naiad - Differential Dataflow framework - 0.4.2-beta + 0.5.0-beta naiadquestions@microsoft.com naiadquestions@microsoft.com,Microsoft http://www.apache.org/licenses/LICENSE-2.0.html @@ -19,7 +19,8 @@ - + + @@ -27,8 +28,11 @@ - - - + + + + + + - + \ No newline at end of file diff --git a/Frameworks/DifferentialDataflow/NaiadList.cs b/Frameworks/DifferentialDataflow/NaiadList.cs index 8442d2d..7348738 100644 --- a/Frameworks/DifferentialDataflow/NaiadList.cs +++ b/Frameworks/DifferentialDataflow/NaiadList.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/OperatorImplementations/BinaryStateful.cs b/Frameworks/DifferentialDataflow/OperatorImplementations/BinaryStateful.cs index 4d223f6..c26e2d0 100644 --- a/Frameworks/DifferentialDataflow/OperatorImplementations/BinaryStateful.cs +++ b/Frameworks/DifferentialDataflow/OperatorImplementations/BinaryStateful.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/OperatorImplementations/BinaryStatefulIntKeyed.cs b/Frameworks/DifferentialDataflow/OperatorImplementations/BinaryStatefulIntKeyed.cs index 9184c0d..7aeb76f 100644 --- a/Frameworks/DifferentialDataflow/OperatorImplementations/BinaryStatefulIntKeyed.cs +++ b/Frameworks/DifferentialDataflow/OperatorImplementations/BinaryStatefulIntKeyed.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/OperatorImplementations/UnaryStateful.cs b/Frameworks/DifferentialDataflow/OperatorImplementations/UnaryStateful.cs index 39cf684..07b8044 100644 --- a/Frameworks/DifferentialDataflow/OperatorImplementations/UnaryStateful.cs +++ b/Frameworks/DifferentialDataflow/OperatorImplementations/UnaryStateful.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/OperatorImplementations/UnaryStatefulIntKeyed.cs b/Frameworks/DifferentialDataflow/OperatorImplementations/UnaryStatefulIntKeyed.cs index 6d1b506..d408911 100644 --- a/Frameworks/DifferentialDataflow/OperatorImplementations/UnaryStatefulIntKeyed.cs +++ b/Frameworks/DifferentialDataflow/OperatorImplementations/UnaryStatefulIntKeyed.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/OperatorImplementations/UnaryStatefulWithAggregation.cs b/Frameworks/DifferentialDataflow/OperatorImplementations/UnaryStatefulWithAggregation.cs index eff9947..0874509 100644 --- a/Frameworks/DifferentialDataflow/OperatorImplementations/UnaryStatefulWithAggregation.cs +++ b/Frameworks/DifferentialDataflow/OperatorImplementations/UnaryStatefulWithAggregation.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/Operators/Abs.cs b/Frameworks/DifferentialDataflow/Operators/Abs.cs index aaa50b9..79f86f3 100644 --- a/Frameworks/DifferentialDataflow/Operators/Abs.cs +++ b/Frameworks/DifferentialDataflow/Operators/Abs.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/Operators/AdjustLattice.cs b/Frameworks/DifferentialDataflow/Operators/AdjustLattice.cs index 7dcc8ba..b5b547b 100644 --- a/Frameworks/DifferentialDataflow/Operators/AdjustLattice.cs +++ b/Frameworks/DifferentialDataflow/Operators/AdjustLattice.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/Operators/Aggregate.cs b/Frameworks/DifferentialDataflow/Operators/Aggregate.cs index 5739e60..dc3c8b0 100644 --- a/Frameworks/DifferentialDataflow/Operators/Aggregate.cs +++ b/Frameworks/DifferentialDataflow/Operators/Aggregate.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/Operators/CoGroupBy.cs b/Frameworks/DifferentialDataflow/Operators/CoGroupBy.cs index b379d5c..a3cc78e 100644 --- a/Frameworks/DifferentialDataflow/Operators/CoGroupBy.cs +++ b/Frameworks/DifferentialDataflow/Operators/CoGroupBy.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/Operators/Concat.cs b/Frameworks/DifferentialDataflow/Operators/Concat.cs index 6c63e32..dc25054 100644 --- a/Frameworks/DifferentialDataflow/Operators/Concat.cs +++ b/Frameworks/DifferentialDataflow/Operators/Concat.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/Operators/Consolidate.cs b/Frameworks/DifferentialDataflow/Operators/Consolidate.cs index f290643..78ffeb8 100644 --- a/Frameworks/DifferentialDataflow/Operators/Consolidate.cs +++ b/Frameworks/DifferentialDataflow/Operators/Consolidate.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/Operators/Count.cs b/Frameworks/DifferentialDataflow/Operators/Count.cs index 44f5a13..5bd1638 100644 --- a/Frameworks/DifferentialDataflow/Operators/Count.cs +++ b/Frameworks/DifferentialDataflow/Operators/Count.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/Operators/Distinct.cs b/Frameworks/DifferentialDataflow/Operators/Distinct.cs index 2c78fe8..15c39ae 100644 --- a/Frameworks/DifferentialDataflow/Operators/Distinct.cs +++ b/Frameworks/DifferentialDataflow/Operators/Distinct.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/Operators/Except.cs b/Frameworks/DifferentialDataflow/Operators/Except.cs index b6c3ab9..5b6b147 100644 --- a/Frameworks/DifferentialDataflow/Operators/Except.cs +++ b/Frameworks/DifferentialDataflow/Operators/Except.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/Operators/FixedPoint.cs b/Frameworks/DifferentialDataflow/Operators/FixedPoint.cs index eac6ec9..6b030b4 100644 --- a/Frameworks/DifferentialDataflow/Operators/FixedPoint.cs +++ b/Frameworks/DifferentialDataflow/Operators/FixedPoint.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/Operators/GroupBy.cs b/Frameworks/DifferentialDataflow/Operators/GroupBy.cs index f2ed407..5ab8b3c 100644 --- a/Frameworks/DifferentialDataflow/Operators/GroupBy.cs +++ b/Frameworks/DifferentialDataflow/Operators/GroupBy.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/Operators/Intersect.cs b/Frameworks/DifferentialDataflow/Operators/Intersect.cs index 5597614..e62a7ea 100644 --- a/Frameworks/DifferentialDataflow/Operators/Intersect.cs +++ b/Frameworks/DifferentialDataflow/Operators/Intersect.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/Operators/Join.cs b/Frameworks/DifferentialDataflow/Operators/Join.cs index f95bd82..b3aac7e 100644 --- a/Frameworks/DifferentialDataflow/Operators/Join.cs +++ b/Frameworks/DifferentialDataflow/Operators/Join.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/Operators/Max.cs b/Frameworks/DifferentialDataflow/Operators/Max.cs index a14123f..f608a7a 100644 --- a/Frameworks/DifferentialDataflow/Operators/Max.cs +++ b/Frameworks/DifferentialDataflow/Operators/Max.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/Operators/Min.cs b/Frameworks/DifferentialDataflow/Operators/Min.cs index 8ffcc1b..d16d031 100644 --- a/Frameworks/DifferentialDataflow/Operators/Min.cs +++ b/Frameworks/DifferentialDataflow/Operators/Min.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/Operators/Monitor.cs b/Frameworks/DifferentialDataflow/Operators/Monitor.cs index 4af217f..47ee6c0 100644 --- a/Frameworks/DifferentialDataflow/Operators/Monitor.cs +++ b/Frameworks/DifferentialDataflow/Operators/Monitor.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -82,4 +82,4 @@ public Monitor(int index, Stage collection, bool immutableInput, Action,T>>(); } } -} \ No newline at end of file +} diff --git a/Frameworks/DifferentialDataflow/Operators/Prioritize.cs b/Frameworks/DifferentialDataflow/Operators/Prioritize.cs index 78d7faa..9bf4f44 100644 --- a/Frameworks/DifferentialDataflow/Operators/Prioritize.cs +++ b/Frameworks/DifferentialDataflow/Operators/Prioritize.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/Operators/Select.cs b/Frameworks/DifferentialDataflow/Operators/Select.cs index a93fca3..4f9a177 100644 --- a/Frameworks/DifferentialDataflow/Operators/Select.cs +++ b/Frameworks/DifferentialDataflow/Operators/Select.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/Operators/SelectMany.cs b/Frameworks/DifferentialDataflow/Operators/SelectMany.cs index 238c3f8..dd39db8 100644 --- a/Frameworks/DifferentialDataflow/Operators/SelectMany.cs +++ b/Frameworks/DifferentialDataflow/Operators/SelectMany.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/Operators/Sum.cs b/Frameworks/DifferentialDataflow/Operators/Sum.cs index 4116da1..afbef4b 100644 --- a/Frameworks/DifferentialDataflow/Operators/Sum.cs +++ b/Frameworks/DifferentialDataflow/Operators/Sum.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/Operators/SymmetricDifference.cs b/Frameworks/DifferentialDataflow/Operators/SymmetricDifference.cs index ba7a168..3cf9fa7 100644 --- a/Frameworks/DifferentialDataflow/Operators/SymmetricDifference.cs +++ b/Frameworks/DifferentialDataflow/Operators/SymmetricDifference.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -38,4 +38,4 @@ protected override Int64 WeightFunction(Int64 weight1, Int64 weight2) public SymmetricDifference(int index, Stage collection, bool input1Immutable, bool input2Immutable) : base(index, collection, input1Immutable, input2Immutable) { } } -} \ No newline at end of file +} diff --git a/Frameworks/DifferentialDataflow/Operators/Union.cs b/Frameworks/DifferentialDataflow/Operators/Union.cs index b4428ce..86b102a 100644 --- a/Frameworks/DifferentialDataflow/Operators/Union.cs +++ b/Frameworks/DifferentialDataflow/Operators/Union.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/Operators/Where.cs b/Frameworks/DifferentialDataflow/Operators/Where.cs index 2d7691f..99c402b 100644 --- a/Frameworks/DifferentialDataflow/Operators/Where.cs +++ b/Frameworks/DifferentialDataflow/Operators/Where.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/DifferentialDataflow/Properties/AssemblyInfo.cs b/Frameworks/DifferentialDataflow/Properties/AssemblyInfo.cs index 7cae207..b750492 100644 --- a/Frameworks/DifferentialDataflow/Properties/AssemblyInfo.cs +++ b/Frameworks/DifferentialDataflow/Properties/AssemblyInfo.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -26,31 +26,7 @@ // set of attributes. Change these attribute values to modify the information // associated with an assembly. [assembly: AssemblyTitle("Microsoft.Research.Naiad.DifferentialDataflow")] -[assembly: AssemblyDescription("")] [assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("")] -[assembly: AssemblyProduct("Microsoft.Research.Naiad.DifferentialDataflow")] -[assembly: AssemblyCopyright("Copyright © Microsoft Corporation. All rights reserved.")] -[assembly: AssemblyTrademark("")] -[assembly: AssemblyCulture("")] - -// Setting ComVisible to false makes the types in this assembly not visible -// to COM components. If you need to access a type in this assembly from -// COM, set the ComVisible attribute to true on that type. -[assembly: ComVisible(false)] // The following GUID is for the ID of the typelib if this project is exposed to COM [assembly: Guid("da38e2f3-2364-4d1b-9ee6-8ee21485aebe")] - -// Version information for an assembly consists of the following four values: -// -// Major Version -// Minor Version -// Build Number -// Revision -// -// You can specify all the values or you can default the Build and Revision Numbers -// by using the '*' as shown below: -// [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("0.4.2")] -[assembly: AssemblyFileVersion("0.4.2")] diff --git a/Frameworks/DifferentialDataflow/Remoting.cs b/Frameworks/DifferentialDataflow/Remoting.cs index 31c597a..a45f889 100644 --- a/Frameworks/DifferentialDataflow/Remoting.cs +++ b/Frameworks/DifferentialDataflow/Remoting.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Frameworks/GraphLINQ/GraphLINQ.cs b/Frameworks/GraphLINQ/GraphLINQ.cs index b79880a..56c3a7c 100644 --- a/Frameworks/GraphLINQ/GraphLINQ.cs +++ b/Frameworks/GraphLINQ/GraphLINQ.cs @@ -1,5 +1,5 @@ -/* - * Naiad ver. 0.4 +/* + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -390,7 +390,7 @@ public static Stream, TTime> TransmitAlong( if (edges == null) throw new ArgumentNullException("edges"); var compacted = edges.Compact(); // could be cached and retrieved as needed - return compacted.NewBinaryStage(nodes, (i, v) => new GraphJoin(i, v, (x, y) => x.value), null, x => x.node.index, null, "TransmitAlong"); + return compacted.NewBinaryStage(nodes, (i, s) => new GraphJoinVertex(i, s), null, x => x.node.index, null, "TransmitAlong"); } @@ -417,7 +417,7 @@ public static Stream, TTime> TransmitAlong new GraphJoin(i, v, valueSelector), null, x => x.node.index, null, "TransmitAlong"); + return compacted.NewBinaryStage(nodes, (i, v) => new GraphJoinVertex(i, v, valueSelector), null, x => x.node.index, null, "TransmitAlong"); } /// @@ -485,7 +485,19 @@ public static Stream, TTime> NodeAggregate( { if (nodes == null) throw new ArgumentNullException("nodes"); if (combiner == null) throw new ArgumentNullException("combiner"); + +#if true + var stage = Foundry.NewStage(nodes.Context, (i,s) => new NodeAggregatorVertex(i, s, combiner, nodes.ForStage.Placement.Count), "Aggregator"); + + Action[], int[], int> action = (data, dsts, len) => { for (int i = 0; i < len; i++) dsts[i] = data[i].node.index; }; + + var input1 = stage.NewInput(nodes, (message, vertex) => vertex.OnReceive(message), x => x.node.index, action); + var output = stage.NewOutput(vertex => vertex.Output, x => x.node.index); + + return output; +#else return nodes.NewUnaryStage((i, v) => new NodeAggregatorVertex(i, v, combiner, nodes.ForStage.Placement.Count), x => x.node.index, x => x.node.index, "Aggregator"); +#endif } /// @@ -533,7 +545,43 @@ public static Stream, TTime> StateMachine new NodeUnaryStateMachine(i, v, transitionSelector, defaultState), x => x.node.index, x => x.node.index, "NodeStateMachine"); + + var stage = Foundry.NewStage(nodes.Context, (i, s) => new NodeUnaryStateMachine(i, s, transitionSelector, defaultState), "StateMachine"); + + Action[], int[], int> action = (data, dsts, len) => { for (int i = 0; i < len; i++) dsts[i] = data[i].node.index; }; + + var input1 = stage.NewInput(nodes, (message, vertex) => vertex.OnReceive(message), x => x.node.index, action); + var output = stage.NewOutput(vertex => vertex.Output, x => x.node.index); + + return output; + } + + /// + /// Given a stream of values associated with nodes, maintains a state machine for each node, + /// and produces a stream of new states on each transition, based on the given . + /// + /// The type of value associated with each node. + /// The type of state associated with each node. + /// The type of timestamp on each record. + /// The stream of nodes with values. + /// A function from current value and state, to new state. + /// The default state associated with a node. + /// The stream of changed states at each node. + public static Stream, TTime> StateMachine(this Stream, TTime> nodes, Func transitionSelector, TState defaultState) + where TTime : Time + where TState : IEquatable + { + if (nodes == null) throw new ArgumentNullException("nodes"); + if (transitionSelector == null) throw new ArgumentNullException("transitionSelector"); + + var stage = Foundry.NewStage(nodes.Context, (i, s) => new NodeUnaryStateMachine(i, s, transitionSelector, defaultState), "StateMachine"); + + Action[], int[], int> action = (data, dsts, len) => { for (int i = 0; i < len; i++) dsts[i] = data[i].node.index; }; + + var input1 = stage.NewInput(nodes, (message, vertex) => vertex.OnReceive(message), x => x.node.index, action); + var output = stage.NewOutput(vertex => vertex.Output, x => x.node.index); + + return output; } /// @@ -564,7 +612,7 @@ public static Stream, TTime> StateMachine /// element type - private struct Option + internal struct Option { public readonly TElement Value; public readonly bool IsValid; @@ -576,23 +624,10 @@ private struct Option public Option(TElement value) { this.Value = value; this.IsValid = true; } } - /// - /// Given a stream of values associated with nodes, maintains a state machine for each node, - /// and produces a stream of new states on each transition, based on the given . - /// - /// The type of value associated with each node. - /// The type of state associated with each node. - /// The type of timestamp on each record. - /// The stream of nodes with values. - /// A function from current value and state, to new state. - /// The stream of changed states at each node. - public static Stream, TTime> StateMachine(this Stream, TTime> nodes, Func transitionSelector) + internal static Stream, TTime> FilterOptions(this Stream>, TTime> stream) where TTime : Time - where TState : IEquatable { - if (nodes == null) throw new ArgumentNullException("nodes"); - if (transitionSelector == null) throw new ArgumentNullException("transitionSelector"); - return nodes.StateMachine(transitionSelector, default(TState)); + return stream.NewUnaryStage((i, s) => new FilterOptionsVertex(i, s), x => x.node.index, x => x.node.index, "FilterOptions"); } /// @@ -604,19 +639,18 @@ public static Stream, TTime> StateMachineThe type of timestamp on each record. /// The stream of nodes with values. /// A function from current value and state, to new state. - /// The default state associated with a node. /// The stream of changed states at each node. - public static Stream, TTime> StateMachine(this Stream, TTime> nodes, Func transitionSelector, TState defaultState) + public static Stream, TTime> StateMachine(this Stream, TTime> nodes, Func transitionSelector) where TTime : Time where TState : IEquatable { if (nodes == null) throw new ArgumentNullException("nodes"); if (transitionSelector == null) throw new ArgumentNullException("transitionSelector"); - return nodes.StateMachine((v, s) => { var n = transitionSelector(v, s); return n.PairWith(s.Equals(n) ? new Option() : new Option(n)); }, defaultState) - .Where(x => x.value.IsValid) - .Select(x => x.node.WithValue(x.value.Value)); + return nodes.StateMachine(transitionSelector, default(TState)); } + + /// /// Given a stream of values associated with nodes, maintains a state machine for each node, /// and produces outputs on each transition, based on the given . @@ -768,6 +802,8 @@ public override void OnNotify(TTime time) public NodeAggregatorVertex(int index, Stage stage, Func aggregate, int parts) : base(index, stage) { + this.Entrancy = 5; + this.values = new Dictionary(); this.update = aggregate; @@ -831,10 +867,10 @@ public override void OnReceive2(Message, TTime> message) { for (int i = 0; i < message.length; i++) { - var index = message.payload[i].node.index; - if (index >= this.state.Length) + var localIndex = message.payload[i].node.index / this.parts; + if (localIndex >= this.state.Length) { - var newState = new TState[Math.Max(index + 1, 2 * this.state.Length)]; + var newState = new TState[Math.Max(localIndex + 1, 2 * this.state.Length)]; for (int j = 0; j < this.state.Length; j++) newState[j] = this.state[j]; @@ -845,7 +881,7 @@ public override void OnReceive2(Message, TTime> message) this.state = newState; } - this.state[message.payload[i].node.index / this.parts] = message.payload[i].value; + this.state[localIndex] = message.payload[i].value; } } @@ -940,8 +976,75 @@ public NodeUnaryStateMachine(int index, Stage vertex, Func : UnaryVertex, NodeWithValue, TTime> + where TTime : Time + where TState : IEquatable + { + private TState[] state; + private readonly Func transition; + + private readonly int parts; + private readonly TState defaultState; + + public override void OnReceive(Message, TTime> message) + { + var output = this.Output.GetBufferForTime(message.time); + for (int i = 0; i < message.length; i++) + { + var record = message.payload[i]; + var localIndex = record.node.index / this.parts; + + if (this.state.Length <= localIndex) + { + var newState = new TState[Math.Max(localIndex + 1, 2 * this.state.Length)]; + for (int j = 0; j < this.state.Length; j++) + newState[j] = this.state[j]; + + for (int j = this.state.Length; j < newState.Length; j++) + newState[j] = defaultState; + + this.state = newState; + } + + var transitionResult = this.transition(record.value, this.state[localIndex]); + + if (!this.state[localIndex].Equals(transitionResult)) + { + this.state[localIndex] = transitionResult; + output.Send(record.node.WithValue(transitionResult)); + } + } + } + + public NodeUnaryStateMachine(int index, Stage vertex, Func transition, TState defaultState) + : base(index, vertex) + { + this.state = new TState[] { }; + + this.transition = transition; + + this.parts = vertex.Placement.Count; + this.defaultState = defaultState; + } + } + + internal class FilterOptionsVertex : UnaryVertex>, NodeWithValue, TTime> + where TTime : Time + { + public override void OnReceive(Message>, TTime> message) + { + var output = this.Output.GetBufferForTime(message.time); + for (int i = 0; i < message.length; i++) + if (message.payload[i].value.IsValid) + output.Send(message.payload[i].node.WithValue(message.payload[i].value.Value)); + } + + public FilterOptionsVertex(int index, Stage stage) : base(index, stage) { } + } + // vertex managing a CompactGraph fragment, processing corresponding values by applying a reducer. - internal class GraphJoin : BinaryVertex, NodeWithValue, T> + internal class GraphJoinVertex : BinaryVertex, NodeWithValue, T> where T : Time { private CompactGraph graph; @@ -994,7 +1097,7 @@ public override void OnNotify(T time) this.toProcess = new List>(); } - public GraphJoin(int index, Stage vertex, Func, Node, TOutput> valueSelector) + public GraphJoinVertex(int index, Stage vertex, Func, Node, TOutput> valueSelector) : base(index, vertex) { this.graph = new CompactGraph(); @@ -1005,6 +1108,68 @@ public GraphJoin(int index, Stage vertex, Func, Node, T } } + // vertex managing a CompactGraph fragment, processing corresponding values by applying a reducer. + internal class GraphJoinVertex : BinaryVertex, NodeWithValue, T> + where T : Time + { + private CompactGraph graph; + + private List> toProcess; + + public override void OnReceive1(Message message) + { + for (int i = 0; i < message.length; i++) + this.graph = message.payload[i]; + + this.NotifyAt(message.time); + } + + public override void OnReceive2(Message, T> message) + { + if (this.graph.Nodes == null && message.length > 0) + { + for (int i = 0; i < message.length; i++) + this.toProcess.Add(message.payload[i]); + } + else + { + var output = this.Output.GetBufferForTime(message.time); + for (int i = 0; i < message.length; i++) + { + var record = message.payload[i]; + var localName = record.node.index / this.Stage.Placement.Count; + + if (localName + 1 < this.graph.Nodes.Length) + for (int j = this.graph.Nodes[localName]; j < this.graph.Nodes[localName + 1]; j++) + output.Send(this.graph.Edges[j].WithValue(record.value)); + } + } + } + + public override void OnNotify(T time) + { + var output = this.Output.GetBufferForTime(time); + foreach (var record in this.toProcess.AsEnumerable()) + { + var localName = record.node.index / this.Stage.Placement.Count; + if (localName + 1 < this.graph.Nodes.Length) + for (int j = this.graph.Nodes[localName]; j < this.graph.Nodes[localName + 1]; j++) + output.Send(this.graph.Edges[j].WithValue(record.value)); + } + + this.toProcess = new List>(); + } + + public GraphJoinVertex(int index, Stage stage) + : base(index, stage) + { + this.graph = new CompactGraph(); + this.toProcess = new List>(); + + this.Entrancy = 5; + } + } + // dense list of edge destinations and node offsets. internal struct CompactGraph { @@ -1349,4 +1514,4 @@ public Stream, Epoch> FinalRenamings } } } -} \ No newline at end of file +} diff --git a/Frameworks/GraphLINQ/GraphLINQ.csproj b/Frameworks/GraphLINQ/GraphLINQ.csproj index 4166ab0..5cb94ef 100644 --- a/Frameworks/GraphLINQ/GraphLINQ.csproj +++ b/Frameworks/GraphLINQ/GraphLINQ.csproj @@ -1,5 +1,5 @@  - + Debug @@ -9,9 +9,10 @@ Properties Microsoft.Research.Naiad.GraphLINQ Microsoft.Research.Naiad.GraphLINQ - v4.0 + v4.5 512 - Client + + true @@ -21,6 +22,7 @@ DEBUG;TRACE prompt 4 + false pdbonly @@ -29,10 +31,31 @@ TRACE prompt 4 + false bin\Release\Microsoft.Research.Naiad.GraphLINQ.xml + + true + bin\x64\Debug\ + DEBUG;TRACE + bin\Release\Microsoft.Research.Naiad.GraphLINQ.xml + full + x64 + prompt + MinimumRecommendedRules.ruleset + + + bin\x64\Release\ + TRACE + bin\Release\Microsoft.Research.Naiad.GraphLINQ.xml + true + pdbonly + x64 + prompt + MinimumRecommendedRules.ruleset + @@ -43,6 +66,9 @@ + + Properties\SharedAssemblyInfo.cs + diff --git a/Frameworks/GraphLINQ/Microsoft.Research.Naiad.GraphLINQ.nuspec b/Frameworks/GraphLINQ/Microsoft.Research.Naiad.GraphLINQ.nuspec index 1d64780..b7d94b6 100644 --- a/Frameworks/GraphLINQ/Microsoft.Research.Naiad.GraphLINQ.nuspec +++ b/Frameworks/GraphLINQ/Microsoft.Research.Naiad.GraphLINQ.nuspec @@ -1,9 +1,9 @@ - - + + Microsoft.Research.Naiad.GraphLINQ Naiad - GraphLINQ framework - 0.4.2-beta + 0.5.0-beta naiadquestions@microsoft.com naiadquestions@microsoft.com,Microsoft http://www.apache.org/licenses/LICENSE-2.0.html @@ -19,7 +19,8 @@ - + + @@ -27,8 +28,11 @@ - - - + + + + + + - + \ No newline at end of file diff --git a/Frameworks/GraphLINQ/Properties/AssemblyInfo.cs b/Frameworks/GraphLINQ/Properties/AssemblyInfo.cs index 664d3d6..86d2755 100644 --- a/Frameworks/GraphLINQ/Properties/AssemblyInfo.cs +++ b/Frameworks/GraphLINQ/Properties/AssemblyInfo.cs @@ -1,5 +1,5 @@ -/* - * Naiad ver. 0.4 +/* + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -26,31 +26,7 @@ // set of attributes. Change these attribute values to modify the information // associated with an assembly. [assembly: AssemblyTitle("Microsoft.Research.Naiad.GraphLINQ")] -[assembly: AssemblyDescription("")] [assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("")] -[assembly: AssemblyProduct("Microsoft.Research.Naiad.GraphLINQ")] -[assembly: AssemblyCopyright("Copyright © Microsoft Corporation. All rights reserved.")] -[assembly: AssemblyTrademark("")] -[assembly: AssemblyCulture("")] - -// Setting ComVisible to false makes the types in this assembly not visible -// to COM components. If you need to access a type in this assembly from -// COM, set the ComVisible attribute to true on that type. -[assembly: ComVisible(false)] // The following GUID is for the ID of the typelib if this project is exposed to COM [assembly: Guid("28c42104-7500-405f-842b-d25d5e4c5dda")] - -// Version information for an assembly consists of the following four values: -// -// Major Version -// Minor Version -// Build Number -// Revision -// -// You can specify all the values or you can default the Build and Revision Numbers -// by using the '*' as shown below: -// [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("0.4.2")] -[assembly: AssemblyFileVersion("0.4.2")] diff --git a/Frameworks/HdfsSupport/Hdfs.cs b/Frameworks/HdfsSupport/Hdfs.cs new file mode 100644 index 0000000..4a3fa59 --- /dev/null +++ b/Frameworks/HdfsSupport/Hdfs.cs @@ -0,0 +1,386 @@ +/* + * Naiad ver. 0.5 + * Copyright (c) Microsoft Corporation + * All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * THIS CODE IS PROVIDED ON AN *AS IS* BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT + * LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR + * A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. + * + * See the Apache Version 2.0 License for specific language governing + * permissions and limitations under the License. + */ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Net; +using System.Text; + +using Microsoft.Research.Peloponnese.Hdfs; + +using Microsoft.Research.Naiad.Input; +using Microsoft.Research.Naiad.Dataflow; +using Microsoft.Research.Naiad.Frameworks.Lindi; +using Microsoft.Research.Naiad.Frameworks.Storage; +using Microsoft.Research.Naiad.Frameworks.Storage.Dfs; +using Microsoft.Research.Naiad.Frameworks.WorkGenerator; +using Microsoft.Research.Naiad.Serialization; + +namespace Microsoft.Research.Naiad.Frameworks.Hdfs +{ + /// + /// The Hdfs framework supports reading and writing Hdfs files using the Java-based native Hdfs protocol. It requires Java and + /// the Hdfs jar files to be installed locally, and will throw an exception if they are not installed. The environment JAVA_HOME + /// must be set to the location of the Java installation, and HADOOP_COMMON_HOME must be set to the location of the Hadoop jar files + /// including those for Hdfs + /// + class NamespaceDoc + { + + } + + #region extension methods + /// + /// extension methods for working with Java-protocol Hdfs files + /// + public static class ExtensionMethods + { + /// + /// Read a stream of path names (file or directory names) each of which corresponds to a collection of HDFS files + /// serialized as lines of text. Concatenate all the lines of the files to the output, in an unspecified order. + /// + /// time of the input and output records + /// stream of input paths + /// stream of text lines in the hdfs files + public static Stream FromHdfsText(this Stream input) where TTime : Time + { + return input.GenerateWork( + time => new DfsTextCoordinator(new HdfsClient()), + (workerIndex, time) => new DfsTextWorker(new HdfsClient(), 256)); + } + + /// + /// Read a collection of HDFS files serialized as lines of text. Concatenate all the lines of the files to the output, + /// in an unspecified order. + /// + /// Naiad computation + /// path of the file or directory to read + /// stream of text lines in the hdfs files + public static Stream ReadHdfsTextCollection( + this Computation manager, Uri fileOrDirectoryPath) + { + return new Uri[] { fileOrDirectoryPath } + .AsNaiadStream(manager) + // this distinct ensures that the same code can be run at every process without reading files multiple times + .Distinct() + .GenerateWork( + time => new DfsTextCoordinator(new HdfsClient()), + (workerIndex, time) => new DfsTextWorker(new HdfsClient(), 256)); + } + + /// + /// Read a stream of path names (file or directory names) each of which corresponds to a collection of HDFS files + /// serialized in a custom binary format. Concatenate all the records to the output, in an unspecified order. + /// + /// output record type + /// time of the input and output records + /// stream of input paths + /// custom deserializer function to convert a stream of bytes into a sequence of records + /// stream of records in the hdfs files + public static Stream FromHdfs( + this Stream input, + Func>> deserialize) where TTime : Time + { + return input.GenerateWork( + time => new DfsFileCoordinator(new HdfsClient()), + (workerIndex, time) => new DfsFileWorker(new HdfsClient(), deserialize)); + } + + /// + /// Read a collection of HDFS files serialized in a custom binary format. Concatenate all the records to the output, + /// in an unspecified order. + /// + /// output record type + /// Naiad computation + /// path of the file or directory to read + /// custom deserializer function to convert a stream of bytes into a sequence of records + /// stream of records in the hdfs files + public static Stream ReadHdfsCollection( + this Computation manager, Uri fileOrDirectoryPath, + Func>> deserialize) + { + return new Uri[] { fileOrDirectoryPath } + .AsNaiadStream(manager) + // this distinct ensures that the same code can be run at every process without reading files multiple times + .Distinct() + .GenerateWork( + time => new DfsFileCoordinator(new HdfsClient()), + (workerIndex, time) => new DfsFileWorker(new HdfsClient(), deserialize)); + } + + /// + /// Read a stream of path names (file or directory names) each of which corresponds to a collection of HDFS files + /// serialized in the default Naiad binary format. Concatenate all the records to the output, in an unspecified order. + /// + /// output record type + /// time of the input and output records + /// stream of input paths + /// stream of records in the hdfs files + public static Stream FromHdfsBinary( + this Stream input) where TTime : Time + { + return input.GenerateWork( + time => new DfsFileCoordinator(new HdfsClient()), + (workerIndex, time) => + new DfsFileWorker( + new HdfsClient(), + stream => Utils.GetNaiadReaderBatchEnumerable(stream, 256))); + } + + /// + /// Read a collection of HDFS files serialized in the default Naiad binary format. Concatenate all the records to the output, + /// in an unspecified order. + /// + /// output record type + /// Naiad computation + /// path of the file or directory to read + /// stream of records in the hdfs files + public static Stream ReadHdfsBinaryCollection( + this Computation manager, Uri fileOrDirectoryPath) + { + return new Uri[] { fileOrDirectoryPath } + .AsNaiadStream(manager) + // this distinct ensures that the same code can be run at every process without reading files multiple times + .Distinct() + .GenerateWork( + time => new DfsFileCoordinator(new HdfsClient()), + (workerIndex, time) => + new DfsFileWorker( + new HdfsClient(), + stream => Utils.GetNaiadReaderBatchEnumerable(stream, 256))); + } + + /// + /// general method to write a stream of records to a collection of HDFS files. The collection is active + /// throughout the computation and is closed when the computation terminates: it concatenates records from all + /// epochs in an undefined order + /// + /// type of the records to write + /// type of the serializer object + /// stream of records to write + /// function to generate a filename given a processId, threadId and sequence number + /// function to generate a serializer given a Stream to write to + /// function to serialize a batch of records given a serializer + /// buffer size to use in the serializer + /// hdfs block size to use, or -1 for the file system default value + /// file size to write before closing the file and opening another one + /// handle to wait on until the computation completes + public static Subscription WriteHdfsBinary( + this Stream source, + Func format, + Func writerFunction, + Action> serialize, + int bufferSize, + long blockSize, + long segmentThreshold) where TWriter : class, IDisposable, IFlushable + { + return source.WriteBySubscription( + format, + fileName => new HdfsClient().GetDfsStreamWriter(fileName, bufferSize, blockSize), + stream => writerFunction(stream), + serialize, + segmentThreshold); + } + + /// + /// method to write a stream of records to a collection of HDFS files using the default Naiad binary serializer. + /// The collection is active throughout the computation and is closed when the computation terminates: it concatenates + /// records from all epochs in an undefined order + /// + /// type of the records to write + /// stream of records to write + /// webhdfs directory to write the partitioned data into + /// buffer size to use in the serializer + /// hdfs block size to use, or -1 for the file system default value + /// file size to write before closing the file and opening another one + /// handle to wait on until the computation completes + public static Subscription WriteHdfsBinary( + this Stream source, + Uri prefix, + int bufferSize = 1024*1024, + long blockSize = -1, + long segmentThreshold = 254 * 1024 * 1024) + { + // make sure we'll be able to write the partitioned data + HdfsClient client = new HdfsClient(); + client.EnsureDirectory(prefix, false); + + return source.WriteHdfsBinary( + (processId, threadId, segment) => Utils.DefaultPartFormat(prefix, processId, threadId, segment), + stream => new NaiadWriter(stream, source.ForStage.Computation.Controller.SerializationFormat, bufferSize), + (writer, arraySegment) => + { + for (int i = 0; i < arraySegment.Count; i++) + { + writer.Write(arraySegment.Array[i]); + } + }, + bufferSize, blockSize, segmentThreshold); + } + + /// + /// general method to write a stream of records to a collection of HDFS files, partitioned by time as well as key. + /// Within a given time and part, records are written in an undefined order + /// + /// type of the records to write + /// type of the serializer object + /// type of the record time + /// stream of records to write + /// function to generate a filename given a processId, threadId, time and sequence number + /// function to generate a serializer given a Stream to write to + /// function to serialize a batch of records given a serializer + /// buffer size to use for the serializer + /// hdfs block size to use, or -1 for the file system default value + /// file size to write before closing the file and opening another one + /// stream of filenames written + public static Stream ToHdfsBinary( + this Stream source, + Func format, + Func writerFunction, + Action> serialize, + int bufferSize, + long blockSize, + long segmentThreshold) + where TWriter : class, IDisposable, IFlushable + where TTime : Time + { + return source.WriteByTime( + format, + () => new HdfsClient(), + (client, fileName) => client.GetDfsStreamWriter(fileName, bufferSize, blockSize), + stream => writerFunction(stream), + serialize, + segmentThreshold); + } + + /// + /// method to write a stream of records to a collection of HDFS files using the default Naiad binary serializer, + /// partitioned by time as well as key. Within a given time and part, records are written in an undefined order + /// + /// type of the records to write + /// type of the record time + /// stream of records to write + /// webhdfs directory to write the partitioned data into + /// buffer size to use for the serializer + /// hdfs block size to use, or -1 for the file system default value + /// file size to write before closing the file and opening another one + /// stream of filenames written + public static Stream ToHdfsBinary( + this Stream source, + Uri prefix, + int bufferSize = 1024 * 1024, + long blockSize = -1, + long segmentThreshold = 254 * 1024 * 1024) where TTime : Time + { + // make sure we'll be able to write the partitioned data + HdfsClient client = new HdfsClient(); + client.EnsureDirectory(prefix, false); + + return source.ToHdfsBinary( + (processId, threadId, time, segment) => Utils.DefaultPartFormat(prefix, processId, threadId, time, segment), + stream => new NaiadWriter(stream, source.ForStage.Computation.Controller.SerializationFormat, bufferSize), + (writer, arraySegment) => + { + for (int i = 0; i < arraySegment.Count; i++) + { + writer.Write(arraySegment.Array[i]); + } + }, + bufferSize, blockSize, segmentThreshold); + } + + /// + /// write a sequence of strings as hdfs text files. The collection is active throughout the computation and is + /// closed when the computation terminates: it concatenates records from all epochs in an undefined order + /// + /// stream of records to write + /// webhdfs directory to write the partitioned data into + /// buffer size to use for the text serializer + /// hdfs block size to use, or -1 for the file system default value + /// file size to write before closing the file and opening another one + /// handle to wait on until the computation completes + public static Subscription WriteHdfsText( + this Stream source, + Uri prefix, + int bufferSize = 1024 * 1024, + long blockSize = -1, + long segmentThreshold = 254 * 1024 * 1024) + { + // make sure we'll be able to write the partitioned data + HdfsClient client = new HdfsClient(); + client.EnsureDirectory(prefix, false); + + // don't write byte order marks at the start of the files + Encoding utf8 = new UTF8Encoding(false, true); + + return source.WriteHdfsBinary( + (processId, threadId, segment) => Utils.DefaultPartFormat(prefix, processId, threadId, segment), + stream => new Utils.FStreamWriter(stream, utf8, 1024 * 1024), + (writer, arraySegment) => + { + for (int i = 0; i < arraySegment.Count; i++) + { + writer.WriteLine(arraySegment.Array[i]); + } + }, + bufferSize, blockSize, segmentThreshold); + } + + /// + /// write a sequence of strings as hdfs text files, partitioned by time as well as key. + /// Within a given time and part, records are written in an undefined order + /// + /// type of the record time + /// stream of records to write + /// webhdfs directory to write the partitioned data into + /// buffer size to use for the text serializer + /// hdfs block size to use, or -1 for the file system default value + /// file size to write before closing the file and opening another one + /// stream of filenames written + public static Stream ToHdfsText( + this Stream source, + Uri prefix, + int bufferSize = 1024 * 1024, + long blockSize = -1, + long segmentThreshold = 254 * 1024 * 1024) where TTime : Time + { + // make sure we'll be able to write the partitioned data + HdfsClient client = new HdfsClient(); + client.EnsureDirectory(prefix, false); + + // don't write byte order marks at the start of the files + Encoding utf8 = new UTF8Encoding(false, true); + + return source.ToHdfsBinary( + (processId, threadId, time, segment) => Utils.DefaultPartFormat(prefix, processId, threadId, time, segment), + stream => new Utils.FStreamWriter(stream, utf8, 1024 * 1024), + (writer, arraySegment) => + { + for (int i = 0; i < arraySegment.Count; i++) + { + writer.WriteLine(arraySegment.Array[i]); + } + }, + bufferSize, blockSize, segmentThreshold); + } + } + #endregion +} diff --git a/Frameworks/HdfsSupport/HdfsSupport.csproj b/Frameworks/HdfsSupport/HdfsSupport.csproj new file mode 100644 index 0000000..d091dcf --- /dev/null +++ b/Frameworks/HdfsSupport/HdfsSupport.csproj @@ -0,0 +1,105 @@ + + + + + + + Debug + AnyCPU + {66D2A00E-F889-4B2F-9C40-04A32278FB86} + Library + Properties + HdfsSupport + Microsoft.Research.Naiad.HdfsSupport + v4.5 + 512 + + caea79ad + ..\..\ + true + + + true + bin\x64\Debug\ + DEBUG;TRACE + full + x64 + prompt + MinimumRecommendedRules.ruleset + false + + + + + bin\x64\Release\ + TRACE + true + pdbonly + x64 + prompt + MinimumRecommendedRules.ruleset + false + bin\x64\Release\Microsoft.Research.Naiad.HdfsSupport.XML + + + + + + + + + + + + + + Properties\SharedAssemblyInfo.cs + + + + + + + {a6221415-1283-4c04-8d2c-e25a857e1fe9} + Naiad + + + {bdc6546c-7ba0-472b-b260-0d596b6152e4} + Lindi + + + {0dca9543-ff9d-48d6-9748-a966dc39c35d} + Storage + + + {eba3d350-41eb-474c-aed9-9cfd1f809de3} + WorkGenerator + + + + + + + + + + + This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. + + + + + + + + + + + + \ No newline at end of file diff --git a/Frameworks/HdfsSupport/Microsoft.Research.Naiad.HdfsSupport.nuspec b/Frameworks/HdfsSupport/Microsoft.Research.Naiad.HdfsSupport.nuspec new file mode 100644 index 0000000..f541539 --- /dev/null +++ b/Frameworks/HdfsSupport/Microsoft.Research.Naiad.HdfsSupport.nuspec @@ -0,0 +1,42 @@ + + + + Microsoft.Research.Naiad.HdfsSupport + Naiad - Hdfs Storage support + 0.5.0-beta + naiadquestions@microsoft.com + naiadquestions@microsoft.com,Microsoft + http://www.apache.org/licenses/LICENSE-2.0.html + http://research.microsoft.com/naiad/ + + true + Provides convenient access to Windows Azure Storage APIs from Naiad programs. + Microsoft Corporation + en-US + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Naiad/Channels/LocalChannel.cs b/Frameworks/HdfsSupport/Properties/AssemblyInfo.cs similarity index 57% rename from Naiad/Channels/LocalChannel.cs rename to Frameworks/HdfsSupport/Properties/AssemblyInfo.cs index f235a85..c0e2021 100644 --- a/Naiad/Channels/LocalChannel.cs +++ b/Frameworks/HdfsSupport/Properties/AssemblyInfo.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -17,4 +17,15 @@ * See the Apache Version 2.0 License for specific language governing * permissions and limitations under the License. */ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("HdfsSupport")] +[assembly: AssemblyConfiguration("")] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("4659cd4b-1cea-424d-a4d9-70bda63ca19b")] diff --git a/Frameworks/HdfsSupport/app.config b/Frameworks/HdfsSupport/app.config new file mode 100644 index 0000000..cd20ef1 --- /dev/null +++ b/Frameworks/HdfsSupport/app.config @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/Frameworks/HdfsSupport/packages.config b/Frameworks/HdfsSupport/packages.config new file mode 100644 index 0000000..612f957 --- /dev/null +++ b/Frameworks/HdfsSupport/packages.config @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/Frameworks/Lindi/Lindi.cs b/Frameworks/Lindi/Lindi.cs index 6be217a..27d82e3 100644 --- a/Frameworks/Lindi/Lindi.cs +++ b/Frameworks/Lindi/Lindi.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -917,12 +917,15 @@ public static Stream SelectManyArraySegmentThe type of the input records. /// The type of timestamp on each record. /// The input stream. + /// A predicate indicating which times to synchronize in /// The input stream. - public static Stream Synchronize(this Stream stream) + public static Stream Synchronize(this Stream stream, Func predicate) where TTime : Time { if (stream == null) throw new ArgumentNullException("stream"); - return stream.UnaryExpression(stream.PartitionedBy, x => x, "Delay"); + if (predicate == null) throw new ArgumentNullException("predicate"); + + return stream.NewUnaryStage((i, s) => new SynchronizeVertex(i, s, predicate), null, stream.PartitionedBy, "Synchronize"); } /// @@ -1130,6 +1133,51 @@ public SelectManyArraySegment(int index, Stage stage, Func : UnaryVertex + where TTime : Time + { + private readonly Func Predicate; + private readonly Dictionary> Records; + + public override void OnReceive(Message message) + { + if (this.Predicate(message.time)) + { + if (!this.Records.ContainsKey(message.time)) + { + this.Records.Add(message.time, new List()); + this.NotifyAt(message.time); + } + + var list = this.Records[message.time]; + for (int i = 0; i < message.length; i++) + list.Add(message.payload[i]); + } + else + this.Output.Send(message); + } + + public override void OnNotify(TTime time) + { + if (this.Records.ContainsKey(time)) + { + var list = this.Records[time]; + this.Records.Remove(time); + + var output = this.Output.GetBufferForTime(time); + for (int i = 0; i < list.Count; i++) + output.Send(list[i]); + } + } + + public SynchronizeVertex(int index, Stage stage, Func predicate) + : base(index, stage) + { + this.Predicate = predicate; + this.Records = new Dictionary>(); + } + } + internal class Writer : SinkVertex { private readonly Dictionary writers = new Dictionary(); diff --git a/Frameworks/Lindi/Lindi.csproj b/Frameworks/Lindi/Lindi.csproj index 5cb5ff8..b2f21d4 100644 --- a/Frameworks/Lindi/Lindi.csproj +++ b/Frameworks/Lindi/Lindi.csproj @@ -9,13 +9,14 @@ Properties Microsoft.Research.Naiad.Frameworks.Lindi Microsoft.Research.Naiad.Lindi - v4.0 + v4.5 512 SAK SAK SAK SAK - Client + + true @@ -25,6 +26,7 @@ DEBUG;TRACE prompt 4 + false pdbonly @@ -33,15 +35,39 @@ TRACE prompt 4 + false bin\Release\Microsoft.Research.Naiad.Lindi.xml + + true + bin\x64\Debug\ + DEBUG;TRACE + bin\Release\Microsoft.Research.Naiad.Lindi.xml + full + x64 + prompt + MinimumRecommendedRules.ruleset + + + bin\x64\Release\ + TRACE + bin\Release\Microsoft.Research.Naiad.Lindi.xml + true + pdbonly + x64 + prompt + MinimumRecommendedRules.ruleset + + + Properties\SharedAssemblyInfo.cs + diff --git a/Frameworks/Lindi/Microsoft.Research.Naiad.Lindi.nuspec b/Frameworks/Lindi/Microsoft.Research.Naiad.Lindi.nuspec index a044fc9..5b14cf7 100644 --- a/Frameworks/Lindi/Microsoft.Research.Naiad.Lindi.nuspec +++ b/Frameworks/Lindi/Microsoft.Research.Naiad.Lindi.nuspec @@ -1,9 +1,9 @@ - - + + Microsoft.Research.Naiad.Lindi Naiad - Lindi framework - 0.4.2-beta + 0.5.0-beta naiadquestions@microsoft.com naiadquestions@microsoft.com,Microsoft http://www.apache.org/licenses/LICENSE-2.0.html @@ -19,7 +19,7 @@ - + @@ -27,8 +27,11 @@ - - - + + + + + + - + \ No newline at end of file diff --git a/Frameworks/Lindi/Properties/AssemblyInfo.cs b/Frameworks/Lindi/Properties/AssemblyInfo.cs index 53ed1da..a082b0f 100644 --- a/Frameworks/Lindi/Properties/AssemblyInfo.cs +++ b/Frameworks/Lindi/Properties/AssemblyInfo.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -26,31 +26,7 @@ // set of attributes. Change these attribute values to modify the information // associated with an assembly. [assembly: AssemblyTitle("Microsoft.Research.Naiad.Lindi")] -[assembly: AssemblyDescription("")] [assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("")] -[assembly: AssemblyProduct("Microsoft.Research.Naiad.Lindi")] -[assembly: AssemblyCopyright("Copyright © Microsoft Corporation. All rights reserved.")] -[assembly: AssemblyTrademark("")] -[assembly: AssemblyCulture("")] - -// Setting ComVisible to false makes the types in this assembly not visible -// to COM components. If you need to access a type in this assembly from -// COM, set the ComVisible attribute to true on that type. -[assembly: ComVisible(false)] // The following GUID is for the ID of the typelib if this project is exposed to COM [assembly: Guid("e37eabd1-ee4d-4c42-9d4b-282f96cbd2d4")] - -// Version information for an assembly consists of the following four values: -// -// Major Version -// Minor Version -// Build Number -// Revision -// -// You can specify all the values or you can default the Build and Revision Numbers -// by using the '*' as shown below: -// [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("0.4.2")] -[assembly: AssemblyFileVersion("0.4.2")] diff --git a/Frameworks/Storage/Dfs.cs b/Frameworks/Storage/Dfs.cs new file mode 100644 index 0000000..441e660 --- /dev/null +++ b/Frameworks/Storage/Dfs.cs @@ -0,0 +1,712 @@ +/* + * Naiad ver. 0.5 + * Copyright (c) Microsoft Corporation + * All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * THIS CODE IS PROVIDED ON AN *AS IS* BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT + * LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR + * A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. + * + * See the Apache Version 2.0 License for specific language governing + * permissions and limitations under the License. + */ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Net; +using System.Text; +using System.Threading.Tasks; + +using Microsoft.Research.Peloponnese.Shared; + +using Microsoft.Research.Naiad; +using Microsoft.Research.Naiad.Dataflow; +using Microsoft.Research.Naiad.Frameworks.Lindi; +using Microsoft.Research.Naiad.Frameworks.WorkGenerator; + +namespace Microsoft.Research.Naiad.Frameworks.Storage.Dfs +{ + /// + /// The Dfs framework includes base classes to support reading and writing Hdfs files, and is intended to be extensible to other + /// block-based file systems + /// + class NamespaceDoc + { + + } + + #region helper classes + /// + /// Work item describing an Dfs block to be read at a worker. + /// + /// + /// This is currently somewhat HDFS-specific, and would probably be generalized if another file + /// system were added + /// + [Serializable] + public class DfsBlock + { + /// + /// path of the file being read + /// + public Uri path; + /// + /// total length of the file + /// + public long fileLength; + /// + /// offset of the block in the file + /// + public long offset; + /// + /// length of the block to read + /// + public long length; + /// + /// address and port of the preferred datanode to use to read the block from; this saves one redirection + /// through the namenode during the webhdfs protocol + /// + public IPEndPoint dataNodeAddress; + } + #endregion + + #region base implementation for Dfs work coordinators + /// + /// base class for Dfs coordinators, that expands directories into sets of files to read, and keeps track + /// of the mapping from IPAddresses to IPEndpoints + /// + /// + /// This is currently somewhat HDFS-specific, and would probably be generalized if another file + /// system were added + /// + /// the type used by workers to identify themselves, e.g. including their IP addresses + public abstract class DfsBaseCoordinator + : MatchingCoordinator + { + /// + /// The client for doing basic DFS operations. + /// + /// + /// This is currently an HDFS base client that supports either Java or WebHdfs protocols, but expects an HDFS-like way + /// of operating that would be generalized for other DFSs + /// + protected readonly HdfsClientBase client; + /// + /// mapping of IP address to datanode endpoint. The MatchingCoordinator queues are indexed by datanode endpoint + /// so we know how to read the block once it is removed from a queue, but the workers identify themselves by IP + /// address, so we need to be able to look up the corresponding queue given an IP address + /// + private readonly Dictionary dataNodes; + + /// + /// called whenever a queue is added: keep our index of addresses up to date + /// + /// new queue + protected override void NotifyQueueAddition(IPEndPoint queue) + { + // we are assuming there is only one datanode per ip address + this.dataNodes.Add(queue.Address, queue); + } + + /// + /// called whenever a queue is removed: keep our index of addresses up to date + /// + /// queue being removed + protected override void NotifyQueueRemoval(IPEndPoint queue) + { + this.dataNodes.Remove(queue.Address); + } + + /// + /// Called when a worker announces that it is ready for another work item, to find a work item on the + /// worker's matching queue, if any. The worker may have multiple IP addresses, so it returns them all, + /// and if any matches an address the datanode is listening on, then the worker is matched to that datanode + /// + /// the IP addresses of the worker's network interfaces + /// the datanode endpoint of a matching queue, if any + /// the matching queue, if there is one, otherwise null + protected override MatchQueue MapWorkerToQueue(IPAddress[] workerAddresses, ref IPEndPoint matchingDataNode) + { + // look at each ip address that the worker can use, to see if any has a matching queue with work waiting + foreach (IPAddress choice in workerAddresses) + { + // if there is a matching datanode, store it in matchingDataNode + if (this.dataNodes.TryGetValue(choice, out matchingDataNode)) + { + // if there is a matching datanode then there must be a queue with that datanode, so return it + return this.waitingWork[matchingDataNode]; + } + } + + // there is no matching queue + return null; + } + + /// + /// given an hdfs file, return a sequence of work items, each with a set of matching categories + /// + /// + /// this is currently HDFS-specific, although the HdfsFile class could easily be extended to support + /// other DFSs + /// + /// file to expand + /// a sequence of work items, each with a set of matching categories + protected abstract IEnumerable EnumerateFileWork(HdfsFile file); + + /// + /// given an input string of a file or directory, expand it into a set of files, and then expand each file + /// into a set of matches using the derived-class implementation of EnumerateFileWork + /// + /// dfs file or directory to be read + /// set of work item matches for the file or directory + protected override IEnumerable EnumerateWork(Uri fileOrDirectory) + { + return client + .ExpandFileOrDirectoryToFile(fileOrDirectory) + .SelectMany(file => EnumerateFileWork(file)); + } + + /// + /// return a new coordinator for a dfs reader + /// + /// hdfs client + public DfsBaseCoordinator(HdfsClientBase client) + { + this.client = client; + this.dataNodes = new Dictionary(); + } + } + #endregion + + #region DfsBaseCoordinator implementation for the file-based dfs work coordinator + /// + /// base coordinator for workers that read an entire hdfs file at a time, rather than split the file into blocks. + /// For each file the coordinator tries to match it to a worker that holds a large proportion of the relevant data + /// + public class DfsFileCoordinator : DfsBaseCoordinator + { + /// + /// Return the length of a particular block in a file. All blocks except the last one are the same length + /// + /// index of the block in the file + /// file being read + /// length in bytes of the requested block + private long BlockLength(int index, HdfsFile file) + { + // start location of the block in the file + long offset = (long)index * file.blockSize; + // number of bytes after the start of the block + long bytesAfterBlockStart = file.length - offset; + // either the standard block length, or the length of the final block if it is shorter + return Math.Min(bytesAfterBlockStart, file.blockSize); + } + + /// + /// given a file, determine how much of that file's data are stored on each datanode. Return every datanode that stores a + /// threshold percentage of the file's data as a candidate match for that file + /// + /// file to be matched + /// a match including a (possibly-empty) set of candidate data nodes + protected override IEnumerable EnumerateFileWork(HdfsFile file) + { + long numberOfBlocks = (file.length + file.blockSize - 1) / file.blockSize; + long threshold; + if (numberOfBlocks > 4) + { + // this is a 'real' multi-block file; only take a datanode that contains at least a third of it + threshold = file.length / 3; + } + else + { + // this file either has a single block or only a few: only return a matching node if it stores the whole file. + // this will select the node that wrote (all the blocks in) the file rather than one of the replicas, in the case of a file with only a + // couple of blocks + threshold = file.length; + } + + Match match = new Match + { + categories = this.client.GetBlockLocations(file) + // first flatten the list of block locations, into a sequence of pairs of 'endpoint,length' each indicating that length + // bytes are stored at endpoint + .SelectMany((endpoints, index) => + endpoints.Select(endpoint => new KeyValuePair(endpoint, BlockLength(index, file)))) + // then group by endpoint + .GroupBy(x => x.Key) + // within each group, sum the bytes to determine how many bytes in total are stored at each endpoint + .Select(g => new KeyValuePair(g.Key, g.Select(elt => elt.Value).Sum())) + // keep only endpoints that store more than 33% of the file + .Where(x => x.Value >= threshold) + // return the flattened array of candidate endpoints, if any + .Select(x => x.Key).ToArray(), + + // if there isn't a matching worker, use null as the default endpoint, meaning the read will be redirected to the + // name node. Set the block to indicate the entire file + workStub = new DfsBlock + { + path = file.path, + fileLength = file.length, + offset = 0, + length = file.length, + dataNodeAddress = null + } + }; + + yield return match; + } + + /// + /// if the work item was matched to a worker on the same computer, fill in the datanode endpoint before sending the work item + /// + /// true if the item was matched to a worker + /// endpoint corresponding to the matched worker, if usedMatchingQueue==true + /// work item with no endpoint filled in + /// work item with the endpoint filled in, if there was a match + protected override DfsBlock ExpandWorkItem(bool usedMatchingQueue, IPEndPoint endpoint, DfsBlock stub) + { + if (usedMatchingQueue) + { + stub.dataNodeAddress = endpoint; + } + + return stub; + } + + /// + /// create a new coordinator for file-at-a-time dfs reads + /// + /// hdfs client + public DfsFileCoordinator(HdfsClientBase client) : base(client) + { + } + } + #endregion + + #region IWorker implementation for the Hdfs file-based reader worker + /// + /// base worker implementation for the worker to read Hdfs files an entire file at a time, rather than block by block + /// + /// the type of records to be read + public class DfsFileWorker : IWorker + { + /// + /// a cache of the local IP interfaces, read on startup from DNS + /// + private readonly IPAddress[] localAddresses; + /// + /// the function that takes a stream and returns batches of records + /// + private readonly Func>> deserialize; + /// + /// the Hdfs client used to read files + /// + protected readonly HdfsClientBase client; + + /// + /// Return a description of the worker that the coordinator will use when matching work items to workers. This is called + /// once before any work item has been assigned, and once after each work item is performed. + /// + /// The IP addresses that the worker node is listening on, to be matched to file block locations for trying + /// to schedule local reads + public IPAddress[] DescribeWorker() + { + return this.localAddresses; + } + + /// + /// Execute a work item, reading an HDFS file and generating a sequence of output records + /// + /// The work item to be executed, corresponding to an entire Hdfs file + /// A sequence of array segments, each containing a sequence of records to be output + public IEnumerable> DoWork(DfsBlock workItem) + { + // ask the Hdfs client for a stream corresponding to the file. Use a 1k buffer for byte-at-a-time reads. + using (Stream reader = client.GetDfsStreamReader(workItem.path, workItem.offset, workItem.length, 1024, workItem.dataNodeAddress)) + { + foreach (ArraySegment segment in this.deserialize(reader)) + { + yield return segment; + } + } + } + + /// + /// create a worker for a file-at-a-time hdfs reader + /// + /// Hdfs client used to read files + /// function to take a stream consisting of an entire webhdfs file, and return a sequence + /// of batches, each containing an arraysegment of output records + public DfsFileWorker( + HdfsClientBase client, Func>> deserialize) + { + // cache all the addresses the local node is listening on + this.localAddresses = Dns.GetHostAddresses(Dns.GetHostName()); + this.deserialize = deserialize; + this.client = client; + } + } + #endregion + + #region DfsBaseCoordinator implementation for the block-based dfs work coordinator + /// + /// Implementation of a MatchingCoordinator that manages work for reading dfs files in parallel, split by blocks + /// + /// The concrete type of work item, which may include metadata specific to a particular + /// serializer or file type + public abstract class DfsBlockCoordinator : DfsBaseCoordinator where TItem : DfsBlock + { + /// + /// For a given file, map a sequence of locations to a sequence of "base" matches, where the workStub + /// component of the match has any filetype-specific metadata filled in and the categories component of the + /// match has been set to the locations of that block. The HdfsBlock fields will + /// all be filled in later after this sequence has been returned + /// + /// The file being read + /// The block locations for the file + /// A sequence of Matches with the categories set, and any file-specific metadata set + protected abstract IEnumerable MakeBaseMatches(HdfsFile file, IEnumerable blocks); + + /// + /// called to convert an input file into a list of blocks + /// + /// the input file + /// the blocks in the file, along with a set of datanodes where each block is stored + protected override IEnumerable EnumerateFileWork(HdfsFile file) + { + // get the blocks in the file, and convert each block to a base match, with file-specific metadata + // filled in to the DfsBlock + IEnumerable rawMatches = MakeBaseMatches(file, this.client.GetBlockLocations(file)); + + // fill in the rest of the DfsBlock fields + return rawMatches.Select((match, index) => + { + match.workStub.path = file.path; + match.workStub.fileLength = file.length; + // all the blocks are the same size + match.workStub.offset = (long)index * file.blockSize; + long bytesRemaining = file.length - match.workStub.offset; + match.workStub.length = Math.Min(file.blockSize, bytesRemaining); + // this address will be used if the block is going to be read by a worker on a remote machine + // otherwise the correct address will be filled in when the worker is chosen + match.workStub.dataNodeAddress = match.categories.First(); + + return match; + }); + } + + /// + /// Called when a work item is going to be sent to a worker. If usedMatchingQueue is true then dataNode + /// is the endpoint of the dataNode that matches the worker + /// + /// true if the work item was pulled from a queue whose datanode is on + /// the same computer as the worker + /// datanode endpoint if the work item was pulled from a matching queue + /// work item stub to fill in + /// work item with the datanode address filled in correctly + protected override TItem ExpandWorkItem(bool usedMatchingQueue, IPEndPoint dataNode, TItem stub) + { + if (usedMatchingQueue) + { + // the worker is on the same machine as a datanode that is storing the block, so read from + // that datanode + stub.dataNodeAddress = dataNode; + } + // else the worker is on a machine that isn't storing the block, so just use the default datanode + // that was stored in the stub in EnumerateBlocks + + return stub; + } + + /// + /// return a new coordinator for a block-based HDFS reader + /// + /// hdfs client used to read files and metadata + public DfsBlockCoordinator(HdfsClientBase client) : base(client) + { + } + } + #endregion + + #region IWorker implementation for the dfs block-based worker + /// + /// IWorker implementation for the dfs worker that reads data from blocks. This is further specialized to different file + /// formats by passing in functions for syncing to record boundaries and deserializing data + /// + /// The work item passed by the matching coordinator, which inherits from DfsBlock but may contain + /// metadata used in syncing to record boundaries or deserializing + /// The type of deserialized records produced by the worker + public class DfsBlockWorker : IWorker where TItem : DfsBlock + { + /// + /// a cache of the local IP interfaces, read on startup from DNS + /// + private readonly IPAddress[] localAddresses; + /// + /// the number of bytes to use for each WebHdfs request when seeking past the end of the block for the start of the following + /// record. If records are expected to be small, this should also be small to avoid pre-fetching a lot of the next block + /// + private readonly int syncRequestLength; + /// + /// the function used to sync to the next record in a stream + /// + private readonly Action syncToNextRecord; + /// + /// the function used to deserialize records from a stream + /// + private readonly Func>> deserialize; + /// + /// the client used for reading Hdfs data + /// + protected readonly HdfsClientBase client; + + /// + /// the IWorker implementation used to identify this worker to the WebHdfs coordinator, so that it can be sent work items + /// of blocks stored at the same machine + /// + /// the IP addresses of this computer as reported by DNS + public IPAddress[] DescribeWorker() + { + return this.localAddresses; + } + + /// + /// Find the start of the first record that begins after the end of the block we have been instructed to read. This indicates + /// the end of the range of data that this block corresponds to + /// + /// the block we are reading + /// + private long FindSpillExtent(TItem workItem) + { + // compute the number of bytes remaining in the file past the end of our block + long spillBytesRemaining = workItem.fileLength - workItem.offset - workItem.length; + if (spillBytesRemaining <= 0) + { + // this is the last block, so our range runs exactly to the end of the block + return workItem.length; + } + + // get a stream that starts immediately after the end of the block, and continues until the end of the file. + using (Stream spillReader = client.GetDfsStreamReader( + workItem.path, + // read from the end of this block for the rest of the file + workItem.offset + workItem.length, spillBytesRemaining, + // use small requests if we expect records to be fairly small, so we don't prefetch and buffer a lot of data in the next block + this.syncRequestLength)) + { + // call into the format-specific function to find the start of the next record. Potentially this spins all the way to the end of the + // stream + this.syncToNextRecord(workItem, spillReader); + + // return the offset of the next record after the block, relative to the start of the block + return workItem.length + spillReader.Position; + } + } + + /// + /// find the range of valid records in the block (those that start within the block), deserialize them, and return them in batches + /// + /// a description of the block to read + /// a sequence of batches of output records + public IEnumerable> DoWork(TItem workItem) + { + //Console.WriteLine("Starting work for " + workItem.path.AbsoluteUri + " " + workItem.offset + " " + workItem.length); + + // find the number of bytes from the start of the block to the end of the range, i.e. the start of the first record + // that begins after the end of the block + long endOfRange = FindSpillExtent(workItem); + + // create a reader for the range. Use the size of the range as the size of each underlying WebHdfs request so we + // will make a single webhdfs request for all of the data that is stored in this block. + using (Stream blockReader = client.GetDfsStreamReader(workItem.path, workItem.offset, endOfRange, this.syncRequestLength)) + { + if (workItem.offset > 0) + { + // unless we are the first block, scan forward from the start of the block to find the start of the next record, + // since the (partial) record at the beginning of the block will be read by the preceding block reader. If no records + // start within the block, the stream will end up positioned at endOfRange, so we won't deserialize anything + this.syncToNextRecord(workItem, blockReader); + } + + // deserialize all the records in the range + foreach (ArraySegment segment in this.deserialize(workItem, blockReader)) + { + yield return segment; + } + } + } + + /// + /// create a worker to read dfs files broken into blocks + /// + /// size of each dfs request when seeking past the end of the block for the start of the + /// next record. If records are expected to be small this should also be small, to avoid prefetching and buffering a lot of the + /// next block's data + /// action to sync to the start of the next record. The first argument is the block item + /// being read, which may contain metadata about sync markers. The second argument is the stream to scan. + /// function to deserialize records in a stream + /// client used to read hdfs data + public DfsBlockWorker( + int syncRequestLength, + Action syncToNextRecord, + Func>> deserialize, + HdfsClientBase client) + { + // cache the local IP addresses + this.localAddresses = Dns.GetHostAddresses(Dns.GetHostName()); + + this.syncRequestLength = syncRequestLength; + this.syncToNextRecord = syncToNextRecord; + this.deserialize = deserialize; + this.client = client; + } + } + #endregion + + #region Hdfs text reader classes + /// + /// the coordinator class for a text reader. No additional metadata is needed to describe a block, so this just uses DfsBlocks + /// directly as work items + /// + public class DfsTextCoordinator : DfsBlockCoordinator + { + /// + /// For a given file, map a sequence of locations to a sequence of "base" matches. The workStub + /// component of the match doesn't need any file-specific metadata filled in. The categories component of the + /// match is set to the locations of that block. The DfsBlock fields will + /// all be filled in after this sequence has been returned + /// + /// The file being read + /// The block locations for the file + /// A sequence of Matches with the categories set, and any file-specific metadata set + protected override IEnumerable MakeBaseMatches(HdfsFile file, IEnumerable blocks) + { + return blocks.Select(endpoints => new Match { workStub = new DfsBlock(), categories = endpoints }); + } + + /// + /// create a coordinator for reading Dfs files with fixed-length blocks, made of text records + /// + /// client used for reading Hdfs data and metadata + public DfsTextCoordinator(HdfsClientBase client) : base(client) + { + } + } + + /// + /// the worker class for a text reader for files with fixed-length blocks and text records. It uses DfsBlocks as work + /// items, and parses data into lines represented as strings + /// + public class DfsTextWorker : DfsBlockWorker + { + /// + /// sync forwards in a stream leaving it positioned on the first character after an end-of-line mark. Supports '\r\n', '\n' and + /// '\r' as end-of-line. '\r' is considered the end of a line if it is followed by any character other than '\n'. + /// + /// + /// this assumes the stream is seekable, and that seeking backward by one character is efficient + /// + /// stream to scan forward in + static private void SyncToNextLine(Stream stream) + { + while (true) + { + int currentByte = stream.ReadByte(); + if (currentByte == -1) + { + // we reached the end of the stream without seeing a line terminator, so leave the stream positioned at its end + return; + } + else if (currentByte == '\n') + { + // we saw a line terminator, and are now positioned to read the first character of the next line + return; + } + else if (currentByte == '\r') + { + // we saw a carriage return. If the next character is a newline then the next line starts after that, + // otherwise we are currently positioned on it. So read the next character, to check + int followingByte = stream.ReadByte(); + if (followingByte == -1) + { + // we reached the end of the stream just after the CR, so leave the stream positioned at its end + return; + } + else if (followingByte == '\n') + { + // we saw '\r\n' and are now positioned at the first character after '\n' + return; + } + else + { + // we have moved one character too many; back up before returning so we are positioned on the first + // character after the '\r' + stream.Seek(-1, SeekOrigin.Current); + } + } + } + } + + /// + /// Deserialize all the lines in a stream, which is assumed to be positioned on the start of a line. Return the lines + /// in batches + /// + /// stream to deserialize + /// number of lines to return per batch + /// sequence of batches of lines + static private IEnumerable> Deserialize(Stream stream, int batchSize) + { + using (StreamReader reader = new StreamReader(stream, Encoding.UTF8, true, 1024*1024)) + { + // count how much of the batch array we have filled + int index; + do + { + // array to store the current batch + string[] batch = new string[batchSize]; + for (index = 0; index < batchSize; ++index) + { + string line = reader.ReadLine(); + if (line == null) + { + // we reached the end of the stream + break; + } + // fill in the next line in the batch + batch[index] = line; + } + + if (index > 0) + { + // return all the lines that got filled in + yield return new ArraySegment(batch, 0, index); + } + // if we didn't fill a complete batch then the stream ended, so exit + } while (index == batchSize); + } + } + + /// + /// create a worker for deserializing lines of text from an Hdfs file + /// + /// Hdfs client to use for reading data + /// number of lines to return at a time + public DfsTextWorker(HdfsClientBase client, int batchSize) + : base( + // use 4k blocks when scanning past the end of the block to find the end of the final line + 4 * 1024, + (item, stream) => SyncToNextLine(stream), + (item, stream) => Deserialize(stream, batchSize), + client) + { + } + } + #endregion +} diff --git a/Frameworks/Storage/Microsoft.Research.Naiad.Storage.nuspec b/Frameworks/Storage/Microsoft.Research.Naiad.Storage.nuspec new file mode 100644 index 0000000..e205067 --- /dev/null +++ b/Frameworks/Storage/Microsoft.Research.Naiad.Storage.nuspec @@ -0,0 +1,43 @@ + + + + Microsoft.Research.Naiad.Storage + Naiad - Shared Storage support + 0.5.0-beta + naiadquestions@microsoft.com + naiadquestions@microsoft.com,Microsoft + http://www.apache.org/licenses/LICENSE-2.0.html + http://research.microsoft.com/naiad/ + + true + Provides convenient access to Windows Azure Storage APIs from Naiad programs. + Microsoft Corporation + en-US + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/ClusterSubmission/YarnSubmission/YarnSubmission.cs b/Frameworks/Storage/Properties/AssemblyInfo.cs similarity index 51% rename from ClusterSubmission/YarnSubmission/YarnSubmission.cs rename to Frameworks/Storage/Properties/AssemblyInfo.cs index c69f549..d543741 100644 --- a/ClusterSubmission/YarnSubmission/YarnSubmission.cs +++ b/Frameworks/Storage/Properties/AssemblyInfo.cs @@ -1,5 +1,5 @@ -/* - * Naiad ver. 0.4 +/* + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -17,25 +17,15 @@ * See the Apache Version 2.0 License for specific language governing * permissions and limitations under the License. */ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; -using Microsoft.Research.Peloponnese.ClusterUtils; -using Microsoft.Research.Peloponnese.Storage; -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("Microsoft.Research.Naiad.Storage")] +[assembly: AssemblyConfiguration("")] -namespace Microsoft.Research.Naiad.Cluster.NativeYarn -{ - class NativeYarnSubmission : ClusterSubmission - { - public NativeYarnSubmission(string headnode, int port, int webPort, int numberOfProcesses, string[] args) - : base(new WebHdfsClient(headnode, port, webPort), - new NativeYarnClient(headnode, 9000, 8471), - numberOfProcesses, args) - { - } - - } -} +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("5455f753-2668-4348-96c5-7101c521de71")] diff --git a/Frameworks/Storage/Storage.cs b/Frameworks/Storage/Storage.cs new file mode 100644 index 0000000..9acbfca --- /dev/null +++ b/Frameworks/Storage/Storage.cs @@ -0,0 +1,587 @@ +/* + * Naiad ver. 0.5 + * Copyright (c) Microsoft Corporation + * All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * THIS CODE IS PROVIDED ON AN *AS IS* BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT + * LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR + * A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. + * + * See the Apache Version 2.0 License for specific language governing + * permissions and limitations under the License. + */ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.Linq; +using System.Reactive; +using System.Text; +using System.Text.RegularExpressions; + +using Microsoft.Research.Naiad.Dataflow; +using Microsoft.Research.Naiad.Dataflow.StandardVertices; +using Microsoft.Research.Naiad.Serialization; + +namespace Microsoft.Research.Naiad.Frameworks.Storage +{ + /// + /// The Storage framework includes base classes to support reading and writing a variety of files including Azure and Hdfs + /// + class NamespaceDoc + { + + } + + /// + /// Utility classes and methods for the Naiad Storage framework + /// + public static class Utils + { + /// + /// Wrapper for StreamWriter than exposes the fact that it implements Flush via the IFlushable interface + /// + public class FStreamWriter : StreamWriter, IFlushable + { + /// + /// Create a StreamWriter exposing the IFlushable interface + /// + /// stream to write data to + /// character encoding + /// size of streamwriter buffer + public FStreamWriter(Stream stream, Encoding encoding, int bufferSize) : base(stream, encoding, bufferSize) + { + } + } + + /// + /// Enumerates lines of text from a stream + /// + /// source stream + /// Each line of text in the source stream + public static IEnumerable ReadLines(this System.IO.Stream stream) + { + using (var reader = new System.IO.StreamReader(stream, Encoding.UTF8, true, 1024 * 1024)) + { + while (!reader.EndOfStream) + yield return reader.ReadLine(); + } + } + + /// + /// Enumerates records from a stream in the Naiad serialization format. + /// + /// Type of record in the stream + /// A stream containing records serialized in the Naiad messaging format + /// An enumeration of records in the stream + public static IEnumerable GetNaiadReaderEnumerable(System.IO.Stream stream) + { + using (NaiadReader reader = new NaiadReader(stream)) + { + TRecord nextElement; + while (reader.TryRead(out nextElement)) + { + yield return nextElement; + } + } + } + + /// + /// Enumerates batches of records from a stream in the Naiad serialization format. + /// + /// Type of record in the stream + /// A stream containing records serialized in the Naiad messaging format + /// number of records per batch + /// An enumeration of records in the stream + public static IEnumerable> GetNaiadReaderBatchEnumerable( + System.IO.Stream stream, int batchSize) + { + using (NaiadReader reader = new NaiadReader(stream)) + { + int batchIndex; + do + { + TRecord[] batch = new TRecord[batchSize]; + for (batchIndex = 0; batchIndex < batchSize; ++batchIndex) + { + if (!reader.TryRead(out batch[batchIndex])) + { + break; + } + } + + if (batchIndex > 0) + { + yield return new ArraySegment(batch, 0, batchIndex); + } + } while (batchIndex == batchSize); + } + } + + /// + /// Returns an record observer that writes records to the given stream in the Naiad message format. + /// + /// Type of records to be written + /// Target I/O stream + /// code generator + /// A record observer that writes records to the given stream. + public static IObserver GetNaiadWriterObserver(System.IO.Stream stream, SerializationFormat codeGenerator) + { + NaiadWriter writer = new NaiadWriter(stream, codeGenerator); + return Observer.Create(r => + { + writer.Write(r); + }, + () => writer.Dispose()); + } + + /// + /// class to wrap a sequence of files for writing. This is used so that records are written as a sequence of + /// relatively small files rather than one large file, which increases the available parallelism when they + /// are read later + /// + /// the type of the serializer + internal class WriterStreamSequence + where TWriter : class, IDisposable, IFlushable + { + /// + /// function to generate the name for a file from its sequence number + /// + private readonly Func fileNameFunction; + /// + /// function to generate the stream for a file from its name + /// + private readonly Func streamFunction; + /// + /// function to generate the writer for a file from its stream + /// + private readonly Func writerFunction; + + /// + /// threshold number of bytes after which a file is closed and the next one in the sequence is opened + /// + private readonly long fileLengthThreshold; + + /// + /// stopwatch used for timing writes + /// + private readonly Stopwatch stopwatch; + + /// + /// stream corresponding to the current file being written + /// + private Stream segmentStream; + /// + /// serializer for the current file being written + /// + private TWriter segmentWriter; + /// + /// the index of the next file to write + /// + private int nextSegmentIndex; + /// + /// the total number of bytes written + /// + private long totalLength; + + /// + /// flush and close the current serializer and file, and create the next file in the sequence + /// along with its serializer + /// + private void StartNextFile() + { + // close the current file if any + CloseCurrentFile(); + + // use the supplied functions to open the file and create its serializer + Uri fileName = this.fileNameFunction(this.nextSegmentIndex); + this.segmentStream = this.streamFunction(fileName); + this.segmentWriter = this.writerFunction(this.segmentStream); + + // get ready for the next file + ++this.nextSegmentIndex; + } + + /// + /// check to see if the current file is long enough that it's time to start a new one, and if + /// so close the current writer and open the next one + /// + public void CheckForFileBoundary() + { + if (this.segmentStream.Position > this.fileLengthThreshold) + { + StartNextFile(); + } + } + + /// + /// the serializer for the file that is currently being written + /// + public TWriter Writer { get { return this.segmentWriter; } } + + /// + /// all of the filenames that have been written to by this instance + /// + public IEnumerable Filenames + { + get + { + return Enumerable.Range(0, this.nextSegmentIndex).Select(i => this.fileNameFunction(i)); + } + } + + /// + /// close the current serializer and file, if any + /// + public void CloseCurrentFile() + { + if (this.segmentStream != null) + { + // flush the writer before determining the stream position + this.segmentWriter.Flush(); + this.totalLength += this.segmentStream.Position; + // this should close the underlying stream + this.segmentWriter.Dispose(); + // dispose the stream, just in case + this.segmentStream.Dispose(); + } + + this.segmentStream = null; + this.segmentWriter = null; + } + + /// + /// close the current serializer and file, if any + /// + public void Close() + { + CloseCurrentFile(); + + this.stopwatch.Stop(); +#if false + long elapsed = this.stopwatch.ElapsedMilliseconds; + Console.WriteLine(String.Format( + "Wrote {0} bytes in {1}ms --- {2:f3}MB/s", + this.totalLength, elapsed, + (double)(((double)this.totalLength / (1024.0 * 1024.0)) / ((double)elapsed / 1000.0)))); +#endif + } + + /// + /// Return the number of bytes written in total. This does not include unflushed writes + /// + public long Length + { + get + { + // if there is a current segment stream, this will not include any unflushed data + return (this.segmentStream == null) ? this.totalLength : this.totalLength + this.segmentStream.Position; + } + } + + /// + /// create a new wrapper for writing a sequence of serialized files + /// + /// function to generate a filename from a sequence number + /// function to open a stream from a filename + /// function to create a serializer from a stream + /// maximum length in bytes for a file before starting the next one + public WriterStreamSequence( + Func fileNameFunction, + Func streamFunction, + Func writerFunction, + long fileLengthThreshold) + { + this.fileNameFunction = fileNameFunction; + this.streamFunction = streamFunction; + this.writerFunction = writerFunction; + this.fileLengthThreshold = fileLengthThreshold; + + this.stopwatch = Stopwatch.StartNew(); + + // open the first file immediately + this.nextSegmentIndex = 0; + StartNextFile(); + } + } + + /// + /// regular expression to match non-alphanumeric characters + /// + private static Regex nonAlpha = new Regex(@"[^a-zA-Z0-9]+"); + + /// + /// default filename generator for a collection partitioned by key and time + /// + /// the type of times + /// the directory prefix, not including the trailing slash + /// the process id of the file part + /// the thread id of the file part + /// the time of the file part + /// the sequence number of the file part within the time/process/thread + /// a filename representing the given part + public static Uri DefaultPartFormat(Uri prefix, int processId, int threadId, TTime time, int segment) + where TTime : Time + { + // times are rendered with spaces and punctuation: get rid of that + string timeString = nonAlpha.Replace(time.ToString(), "_").Trim('_'); + UriBuilder builder = new UriBuilder(prefix); + builder.Path = String.Format("{0}/time_{1}_part_{2:D4}.{3:D4}.{4:D4}", prefix.AbsolutePath, timeString, processId, threadId, segment); + return builder.Uri; + } + + /// + /// default filename generator for a collection partitioned by key, but concatenated across times + /// + /// the directory prefix, not including the trailing slash + /// the process id of the file part + /// the thread id of the file part + /// the sequence number of the file part within process/thread + /// a filename representing the given part + public static Uri DefaultPartFormat(Uri prefix, int processId, int threadId, int segment) + { + UriBuilder builder = new UriBuilder(prefix); + builder.Path = String.Format("{0}/part_{1:D4}.{2:D4}.{3:D4}", prefix.AbsolutePath, processId, threadId, segment); + return builder.Uri; + } + + /// + /// serialize a sequence of records to a collection of files partitioned by process and thread. For each + /// process/thread this writes a sequence of files; each time a file reaches a threshold number of bytes, + /// it is closed and another is opened. This keeps individual files of bounded length, allowing for more + /// parallelism when reading them later + /// + /// type of record to serialize + /// type of the serializer + /// stream of records to serialize + /// function from processId, threadId and sequence number to filename + /// function to create an output stream given a filename + /// function to create a serializer from a stream + /// action to serialize a batch of records + /// length in bytes of a file after which it is closed and a new one is opened + /// a handle that can be waited on for the computation to complete + public static Subscription WriteBySubscription( + this Stream source, + Func pathFunction, + Func streamFunction, + Func writerFunction, + Action> serialize, + long fileLengthThreshold) where TWriter : class, IDisposable, IFlushable + { + // dictionary of sequence writers, indexed by worker id + var writers = new Dictionary>(); + + return source.Subscribe( + // OnRecv callback + (message, workerid) => + { + WriterStreamSequence writer; + + lock (writers) + { + if (!writers.TryGetValue(workerid, out writer)) + { + // make a filename generator for the specified worker and process + Func format = segment => + pathFunction(source.ForStage.Computation.Controller.Configuration.ProcessID, + workerid, + segment); + // make the sequence writer for the specified worker and process + writer = new WriterStreamSequence( + format, streamFunction, writerFunction, fileLengthThreshold); + writers.Add(workerid, writer); + } + } + + // before serializing a batch of records, check to see if the current file has gone over + // its length threshold; if so the current file will be closed, and the next one will be + // opened + writer.CheckForFileBoundary(); + + // serialize the batch of records to the current file + serialize(writer.Writer, new ArraySegment(message.payload, 0, message.length)); + }, + // OnNotify callback + (epoch, workerid) => { }, + // OnCompleted callback + workerid => + { + lock (writers) + { + if (writers.ContainsKey(workerid)) + { + writers[workerid].Close(); + writers.Remove(workerid); + } + } + }); + } + + /// + /// serialize a sequence of records to a collection of files partitioned by process, thread and time. For each + /// process/thread/time this writes a sequence of files; each time a file reaches a threshold number of bytes, + /// it is closed and another is opened. This keeps individual files of bounded length, allowing for more + /// parallelism when reading them later + /// + /// type of record to serialize + /// type of client that is passed as a context to the stream function + /// type of the serializer + /// type of the time of records + /// stream of records to serialize + /// function from processId, threadId, time and sequence number to filename + /// function to return a client to pass as context to the stream function + /// function to create an output stream given a client and filename + /// function to create a serializer from a stream + /// action to serialize a batch of records + /// length in bytes of a file after which it is closed and a new one is opened + /// stream of names of files written. The set of names written for a given time is released when the + /// time completes + public static Stream WriteByTime( + this Stream source, + Func pathFunction, + Func clientFunction, + Func streamFunction, + Func writerFunction, + Action> serialize, + long fileLengthThreshold) + where TWriter : class, IDisposable, IFlushable + where TTime : Time + { + return source.NewUnaryStage( + (i, v) => new WriteByTimeVertex( + i, v, pathFunction, clientFunction(), streamFunction, writerFunction, serialize, fileLengthThreshold), + null, null, "WriteByTime"); + } + + /// + /// vertex to serialize a sequence of records to a collection of files partitioned by process, thread and time. For each + /// process/thread/time this writes a sequence of files; each time a file reaches a threshold number of bytes, + /// it is closed and another is opened. This keeps individual files of bounded length, allowing for more + /// parallelism when reading them later + /// + /// type of record to serialize + /// type of client to use as context in the stream generator function + /// type of the serializer + /// type of the time of records + internal class WriteByTimeVertex : UnaryVertex + where TWriter : class, IDisposable, IFlushable + where TTime : Time + { + /// + /// client to use as context for the stream function + /// + private readonly TClient client; + /// + /// dictionary of sequence writers, indexed by time + /// + private readonly Dictionary> writers; + /// + /// worker id of this vertex + /// + private readonly int workerId; + /// + /// function from processId, workerId, time and sequence number to filename + /// + private readonly Func pathFunction; + /// + /// function from filename and client to output stream + /// + private readonly Func streamFunction; + /// + /// function from output stream to serializer + /// + private readonly Func writerFunction; + /// + /// action to serialize a batch of records + /// + private readonly Action> serialize; + /// + /// length in bytes after which a file is closed and the next one is opened + /// + private readonly long fileLengthThreshold; + + public override void OnReceive(Message message) + { + WriterStreamSequence writer; + if (!writers.TryGetValue(message.time, out writer)) + { + // make a filename generator for the specified process, worker and time + Func format = segment => + this.pathFunction(this.Stage.Computation.Controller.Configuration.ProcessID, + this.workerId, + message.time, + segment); + // make a sequence writer for the specified process, worker and time + writer = new WriterStreamSequence(format, u => streamFunction(this.client, u), writerFunction, fileLengthThreshold); + writers.Add(message.time, writer); + // ensure that we are called later to close the sequence writer when the time completes + this.NotifyAt(message.time); + } + + // before serializing a batch of records, check to see if the current file has gone over + // its length threshold; if so the current file will be closed, and the next one will be + // opened + writer.CheckForFileBoundary(); + + // serialize the batch of records to the current file + this.serialize(writer.Writer, new ArraySegment(message.payload, 0, message.length)); + } + + public override void OnNotify(TTime time) + { + WriterStreamSequence writer = writers[time]; + writers.Remove(time); + + // close the sequence writer + writer.Close(); + + var output = this.Output.GetBufferForTime(time); + foreach (Uri fileName in writer.Filenames) + { + // emit the filename of each file written by this writer + output.Send(fileName); + } + } + + /// + /// construct a new vertex to serialize records partitioned by processId, threadId and time + /// + /// threadId of this worker + /// stage the vertex belongs to + /// function from processId, workerId, time and sequence number to filename + /// function to return the client used as context for the stream function + /// function from client and filename to output stream + /// function from stream to serializer + /// action to serialize a batch of records + /// length in bytes after which a file is closed and the next one is opened + public WriteByTimeVertex( + int workerId, + Stage stage, + Func pathFunction, + TClient client, + Func streamFunction, + Func writerFunction, + Action> serialize, + long fileLengthThreshold) + : base(workerId, stage) + { + this.workerId = workerId; + this.client = client; + this.pathFunction = pathFunction; + this.streamFunction = streamFunction; + this.writerFunction = writerFunction; + this.serialize = serialize; + this.fileLengthThreshold = fileLengthThreshold; + + this.writers = new Dictionary>(); + } + } + } +} diff --git a/Frameworks/Storage/Storage.csproj b/Frameworks/Storage/Storage.csproj new file mode 100644 index 0000000..07b5aad --- /dev/null +++ b/Frameworks/Storage/Storage.csproj @@ -0,0 +1,131 @@ + + + + + + Debug + AnyCPU + {0DCA9543-FF9D-48D6-9748-A966DC39C35D} + Library + Properties + Storage + Microsoft.Research.Naiad.Storage + v4.5 + 512 + + 7459a486 + ..\..\ + true + + + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + false + + + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + false + bin\Release\Microsoft.Research.Naiad.Storage.XML + + + true + bin\x64\Debug\ + DEBUG;TRACE + full + x64 + prompt + MinimumRecommendedRules.ruleset + false + + + bin\x64\Release\ + TRACE + true + pdbonly + x64 + prompt + MinimumRecommendedRules.ruleset + false + + + + False + ..\..\packages\Microsoft.Hadoop.Avro.1.3.2.1\lib\net40\Microsoft.Hadoop.Avro.dll + + + False + ..\..\packages\Newtonsoft.Json.6.0.5\lib\net45\Newtonsoft.Json.dll + + + + + False + ..\..\packages\Rx-Core.2.2.5\lib\net40\System.Reactive.Core.dll + + + False + ..\..\packages\Rx-Interfaces.2.2.5\lib\net40\System.Reactive.Interfaces.dll + + + + + + + + + + + Properties\SharedAssemblyInfo.cs + + + + + + + + {a6221415-1283-4c04-8d2c-e25a857e1fe9} + Naiad + + + {bdc6546c-7ba0-472b-b260-0d596b6152e4} + Lindi + + + {eba3d350-41eb-474c-aed9-9cfd1f809de3} + WorkGenerator + + + + + + + + + + + This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. + + + + + + + + + \ No newline at end of file diff --git a/Frameworks/Storage/app.config b/Frameworks/Storage/app.config new file mode 100644 index 0000000..cd20ef1 --- /dev/null +++ b/Frameworks/Storage/app.config @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/Frameworks/Storage/packages.config b/Frameworks/Storage/packages.config new file mode 100644 index 0000000..00ef8ef --- /dev/null +++ b/Frameworks/Storage/packages.config @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/Frameworks/WebHdfsSupport/Microsoft.Research.Naiad.WebHdfsSupport.nuspec b/Frameworks/WebHdfsSupport/Microsoft.Research.Naiad.WebHdfsSupport.nuspec new file mode 100644 index 0000000..853b2e1 --- /dev/null +++ b/Frameworks/WebHdfsSupport/Microsoft.Research.Naiad.WebHdfsSupport.nuspec @@ -0,0 +1,43 @@ + + + + Microsoft.Research.Naiad.WebHdfs + Naiad - WebHdfs Storage support + 0.5.0-beta + naiadquestions@microsoft.com + naiadquestions@microsoft.com,Microsoft + http://www.apache.org/licenses/LICENSE-2.0.html + http://research.microsoft.com/naiad/ + + true + Provides convenient access to Windows Azure Storage APIs from Naiad programs. + Microsoft Corporation + en-US + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Frameworks/WebHdfsSupport/Properties/AssemblyInfo.cs b/Frameworks/WebHdfsSupport/Properties/AssemblyInfo.cs new file mode 100644 index 0000000..5879c8d --- /dev/null +++ b/Frameworks/WebHdfsSupport/Properties/AssemblyInfo.cs @@ -0,0 +1,31 @@ +/* + * Naiad ver. 0.5 + * Copyright (c) Microsoft Corporation + * All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * THIS CODE IS PROVIDED ON AN *AS IS* BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT + * LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR + * A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. + * + * See the Apache Version 2.0 License for specific language governing + * permissions and limitations under the License. + */ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("WebHdfsSupport")] +[assembly: AssemblyConfiguration("")] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("83ee83cf-e684-425a-836e-0798594ce68e")] diff --git a/Frameworks/WebHdfsSupport/WebHdfs.cs b/Frameworks/WebHdfsSupport/WebHdfs.cs new file mode 100644 index 0000000..8900520 --- /dev/null +++ b/Frameworks/WebHdfsSupport/WebHdfs.cs @@ -0,0 +1,512 @@ +/* + * Naiad ver. 0.5 + * Copyright (c) Microsoft Corporation + * All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * THIS CODE IS PROVIDED ON AN *AS IS* BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT + * LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR + * A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. + * + * See the Apache Version 2.0 License for specific language governing + * permissions and limitations under the License. + */ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Net; +using System.IO; +using System.Text; +using System.Threading.Tasks; + +using Microsoft.Research.Peloponnese.Shared; +using Microsoft.Research.Peloponnese.WebHdfs; + +using Microsoft.Research.Naiad; +using Microsoft.Research.Naiad.Dataflow; +using Microsoft.Research.Naiad.Frameworks.Lindi; +using Microsoft.Research.Naiad.Frameworks.WorkGenerator; +using Microsoft.Research.Naiad.Dataflow.PartitionBy; +using Microsoft.Research.Naiad.Dataflow.Channels; +using Microsoft.Research.Naiad.Serialization; +using Microsoft.Research.Naiad.Input; +using Microsoft.Research.Naiad.Frameworks.Storage; +using Microsoft.Research.Naiad.Frameworks.Storage.Dfs; + +using Newtonsoft.Json; +using Newtonsoft.Json.Linq; + +namespace Microsoft.Research.Naiad.Frameworks.WebHdfs +{ + /// + /// The WebHdfs framework supports reading and writing Hdfs files using the REST-based WebHdfs protocol. + /// + class NamespaceDoc + { + + } + + #region extension methods + /// + /// extension methods for working with WebHdfs files + /// + public static class ExtensionMethods + { + /// + /// Read a stream of path names (file or directory names) each of which corresponds to a collection of HDFS files + /// serialized as lines of text. Concatenate all the lines of the files to the output, in an unspecified order. + /// + /// time of the input and output records + /// stream of input paths + /// hdfs user + /// webhdfs protocol port + /// stream of text lines in the hdfs files + public static Stream FromWebHdfsText( + this Stream input, string user, int webPort) where TTime : Time + { + return input.GenerateWork( + time => new DfsTextCoordinator(new WebHdfsClient(user, webPort)), + (workerIndex, time) => new DfsTextWorker(new WebHdfsClient(user, webPort), 256)); + } + + /// + /// Read a stream of path names (file or directory names) each of which corresponds to a collection of HDFS files + /// serialized as lines of text. Concatenate all the lines of the files to the output, in an unspecified order. + /// + /// time of the input and output records + /// stream of input paths + /// stream of text lines in the hdfs files + public static Stream FromWebHdfsText( + this Stream input) where TTime : Time + { + return input.FromWebHdfsText(Environment.UserName, 50070); + } + + /// + /// Read a collection of HDFS files serialized as lines of text. Concatenate all the lines of the files to the output, + /// in an unspecified order. + /// + /// Naiad computation + /// path of the file or directory to read + /// hdfs user + /// webhdfs protocol port + /// stream of text lines in the hdfs files + public static Stream ReadWebHdfsTextCollection( + this Computation manager, Uri fileOrDirectoryPath, + string user, int webPort) + { + return new Uri[] { fileOrDirectoryPath } + .AsNaiadStream(manager) + // this distinct ensures that the same code can be run at every process without reading files multiple times + .Distinct() + .GenerateWork( + time => new DfsTextCoordinator(new WebHdfsClient(user, webPort)), + (workerIndex, time) => new DfsTextWorker(new WebHdfsClient(user, webPort), 256)); + } + + /// + /// Read a collection of HDFS files serialized as lines of text. Concatenate all the lines of the files to the output, + /// in an unspecified order. + /// + /// Naiad computation + /// path of the file or directory to read + /// stream of text lines in the hdfs files + public static Stream ReadWebHdfsTextCollection( + this Computation manager, Uri fileOrDirectoryPath) + { + return manager.ReadWebHdfsTextCollection(fileOrDirectoryPath, Environment.UserName, 50070); + } + + /// + /// Read a stream of path names (file or directory names) each of which corresponds to a collection of HDFS files + /// serialized in a custom binary format. Concatenate all the records to the output, in an unspecified order. + /// + /// output record type + /// time of the input and output records + /// stream of input paths + /// custom deserializer function to convert a stream of bytes into a sequence of records + /// hdfs user + /// webhdfs protocol port + /// stream of records in the hdfs files + public static Stream FromWebHdfs( + this Stream input, + Func>> deserialize, + string user, int webPort) where TTime : Time + { + return input.GenerateWork( + time => new DfsFileCoordinator(new WebHdfsClient(user, webPort)), + (workerIndex, time) => new DfsFileWorker(new WebHdfsClient(user, webPort), deserialize)); + } + + /// + /// Read a stream of path names (file or directory names) each of which corresponds to a collection of HDFS files + /// serialized in a custom binary format. Concatenate all the records to the output, in an unspecified order. + /// + /// output record type + /// time of the input and output records + /// stream of input paths + /// custom deserializer function to convert a stream of bytes into a sequence of records + /// stream of records in the hdfs files + public static Stream FromWebHdfs( + this Stream input, + Func>> deserialize) where TTime : Time + { + return input.FromWebHdfs(deserialize, Environment.UserName, 50070); + } + + /// + /// Read a collection of HDFS files serialized in a custom binary format. Concatenate all the records to the output, + /// in an unspecified order. + /// + /// output record type + /// Naiad computation + /// path of the file or directory to read + /// custom deserializer function to convert a stream of bytes into a sequence of records + /// hdfs user + /// webhdfs protocol port + /// stream of records in the hdfs files + public static Stream ReadWebHdfsCollection( + this Computation manager, Uri fileOrDirectoryPath, + Func>> deserialize, + string user, int webPort) + { + return new Uri[] { fileOrDirectoryPath } + .AsNaiadStream(manager) + // this distinct ensures that the same code can be run at every process without reading files multiple times + .Distinct() + .GenerateWork( + time => new DfsFileCoordinator(new WebHdfsClient(user, webPort)), + (workerIndex, time) => new DfsFileWorker(new WebHdfsClient(user, webPort), deserialize)); + } + + /// + /// Read a collection of HDFS files serialized in a custom binary format. Concatenate all the records to the output, + /// in an unspecified order. + /// + /// output record type + /// Naiad computation + /// path of the file or directory to read + /// custom deserializer function to convert a stream of bytes into a sequence of records + /// stream of records in the hdfs files + public static Stream ReadWebHdfsCollection( + this Computation manager, Uri fileOrDirectoryPath, + Func>> deserialize) + { + return manager.ReadWebHdfsCollection(fileOrDirectoryPath, deserialize, Environment.UserName, 50070); + } + + /// + /// Read a stream of path names (file or directory names) each of which corresponds to a collection of HDFS files + /// serialized in the default Naiad binary format. Concatenate all the records to the output, in an unspecified order. + /// + /// output record type + /// time of the input and output records + /// stream of input paths + /// hdfs user + /// webhdfs protocol port + /// stream of records in the hdfs files + public static Stream FromWebHdfsBinary( + this Stream input, + string user, int webPort) where TTime : Time + { + return input.GenerateWork( + time => new DfsFileCoordinator(new WebHdfsClient(user, webPort)), + (workerIndex, time) => + new DfsFileWorker( + new WebHdfsClient(user, webPort), + stream => Utils.GetNaiadReaderBatchEnumerable(stream, 256))); + } + + /// + /// Read a stream of path names (file or directory names) each of which corresponds to a collection of HDFS files + /// serialized in the default Naiad binary format. Concatenate all the records to the output, in an unspecified order. + /// + /// output record type + /// time of the input and output records + /// stream of input paths + /// stream of records in the hdfs files + public static Stream FromWebHdfsBinary( + this Stream input) where TTime : Time + { + return input.FromWebHdfsBinary(Environment.UserName, 50070); + } + + /// + /// Read a collection of HDFS files serialized in the default Naiad binary format. Concatenate all the records to the output, + /// in an unspecified order. + /// + /// output record type + /// Naiad computation + /// path of the file or directory to read + /// hdfs user + /// webhdfs protocol port + /// stream of records in the hdfs files + public static Stream ReadWebHdfsBinaryCollection( + this Computation manager, Uri fileOrDirectoryPath, + string user, int webPort) + { + return new Uri[] { fileOrDirectoryPath } + .AsNaiadStream(manager) + // this distinct ensures that the same code can be run at every process without reading files multiple times + .Distinct() + .GenerateWork( + time => new DfsFileCoordinator(new WebHdfsClient(user, webPort)), + (workerIndex, time) => + new DfsFileWorker( + new WebHdfsClient(user, webPort), + stream => Utils.GetNaiadReaderBatchEnumerable(stream, 256))); + } + + /// + /// Read a collection of HDFS files serialized in the default Naiad binary format. Concatenate all the records to the output, + /// in an unspecified order. + /// + /// output record type + /// Naiad computation + /// path of the file or directory to read + /// stream of records in the hdfs files + public static Stream ReadWebHdfsBinaryCollection( + this Computation manager, Uri fileOrDirectoryPath) + { + return manager.ReadWebHdfsBinaryCollection(fileOrDirectoryPath, Environment.UserName, 50070); + } + + /// + /// general method to write a stream of records to a collection of HDFS files. The collection is active + /// throughout the computation and is closed when the computation terminates: it concatenates records from all + /// epochs in an undefined order + /// + /// type of the records to write + /// type of the serializer object + /// stream of records to write + /// hdfs user + /// webhdfs protocol port + /// function to generate a filename given a processId, threadId and sequence number + /// function to generate a serializer given a Stream to write to + /// function to serialize a batch of records given a serializer + /// hdfs block size to use, or -1 for the file system default value + /// file size to write before closing the file and opening another one + /// handle to wait on until the computation completes + public static Subscription WriteWebHdfsBinary( + this Stream source, + string user, int webPort, + Func format, + Func writerFunction, + Action> serialize, + long blockSize, + long segmentThreshold) where TWriter : class, IDisposable, IFlushable + { + return source.WriteBySubscription( + format, + fileName => new WebHdfsClient(user, webPort).GetDfsStreamWriter(fileName, blockSize), + stream => writerFunction(stream), + serialize, + segmentThreshold); + } + + /// + /// method to write a stream of records to a collection of HDFS files using the default Naiad binary serializer. + /// The collection is active throughout the computation and is closed when the computation terminates: it concatenates + /// records from all epochs in an undefined order + /// + /// type of the records to write + /// stream of records to write + /// hdfs user + /// webhdfs protocol port + /// webhdfs directory to write the partitioned data into + /// buffer size to use in the serializer + /// hdfs block size to use, or -1 for the file system default value + /// file size to write before closing the file and opening another one + /// handle to wait on until the computation completes + public static Subscription WriteWebHdfsBinary( + this Stream source, + string user, int webPort, + Uri prefix, + int bufferSize = 1024 * 1024, + long blockSize = -1, + long segmentThreshold = 254 * 1024 * 1024) + { + // make sure we'll be able to write the partitioned data + WebHdfsClient client = new WebHdfsClient(user, webPort); + client.EnsureDirectory(prefix, false); + + return source.WriteWebHdfsBinary( + user, webPort, + (processId, threadId, segment) => Utils.DefaultPartFormat(prefix, processId, threadId, segment), + stream => new NaiadWriter(stream, source.ForStage.Computation.Controller.SerializationFormat, bufferSize), + (writer, arraySegment) => + { + for (int i = 0; i < arraySegment.Count; i++) + { + writer.Write(arraySegment.Array[i]); + } + }, + blockSize, + segmentThreshold); + } + + /// + /// general method to write a stream of records to a collection of HDFS files, partitioned by time as well as key. + /// Within a given time and part, records are written in an undefined order + /// + /// type of the records to write + /// type of the serializer object + /// type of the record time + /// stream of records to write + /// hdfs user + /// webhdfs protocol port + /// function to generate a filename given a processId, threadId, time and sequence number + /// function to generate a serializer given a Stream to write to + /// function to serialize a batch of records given a serializer + /// hdfs block size to use, or -1 for the file system default value + /// file size to write before closing the file and opening another one + /// stream of filenames written + public static Stream ToWebHdfsBinary( + this Stream source, + string user, int webPort, + Func format, + Func writerFunction, + Action> serialize, + long blockSize, + long segmentThreshold) + where TWriter : class, IDisposable, IFlushable + where TTime : Time + { + return source.WriteByTime( + format, + () => new WebHdfsClient(user, webPort), + (client, fileName) => client.GetDfsStreamWriter(fileName, blockSize), + stream => writerFunction(stream), + serialize, + segmentThreshold); + } + + /// + /// method to write a stream of records to a collection of HDFS files using the default Naiad binary serializer, + /// partitioned by time as well as key. Within a given time and part, records are written in an undefined order + /// + /// type of the records to write + /// type of the record time + /// stream of records to write + /// hdfs user + /// webhdfs protocol port + /// webhdfs directory to write the partitioned data into + /// buffer size to use in the serializer + /// hdfs block size to use, or -1 for the file system default value + /// file size to write before closing the file and opening another one + /// stream of filenames written + public static Stream ToWebHdfsBinary( + this Stream source, + string user, int webPort, + Uri prefix, + int bufferSize = 1024 * 1024, + long blockSize = -1, + long segmentThreshold = 254 * 1024 * 1024) where TTime : Time + { + // make sure we'll be able to write the partitioned data + WebHdfsClient client = new WebHdfsClient(user, webPort); + client.EnsureDirectory(prefix, false); + + return source.ToWebHdfsBinary( + user, webPort, + (processId, threadId, time, segment) => Utils.DefaultPartFormat(prefix, processId, threadId, time, segment), + stream => new NaiadWriter(stream, source.ForStage.Computation.Controller.SerializationFormat, bufferSize), + (writer, arraySegment) => + { + for (int i = 0; i < arraySegment.Count; i++) + { + writer.Write(arraySegment.Array[i]); + } + }, + blockSize, + segmentThreshold); + } + + /// + /// write a sequence of strings as hdfs text files. The collection is active throughout the computation and is + /// closed when the computation terminates: it concatenates records from all epochs in an undefined order + /// + /// stream of records to write + /// hdfs user + /// webhdfs protocol port + /// webhdfs directory to write the partitioned data into + /// hdfs block size to use, or -1 for the file system default value + /// file size to write before closing the file and opening another one + /// handle to wait on until the computation completes + public static Subscription WriteWebHdfsText( + this Stream source, + string user, int webPort, + Uri prefix, + long blockSize = -1, + long segmentThreshold = 254 * 1024 * 1024) + { + // make sure we'll be able to write the partitioned data + WebHdfsClient client = new WebHdfsClient(user, webPort); + client.EnsureDirectory(prefix, false); + + // don't write byte order marks at the start of the files + Encoding utf8 = new UTF8Encoding(false, true); + + return source.WriteWebHdfsBinary( + user, webPort, + (processId, threadId, segment) => Utils.DefaultPartFormat(prefix, processId, threadId, segment), + stream => new Utils.FStreamWriter(stream, utf8, 1024 * 1024), + (writer, arraySegment) => + { + for (int i = 0; i < arraySegment.Count; i++) + { + writer.WriteLine(arraySegment.Array[i]); + } + }, + blockSize, + segmentThreshold); + } + + /// + /// write a sequence of strings as hdfs text files, partitioned by time as well as key. + /// Within a given time and part, records are written in an undefined order + /// + /// type of the record time + /// stream of records to write + /// hdfs user + /// webhdfs protocol port + /// webhdfs directory to write the partitioned data into + /// hdfs block size to use, or -1 for the file system default value + /// file size to write before closing the file and opening another one + /// stream of filenames written + public static Stream ToWebHdfsText( + this Stream source, + string user, int webPort, + Uri prefix, + long blockSize = -1, + long segmentThreshold = 254 * 1024 * 1024) where TTime : Time + { + // make sure we'll be able to write the partitioned data + WebHdfsClient client = new WebHdfsClient(user, webPort); + client.EnsureDirectory(prefix, false); + + // don't write byte order marks at the start of the files + Encoding utf8 = new UTF8Encoding(false, true); + + return source.ToWebHdfsBinary( + user, webPort, + (processId, threadId, time, segment) => Utils.DefaultPartFormat(prefix, processId, threadId, time, segment), + stream => new Utils.FStreamWriter(stream, utf8, 1024 * 1024), + (writer, arraySegment) => + { + for (int i = 0; i < arraySegment.Count; i++) + { + writer.WriteLine(arraySegment.Array[i]); + } + }, + blockSize, segmentThreshold); + } + } + #endregion +} diff --git a/Frameworks/WebHdfsSupport/WebHdfsSupport.csproj b/Frameworks/WebHdfsSupport/WebHdfsSupport.csproj new file mode 100644 index 0000000..775901a --- /dev/null +++ b/Frameworks/WebHdfsSupport/WebHdfsSupport.csproj @@ -0,0 +1,125 @@ + + + + + + + Debug + AnyCPU + {5CFC93F6-68C3-45B5-92FA-43F8626EC482} + Library + Properties + WebHdfsSupport + Microsoft.Research.Naiad.WebHdfsSupport + v4.5 + 512 + + c7fb5862 + ..\..\ + true + + + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + false + + + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + false + bin\Release\Microsoft.Research.Naiad.WebHdfsSupport.XML + + + true + bin\x64\Debug\ + DEBUG;TRACE + full + x64 + prompt + MinimumRecommendedRules.ruleset + false + + + bin\x64\Release\ + TRACE + true + pdbonly + x64 + prompt + MinimumRecommendedRules.ruleset + false + + + + False + ..\..\packages\Newtonsoft.Json.6.0.5\lib\net45\Newtonsoft.Json.dll + + + + + + + + + + + + + Properties\SharedAssemblyInfo.cs + + + + + + + {a6221415-1283-4c04-8d2c-e25a857e1fe9} + Naiad + + + {bdc6546c-7ba0-472b-b260-0d596b6152e4} + Lindi + + + {0dca9543-ff9d-48d6-9748-a966dc39c35d} + Storage + + + {eba3d350-41eb-474c-aed9-9cfd1f809de3} + WorkGenerator + + + + + + + + + + This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. + + + + + + + + + + + + \ No newline at end of file diff --git a/Frameworks/WebHdfsSupport/app.config b/Frameworks/WebHdfsSupport/app.config new file mode 100644 index 0000000..4429813 --- /dev/null +++ b/Frameworks/WebHdfsSupport/app.config @@ -0,0 +1,11 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/Frameworks/WebHdfsSupport/packages.config b/Frameworks/WebHdfsSupport/packages.config new file mode 100644 index 0000000..b7fd9e5 --- /dev/null +++ b/Frameworks/WebHdfsSupport/packages.config @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/Frameworks/WorkGenerator/Microsoft.Research.Naiad.WorkGenerator.nuspec b/Frameworks/WorkGenerator/Microsoft.Research.Naiad.WorkGenerator.nuspec new file mode 100644 index 0000000..338dc62 --- /dev/null +++ b/Frameworks/WorkGenerator/Microsoft.Research.Naiad.WorkGenerator.nuspec @@ -0,0 +1,37 @@ + + + + Microsoft.Research.Naiad.WorkGenerator + Naiad - Work queue library + 0.5.0-beta + naiadquestions@microsoft.com + naiadquestions@microsoft.com,Microsoft + http://www.apache.org/licenses/LICENSE-2.0.html + http://research.microsoft.com/naiad/ + + true + Lindi is a simple LINQ-like programming framework for Naiad. + Microsoft Corporation + en-US + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Frameworks/WorkGenerator/Properties/AssemblyInfo.cs b/Frameworks/WorkGenerator/Properties/AssemblyInfo.cs new file mode 100644 index 0000000..3c8ed00 --- /dev/null +++ b/Frameworks/WorkGenerator/Properties/AssemblyInfo.cs @@ -0,0 +1,31 @@ +/* + * Naiad ver. 0.5 + * Copyright (c) Microsoft Corporation + * All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * THIS CODE IS PROVIDED ON AN *AS IS* BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT + * LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR + * A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. + * + * See the Apache Version 2.0 License for specific language governing + * permissions and limitations under the License. + */ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("WorkGenerator")] +[assembly: AssemblyConfiguration("")] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("a53990d2-34a4-4e76-b08b-5428e64dc742")] diff --git a/Frameworks/WorkGenerator/WorkGenerator.cs b/Frameworks/WorkGenerator/WorkGenerator.cs new file mode 100644 index 0000000..a983f46 --- /dev/null +++ b/Frameworks/WorkGenerator/WorkGenerator.cs @@ -0,0 +1,1110 @@ +/* + * Naiad ver. 0.5 + * Copyright (c) Microsoft Corporation + * All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * THIS CODE IS PROVIDED ON AN *AS IS* BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT + * LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR + * A PARTICULAR PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. + * + * See the Apache Version 2.0 License for specific language governing + * permissions and limitations under the License. + */ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using Microsoft.Research.Naiad; +using Microsoft.Research.Naiad.Dataflow; +using Microsoft.Research.Naiad.Dataflow.StandardVertices; + +namespace Microsoft.Research.Naiad.Frameworks.WorkGenerator +{ + /// + /// The WorkGenerator framework contains base classes for implementing a work queue of items to be distributed dynamically to a set + /// of Naiad workers. The queue is implemented as a Naiad loop; when new work arrives at a coordinator, it sends "wakeup" messages + /// to all the workers, which respond with "ready" messages. Each time the coordinator receives a ready message from a worker, it matches + /// an outstanding work item to that worker and sends it back. When the worker completes its work item, it responds with another ready message. + /// Work items are thus sent out as workers become ready, rather than requiring coordination across workers. The framework allows + /// workers to send identifying information that the coordinator can use for matching. For example the Hdfs and WebHdfs frameworks + /// instantiate work generators in which the workers identify themselves by the IP addresses of the computer they are running on, + /// allowing file blocks to be preferentially read on the worker computer that is hosting them. + /// + class NamespaceDoc + { + + } + + #region internal helper record types + /// + /// Internal enumeration distinguishing work item records. This is public only for the benefit of the serialization code + /// + public enum WorkRecordType + { + /// + /// The record does not contain any work, and is simply a notification that the worker should identify itself + /// + Initialize, + /// + /// The record contains work + /// + DoWork + } + + /// + /// Internal record describing a work item, sent from the Coordinator to a Worker. This is public only for the benefit + /// of the serialization code + /// + /// Type describing an item of work to be done + public struct WorkItem + { + /// + /// Records are partitioned by this field, which identifies the worker that is being assigned the work + /// + public int destination; + /// + /// This indicates whether the record contains an actual work item, or is simply an initialization notification + /// + public WorkRecordType type; + /// + /// If the record type is DoWork, this describes the work to be done + /// + public TWorkDescription description; + } + + /// + /// Internal record describing a worker that is ready to receive a work item, sent by the Worker to the Coordinator when + /// the Worker has processed a WorkItem. This is public only for the benefit of the serialization code + /// + /// Type describing a worker, for use matching items to workers + public struct WorkRequest + { + /// + /// This matches the destination of the WorkItem that triggered the send. It is the partitioning key that tells Naiad + /// how to route items to workers, and it is just passed through unchanged from WorkItem.destination to WorkRequest.source + /// + public int source; + /// + /// This describes the worker that is ready for a new work assignment, and is used to match items to workers + /// + public TWorkerDescription description; + } + #endregion + + #region ICoordinator interface defining the behavior of the coordinator + /// + /// Interface specifying the behavior of a coordinator vertex. A coordinator takes a stream of input records via calls to AddInput, + /// and for each input it generates a set of work items. As workers become free, the coordinator is informed, via AssignWork, and + /// responds with a work item for that worker, or nothing if there is no more work for that worker to perform. + /// + /// Type of the input records + /// Type of the description of an individual work item + /// Type of the description of a worker, used to match items to workers + public interface ICoordinator + { + /// + /// Take an input record, translate it to a set of work items, and add those items to an internal datastructure of + /// outstanding work. + /// + /// The input describing a new batch of work items + void AddInput(TInput input); + + /// + /// Given a worker that is now free, optionally assign it a work item. If no work item is assigned for that worker, the + /// worker will never be presented in a subsequent call to AssignWork, i.e. it is assumed that there is no more work for + /// that worker. + /// + /// The worker that is now free + /// The work item assigned to the worker, if any + /// true if a work item has been assigned, false otherwise + bool AssignWork(TWorkerDescription worker, ref TWorkDescription work); + } + #endregion + + #region IWorker interface defining the behavior of a worker vertex + /// + /// Interface specifying the behavior of a worker vertex. A worker takes a stream of work items to perform. Before it is + /// assigned its first work item, and after each item has been performed, the worker is asked to describe itself; the + /// coordinator uses this description to match work items to workers. The act of performing a work item causes a sequence of + /// output items to be generated + /// + /// Type describing a work item + /// Type describing a worker, used to match workers to items + /// Type of the output items generated by work items + public interface IWorker + { + /// + /// Return a description of the worker that the coordinator will use when matching work items to workers. This is called + /// once before any work item has been assigned, and once after each work item is performed. + /// + /// A description of the worker, used to match work items to workers + TWorkerDescription DescribeWorker(); + + /// + /// Execute a work item, generating a sequence of output records + /// + /// The work item to be executed + /// A sequence of array segments, each containing a sequence of records to be output + IEnumerable> DoWork(TWorkDescription workItem); + } + #endregion + + #region main internal class defining the dataflow vertices + /// + /// Class defining the vertices needed to construct a work-generator dataflow subgraph + /// + /// Type of input records describing batches of work + /// Type describing a worker, used to match work items to workers + /// Type describing a work item + /// Type of output records generated by workers + /// Time type for input and output records + internal class Generator where TTime : Time + { + #region coordinator vertex + /// + /// Vertex acting as coordinator for work generation: this takes input records and indications that workers are free, + /// assigns work items to workers, and sends the work items to the appropriate worker vertices. + /// + /// There is an invariant that each worker has at most one work item outstanding at any given (wall-clock) time. + /// + private class CoordinatorVertex : BinaryVertex, WorkItem, IterationIn> + { + #region static helpers to create WorkItem records + /// + /// Returns a work item record that is sent to tell a worker to initialize and identify itself: this record does not + /// include any actual work + /// + /// The VertexId of the worker this will be sent to + /// The record to send + private static WorkItem Initial(int destination) + { + WorkItem initial; + + // the destination field is used by the partition function on the outgoing edge to identify the destination worker + initial.destination = destination; + // this means don't do any work, just respond with a record describing yourself + initial.type = WorkRecordType.Initialize; + // fill in the dummy field + initial.description = default(TWorkDescription); + + return initial; + } + + /// + /// Returns a work item record that is sent to tell a worker to do some work and then identify itself again + /// + /// The VertexId of the worker this will be sent to + /// A description of the work to be done + /// The record to send + private static WorkItem Work(int destination, TWorkDescription item) + { + WorkItem work; + + // the destination field is used by the partition function on the outgoing edge to identify the destination worker + work.destination = destination; + // this means actually do the work in the description field + work.type = WorkRecordType.DoWork; + work.description = item; + + return work; + } + #endregion + + #region private member fields + /// + /// user-supplied factory to generate a coordinator object for each outer time, to perform the logic of + /// generating and assigning work items for that outer time + /// + private readonly Func> coordinatorFactory; + /// + /// Placement used to create the worker vertices; this is needed so we can broadcast an initialization record to every + /// worker vertex + /// + private readonly Placement workerPlacement; + /// + /// Dictionary of coordinator objects, one for each outer time we see on the input + /// + private readonly Dictionary> coordinators; + /// + /// Set to keep track of which workers we have sent work items to, indexed by outer time (so there is one + /// set per coordinator object in coordinators). Whenever we receive new input at a given outer time, we send + /// an initialization request to any worker not in the set for that time. If the worker indicates that it is + /// free, but we don't have work for it, then it is removed from the set again. This allows us to ensure that + /// each worker only receives a single outstanding work item at any given outer time. + /// + private readonly Dictionary> liveWorkersByTime; + #endregion + + #region receive an input record from the outside world + /// + /// Receive input from the outside world, call the ICoordinator logic to translate that record into a + /// set of work items, and kick all the workers to cause them to request some new work + /// + /// input message + public override void OnReceive1(Message> message) + { + // look up the coordinator and live set for this time, and make new ones if they don't already exist + ICoordinator coordinator; + HashSet liveWorkers; + if (this.coordinators.TryGetValue(message.time.outerTime, out coordinator)) + { + liveWorkers = this.liveWorkersByTime[message.time.outerTime]; + } + else + { + // make a new coordinator from the user-supplied factory and add it to the dictionary + coordinator = this.coordinatorFactory(message.time.outerTime); + this.coordinators[message.time.outerTime] = coordinator; + + // make an empty set of live workers and add it to the dictionary + liveWorkers = new HashSet(); + this.liveWorkersByTime[message.time.outerTime] = liveWorkers; + // If there are multiple workers, and the coordinator is situated on the same thread as one + // of the workers, then liveWorkers is initialized to contain that VertexId, so that the worker + // on the coordinator's thread will never receive any work. This is because work items may take + // a long time, and we don't want the coordinator to be blocked from handing out work. + if (this.workerPlacement.Count > 1) + { + int coordinatorProcess = this.Stage.Placement.First().ProcessId; + int coordinatorThread = this.Stage.Placement.First().ThreadId; + IEnumerable workersToBlock = this.workerPlacement. + Where(w => w.ProcessId == coordinatorProcess && w.ThreadId == coordinatorThread). + Select(w => w.VertexId); + if (workersToBlock.Count() > 0) + { + // block any worker on our thread from receiving work + liveWorkers.Add(workersToBlock.First()); + } + } + + // add a notification to ensure the coordinator gets discarded when work is finished for + // this time + this.NotifyAt(new IterationIn { outerTime = message.time.outerTime, iteration = Int32.MaxValue - 1 }); + } + + // for each input record in the message, tell the coordinator to turn it into the requisite batch of work items + for (int i=0; i !liveWorkers.Contains(w.VertexId))) + { + // add the worker to the set of workers that have received a message and not yet gone back to sleep + liveWorkers.Add(worker.VertexId); + buffer.Send(CoordinatorVertex.Initial(worker.VertexId)); + } + } + #endregion + + #region receive an input record indicating that a worker is ready for more work + /// + /// Receive input from workers, indicating that they are ready for more work + /// + /// Message containing records describing which workers are ready + public override void OnReceive2(Message, IterationIn> message) + { + // there must already be a coordinator and live worker set for this time, since there won't be + // any messages returned from a worker about the time until it has been initialized by us receiving + // an input above at that time + ICoordinator coordinator = this.coordinators[message.time.outerTime]; + HashSet liveWorkers = this.liveWorkersByTime[message.time.outerTime]; + + // get a buffer to write outputs into + var buffer = this.Output.GetBufferForTime(message.time); + + // process each record in this message in turn + for (int i=0; i record = message.payload[i]; + + // make a default work item; this will get replaced by a real one if there is work to be assigned + TWorkDescription work = default(TWorkDescription); + + // ask the user-supplied coordinator to try to assign work to the worker that has requested it + if (coordinator.AssignWork(record.description, ref work)) + { + // a work item was assigned, and filled in to the work variable. Send a record back to the worker that + // indicated it was free, assigned the work item + buffer.Send(CoordinatorVertex.Work(record.source, work)); + } + else + { + // The worker has not received any work, and is 'going to sleep'. Remove it from the set of live + // workers. This ensures that if we get more input, the worker will receive a new initialization + // and wake up again. + liveWorkers.Remove(record.source); + } + } + } + #endregion + + #region receive a notification that an outer time is complete + /// + /// Receive a notification once all inputs and work items have been completed for a given outer time + /// Notifications are always requested at time (outerTime,Int32.MaxInt-1). + /// + /// The time that is now complete + public override void OnNotify(IterationIn time) + { + // we aren't going to see any more inputs or work items for this outer time, so discard the state + // we are keeping for it + this.coordinators.Remove(time.outerTime); + this.liveWorkersByTime.Remove(time.outerTime); + } + #endregion + + #region constructor + /// + /// constructor for the coordinator vertex + /// + /// vertex index in stage, passed to base class + /// stage, passed to base class + /// factory to generate an ICoordinator for each time seen on the input + /// placement of the worker stage, used to broadcast initializations to all workers + public CoordinatorVertex( + int index, Stage> stage, + Func> coordinatorFactory, + Placement workerPlacement) + : base(index, stage) + { + this.coordinatorFactory = coordinatorFactory; + this.workerPlacement = workerPlacement; + + this.coordinators = new Dictionary>(); + this.liveWorkersByTime = new Dictionary>(); + } + #endregion + } + #endregion + + #region worker vertex + /// + /// Vertex that implements a worker, receiving work items on a single input and writing to two outputs. The first + /// is the 'external' output that receives all the generated records of type TOutput, and the second is the feedback + /// output that receives identification records indicating to the coordinator that this worker is ready for more work + /// + private class WorkerVertex : Vertex> + { + #region private member variables + /// + /// The output buffer for writing the generated work records that are the external output of the vertex + /// + private VertexOutputBuffer> output; + /// + /// The output buffer for writing responses to the Coordinator indicating that this worker is ready for + /// more work + /// + private VertexOutputBuffer, IterationIn> requests; + + /// + /// The user-supplied factory to create a worker per outer time. Each worker can taking a work item and + /// generate a stream of outputs, and also generate a self-description to send to the coordinator to + /// request more work. + /// + private readonly Func> workerFactory; + + /// + /// We store an IWorker for each outer time we have seen, and garbage-collect them when all work for + /// that time has been completed + /// + private readonly Dictionary> workers; + #endregion + + #region receive a work item from the coordinator + /// + /// Receive a work record from the coordinator. This may be an initialization request, which contains no + /// work but just causes the worker to identify itself, or may contain a payload of a work item. The coordinator + /// promises there is only one outstanding work record at a time for a given worker, so the message should not + /// contain multiple records. + /// + /// the input message containing the work record + private void OnReceive(Message, IterationIn> message) + { + if (message.length != 1) + { + // the coordinator only gives out one record per worker at a given outer time, so this + // shouldn't happen + throw new ApplicationException("Got mysterious payload length " + message.length); + } + + IWorker worker; + if (!this.workers.TryGetValue(message.time.outerTime, out worker)) + { + // we haven't seen this time before. Generate a new worker from the factory, add it to the + // dictionary, and request a notification to be able to garbage collect it when all the work + // for this time is complete + worker = this.workerFactory(message.time.outerTime); + this.workers.Add(message.time.outerTime, worker); + this.NotifyAt(new IterationIn { outerTime = message.time.outerTime, iteration = Int32.MaxValue - 1 }); + } + + // get the buffer to forward the output records to + var outputBuffer = this.output.GetBufferForTime(message.time); + + // get the record out of the message + WorkItem item = message.payload[0]; + if (item.type == WorkRecordType.DoWork) + { + // it has a payload of work, so call the user-supplied function to actually generate a + // sequence of output records, and write them all out. + IEnumerable> data = worker.DoWork(item.description); + foreach (ArraySegment payload in data) + { + for (int i = 0; i < payload.Count; ++i) + { + outputBuffer.Send(payload.Array[i + payload.Offset]); + } + } + } + + // get the buffer to forward the coordinator request to + var requestBuffer = this.requests.GetBufferForTime(message.time); + + // identify ourselves as ready to receive another work item. The item.destination/request.source field is + // the VertexId that Naiad uses to route requests to this worker, so setting request.source ensures that + // the work item will get routed back to the same place. We send the request with the same logical time + // we received the work item at, but the iteration counter will get incremented by the feedback vertex + WorkRequest request = + new WorkRequest { source = item.destination, description = worker.DescribeWorker() }; + requestBuffer.Send(request); + } + #endregion + + #region receive a notification that all work has completed for an outer time + /// + /// Receive a notification that all work has finished for an outer time. Notifications are always + /// requested for times (time.outerTime, Int32.MaxValue - 1) + /// + /// + public override void OnNotify(IterationIn time) + { + // discard the worker we were using for this time + this.workers.Remove(time.outerTime); + } + #endregion + + #region make a stage consisting of worker vertices + /// + /// Make a stage of worker vertices, receiving input from a stream supplied by the coordinator, and return a pair + /// of streams corresponding to the external output records, and the identification requests that need to be sent + /// back to the coordinator + /// + /// the placement to use for vertices in the stage + /// the input stream of work requests from the coordinator + /// a factory to generate a worker vertex object for each vertex in the stage + /// the friendly name identifying the stage + /// + public static Pair>, Stream, IterationIn>> + MakeStage( + Placement placement, + Stream, IterationIn> input, + Func>, WorkerVertex> factory, + string name) + { + // create a new stage object to hold the vertices + var stage = Foundry.NewStage(placement, input.Context, factory, name); + + // make an input that responds correctly when messages arrive from the coordinator, routing the messages to the + // worker described by the destination field + var stageInput = stage.NewInput(input, (message, vertex) => vertex.OnReceive(message), r => r.destination); + + // make an output where workers can send their generated records, with no partitioning information + var dataOutput = stage.NewOutput(vertex => vertex.output); + + // make an output where workers can send their identification records back to the coordinator, with + // no partitioning information + var requestOutput = stage.NewOutput(vertex => vertex.requests); + + // return the two output streams as a pair + return new Pair< + Stream>, + Stream, IterationIn>> + (dataOutput, requestOutput); + } + #endregion + + #region constructor + /// + /// make a new worker vertex + /// + /// index of the vertex in the stage, passed to the base class + /// stage the vertex belongs to, passed to the base class + /// IWorker object containing the user-supplied logic to generate output records from work items + public WorkerVertex( + int index, Stage> stage, + Func> workerFactory) + : base(index, stage) + { + this.output = new VertexOutputBuffer>(this); + this.requests = new VertexOutputBuffer, IterationIn>(this); + + this.workerFactory = time => workerFactory(index, time); + this.workers = new Dictionary>(); + } + #endregion + } + #endregion + + #region method to generate the dataflow + /// + /// Generate a dataflow that takes a stream of input records and generates a stream of output records, + /// by dividing each input into a set of work items. A supplied ICoordinator factory indicates how to + /// divide an input into work items, and how to assign items to workers. A supplied IWorker factory + /// indicates how to generate output records from a work item, and how to identify the worker so that + /// the coordinator can decide which worker to assign items to. + /// + /// The inputs corresponding to a particular time all form the same 'batch' and work items within that + /// batch may be interleaved. If a second batch is started before the first completes then the efficiency + /// of the work assignment may suffer, since batch A's coordinator cannot 'see' that a worker is busy working + /// on an item from batch B and may assign work that will be queued behind work from batch B, even if other + /// workers are idle. + /// + /// The stream of records describing work to be done + /// The factory to generate a coordinator for each distinct input time + /// The factory to generate a worker for each distinct worker VertexId and + /// input time + /// A stream of output records, with no guaranteed partitioning + public static Stream Generate( + Stream input, + Func> coordinatorFactory, + Func> workerFactory) + { + // determine the placement we are going to use for the workers + Placement workerPlacement = input.ForStage.Computation.Controller.DefaultPlacement; + + // Make a loop context to hold the coordinator and the worker vertices + var workLoop = new Dataflow.Iteration.LoopContext(input.Context, "WorkLoop"); + + // Make the feedback edge that takes work requests from workers and routes them back to the coordinator + var feedback = workLoop.Delay>(); + + // Bring the input records into the loop, adding an extra loop coordinate that will be 0 for all records + var workInput = workLoop.EnterLoop(input); + + // Make the coordinator stage, and return a stream consisting of work items to be handed to workers + var coordinatorOutput = CoordinatorVertex.MakeStage( + // A SingleVertex placement ensures there is just one coordinator vertex handing out all the work. + new Placement.SingleVertex(), + // this is the stream of input records that we are going to turn into work items + workInput, + // this is the stream of notifications coming from workers when they become idle + feedback.Output, + // this is the factory to instantiate the coordinator vertex: it needs to know the placement of + // the workers, so it can broadcast initialization messages to them + (i, s) => new CoordinatorVertex(i, s, coordinatorFactory, workerPlacement), + // these are the partitioning requirements for the inputs. Since the coordinator stage has a single + // vertex, the partitioning function can be null as all records get routed to the same place + null, null, + // this is the partitioning guarantee for the outputs, which is null because they all come from the + // same coordinator vertex + null, + // this is the friendly name for the stage + "Work Coordinator"); + + // Make the worker vertex stage. The output is a pair of streams: the first is the generated output + // records, and the second is the stream of notifications to send back to the controller when each + // worker becomes idle. The MakeStage internally uses the desired partition function to route the + // work items to the appropriate vertex + var workers = WorkerVertex.MakeStage( + // this is the placement we are using for the workers. We specify it explicitly to ensure it + // matches the placement we told the coordinator, so broadcast will work + workerPlacement, + // this is the stream of work items coming from the coordinator + coordinatorOutput, + // this is the factory used to create each worker in the stage + (i, s) => new WorkerVertex(i, s, workerFactory), + // this is the friendly name for the stage + "Work Generators"); + + // connect the idle notifications to the feedback stage, which will forward them back to the coordinator + feedback.Input = workers.Second; + + // strip off the loop coordinate from the output records, and return the resulting stream to the caller + return workLoop.ExitLoop(workers.First); + } + #endregion + } + #endregion + + #region public MatchingCoordinator helper class to build a coordinator that matches work items to workers + /// + /// A coordinator that assigns work items to zero or more matching workers. When a worker is free, it is assigned a matching item if + /// there is one, otherwise it is assigned an item that matched to no workers if there is one, otherwise it is assigned an item from + /// a worker that has the maximal number of remaining unassigned items. + /// + /// Type of an input record: each input expands to some number of work items + /// Type of a work item category + /// Type of a work item stub, expanded to a work item once the worker is chosen + /// Type of a work item description + /// Type of a worker description: this is translated to a TQueueKey to find a match + public abstract class MatchingCoordinator : ICoordinator + { + #region public Match helper class to describe a work item and the list of workers that match it + /// + /// A work item and the list of categories that match the work item. + /// + public class Match + { + /// + /// A stub description of the work, to be passed to the worker when the work is assigned. Once the worker is + /// assigned the stub is converted to a TWorkDescription using the derived ExpandWorkItem method; this allows + /// for the case that the work item description is dependent on which worker is chosen to execute it + /// + public TWorkStub workStub; + + /// + /// A list of queues that match to this work item. If this list is null or empty, the item will + /// be assigned to an available worker after that worker has executed all matching items. + /// + public IEnumerable categories; + + /// + /// Create a new empty match. + /// + public Match() + { + taken = false; + } + + /// + /// This is initially false. Once the item is assigned to a worker, Taken is set to true, but the item may be left on + /// other worker queues (to avoid the cost of keeping track of its location in all the relevant queues). Items with + /// taken set are skipped over when dequeueing matches from queues. + /// + internal bool taken; + } + #endregion + + #region helper class holding a queue of work items + /// + /// Queue of work items that match to a particular worker. + /// + protected class MatchQueue + { + /// + /// Queue of items that match to this worker. Each Match may be present on multiple queues. If a Match has its taken field + /// set to true, then it has already been consumed by another worker, and should be skipped while dequeueing. + /// + private readonly Queue queue; + /// + /// Number of items in queue that have item.taken == false (and are thus available). + /// + private int unusedMatches; + /// + /// A unique id that allows MatchQueues to form a unique sort order. + /// + private readonly int uid; + + /// + /// Comparison function to use when sorting MatchQueues. a sorts before b if a contains more available matches + /// than b, or if they contain the same number of available matches and a's uid is less than b's uid. + /// + /// First MatchQueue to compare + /// Second MatchQueue to compare + /// -1 if a has more available matches than b, or the same number of matches, but a's uid is less than b's. 0 if + /// a and b have the same number of available matches and the same uid. 1 if a has fewer available matches than b, or the same + /// number of matches, but a's uid is greater than b's. + static public int Compare(MatchQueue a, MatchQueue b) + { + if (a.unusedMatches > b.unusedMatches) + { + return -1; + } + else if (a.unusedMatches < b.unusedMatches) + { + return 1; + } + else + { + if (a.uid < b.uid) + { + return -1; + } + else if (a.uid == b.uid) + { + return 0; + } + else + { + return 1; + } + } + } + + /// + /// Create an empty MatchQueue with the specified unique identifier + /// + /// Unique identifier to allow MatchQueues to form a sort order + public MatchQueue(int uid) + { + this.queue = new Queue(); + this.unusedMatches = 0; + this.uid = uid; + } + + /// + /// Add an unused Match to the queue. Throws an exception if the Match has been taken + /// + /// A Match to add to the queue + public void Enqueue(Match match) + { + if (match.taken) + { + throw new ApplicationException("Can't add a taken match to a queue"); + } + + this.queue.Enqueue(match); + ++this.unusedMatches; + } + + /// + /// The number of unused matches remaining in the queue + /// + public int Count + { + get { return this.unusedMatches; } + } + + /// + /// Return the next unused matched in the queue. Throws an exception if there are no remaining + /// matches. DOES NOT UPDATE THE COUNT OF UNUSED MATCHES. The caller is responsible for decrementing + /// the count of unused matches for every queue the return value appears in, including this one, by + /// calling MarkTaken on the relevant queues. + /// + /// The next previously-unused match in the queue + public Match Dequeue() + { + if (this.unusedMatches == 0) + { + throw new ApplicationException("Queue is empty"); + } + + Match match; + + // skip over all matches that have been marked as taken by being removed from another queue + do + { + match = this.queue.Dequeue(); + } + while (match.taken); + + // mark this match as taken. The unused queue count in this queue, and any other queues the match was + // added to, must be decremented by calls to MarkTaken + match.taken = true; + + return match; + } + + /// + /// Indicate that an unused entry in this queue has either been dequeued or marked as taken by another queue. + /// This must be called appropriately to ensure the invariant that this.unusedMatches is equal to the number + /// of unused Match elements in this.queue. Throws an exception if there are no unused matches in the queue. + /// + public void MarkTaken() + { + if (this.unusedMatches == 0) + { + throw new ApplicationException("Queue is empty"); + } + + --unusedMatches; + } + } + + /// + /// Class to sort MatchQueues by priority + /// + private class MatchQueueComparer : Comparer + { + public override int Compare(MatchQueue x, MatchQueue y) + { + return MatchQueue.Compare(x, y); + } + } + #endregion + + #region member fields + /// + /// For each category with outstanding work, a queue of unused matches. There is an invariant that every value in + /// this dictionary has Count greater than 0 + /// + protected readonly Dictionary waitingWork; + /// + /// Queue of unused matches that have no category, and can thus be assigned to any worker + /// + private readonly MatchQueue anyplaceWork; + /// + /// Sorted set of queues. Queues with more unused matches sort earlier. When a worker has no matching work, and + /// there are no unused matches in this.anyplaceWork, a work item will be assigned from the queue that sorts + /// earliest in this set + /// + private readonly SortedSet priority; + + /// + /// The next identifier to be assigned to a MatchQueue. These identifiers exist only to ensure a unique sort + /// order for MatchQueues + /// + private int matchQueueId; + #endregion + + #region implementation of ICoordinator AddInput method + /// + /// Implements ICoordinator.AddInput. Takes an input record and calls the user-supplied function provided in + /// the constructor, to translate the input into a set of work items, each of which is annotated with a list + /// of zero or more categories. As workers become ready, their work will be assigned from these items. + /// + /// Record describing an input that will be converted to a set of work items + public void AddInput(TInput input) + { + // This dictionary will hold the items returned by the user-supplied function, sorted by category. The items are + // marshaled into this dictionary before updating our datastructures to avoid hammering on the priority queue + // with every individual addition + Dictionary> additionalWork = new Dictionary>(); + + // Call the derived-class function to get the list of work items corresponding to this input + IEnumerable workItems = EnumerateWork(input); + + foreach (Match item in workItems) + { + if (item.categories == null || item.categories.Count() == 0) + { + // the item has no matching categories, so put it on the queue of unaffiliated work + anyplaceWork.Enqueue(item); + } + else + { + // go through each candidate category, and add the item to the list of new work we are building + // up for that category + foreach (TCategory category in item.categories) + { + List newWork; + if (!additionalWork.TryGetValue(category, out newWork)) + { + newWork = new List(); + additionalWork.Add(category, newWork); + } + newWork.Add(item); + } + } + } + + // Now we have figured out, for each category, the list of new items to add to its queue + foreach (var element in additionalWork) + { + MatchQueue queue; + if (this.waitingWork.TryGetValue(element.Key, out queue)) + { + // there was already a queue of items waiting for this category. Remove the corresponding + // element from the priority set, since the number of items in the queue (and hence the + // priority) is going to change + this.priority.Remove(queue); + } + else + { + // there were no items queued for this category, so make a new queue and add it to the + // dictionary. Don't add anything to the priority set yet: that will happen below + queue = new MatchQueue(matchQueueId); + ++matchQueueId; + this.waitingWork.Add(element.Key, queue); + // let derived classes know there is a new queue + this.NotifyQueueAddition(element.Key); + } + + // add each of the new matches to the queue for this category + foreach (Match item in element.Value) + { + queue.Enqueue(item); + } + + // now that we have added all the new elements for this category, we can reinsert its queue + // into the set sorted by queue length + this.priority.Add(queue); + } + } + #endregion + + #region implementation of ICoordinator AssignWork method + /// + /// Implements ICoordinator.AssignWork. Given a worker that is now free, assign it a work item if any remain. + /// If there is an item matching the worker it will be returned; otherwise if there is an item that wasn't matched + /// to any worker it will be returned; otherwise an item from another category's queue that has a maximal number of + /// pending items will be returned. If there are no items remaining to be assigned, this returns false. + /// + /// The worker that needs work + /// The work item to be assigned if any + /// True if any items remained, false otherwise + public bool AssignWork(TWorkerDescription worker, ref TWorkDescription work) + { + // This will be set to non-null if we find an item to assign + Match match = null; + + // This will be set to the category of the matching queue, if there is one + TCategory queueCategory = default(TCategory); + // Call the derived-class method to match a worker to a category. If this returns non-null then there was + // a non-empty queue with work, and its key is returned in queueKey. + MatchQueue queue = MapWorkerToQueue(worker, ref queueCategory); + if (queue != null) + { + // There is an item in the worker's queue of matching work, so we will assign it; hence, let's remove it + // from the queue. We will update the counts of unused items in all the queues that match belongs to below + match = queue.Dequeue(); + } + else if (this.anyplaceWork.Count > 0) + { + // There was no item in the worker's queue, but there is one in the list of items that can be matched + // anywhere, so we will assign that; let's remove it from that queue + match = this.anyplaceWork.Dequeue(); + // and update the queue's count of unused items + this.anyplaceWork.MarkTaken(); + } + else if (this.priority.Count > 0) + { + // The only remaining items would have preferred to be matched somewhere else. Never mind, let's just take + // one from the queue of a worker with a maximal number of unused items. We will update the counts of + // unused items in all the queues that match belongs to below + match = this.priority.First().Dequeue(); + } + + if (match == null) + { + // There was no outstanding work to assign + return false; + } + + if (match.categories != null) + { + // The item belongs to zero or more categories: we need to update the count of unused items for + // each of those categories now that this item has been matched + foreach (TCategory candidate in match.categories) + { + // First, remove the queue from the priority set, since its count (and hence priority) are going to change + MatchQueue candidateQueue = this.waitingWork[candidate]; + this.priority.Remove(candidateQueue); + + // Now update the count of unused items in the queue + candidateQueue.MarkTaken(); + + if (candidateQueue.Count == 0) + { + // The queue has no more items, so remove it from the set of workers that have waiting work + this.waitingWork.Remove(candidate); + // let derived classes know the queue has been removed + this.NotifyQueueRemoval(candidate); + } + else + { + // Put the queue back in the priority set, now that it has a new count + this.priority.Add(candidateQueue); + } + } + } + + // Get a work item by converting the stub. If queue!=null the worker got the item from a + // matching queue with category queueKey, otherwise the item doesn't match the worker, and + // queueCategory is default(TCategory) + work = ExpandWorkItem((queue != null), queueCategory, match.workStub); + return true; + } + #endregion + + #region abstract and virtual methods + /// + /// Given an input item, return a list of work items, each with a list of categories + /// + /// input record to convert to work items + /// list of work items + protected abstract IEnumerable EnumerateWork(TInput input); + /// + /// Given a worker, return a matching queue from this.waitingWork (and fill in its key to categoryKey) + /// or null if there is no matching queue + /// + /// The worker to match to a category + /// The category of the matching queue, if there is one + /// The matching queue, or null if there is no matching queue + protected abstract MatchQueue MapWorkerToQueue(TWorkerDescription worker, ref TCategory categoryKey); + /// + /// Convert a work item stub to a work item once it is known which category, if any, it was matched to + /// + /// True if the item was drawn from a queue that matched the worker it is + /// being assigned to + /// Category of the matching queue, if usedMatchingQueue==true + /// Work item stub to convert + /// Filled-in work item + protected abstract TWorkDescription ExpandWorkItem(bool usedMatchingQueue, TCategory category, TWorkStub stub); + + /// + /// called whenever a new worker queue is added + /// + /// key for the queue + protected virtual void NotifyQueueAddition(TCategory queue) + { + } + + /// + /// called whenever an existing worker queue becomes empty and is deleted + /// + /// key for the queue + protected virtual void NotifyQueueRemoval(TCategory queue) + { + } + #endregion + + #region constructor + /// + /// Create a new MatchingCoordinator + /// + public MatchingCoordinator() + { + this.priority = new SortedSet(new MatchQueueComparer()); + this.waitingWork = new Dictionary(); + this.anyplaceWork = new MatchQueue(0); + // initialize the unique id for matchqueues to be created as work items are added + this.matchQueueId = 1; + } + #endregion + } + #endregion + + #region extension methods to build the dataflow + /// + /// Extension methods for the work generator + /// + public static class ExtensionMethods + { + /// + /// Add a work generator taking a stream of inputs and converting each input to a stream of outputs, by assigning inputs to workers + /// of a given type + /// + /// The type of input records to consume + /// Description of a work item + /// Description of a worker, used to match workers to items + /// The type of output records produced by workers + /// The time of input records + /// stream of input records + /// factory to make the work coordinator object + /// factory to make the worker objects + /// stream of output records + public static Stream GenerateWork( + this Stream input, + Func> coordinatorFactory, + Func> workerFactory) + where TTime : Time + { + return Generator.Generate( + input, coordinatorFactory, workerFactory); + } + } + #endregion +} diff --git a/ClusterSubmission/DependencyLister/DependencyLister.csproj b/Frameworks/WorkGenerator/WorkGenerator.csproj similarity index 53% rename from ClusterSubmission/DependencyLister/DependencyLister.csproj rename to Frameworks/WorkGenerator/WorkGenerator.csproj index 693ecfb..2278dd5 100644 --- a/ClusterSubmission/DependencyLister/DependencyLister.csproj +++ b/Frameworks/WorkGenerator/WorkGenerator.csproj @@ -1,19 +1,19 @@  - + Debug AnyCPU - {4B1A2CC2-1798-472C-954B-9C808B2C0748} + {EBA3D350-41EB-474C-AED9-9CFD1F809DE3} Library Properties - Microsoft.Research.Naiad.Cluster.DependencyLister - Microsoft.Research.Naiad.Cluster.DependencyLister + WorkGenerator + Microsoft.Research.Naiad.WorkGenerator v4.5 512 + - AnyCPU true full false @@ -21,18 +21,37 @@ DEBUG;TRACE prompt 4 + false - AnyCPU pdbonly true bin\Release\ TRACE prompt 4 + false + bin\Release\Microsoft.Research.Naiad.WorkGenerator.XML - - + + true + bin\x64\Debug\ + DEBUG;TRACE + full + x64 + prompt + MinimumRecommendedRules.ruleset + false + + + bin\x64\Release\ + TRACE + true + pdbonly + x64 + prompt + MinimumRecommendedRules.ruleset + false @@ -44,11 +63,20 @@ - + + Properties\SharedAssemblyInfo.cs + + - + + {a6221415-1283-4c04-8d2c-e25a857e1fe9} + Naiad + + + + + + + + - + \ No newline at end of file diff --git a/Naiad/Naiad.csproj b/Naiad/Naiad.csproj index f9462d5..f292de5 100644 --- a/Naiad/Naiad.csproj +++ b/Naiad/Naiad.csproj @@ -10,8 +10,9 @@ Properties Microsoft.Research.Naiad Microsoft.Research.Naiad - v4.0 - Client + v4.5 + + 512 SAK SAK @@ -65,6 +66,40 @@ + + true + bin\x64\Debug\ + TRACE;DEBUG;LOGGING_ON + true + bin\Release\Microsoft.Research.Naiad.xml + full + x64 + prompt + MinimumRecommendedRules.ruleset + false + false + + + true + bin\x64\Release\ + TRACE + true + bin\Release\Microsoft.Research.Naiad.xml + true + pdbonly + x64 + prompt + ExtendedCorrectnessRules.ruleset + + + bin\x64\Tracing\ + TRACING_ON + true + bin\Release\Microsoft.Research.Naiad.xml + true + x64 + MinimumRecommendedRules.ruleset + @@ -73,11 +108,13 @@ + + Properties\SharedAssemblyInfo.cs + - diff --git a/Naiad/NamespaceDocs.cs b/Naiad/NamespaceDocs.cs index 7fd76ed..a4026d0 100644 --- a/Naiad/NamespaceDocs.cs +++ b/Naiad/NamespaceDocs.cs @@ -1,5 +1,5 @@ -/* - * Naiad ver. 0.4 +/* + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -75,7 +75,7 @@ class NamespaceDoc namespace Diagnostics { /// - /// The Diagnostics namespace provides classes that support , , and observing various events in the Naiad runtime. + /// The Diagnostics namespace provides classes that support , tracing, and observing various events in the Naiad runtime. /// class NamespaceDoc { diff --git a/Naiad/Properties/AssemblyInfo.cs b/Naiad/Properties/AssemblyInfo.cs index 512a584..88ee2ad 100644 --- a/Naiad/Properties/AssemblyInfo.cs +++ b/Naiad/Properties/AssemblyInfo.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -26,31 +26,7 @@ // set of attributes. Change these attribute values to modify the information // associated with an assembly. [assembly: AssemblyTitle("Microsoft.Research.Naiad")] -[assembly: AssemblyDescription("")] [assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("")] -[assembly: AssemblyProduct("Microsoft.Research.Naiad")] -[assembly: AssemblyCopyright("Copyright © Microsoft Corporation. All rights reserved.")] -[assembly: AssemblyTrademark("")] -[assembly: AssemblyCulture("")] - -// Setting ComVisible to false makes the types in this assembly not visible -// to COM components. If you need to access a type in this assembly from -// COM, set the ComVisible attribute to true on that type. -[assembly: ComVisible(false)] // The following GUID is for the ID of the typelib if this project is exposed to COM [assembly: Guid("e2d5f5f0-161a-46ef-9c5b-129a307527e3")] - -// Version information for an assembly consists of the following four values: -// -// Major Version -// Minor Version -// Build Number -// Revision -// -// You can specify all the values or you can default the Build and Revision Numbers -// by using the '*' as shown below: -// [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("0.4.2")] -[assembly: AssemblyFileVersion("0.4.2")] diff --git a/Naiad/Runtime/Controlling/Controller.cs b/Naiad/Runtime/Controlling/Controller.cs index 5e648c4..72b4c94 100644 --- a/Naiad/Runtime/Controlling/Controller.cs +++ b/Naiad/Runtime/Controlling/Controller.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -483,7 +483,7 @@ public long BlockScheduler(AutoResetEvent selectiveEvent, long val) public void WakeUp() { - Tracing.Trace("{WakeUp"); + NaiadTracing.Trace.RegionStart(NaiadTracingRegion.Wakeup); if (this.useBroadcastWakeup) { this.wakeUpEvent.Advance(); @@ -493,7 +493,7 @@ public void WakeUp() foreach (Scheduler scheduler in this.schedulers) scheduler.Signal(); } - Tracing.Trace("}WakeUp"); + NaiadTracing.Trace.RegionStop(NaiadTracingRegion.Wakeup); } public void Abort() @@ -524,7 +524,7 @@ public void Resume() internal void DrainAllQueuedMessages() { foreach (Scheduler scheduler in this.schedulers) - scheduler.DrainPostOffice(); + scheduler.AcceptWorkItemsFromOthers(); } #region Scheduler events @@ -823,6 +823,9 @@ public BaseController(Configuration config) Logging.Progress("Server GC = {0}", System.Runtime.GCSettings.IsServerGC); Logging.Progress("GC settings latencymode={0}", System.Runtime.GCSettings.LatencyMode); Logging.Progress("Using CLR {0}", System.Environment.Version); + + NaiadTracing.Trace.ProcessInfo(this.configuration.ProcessID, System.Environment.MachineName); + NaiadTracing.Trace.LockInfo(this.GlobalLock, "Controller lock"); if (this.NetworkChannel != null) this.NetworkChannel.StartMessageDelivery(); diff --git a/Naiad/Runtime/Controlling/InternalController.cs b/Naiad/Runtime/Controlling/InternalController.cs index edf2013..8ecc322 100644 --- a/Naiad/Runtime/Controlling/InternalController.cs +++ b/Naiad/Runtime/Controlling/InternalController.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Naiad/Runtime/Controlling/Peloponnese.cs b/Naiad/Runtime/Controlling/Peloponnese.cs index 21c4ba5..cd4d6cd 100644 --- a/Naiad/Runtime/Controlling/Peloponnese.cs +++ b/Naiad/Runtime/Controlling/Peloponnese.cs @@ -1,5 +1,5 @@ -/* - * Naiad ver. 0.4 +/* + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Naiad/Runtime/Networking/NaiadServer.cs b/Naiad/Runtime/Networking/NaiadServer.cs index 71e23b4..918978a 100644 --- a/Naiad/Runtime/Networking/NaiadServer.cs +++ b/Naiad/Runtime/Networking/NaiadServer.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Naiad/Runtime/Networking/Networking.cs b/Naiad/Runtime/Networking/Networking.cs index 3ad3326..0d1627e 100644 --- a/Naiad/Runtime/Networking/Networking.cs +++ b/Naiad/Runtime/Networking/Networking.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -29,6 +29,7 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Runtime.Remoting.Messaging; using System.Text; using System.Threading; using System.Net; @@ -36,6 +37,7 @@ using System.Net.Sockets; using System.Diagnostics; using System.IO; +using System.Threading.Tasks; using Microsoft.Research.Naiad.Utilities; using Microsoft.Research.Naiad.Scheduling; using Microsoft.Research.Naiad.DataStructures; @@ -81,7 +83,7 @@ internal interface NetworkChannel : IDisposable /// Registers the given mailbox to receive messages. /// /// The mailbox to which messages with the same channel and destination vertex ID should be sent. - void RegisterMailbox(UntypedMailbox mailbox); + void RegisterMailbox(Mailbox mailbox); /// /// Returns the size (in bytes) of a page of serialized data used for sending. @@ -148,7 +150,7 @@ private enum ReceiveResult public readonly int id; public int Id { get { return this.id; } } - private readonly List>> graphmailboxes; + private readonly List>> graphmailboxes; //private readonly AutoResetEvent sendEvent; //private readonly AutoResetEvent[] sendEvents; @@ -194,7 +196,7 @@ private class ConnectionState : IDisposable public readonly ConcurrentQueue HighPrioritySegmentQueue; //public readonly NaiadList InflightSegments; //public readonly NaiadList> InflightArraySegments; - public readonly RecvBufferSheaf RecvBufferSheaf; + public RecvBufferSheaf RecvBufferSheaf; public Thread RecvThread; public Thread SendThread; //public readonly CircularBuffer RecvBuffer; @@ -214,6 +216,14 @@ private class ConnectionState : IDisposable public long RecordsSent; public long RecordsRecv; + public readonly Dictionary DeferredDisposalSegments; + + public int NextSequenceNumber; + + public int SequenceNumberReceived; + public int SequenceNumberToAcknowledge; + public int SequenceNumberAcknowledged; + // Trying to be cache-friendly with separate arrays for send/recv threads internal long[] sendStatistics; internal long[] recvStatistics; @@ -223,6 +233,8 @@ private class ConnectionState : IDisposable public int ReceivedCheckpointMessages; public int LastCheckpointSequenceNumber; + public bool EverReceivedIncomingConnection; + public ConnectionState(int id, ConnectionStatus status, int recvBufferLength, BufferPool sendPool) { this.Id = id; @@ -249,6 +261,13 @@ public ConnectionState(int id, ConnectionStatus status, int recvBufferLength, Bu this.sendStatistics = new long[(int)RuntimeStatistic.NUM_STATISTICS]; this.recvStatistics = new long[(int)RuntimeStatistic.NUM_STATISTICS]; + this.DeferredDisposalSegments = new Dictionary(); + this.NextSequenceNumber = 0; + + this.SequenceNumberReceived = -1; + this.SequenceNumberAcknowledged = -1; + this.SequenceNumberToAcknowledge = -1; + this.ReceivedCheckpointMessages = 0; this.LastCheckpointSequenceNumber = -1; @@ -258,6 +277,8 @@ public ConnectionState(int id, ConnectionStatus status, int recvBufferLength, Bu this.CheckpointResumeEvent = new AutoResetEvent(false); this.sequenceNumber = 1; + + this.EverReceivedIncomingConnection = false; } public void Dispose() @@ -326,7 +347,7 @@ internal TcpNetworkChannel(int id, InternalController controller, Configuration this.localProcessID = this.Controller.Configuration.ProcessID; - this.graphmailboxes = new List>>(); + this.graphmailboxes = new List>>(); this.connections = new List(); @@ -409,7 +430,7 @@ internal TcpNetworkChannel(int id, InternalController controller, Configuration private void UdpReceiveThread(IPEndPoint multicastGroupAddress) { - Tracing.Trace("@UdpReceiveThread"); + NaiadTracing.Trace.ThreadName("UdpRecvThread[{0}]", multicastGroupAddress.ToString()); IPEndPoint from = multicastGroupAddress; MessageHeader header = default(MessageHeader); //int count = 0; @@ -420,9 +441,7 @@ private void UdpReceiveThread(IPEndPoint multicastGroupAddress) { byte[] bytes = this.udpClient.Receive(ref from); - Tracing.Trace("Recv"); - - MessageHeader.ReadHeaderFromBuffer(bytes, 0, ref header, this.HeaderSerializer); + MessageHeader.ReadHeaderFromBuffer(bytes, 0, ref header); //Console.Error.WriteLine("UdpReceiveThread: got {0} bytes from {1}. Sequence number = {2}, count = {3}", bytes.Length, from, header.SequenceNumber, count++); SerializedMessage message = new SerializedMessage(0, header, new RecvBuffer(bytes, MessageHeader.SizeOf, bytes.Length)); @@ -482,15 +501,30 @@ private void AddEndPointIncoming(int processId, Socket recvSocket) lock (this) { this.AllocateConnectionState(processId); + if (this.connections[processId].RecvSocket != null) { - Logging.Error("Error: already accepted a connection from process {0}", processId); - System.Environment.Exit(-1); + Logging.Error("WARNING: already accepted a connection from process {0}, so shutting down existing recvThread", processId); + + this.connections[processId].RecvSocket.Close(); + this.connections[processId].RecvThread.Join(); + + this.connections[processId].RecvSocket = null; + this.connections[processId].RecvThread = null; + + // + //System.Environment.Exit(-1); + } + + if (!this.connections[processId].EverReceivedIncomingConnection) + { + this.recvConnectionCountdown.Signal(); + this.connections[processId].EverReceivedIncomingConnection = true; } - this.recvConnectionCountdown.Signal(); this.connections[processId].RecvSocket = recvSocket; this.connections[processId].RecvThread = new Thread(() => this.PerProcessRecvThread(processId)); + this.connections[processId].RecvBufferSheaf = new RecvBufferSheaf(processId, (1 << 22) / RecvBufferPage.PAGE_SIZE, GlobalBufferPool.pool); #if RECV_HIGH_PRIORITY this.connections[processId].RecvThread.Priority = ThreadPriority.Highest; #endif @@ -520,40 +554,42 @@ internal void PeerConnect(Socket socket) this.AddEndPointIncoming(peerID, socket); } - public void RegisterMailbox(UntypedMailbox mailbox) + public void RegisterMailbox(Mailbox mailbox) { while (this.graphmailboxes.Count <= mailbox.GraphId) this.graphmailboxes.Add(null); if (this.graphmailboxes[mailbox.GraphId] == null) - this.graphmailboxes[mailbox.GraphId] = new List>(); + this.graphmailboxes[mailbox.GraphId] = new List>(); var mailboxes = this.graphmailboxes[mailbox.GraphId]; - while (mailboxes.Count <= mailbox.Id) + while (mailboxes.Count <= mailbox.ChannelId) mailboxes.Add(null); - if (mailboxes[mailbox.Id] == null) - mailboxes[mailbox.Id] = new List(); + if (mailboxes[mailbox.ChannelId] == null) + mailboxes[mailbox.ChannelId] = new List(); - while (mailboxes[mailbox.Id].Count <= mailbox.VertexId) - mailboxes[mailbox.Id].Add(null); - mailboxes[mailbox.Id][mailbox.VertexId] = mailbox; + while (mailboxes[mailbox.ChannelId].Count <= mailbox.VertexId) + mailboxes[mailbox.ChannelId].Add(null); + mailboxes[mailbox.ChannelId][mailbox.VertexId] = mailbox; //Logging.Info("Registered Mailbox {0} Vertex {1}", mailbox.Id, mailbox.VertexID); } public void AnnounceCheckpoint() { int seqno = this.GetSequenceNumber(-1); - SendBufferPage checkpointPage = SendBufferPage.CreateSpecialPage(MessageHeader.Checkpoint, seqno, this.Controller.SerializationFormat.GetSerializer()); + SendBufferPage checkpointPage = SendBufferPage.CreateSpecialPage(MessageHeader.Checkpoint, seqno); BufferSegment checkpointSegment = checkpointPage.Consume(); - for (int i = 0; i < this.connections.Count - 2; ++i) - checkpointSegment.Copy(); + // XXX: Hack due to new sequence numbers. + + //for (int i = 0; i < this.connections.Count - 2; ++i) + // checkpointSegment.Copy(); for (int i = 0; i < this.connections.Count; ++i) { if (i != this.localProcessID) { Logging.Info("Sending checkpoint message to process {0}", i); - this.SendBufferSegment(checkpointPage.CurrentMessageHeader, i, checkpointSegment); + this.SendBufferSegment(checkpointPage.CurrentMessageHeader, i, checkpointSegment.DeepCopy()); } } } @@ -574,37 +610,24 @@ public void ResumeAfterCheckpoint() this.connections[i].CheckpointResumeEvent.Set(); } - private NaiadSerialization _headerSerializer; - private NaiadSerialization HeaderSerializer - { - get - { - if (this._headerSerializer == null) - this._headerSerializer = this.Controller.SerializationFormat.GetSerializer(); - - if (this._headerSerializer == null) - throw new Exception(); - - return this._headerSerializer; - } - } - private void AnnounceShutdown() { Logging.Progress("Announcing shutdown"); int seqno = this.GetSequenceNumber(-1); - SendBufferPage shutdownPage = SendBufferPage.CreateShutdownMessagePage(seqno, this.HeaderSerializer); + SendBufferPage shutdownPage = SendBufferPage.CreateShutdownMessagePage(seqno); BufferSegment shutdownSegment = shutdownPage.Consume(); - for (int i = 0; i < this.connections.Count - 2; ++i) - shutdownSegment.Copy(); + // XXX: Broadcast hack due to new sequence numbers. + + //for (int i = 0; i < this.connections.Count - 2; ++i) + // shutdownSegment.Copy(); for (int i = 0; i < this.connections.Count; ++i) { if (i != this.localProcessID) { Logging.Progress("Sending shutdown message to process {0}", i); - this.SendBufferSegment(shutdownPage.CurrentMessageHeader, i, shutdownSegment); + this.SendBufferSegment(shutdownPage.CurrentMessageHeader, i, shutdownSegment.DeepCopy()); } } } @@ -624,18 +647,20 @@ private void WaitForShutdown() private void AnnounceStartup(int barrierId) { int seqno = this.GetSequenceNumber(-1); - SendBufferPage startupPage = SendBufferPage.CreateSpecialPage(MessageHeader.GenerateBarrierMessageHeader(barrierId), seqno, this.HeaderSerializer); + SendBufferPage startupPage = SendBufferPage.CreateSpecialPage(MessageHeader.GenerateBarrierMessageHeader(barrierId), seqno); BufferSegment startupSegment = startupPage.Consume(); - for (int i = 0; i < this.connections.Count - 2; ++i) - startupSegment.Copy(); + // XXX: Hack due to new sequence numbers. + + //for (int i = 0; i < this.connections.Count - 2; ++i) + // startupSegment.Copy(); for (int i = 0; i < this.connections.Count; ++i) { if (i != this.localProcessID) { Logging.Info("Sending startup message to process {0}", i); - this.SendBufferSegment(startupPage.CurrentMessageHeader, i, startupSegment); + this.SendBufferSegment(startupPage.CurrentMessageHeader, i, startupSegment.DeepCopy()); } } } @@ -680,19 +705,6 @@ public void OnRecvBarrierMessageAndBlock(int id) public void SendBufferSegment(MessageHeader header, int destProcessID, BufferSegment segment, bool HighPriority=false, bool wakeUp=true) { - if (header.SequenceNumber < 0) // progress message - { - //NaiadTracing.Trace.ProgressSend(header); - //Tracing.Trace("$SendC {0} {1} {2} {3}", header.SequenceNumber, segment.Length, header.FromVertexID, header.DestVertexID); - //Console.Error.WriteLine("$SendC {0} {1} {2} {3}", header.SequenceNumber, segment.Length, header.FromVertexID, header.DestVertexID); - } - else - { - //NaiadTracing.Trace.DataSend(header); - //Tracing.Trace("$SendD {0} {1} {2} {3}", header.SequenceNumber, segment.Length, header.FromVertexID, header.DestVertexID); - //Console.Error.WriteLine("$SendD {0} {1} {2} {3}", header.SequenceNumber, segment.Length, header.FromVertexID, header.DestVertexID); - } - if (Controller.Configuration.DontUseHighPriorityQueue) HighPriority = false; @@ -712,18 +724,28 @@ public void SendBufferSegment(MessageHeader header, int destProcessID, BufferSeg private static SocketError SendAllBytes(Socket dest, ArraySegment segment) { - SocketError result; - Tracing.Trace("[Send"); - int bytesToSend = segment.Count; - int startOffset = segment.Offset; - do + SocketError result = SocketError.Success; + try { - int bytesSent = dest.Send(segment.Array, startOffset, bytesToSend, SocketFlags.None, out result); - startOffset += bytesSent; - bytesToSend -= bytesSent; - } while (result == SocketError.Success && bytesToSend != 0); - Tracing.Trace("]Send"); - return result; + + NaiadTracing.Trace.RegionStart(NaiadTracingRegion.Send); + int bytesToSend = segment.Count; + int startOffset = segment.Offset; + do + { + int bytesSent = dest.Send(segment.Array, startOffset, bytesToSend, SocketFlags.None, out result); + startOffset += bytesSent; + bytesToSend -= bytesSent; + } while (result == SocketError.Success && bytesToSend != 0); + NaiadTracing.Trace.RegionStop(NaiadTracingRegion.Send); + return result; + } + catch (Exception e) + { + Logging.Error("WARNING: An exception was raised when sending: {0}", e); + NaiadTracing.Trace.SocketError(result); + return SocketError.Fault; + } } private static SocketError SendAllBytes(Socket dest, byte[] bytes) @@ -737,7 +759,11 @@ private void PerProcessSendThread(int destProcessID) //PinnedThread pin = new PinnedThread(0xC0UL); PinnedThread pin = new PinnedThread(destProcessID % 8); #endif - Tracing.Trace("@SendThread[{0:00}]", destProcessID); + NaiadTracing.Trace.ThreadName("SendThread[{0:00}]", destProcessID); + bool doneAtLeastOnce = false; + + try_connecting_again: + // Connect to the destination socket. while (true) { @@ -777,7 +803,11 @@ private void PerProcessSendThread(int destProcessID) this.connections[destProcessID].Status = ConnectionStatus.Idle; - this.sendConnectionCountdown.Signal(1); + if (!doneAtLeastOnce) + { + this.sendConnectionCountdown.Signal(1); + doneAtLeastOnce = true; + } this.startCommunicatingEvent.WaitOne(); Socket socket; @@ -811,7 +841,45 @@ private void PerProcessSendThread(int destProcessID) sw.Start(); + KeyValuePair[] resendSegments; + lock (this.connections[destProcessID].DeferredDisposalSegments) + { + resendSegments = this.connections[destProcessID].DeferredDisposalSegments.OrderBy(x => x.Key).ToArray(); + } + bool shuttingDown = false; + + for (int i = 0; i < resendSegments.Length; ++i) + { + shuttingDown |= (resendSegments[i].Value.Type == SerializedMessageType.Shutdown); + ArraySegment messageArraySegment = resendSegments[i].Value.ToArraySegment(); + + MessageHeader header = default(MessageHeader); + MessageHeader.ReadHeaderFromBuffer(messageArraySegment.Array, messageArraySegment.Offset, ref header); + var channelId = header.ChannelID & 0xFFFF; + var dest = header.DestVertexID; + // For tracing purposes, if broadcast then replace dest vertex id=-1 with actual dest process id. + // This assumes that broadcast is only being used by progress messages, where there is a single + // vertex on each machine located on worker 0. + if (dest == -1) + { + dest = destProcessID; + } + NaiadTracing.Trace.MsgSend(channelId, header.SequenceNumber, header.Length, header.FromVertexID, dest); + + SocketError errorCode = SendAllBytes(socket, messageArraySegment); + if (errorCode != SocketError.Success) + { + NaiadTracing.Trace.SocketError(errorCode); + + Logging.Error("WARNING: Send thread got error from peer {0}: {1}", destProcessID, errorCode); + socket.Close(); + goto try_connecting_again; + + //this.HandleSocketError(destProcessID, errorCode); + } + } + while (true) { BufferSegment seg; @@ -822,20 +890,47 @@ private void PerProcessSendThread(int destProcessID) Debug.Assert(seg.Length > 0); length += seg.Length; - shuttingDown = (seg.Type == SerializedMessageType.Shutdown); + shuttingDown |= (seg.Type == SerializedMessageType.Shutdown); + + // Rewrite the message header to use contiguous per-connection sequence numbers. + ArraySegment messageArraySegment = seg.ToArraySegment(); + MessageHeader header = default(MessageHeader); + MessageHeader.ReadHeaderFromBuffer(messageArraySegment.Array, messageArraySegment.Offset, ref header); + header.SequenceNumber = this.connections[destProcessID].NextSequenceNumber++; + MessageHeader.WriteHeaderToBuffer(messageArraySegment.Array, messageArraySegment.Offset, header); + //Logging.Error("Sending message to {0}: sequence number {1}, type {2}", destProcessID, header.SequenceNumber, header.Type); + + this.DeferDisposalUntilAcknowledged(destProcessID, header.SequenceNumber, seg); + + var channelId = header.ChannelID & 0xFFFF; + var dest = header.DestVertexID; + // For tracing purposes, if broadcast then replace dest vertex id=-1 with actual dest process id. + // This assumes that broadcast is only being used by progress messages, where there is a single + // vertex on each machine located on worker 0. + if (dest == -1) + { + dest = destProcessID; + } + NaiadTracing.Trace.MsgSend(channelId, header.SequenceNumber, header.Length, header.FromVertexID, dest); + SocketError errorCode = SendAllBytes(socket, seg.ToArraySegment()); if (errorCode != SocketError.Success) { - Tracing.Trace("*Socket Error {0}", errorCode); - this.HandleSocketError(destProcessID, errorCode); + NaiadTracing.Trace.SocketError(errorCode); + + Logging.Error("Send thread got error from peer {0}: {1}", destProcessID, errorCode); + socket.Close(); + goto try_connecting_again; + + //this.HandleSocketError(destProcessID, errorCode); } this.connections[destProcessID].ProgressSegmentsSent += 1; this.connections[destProcessID].sendStatistics[(int)RuntimeStatistic.TxHighPriorityMessages] += 1; this.connections[destProcessID].sendStatistics[(int)RuntimeStatistic.TxHighPriorityBytes] += seg.Length; - seg.Dispose(); + } while (this.connections[destProcessID].SegmentQueue.TryDequeue(out seg)) @@ -843,26 +938,74 @@ private void PerProcessSendThread(int destProcessID) Debug.Assert(seg.Length > 0); length += seg.Length; - shuttingDown = (seg.Type == SerializedMessageType.Shutdown); + shuttingDown |= (seg.Type == SerializedMessageType.Shutdown); + + // Rewrite the message header to use contiguous per-connection sequence numbers. + ArraySegment messageArraySegment = seg.ToArraySegment(); + MessageHeader header = default(MessageHeader); + MessageHeader.ReadHeaderFromBuffer(messageArraySegment.Array, messageArraySegment.Offset, ref header); + header.SequenceNumber = this.connections[destProcessID].NextSequenceNumber++; + MessageHeader.WriteHeaderToBuffer(messageArraySegment.Array, messageArraySegment.Offset, header); + + //Logging.Error("Sending message to {0}: sequence number {1}, type {2}", destProcessID, header.SequenceNumber, header.Type); + + this.DeferDisposalUntilAcknowledged(destProcessID, header.SequenceNumber, seg); + + var channelId = header.ChannelID & 0xFFFF; + var dest = header.DestVertexID; + // For tracing purposes, if broadcast then replace dest vertex id=-1 with actual dest process id. + // This assumes that broadcast is only being used by progress messages, where there is a single + // vertex on each machine located on worker 0. + if (dest == -1) + { + dest = destProcessID; + } + NaiadTracing.Trace.MsgSend(channelId, header.SequenceNumber, header.Length, header.FromVertexID, dest); SocketError errorCode = SendAllBytes(socket, seg.ToArraySegment()); if (errorCode != SocketError.Success) { - Tracing.Trace("*Socket Error {0}", errorCode); - this.HandleSocketError(destProcessID, errorCode); + NaiadTracing.Trace.SocketError(errorCode); + + Logging.Error("WARNING: Send thread got error from peer {0}: {1}", destProcessID, errorCode); + socket.Close(); + goto try_connecting_again; + + //this.HandleSocketError(destProcessID, errorCode); } this.connections[destProcessID].DataSegmentsSent += 1; this.connections[destProcessID].sendStatistics[(int)RuntimeStatistic.TxNormalPriorityMessages] += 1; this.connections[destProcessID].sendStatistics[(int)RuntimeStatistic.TxNormalPriorityBytes] += seg.Length; - - seg.Dispose(); } if (shuttingDown) break; if (length == 0) { + int seqNumToAck = this.connections[destProcessID].SequenceNumberToAcknowledge; + if (seqNumToAck > this.connections[destProcessID].SequenceNumberAcknowledged) + { + MessageHeader ackHeader = new MessageHeader(-1, seqNumToAck, -1, -1, 0, SerializedMessageType.Ack); + byte[] ackBuffer = new byte[MessageHeader.SizeOf]; + MessageHeader.WriteHeaderToBuffer(ackBuffer, 0, ackHeader); + + SocketError errorCode = SendAllBytes(socket, ackBuffer); + if (errorCode != SocketError.Success) + { + NaiadTracing.Trace.SocketError(errorCode); + + Logging.Error("WARNING: Send thread got error from peer {0}: {1}", destProcessID, errorCode); + socket.Close(); + goto try_connecting_again; + + //this.HandleSocketError(destProcessID, errorCode); + } + + this.connections[destProcessID].SequenceNumberAcknowledged = seqNumToAck; + } + + if (this.useBroadcastWakeup) { this.wakeUpEvent.Await(this.connections[destProcessID].SendEvent, wakeupCount + 1); @@ -894,7 +1037,7 @@ private void PerProcessRecvThread(int srcProcessID) #if RECV_AFFINITY PinnedThread pin = new PinnedThread(srcProcessID % 8); #endif - Tracing.Trace("@RecvThread[{0:00}]", srcProcessID); + NaiadTracing.Trace.ThreadName("RecvThread[{0:00}]", srcProcessID); Logging.Info("Initializing per-process recv thread for {0}", srcProcessID); this.startCommunicatingEvent.WaitOne(); @@ -939,33 +1082,88 @@ private void PerProcessRecvThread(int srcProcessID) recvBytesIn += recvSegment.Count; - int bytesRecvd = socket.Receive(recvSegment.Array, recvSegment.Offset, recvSegment.Count, SocketFlags.None, out errorCode); - + int bytesRecvd = 0; + try + { + bytesRecvd = socket.Receive(recvSegment.Array, recvSegment.Offset, recvSegment.Count, + SocketFlags.None, out errorCode); + } + catch (Exception e) + { + Logging.Error("WARNING: Got exception while receiving from {0}: {1}", srcProcessID, e); + errorCode = SocketError.Fault; + } + + if (errorCode != SocketError.Success) + { + //Tracing.Trace("*Socket Error {0}", errorCode); + Logging.Error("WARNING: Receive thread got socket error from peer {0}: {1}", srcProcessID, errorCode); + socket.Close(); + + lock (this) + { + this.connections[srcProcessID].RecvThread = null; + this.connections[srcProcessID].RecvSocket = null; + this.connections[srcProcessID].RecvBufferSheaf = null; + } + + return; + + //this.HandleSocketError(srcProcessID, errorCode); + } + // If the remote host shuts down the Socket connection with the Shutdown method, // and all available data has been received, the Receive method will complete // immediately and return zero bytes. if (bytesRecvd == 0) + { + Logging.Error("WARNING: Receive thread received no bytes from peer {0}", srcProcessID); + socket.Close(); + + lock (this) + { + this.connections[srcProcessID].RecvThread = null; + this.connections[srcProcessID].RecvSocket = null; + this.connections[srcProcessID].RecvBufferSheaf = null; + } + return; - recvBytesOut += bytesRecvd; - numRecvs++; - //Logging.Progress("Received {0} bytes from {1}", bytesRecvd, srcProcessID); - if (errorCode != SocketError.Success) - { - Tracing.Trace("*Socket Error {0}", errorCode); + //Debug.Assert(false); + //Logging.Info("Shutting down receive thread due to lack of data - believed to be impossible."); - this.HandleSocketError(srcProcessID, errorCode); + //this.HandleSocketError(srcProcessID, errorCode); + //return; } + this.connections[srcProcessID].RecvBufferSheaf.OnBytesProduced(bytesRecvd); - foreach (SerializedMessage message in this.connections[srcProcessID].RecvBufferSheaf.ConsumeMessages(this.HeaderSerializer)) + recvBytesOut += bytesRecvd; + numRecvs++; + + //Logging.Progress("Received {0} bytes from {1}", bytesRecvd, srcProcessID); + + foreach (SerializedMessage message in this.connections[srcProcessID].RecvBufferSheaf.ConsumeMessages()) { + if (message.Header.SequenceNumber != this.connections[srcProcessID].SequenceNumberReceived + 1 && message.Header.Type != SerializedMessageType.Ack) + { + Logging.Error("Dropping duplicated received message from {0}: sequence number {1}, type {2}", srcProcessID, message.Header.SequenceNumber, message.Header.Type); + message.Dispose(); + continue; + } + else + { + //Logging.Error("Receiving message from {0}: sequence number {1}, type {2}", srcProcessID, message.Header.SequenceNumber, message.Header.Type); + } + message.ConnectionSequenceNumber = nextConnectionSequenceNumber++; this.connections[srcProcessID].recvStatistics[(int)RuntimeStatistic.RxNetMessages] += 1; this.connections[srcProcessID].recvStatistics[(int)RuntimeStatistic.RxNetBytes] += message.Header.Length; + //Console.WriteLine("Received {1} message: {0}", message.Header.SequenceNumber, message.Type); + switch (message.Type) { case SerializedMessageType.Startup: @@ -997,12 +1195,23 @@ private void PerProcessRecvThread(int srcProcessID) bool success = this.AttemptDelivery(message, srcProcessID); Debug.Assert(success); break; + case SerializedMessageType.Ack: + this.HandleAcknowledgement(srcProcessID, message.Header.SequenceNumber); + break; default: Logging.Progress("Received BAD msg type {0} from process {1}! ", message.Type, srcProcessID); Debug.Assert(false); break; } + + if (message.Header.Type != SerializedMessageType.Ack) + { + this.connections[srcProcessID].SequenceNumberReceived = message.Header.SequenceNumber; + } } + + if (this.connections[srcProcessID].SequenceNumberReceived >= 0) + this.AcknowledgeMessage(srcProcessID, this.connections[srcProcessID].SequenceNumberReceived); } #if RECV_AFFINITY pin.Dispose(); @@ -1034,8 +1243,9 @@ private bool AttemptDelivery(SerializedMessage message, int peerID = -1) { if (message.Header.ChannelID < 0 || this.localProcessID < 0) // debug check throw new Exception("This shouldn't happen"); - + // Special-cased logic for the progress channel, where we know that each process uses its process ID as the vertex ID. + NaiadTracing.Trace.MsgRecv(channelId, message.Header.SequenceNumber, message.Header.Length, message.Header.FromVertexID, this.localProcessID); try { this.graphmailboxes[graphId][channelId][this.localProcessID].DeliverSerializedMessage(message, new ReturnAddress(peerID, message.Header.FromVertexID)); @@ -1047,7 +1257,6 @@ private bool AttemptDelivery(SerializedMessage message, int peerID = -1) Console.Error.WriteLine("{0} mailboxes currently exist", "some");//this.mailboxes.Count); System.Environment.Exit(-1); } - return true; } else if (graphId >= this.graphmailboxes.Count || @@ -1070,6 +1279,7 @@ private bool AttemptDelivery(SerializedMessage message, int peerID = -1) } else { + NaiadTracing.Trace.MsgRecv(channelId, message.Header.SequenceNumber, message.Header.Length, message.Header.FromVertexID, message.Header.DestVertexID); this.graphmailboxes[graphId][channelId][message.Header.DestVertexID].DeliverSerializedMessage(message, new ReturnAddress(peerID, message.Header.FromVertexID)); return true; } @@ -1142,30 +1352,31 @@ public int BroadcastBufferSegment(MessageHeader header, BufferSegment segment) var nmsgs = 0; if (segment.Length > 0) { + if (this.Controller.Configuration.Broadcast == Configuration.BroadcastProtocol.UdpOnly || this.Controller.Configuration.Broadcast == Configuration.BroadcastProtocol.TcpUdp) { ArraySegment array = segment.ToArraySegment(); Debug.Assert(array.Offset == 0); - Tracing.Trace("{UdpBroadcast"); - this.udpClient.Send(array.Array, array.Count); - Tracing.Trace("}UdpBroadcast"); + NaiadTracing.Trace.RegionStart(NaiadTracingRegion.BroadcastUDP); + this.udpClient.Send(array.Array, array.Count); + NaiadTracing.Trace.RegionStop(NaiadTracingRegion.BroadcastUDP); nmsgs++; } if (this.Controller.Configuration.Broadcast == Configuration.BroadcastProtocol.TcpOnly || this.Controller.Configuration.Broadcast == Configuration.BroadcastProtocol.TcpUdp) { - Tracing.Trace("{TcpBroadcast"); + NaiadTracing.Trace.RegionStart(NaiadTracingRegion.BroadcastTCP); for (int i = 0; i < this.connections.Count; ++i) if (i != this.localProcessID) { // Increment refcount for each destination process. - segment.Copy(); - this.SendBufferSegment(header, i, segment, true, !this.useBroadcastWakeup); + //segment.Copy(); + this.SendBufferSegment(header, i, segment.DeepCopy(), true, !this.useBroadcastWakeup); nmsgs++; } if (this.useBroadcastWakeup) this.wakeUpEvent.Advance(); - Tracing.Trace("}TcpBroadcast"); + NaiadTracing.Trace.RegionStop(NaiadTracingRegion.BroadcastTCP); } } // Decrement refcount for the initial call to Consume(). @@ -1173,6 +1384,41 @@ public int BroadcastBufferSegment(MessageHeader header, BufferSegment segment) return nmsgs; } + + private Dictionary> deferredDisposalSegments = new Dictionary>(); + + private void DeferDisposalUntilAcknowledged(int destProcessId, int sequenceNumber, BufferSegment segment) + { + lock (this.connections[destProcessId].DeferredDisposalSegments) + { + this.connections[destProcessId].DeferredDisposalSegments.Add(sequenceNumber, segment); + } + } + + private void AcknowledgeMessage(int fromProcessId, int sequenceNumber) + { + this.connections[fromProcessId].SequenceNumberToAcknowledge = sequenceNumber; + this.connections[fromProcessId].SendEvent.Set(); + } + + private void HandleAcknowledgement(int fromProcessId, int sequenceNumber) + { + //Console.WriteLine("Handling ack for {0}", sequenceNumber); + lock (this.connections[fromProcessId].DeferredDisposalSegments) + { + var deferredSegments = this.connections[fromProcessId].DeferredDisposalSegments; + + foreach (var x in deferredSegments.ToArray()) + { + if (x.Key <= sequenceNumber) + { + deferredSegments.Remove(x.Key); + x.Value.Dispose(); + } + } + } + } + } } diff --git a/Naiad/Runtime/Progress/PointstampCountSet.cs b/Naiad/Runtime/Progress/PointstampCountSet.cs index 792ceb7..ad1e835 100644 --- a/Naiad/Runtime/Progress/PointstampCountSet.cs +++ b/Naiad/Runtime/Progress/PointstampCountSet.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Naiad/Runtime/Progress/ProgressTracker.cs b/Naiad/Runtime/Progress/ProgressTracker.cs index 527319e..1e4e0fb 100644 --- a/Naiad/Runtime/Progress/ProgressTracker.cs +++ b/Naiad/Runtime/Progress/ProgressTracker.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Naiad/Runtime/Progress/Update.cs b/Naiad/Runtime/Progress/Update.cs index a226ca1..e6ef525 100644 --- a/Naiad/Runtime/Progress/Update.cs +++ b/Naiad/Runtime/Progress/Update.cs @@ -1,5 +1,5 @@ -/* - * Naiad ver. 0.4 +/* + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Naiad/Runtime/Progress/UpdateAggregator.cs b/Naiad/Runtime/Progress/UpdateAggregator.cs index fabdf9f..5264bf8 100644 --- a/Naiad/Runtime/Progress/UpdateAggregator.cs +++ b/Naiad/Runtime/Progress/UpdateAggregator.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -57,9 +57,10 @@ internal override void PerformAction(Scheduler.WorkItem workItem) internal void OnRecv(Dictionary deltas) { - Tracing.Trace("(AggLock"); + NaiadTracing.Trace.LockAcquire(this.Lock); lock(this.Lock) { + NaiadTracing.Trace.LockHeld(this.Lock); foreach (var pair in deltas) { if (!BufferedUpdates.ContainsKey(pair.Key)) @@ -71,7 +72,7 @@ internal void OnRecv(Dictionary deltas) BufferedUpdates.Remove(pair.Key); } } - Tracing.Trace(")AggLock"); + NaiadTracing.Trace.LockRelease(this.Lock); ConsiderFlushingBufferedUpdates(); } @@ -83,9 +84,10 @@ internal void ConsiderFlushingBufferedUpdates() var mustFlushBuffer = false; // consult the buffered updates under a lock. - Tracing.Trace("(AggLock"); + NaiadTracing.Trace.LockAcquire(this.Lock); lock (this.Lock) { + NaiadTracing.Trace.LockHeld(this.Lock); if (this.BufferedUpdates.Count > 0) { var frontier = this.Stage.InternalComputation.ProgressTracker.GetInfoForWorker(0).PointstampCountSet.Frontier; @@ -104,7 +106,7 @@ internal void ConsiderFlushingBufferedUpdates() } } } - Tracing.Trace(")AggLock"); + NaiadTracing.Trace.LockRelease(this.Lock); if (mustFlushBuffer) { @@ -112,18 +114,21 @@ internal void ConsiderFlushingBufferedUpdates() Dictionary FreshBufferedUpdates = new Dictionary(); // we don't want to get stuck behind a centralizer -> consumer on the same process. - Tracing.Trace("(GlobalLock"); + NaiadTracing.Trace.LockAcquire(this.scheduler.Controller.GlobalLock); lock (this.scheduler.Controller.GlobalLock) { + NaiadTracing.Trace.LockHeld(this.scheduler.Controller.GlobalLock); + // get exclusive access and swap the update buffer. - Tracing.Trace("(AggLock"); + NaiadTracing.Trace.LockAcquire(this.Lock); lock (this.Lock) { + NaiadTracing.Trace.LockHeld(this.Lock); PrivateBufferedUpdates = this.BufferedUpdates; this.BufferedUpdates = FreshBufferedUpdates; } - Tracing.Trace(")AggLock"); - + NaiadTracing.Trace.LockRelease(this.Lock); + // update Notifications count to include shipped values. foreach (var pair in PrivateBufferedUpdates) { @@ -155,7 +160,7 @@ internal void ConsiderFlushingBufferedUpdates() PrivateBufferedUpdates.Clear(); this.Output.Flush(); } - Tracing.Trace(")GlobalLock"); + NaiadTracing.Trace.LockRelease(this.scheduler.Controller.GlobalLock); } } @@ -163,6 +168,7 @@ public ProgressUpdateAggregator(int index, Stage stage) : base(index, stage) { this.Output = new VertexOutputBuffer(this); + NaiadTracing.Trace.LockInfo(this.Lock, "Aggregator Lock"); } } -} \ No newline at end of file +} diff --git a/Naiad/Runtime/Progress/UpdateBuffer.cs b/Naiad/Runtime/Progress/UpdateBuffer.cs index 633a079..dd11401 100644 --- a/Naiad/Runtime/Progress/UpdateBuffer.cs +++ b/Naiad/Runtime/Progress/UpdateBuffer.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Naiad/Runtime/Progress/UpdateConsumer.cs b/Naiad/Runtime/Progress/UpdateConsumer.cs index e420050..89064c3 100644 --- a/Naiad/Runtime/Progress/UpdateConsumer.cs +++ b/Naiad/Runtime/Progress/UpdateConsumer.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -93,13 +93,14 @@ private class VertexInput : Dataflow.VertexInput internal void InjectElement(Pointstamp time, Int64 update) { // by directly modifying the PCS, we don't risk sending anything from centralizer. Used only for initializing inputs. - Tracing.Trace("(PCSLock"); + NaiadTracing.Trace.LockAcquire(this.PCS); Monitor.Enter(this.PCS); + NaiadTracing.Trace.LockHeld(this.PCS); var progressChanged = PCS.UpdatePointstampCount(time, update); Monitor.Exit(this.PCS); - Tracing.Trace(")PCSLock"); + NaiadTracing.Trace.LockRelease(this.PCS); } public readonly PointstampCountSet PCS; @@ -110,15 +111,16 @@ internal void InjectElement(Pointstamp time, Int64 update) public void ProcessCountChange(Pointstamp time, Int64 weight) { // the PCS should not be touched outside this lock, other than by capturing PCS.Frontier. - Tracing.Trace("(PCSLock"); + NaiadTracing.Trace.LockAcquire(this.PCS); Monitor.Enter(this.PCS); + NaiadTracing.Trace.LockHeld(this.PCS); var oldFrontier = PCS.Frontier; var frontierChanged = PCS.UpdatePointstampCount(time, weight); var newFrontier = PCS.Frontier; Monitor.Exit(this.PCS); - Tracing.Trace(")PCSLock"); + NaiadTracing.Trace.LockRelease(this.PCS); if (frontierChanged) { @@ -132,12 +134,13 @@ public void ProcessCountChange(Pointstamp time, Int64 weight) // no elements means done. if (newFrontier.Length == 0) { - Tracing.Trace("Frontier advanced to "); + //Tracing.Trace("Frontier advanced to "); + NaiadTracing.Trace.RefAlignFrontier(); this.FrontierEmpty.Set(); } else { - Tracing.Trace("Frontier advanced to " + string.Join(" ", newFrontier.Select(x => x.ToString()))); + NaiadTracing.Trace.AdvanceFrontier(newFrontier); } // Wake up schedulers to run shutdown actions for the graph. @@ -148,8 +151,9 @@ public void ProcessCountChange(Pointstamp time, Int64 weight) public void ProcessCountChange(Message updates) { // the PCS should not be touched outside this lock, other than by capturing PCS.Frontier. - Tracing.Trace("(PCSLock"); + NaiadTracing.Trace.LockAcquire(this.PCS); Monitor.Enter(this.PCS); + NaiadTracing.Trace.LockHeld(this.PCS); var oldFrontier = PCS.Frontier; @@ -160,7 +164,7 @@ public void ProcessCountChange(Message updates) var newFrontier = PCS.Frontier; Monitor.Exit(this.PCS); - Tracing.Trace(")PCSLock"); + NaiadTracing.Trace.LockRelease(this.PCS); if (frontierChanged) { @@ -174,12 +178,13 @@ public void ProcessCountChange(Message updates) // no elements means done. if (newFrontier.Length == 0) { - Tracing.Trace("Frontier advanced to "); + //Tracing.Trace("Frontier advanced to "); + NaiadTracing.Trace.RefAlignFrontier(); this.FrontierEmpty.Set(); } else { - Tracing.Trace("Frontier advanced to " + string.Join(" ", newFrontier.Select(x => x.ToString()))); + NaiadTracing.Trace.AdvanceFrontier(newFrontier); } // Wake up schedulers to run shutdown actions for the graph. @@ -250,13 +255,14 @@ public VertexInput(ProgressUpdateCentralizer op) internal void InjectElement(Pointstamp time, Int64 update) { // by directly modifying the PCS, we don't risk sending anything from the centralizer. Used only for initializing inputs. - Tracing.Trace("(PCSLock"); + NaiadTracing.Trace.LockAcquire(this.PCS); Monitor.Enter(this.PCS); + NaiadTracing.Trace.LockHeld(this.PCS); var frontierChanged = PCS.UpdatePointstampCount(time, update); Monitor.Exit(this.PCS); - Tracing.Trace(")PCSLock"); + NaiadTracing.Trace.LockRelease(this.PCS); } public readonly PointstampCountSet PCS; @@ -269,26 +275,29 @@ internal void InjectElement(Pointstamp time, Int64 update) public void ProcessCountChange(Message updates) { // the PCS should not be touched outside this lock, other than by capturing PCS.Frontier. - Tracing.Trace("(PCSLock"); + NaiadTracing.Trace.LockAcquire(this.PCS); Monitor.Enter(this.PCS); + NaiadTracing.Trace.LockHeld(this.PCS); var oldfrontier = PCS.Frontier; var frontierChanged = false; for (int i = 0; i < updates.length; i++) - frontierChanged = PCS.UpdatePointstampCount(updates.payload[i].Pointstamp, updates.payload[i].Delta) || frontierChanged; ; + frontierChanged = PCS.UpdatePointstampCount(updates.payload[i].Pointstamp, updates.payload[i].Delta) || frontierChanged; var newfrontier = PCS.Frontier; Monitor.Exit(this.PCS); - Tracing.Trace(")PCSLock"); + NaiadTracing.Trace.LockRelease(this.PCS); if (frontierChanged) { // get an exclusive lock, as this.Output.Send is not threadsafe. - Tracing.Trace("(GlobalLock"); + NaiadTracing.Trace.LockAcquire(this.scheduler.Controller.GlobalLock); lock (this.scheduler.Controller.GlobalLock) { + NaiadTracing.Trace.LockHeld(this.scheduler.Controller.GlobalLock); + var output = this.Output.GetBufferForTime(new Empty()); foreach (var pointstamp in newfrontier.Except(oldfrontier)) output.Send(new Update(pointstamp, +1)); @@ -298,7 +307,7 @@ public void ProcessCountChange(Message updates) this.Output.Flush(); } - Tracing.Trace(")GlobalLock"); + NaiadTracing.Trace.LockRelease(this.scheduler.Controller.GlobalLock); if (this.OnFrontierChanged != null) this.OnFrontierChanged(this, new FrontierChangedEventArgs(newfrontier)); @@ -320,6 +329,7 @@ internal ProgressUpdateCentralizer(int index, Stage stage, ProgressUpdate this.Output = new VertexOutputBuffer(this); this.PCS = new PointstampCountSet(this.Stage.InternalComputation.Reachability); + NaiadTracing.Trace.LockInfo(this.PCS, "PCS lock"); } internal override void PerformAction(Scheduler.WorkItem workItem) { throw new NotImplementedException(); } diff --git a/Naiad/Runtime/Progress/UpdateProducer.cs b/Naiad/Runtime/Progress/UpdateProducer.cs index aafacbd..dade3b9 100644 --- a/Naiad/Runtime/Progress/UpdateProducer.cs +++ b/Naiad/Runtime/Progress/UpdateProducer.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -45,9 +45,10 @@ public override string ToString() private readonly Dictionary outstandingRecords = new Dictionary(); public void UpdateRecordCounts(Pointstamp time, Int64 delta) { - Tracing.Trace("(ProdLock"); + NaiadTracing.Trace.LockAcquire(this); lock (this) { + NaiadTracing.Trace.LockHeld(this); //if (this.Stage.InternalComputation.Controller.Configuration.Impersonation && !this.Stage.InternalComputation.Reachability.NoImpersonation.Contains(time.Location) && this.Stage.InternalComputation.Reachability.Impersonations[time.Location] != null) //{ // foreach (var newVersion in this.Stage.InternalComputation.Reachability.EnumerateImpersonations(time)) @@ -59,7 +60,7 @@ public void UpdateRecordCounts(Pointstamp time, Int64 delta) AddToOutstandingRecords(time, delta); } - Tracing.Trace(")ProdLock"); + NaiadTracing.Trace.LockRelease(this); } private void AddToOutstandingRecords(Pointstamp time, Int64 delta) @@ -74,15 +75,17 @@ private void AddToOutstandingRecords(Pointstamp time, Int64 delta) outstandingRecords.Remove(time); } } - + /// /// Lock the producer and transmit pointstamp counts to the appropriate consumer(s) /// public void Start() { - Tracing.Trace("(ProdLock"); + NaiadTracing.Trace.LockAcquire(this); lock (this) { + NaiadTracing.Trace.LockHeld(this); + // note: FOC may return without sending stuff due to re-entrancy. if (outstandingRecords.Count > 0) { @@ -90,7 +93,7 @@ public void Start() outstandingRecords.Clear(); } } - Tracing.Trace(")ProdLock"); + NaiadTracing.Trace.LockRelease(this); } public void Checkpoint(NaiadWriter writer) @@ -107,6 +110,7 @@ internal ProgressUpdateProducer(InternalComputation manager, ProgressUpdateAggre { this.LocalPCS = new PointstampCountSet(manager.Reachability); this.Aggregator = aggregator; + NaiadTracing.Trace.LockInfo(this, "Producer lock"); } } -} \ No newline at end of file +} diff --git a/Naiad/Runtime/Scheduling/EventCount.cs b/Naiad/Runtime/Scheduling/EventCount.cs index 2829a63..66e46e5 100644 --- a/Naiad/Runtime/Scheduling/EventCount.cs +++ b/Naiad/Runtime/Scheduling/EventCount.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -114,7 +114,7 @@ public void Await(AutoResetEvent selectiveEvent, long waitfor) } else { - Tracing.Trace("Await new waitblock"); + KernelLoggerTracing.PostKernelLoggerMarkEvent("Await new waitblock"); Console.Error.WriteLine("EventCount Await(): NEW WAITBLOCK"); this.current = new WaitBlock(); } @@ -216,9 +216,9 @@ public void Advance() // Unblock the waiters if (wb != null) { - Tracing.Trace("{SetEvent"); + NaiadTracing.Trace.RegionStart(NaiadTracingRegion.SetEvent); wb.ev.Set(); - Tracing.Trace("}SetEvent"); + NaiadTracing.Trace.RegionStop(NaiadTracingRegion.SetEvent); } } diff --git a/Naiad/Runtime/Scheduling/Events.cs b/Naiad/Runtime/Scheduling/Events.cs index bd2a5b5..0e0da78 100644 --- a/Naiad/Runtime/Scheduling/Events.cs +++ b/Naiad/Runtime/Scheduling/Events.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Naiad/Runtime/Scheduling/Placement.cs b/Naiad/Runtime/Scheduling/Placement.cs index 9eb45bb..9898b94 100644 --- a/Naiad/Runtime/Scheduling/Placement.cs +++ b/Naiad/Runtime/Scheduling/Placement.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -161,16 +161,24 @@ public override bool Equals(Placement that) /// /// Placement with one vertex /// - internal class SingleVertex : Placement + public class SingleVertex : Placement { private readonly VertexLocation location; + /// + /// Constructor + /// + public SingleVertex() + { + this.location = new VertexLocation(0, 0, 0); + } + /// /// Constructor /// /// process identifier for the vertex /// thread identifier for the vertex - public SingleVertex(int processId, int threadId) + internal SingleVertex(int processId, int threadId) { this.location = new VertexLocation(0, processId, threadId); } @@ -190,7 +198,7 @@ public SingleVertex(int processId, int threadId) /// /// Enumerator /// - /// + /// an enumeration of the Placement's locations public override IEnumerator GetEnumerator() { yield return this.location; @@ -199,8 +207,8 @@ public override IEnumerator GetEnumerator() /// /// Test equality with another SingleVertexPlacement /// - /// - /// + /// placement to compare to + /// true if the placements are equal public override bool Equals(Placement that) { SingleVertex other = that as SingleVertex; diff --git a/Naiad/Runtime/Scheduling/Reachability.cs b/Naiad/Runtime/Scheduling/Reachability.cs index 3bf5a2e..76fd5bd 100644 --- a/Naiad/Runtime/Scheduling/Reachability.cs +++ b/Naiad/Runtime/Scheduling/Reachability.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -460,4 +460,4 @@ public bool ProductOrderLessThan(Pointstamp a, Pointstamp b) return true; } } -} \ No newline at end of file +} diff --git a/Naiad/Runtime/Scheduling/Scheduler.cs b/Naiad/Runtime/Scheduling/Scheduler.cs index e9762da..b483a85 100644 --- a/Naiad/Runtime/Scheduling/Scheduler.cs +++ b/Naiad/Runtime/Scheduling/Scheduler.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -52,15 +52,6 @@ internal class ComputationState public Runtime.Progress.ProgressUpdateProducer Producer { -#if false - get - { - if (this.Manager.ProgressTracker == null) - return null; - else - return this.Manager.ProgressTracker.GetProducerForScheduler(this.index); - } -#else get { if (this.InternalComputation.ProgressTracker == null) @@ -73,7 +64,6 @@ public Runtime.Progress.ProgressUpdateProducer Producer return this.producer; } } -#endif } private Runtime.Progress.ProgressUpdateProducer producer; @@ -122,8 +112,8 @@ internal void RegisterGraph(InternalComputation internalComputation) while (!success); } - private readonly BufferPool sendPool; public BufferPool SendPool { get { return this.sendPool; } } + private readonly BufferPool sendPool; public struct WorkItem : IEquatable { @@ -196,6 +186,11 @@ internal void Schedule(WorkItem workItem) if (Logging.LogLevel <= LoggingLevel.Info) Logging.Info("Vertex {2}: Finishing @ {1}:\t{0}", workItem.Vertex, workItem.Requirement, this.Index); } + internal bool ProposeDrain(LocalMailbox mailbox) + { + return true; + } + internal void Register(Dataflow.Vertex vertex, InternalComputation manager) { for (int i = 0; i < this.computationStates.Count; i++) @@ -211,11 +206,11 @@ internal IList GetWorkItemsForVertex(Dataflow.Vertex vertex) protected System.Collections.Concurrent.ConcurrentQueue sharedQueue = new System.Collections.Concurrent.ConcurrentQueue(); - private void Enqueue(WorkItem item, bool local = true) + private void Enqueue(WorkItem item, bool fromThisScheduler = true) { this.Controller.Workers.NotifyVertexEnqueued(this, item); - if (local) + if (fromThisScheduler) { computationStates[item.Vertex.Stage.InternalComputation.Index].WorkItems.Add(item); } @@ -246,15 +241,6 @@ internal void Start() this.thread.Start(); } - internal void DrainPostOffice() - { - //throw new NotImplementedException(); - - // drain the shared queue. - var item = default(WorkItem); - while (sharedQueue.TryDequeue(out item)) - Enqueue(item); - } /// /// Starts the ThreadScheduler into an infinite scheduling loop. @@ -264,141 +250,160 @@ protected virtual void InternalStart() this.Controller.Workers.NotifyWorkerStarting(this); // the time of the most recent reachability computation. - var reachabilityTime = this.Controller.Stopwatch.ElapsedMilliseconds - this.Controller.Configuration.CompactionInterval; - - long wakeupCount = 0; + this.reachabilityTime = this.Controller.Stopwatch.ElapsedMilliseconds - this.Controller.Configuration.CompactionInterval; + // perform work until the scheduler is aborted for (int iteration = 0; !aborted; iteration++) { - #region related to pausing - if (this.pauseEvent != null) - { - Logging.Info("Starting to pause worker {0}", this.Index); - - CountdownEvent signalEvent = this.pauseEvent; - this.pauseEvent = null; - signalEvent.Signal(); - Logging.Info("Finished pausing worker {0}", this.Index); - - this.resumeEvent.WaitOne(); - Logging.Info("Resumed worker {0}", this.Index); - for (int i = 0; i < this.computationStates.Count; i++) - if (this.computationStates[i].InternalComputation != null) - this.computationStates[i].Producer.Start(); // In case any outstanding records were caught in the checkpoint. - } - #endregion + // test pause event. + this.ConsiderPausing(); - // drain the shared queue. - var item = default(WorkItem); - while (sharedQueue.TryDequeue(out item)) - Enqueue(item); + // accept work items from the shared queue. + this.AcceptWorkItemsFromOthers(); - #region related to shutting down finished computations - // check for computations that have empty frontiers: these can be shutdown - for (int i = 0; i < this.computationStates.Count; i++) - { - if (this.computationStates[i].InternalComputation != null && this.computationStates[i].InternalComputation.CurrentState == InternalComputationState.Active && this.computationStates[i].InternalComputation.ProgressTracker.GetInfoForWorker(this.Index).PointstampCountSet.Frontier.Length == 0) - { - foreach (Dataflow.Vertex vertex in this.computationStates[i].Vertices) - vertex.ShutDown(); + // check for computations that have empty frontiers: these can be shutdown. + for (int computationIndex = 0; computationIndex < this.computationStates.Count; computationIndex++) + this.TestComputationsForShutdown(computationIndex); - this.computationStates[i].InternalComputation.SignalShutdown(); + // push any pending messages to recipients, so that work-to-do is as current as possible. + for (int computationIndex = 0; computationIndex < this.computationStates.Count; computationIndex++) + this.DrainMessagesForComputation(computationIndex); - this.computationStates[i] = new ComputationState(); - } - } - #endregion + // periodically assesses global reachability. + this.ConsiderAssesingGlobalReachability(); + + // deliver notifications. + var ranAnything = false; + for (int computationIndex = 0; computationIndex < computationStates.Count; computationIndex++) + ranAnything = this.RunNotification(computationIndex) || ranAnything; + + // if nothing ran, consider sleeping until more work arrives + if (!ranAnything) + this.ConsiderSleeping(); + } + + this.Controller.Workers.NotifySchedulerTerminating(this); + } + + protected bool ComputationActive(int computationIndex) + { + return this.computationStates.Count > computationIndex && + this.computationStates[computationIndex].InternalComputation != null && + this.computationStates[computationIndex].InternalComputation.CurrentState == InternalComputationState.Active; + } + + internal void AcceptWorkItemsFromOthers() + { + // drain the shared queue. + var item = default(WorkItem); + while (sharedQueue.TryDequeue(out item)) + Enqueue(item); + } + + private void ConsiderPausing() + { + if (this.pauseEvent != null) + { + Logging.Info("Starting to pause worker {0}", this.Index); + + CountdownEvent signalEvent = this.pauseEvent; + this.pauseEvent = null; + signalEvent.Signal(); + Logging.Info("Finished pausing worker {0}", this.Index); - #region related to flushing messages for each computations - // push any pending messages to recipients, so that work-to-do is as current as possible + this.resumeEvent.WaitOne(); + Logging.Info("Resumed worker {0}", this.Index); for (int i = 0; i < this.computationStates.Count; i++) - { - if (this.computationStates[i].InternalComputation != null && this.computationStates[i].InternalComputation.CurrentState == InternalComputationState.Active) - { - Tracing.Trace("(Flush {0}", this.Index); - try - { - this.computationStates[i].PostOffice.DrainAllQueues(); - this.computationStates[i].Producer.Start(); // tell everyone about records produced and consumed. - } - catch (Exception e) - { - Logging.Error("Graph {0} failed on scheduler {1} with exception:\n{2}", i, this.Index, e); - this.computationStates[i].InternalComputation.Cancel(e); - } - Tracing.Trace(")Flush {0}", this.Index); - } - } - #endregion + if (this.computationStates[i].InternalComputation != null) + this.computationStates[i].Producer.Start(); // In case any outstanding records were caught in the checkpoint. + } + } + + private void TestComputationsForShutdown(int computationIndex) + { + if (this.ComputationActive(computationIndex) && this.computationStates[computationIndex].InternalComputation.ProgressTracker.GetInfoForWorker(this.Index).PointstampCountSet.Frontier.Length == 0) + { + foreach (Dataflow.Vertex vertex in this.computationStates[computationIndex].Vertices) + vertex.ShutDown(); + + this.computationStates[computationIndex].InternalComputation.SignalShutdown(); + + this.computationStates[computationIndex] = new ComputationState(); + } + } - #region related to assessing reachability of vertices based on the current frontier - // periodically assesses reachability of versions based on the frontier. alerts operator vertices to results, allowing them to compact state. - if (this.Controller.Configuration.CompactionInterval > 0 && this.Controller.Stopwatch.ElapsedMilliseconds - reachabilityTime > this.Controller.Configuration.CompactionInterval) + private void DrainMessagesForComputation(int computationIndex) + { + if (this.ComputationActive(computationIndex)) + { + NaiadTracing.Trace.RegionStart(NaiadTracingRegion.Flush); + try { - Tracing.Trace("(reachability {0}", this.Index); - for (int i = 0; i < this.computationStates.Count; i++) - { - if (this.computationStates[i].InternalComputation != null && this.computationStates[i].InternalComputation.CurrentState == InternalComputationState.Active) - { - var frontiers = this.computationStates[i].InternalComputation.ProgressTracker.GetInfoForWorker(0).PointstampCountSet.Frontier.Concat(this.computationStates[i].Producer.LocalPCS.Frontier).ToArray(); - this.computationStates[i].InternalComputation.Reachability.UpdateReachability(this.Controller, frontiers, this.computationStates[i].Vertices); - } - } - reachabilityTime = this.Controller.Stopwatch.ElapsedMilliseconds; - Tracing.Trace(")reachability {0}", this.Index); + this.computationStates[computationIndex].PostOffice.DrainAllMailboxes(); + this.computationStates[computationIndex].Producer.Start(); // tell everyone about records produced and consumed. } - #endregion - - var ranAnything = false; - for (int i = 0; i < computationStates.Count; i++) + catch (Exception e) { - if (computationStates[i].InternalComputation != null && this.computationStates[i].InternalComputation.CurrentState == InternalComputationState.Active) - { - try - { - var ranSomething = RunWorkItem(i); - ranAnything = ranSomething || ranAnything; - } - catch (Exception e) - { - Logging.Error("Graph {0} failed on scheduler {1} with exception:\n{2}", i, this.Index, e); - this.computationStates[i].InternalComputation.Cancel(e); - } - } + Logging.Error("Graph {0} failed on scheduler {1} with exception:\n{2}", computationIndex, this.Index, e); + this.computationStates[computationIndex].InternalComputation.Cancel(e); } + NaiadTracing.Trace.RegionStop(NaiadTracingRegion.Flush); + } - // try and run some work. if we can't, go to sleep. if we sleep too long, complain. - if (!ranAnything) - { - this.Controller.Workers.NotifySchedulerSleeping(this); + } - if (this.Controller.Configuration.UseBroadcastWakeup) - { - wakeupCount = this.Controller.Workers.BlockScheduler(this.ev, wakeupCount + 1); - } - else - { - if (!ev.WaitOne(this.deadlockTimeout)) - { - Complain(); - while (!ev.WaitOne(1000)) ; - } - } + #region Related to global reachability computation + private void ConsiderAssesingGlobalReachability() + { + if (this.Controller.Configuration.CompactionInterval > 0 && this.Controller.Stopwatch.ElapsedMilliseconds - this.reachabilityTime > this.Controller.Configuration.CompactionInterval) + { + NaiadTracing.Trace.RegionStart(NaiadTracingRegion.Reachability); + for (int i = 0; i < this.computationStates.Count; i++) + this.AssessAndNotifyGlobalReachability(i); + + this.reachabilityTime = this.Controller.Stopwatch.ElapsedMilliseconds; + NaiadTracing.Trace.RegionStop(NaiadTracingRegion.Reachability); + } + } - this.Controller.Workers.NotifyWorkerWaking(this); + long reachabilityTime; + + private void AssessAndNotifyGlobalReachability(int computationIndex) + { + if (this.ComputationActive(computationIndex)) + { + var frontiers = this.computationStates[computationIndex].InternalComputation.ProgressTracker.GetInfoForWorker(0).PointstampCountSet.Frontier.Concat(this.computationStates[computationIndex].Producer.LocalPCS.Frontier).ToArray(); + this.computationStates[computationIndex].InternalComputation.Reachability.UpdateReachability(this.Controller, frontiers, this.computationStates[computationIndex].Vertices); + } + } + #endregion + + private bool RunNotification(int computationIndex) + { + if (this.ComputationActive(computationIndex)) + { + try + { + return RunWorkItem(computationIndex); + } + catch (Exception e) + { + Logging.Error("Graph {0} failed on scheduler {1} with exception:\n{2}", computationIndex, this.Index, e); + this.computationStates[computationIndex].InternalComputation.Cancel(e); } } - this.Controller.Workers.NotifySchedulerTerminating(this); - + return false; } + protected bool RunWorkItem(int graphId) { var computation = this.computationStates[graphId].InternalComputation; var workItems = this.computationStates[graphId].WorkItems; var itemToRun = workItems.Count; + // determine which item to run for (int i = 0; i < workItems.Count; i++) { if (itemToRun == workItems.Count || computation.Reachability.CompareTo(workItems[itemToRun].Capability, workItems[i].Capability) > 0) @@ -427,6 +432,7 @@ protected bool RunWorkItem(int graphId) } } + // execute identified work item. if (itemToRun < workItems.Count) { var item = workItems[itemToRun]; @@ -435,11 +441,13 @@ protected bool RunWorkItem(int graphId) workItems.RemoveAt(workItems.Count - 1); this.Controller.Workers.NotifyVertexStarting(this, item); - Tracing.Trace("[Sched " + this.Index + " " + item.ToString()); - + NaiadTracing.Trace.StartSched(item); + //Tracing.Trace("[Sched " + this.Index + " " + item.ToString()); + Schedule(item); - Tracing.Trace("]Sched " + this.Index + " " + item.ToString()); + //Tracing.Trace("]Sched " + this.Index + " " + item.ToString()); + NaiadTracing.Trace.StopSched(item); this.Controller.Workers.NotifyVertexEnding(this, item); this.computationStates[graphId].Producer.Start(); // tell everyone about records produced and consumed. @@ -447,12 +455,52 @@ protected bool RunWorkItem(int graphId) return true; } else - return false; + return false; } + + private void ConsiderSleeping() + { + this.Controller.Workers.NotifySchedulerSleeping(this); + + if (this.Controller.Configuration.UseBroadcastWakeup) + { + wakeupCount = this.Controller.Workers.BlockScheduler(this.ev, wakeupCount + 1); + } + else + { + if (!ev.WaitOne(this.deadlockTimeout)) + { + Complain(); + while (!ev.WaitOne(1000)) ; + } + } + + this.Controller.Workers.NotifyWorkerWaking(this); + } + + long wakeupCount = 0; + private void Complain() { +#if true Console.Error.WriteLine(ComplainObject); +#else + // XXX : Currently races and can crash due to null data structures. + for (int i = 0; i < computationStates.Count; i++) + { + var computationState = this.computationStates[i]; + if (computationState != null) + { + var internalComputation = computationState.InternalComputation; + if (internalComputation != null) + { + var frontier = internalComputation.ProgressTracker.GetInfoForWorker(this.Index).PointstampCountSet.Frontier; + Console.WriteLine("Computation[{0}].Frontier.Length = {1}", i, frontier.Length); + } + } + } +#endif } private static string ComplainObject = "Moan moan moan"; @@ -464,7 +512,7 @@ public void AllChannelsInitialized() public void Signal() { - ev.Set(); + ev.Set(); } public void Abort() @@ -564,7 +612,7 @@ protected override void InternalStart() int CPUIndex = this.Controller.Configuration.MultipleLocalProcesses ? this.Index + this.Controller.Configuration.ProcessID * this.Controller.Workers.Count : this.Index; using (var thrd = new PinnedThread(CPUIndex, true)) { - Tracing.Trace("@Scheduler[{0}]", this.Index); + NaiadTracing.Trace.ThreadName("Scheduler[{0}]", this.Index); Logging.Info("Starting scheduler {0} on CPU {1}, .NET thread {2} mapped to Windows thread {3}", this.Name, CPUIndex, thrd.runtimeThreadId, thrd.OSThreadId); //Console.Error.WriteLine("Starting scheduler {0}({4}) on CPU {1}, .NET thread {2} mapped to Windows thread {3}", this.Name, CPUIndex, thrd.runtimeThreadId, thrd.OSThreadId, this.Index); base.InternalStart(); diff --git a/Naiad/Runtime/Scheduling/Version.cs b/Naiad/Runtime/Scheduling/Version.cs index 3a222db..946ad97 100644 --- a/Naiad/Runtime/Scheduling/Version.cs +++ b/Naiad/Runtime/Scheduling/Version.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Naiad/Runtime/SubgraphManager.cs b/Naiad/Runtime/SubgraphManager.cs index 8c25c58..694a59b 100644 --- a/Naiad/Runtime/SubgraphManager.cs +++ b/Naiad/Runtime/SubgraphManager.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -181,6 +181,7 @@ public interface Computation : IDisposable /// /// Blocks until all subscriptions have processed all inputs up to the supplied epoch. + /// If the computation has no subscriptions, no synchronization occurs. /// /// The epoch. /// @@ -317,8 +318,8 @@ internal interface InternalComputation void Activate(); void MaterializeAll(); // used by Controller.Restore(); perhaps can hide. - void Connect(Dataflow.StageOutput stream, Dataflow.StageInput recvPort, Expression> key, Channel.Flags flags) where T : Time; - void Connect(Dataflow.StageOutput stream, Dataflow.StageInput recvPort, Expression> key) where T : Time; + void Connect(Dataflow.StageOutput stream, Dataflow.StageInput recvPort, Action key, Channel.Flags flags) where T : Time; + void Connect(Dataflow.StageOutput stream, Dataflow.StageInput recvPort, Action key) where T : Time; void Connect(Dataflow.StageOutput stream, Dataflow.StageInput recvPort) where T : Time; Computation ExternalComputation { get; } @@ -349,7 +350,7 @@ public void Cancel(Exception e) if (this.Controller.NetworkChannel != null) { MessageHeader header = MessageHeader.GraphFailure(this.index); - SendBufferPage page = SendBufferPage.CreateSpecialPage(header, 0, this.SerializationFormat.GetSerializer()); + SendBufferPage page = SendBufferPage.CreateSpecialPage(header, 0); BufferSegment segment = page.Consume(); Logging.Error("Broadcasting graph failure message"); @@ -358,6 +359,7 @@ public void Cancel(Exception e) } this.ProgressTracker.Cancel(); + } } @@ -513,12 +515,12 @@ public int AllocatedGraphIdentifiers } - public void Connect(Dataflow.StageOutput stream, Dataflow.StageInput recvPort, Expression> key, Channel.Flags flags) + public void Connect(Dataflow.StageOutput stream, Dataflow.StageInput recvPort, Action key, Channel.Flags flags) where T : Time { stream.ForStage.Targets.Add(new Dataflow.Edge(stream, recvPort, key, flags)); } - public void Connect(Dataflow.StageOutput stream, Dataflow.StageInput recvPort, Expression> key) + public void Connect(Dataflow.StageOutput stream, Dataflow.StageInput recvPort, Action key) where T : Time { this.Connect(stream, recvPort, key, Channel.Flags.None); @@ -615,7 +617,7 @@ public BaseComputation(InternalController controller, int index) this.defaultPlacement = this.controller.DefaultPlacement; this.index = index; - this.ShutdownCounter = new CountdownEvent(defaultPlacement.Where(x => x.ProcessId == controller.Configuration.ProcessID).Count()); + this.ShutdownCounter = new CountdownEvent(controller.Workers.Count); this.contextManager = new Microsoft.Research.Naiad.Dataflow.TimeContextManager(this); @@ -637,10 +639,13 @@ public void Sync(int epoch) { if (!input.IsCompleted && input.CurrentEpoch <= epoch) { - Logging.Error("Syncing at epoch ({0}) that is in the future of {1}", epoch, input); + Logging.Debug("Syncing at epoch ({0}) in the future of {1}.", epoch, input); } } + if (this.outputs.Count == 0) + Logging.Debug("Syncing a computation with no subscriptions; no synchronization performed."); + foreach (var subscription in this.Outputs) subscription.Sync(epoch); } diff --git a/Naiad/Time.cs b/Naiad/Time.cs index 162c23f..aab9b05 100644 --- a/Naiad/Time.cs +++ b/Naiad/Time.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/Naiad/Tracing.cs b/Naiad/Tracing.cs index e50a893..eb9f7af 100644 --- a/Naiad/Tracing.cs +++ b/Naiad/Tracing.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * @@ -21,22 +21,68 @@ using System; using System.Collections.Generic; using System.Diagnostics; -//using System.Diagnostics.Tracing; +using System.Diagnostics.Tracing; using System.IO; using System.Linq; using System.Runtime.InteropServices; using System.Text; using System.Threading; +using Microsoft.Research.Naiad.Dataflow; using Microsoft.Research.Naiad.Dataflow.Channels; using Microsoft.Research.Naiad.Scheduling; +using Microsoft.Research.Naiad.Serialization; +using Microsoft.Research.Naiad.Runtime.Progress; namespace Microsoft.Research.Naiad.Diagnostics { + /// + /// Enumeration describing which aspect of a Naiad computation a tracing region corresponds to + /// + public enum NaiadTracingRegion + { + /// + /// Region corresponding to a flush + /// + Flush, + /// + /// Region corresponding to a send + /// + Send, + /// + /// Region corresponding to a TCP message broadcast + /// + BroadcastTCP, + /// + /// Region corresponding to a UDP message broadcast + /// + BroadcastUDP, + /// + /// Region corresponding to a reachability computation + /// + Reachability, + /// + /// Region corresponding to codegen + /// + Compile, + /// + /// Region corresponding to waking up dormant workers + /// + Wakeup, + /// + /// Region corresponding to setting events to wake up dormant workers + /// + SetEvent, + /// + /// Unclassifed region + /// + Unspecified + } + /// /// ETW provider for Naiad + /// GUID is 0ad7158e-b717-53ae-c71a-6f41ab15fe16 /// /// -#if false // Some stuff to remember about the EventSource class: // - Anything returning void will be considered an Event unless given the NonEvent attribute // - Events can only have primitive types as arguments @@ -45,6 +91,21 @@ internal class NaiadTracing : EventSource { public static NaiadTracing Trace = new NaiadTracing(); + public NaiadTracing() + : base(true) + { + Logging.Progress("Naiad provider guid = {0}", this.Guid); + Logging.Progress("Naiad provider enabled = {0}", this.IsEnabled()); + + this.DumpManifestToFile("naiadprovider.xml"); + } + + protected override void OnEventCommand(EventCommandEventArgs command) + { + //Logging.Progress("OnEventCommand: {0} {1}", command.Command, command.Arguments); + base.OnEventCommand(command); + } + /// /// Identifies the Naiad subsystem the event pertains to /// @@ -53,6 +114,8 @@ public class Tasks public const EventTask Channels = (EventTask)1; public const EventTask Scheduling = (EventTask)2; public const EventTask Control = (EventTask)3; + public const EventTask Locks = (EventTask)4; + public const EventTask Graph = (EventTask)5; } /// @@ -63,171 +126,372 @@ public class Keywords public const EventKeywords Debug = (EventKeywords)0x0001; public const EventKeywords Measurement = (EventKeywords)0x0002; public const EventKeywords Network = (EventKeywords)0x0004; - public const EventKeywords Viz = (EventKeywords)0x0008; + public const EventKeywords Scheduling = (EventKeywords)0x0008; + public const EventKeywords Viz = (EventKeywords)0x0010; + public const EventKeywords Locks = (EventKeywords)0x0020; + public const EventKeywords GraphMetaData = (EventKeywords)0x0040; } #region Channels /// - /// Generates an ETW event for data message send. + /// Generates an ETW event for message send. /// Does nothing if there is no listener for the Network or Viz keywords of the NaiadTracing provider. + /// Id of the channel the message is sent on + /// Sequence number in the message header + /// Length value in the message header + /// Source vertex id + /// Destination vertex id /// - /// Message header with fields correctly populated. - [NonEvent] - public void DataSend(MessageHeader msg) + [Conditional("TRACING_ON")] + [Event(104, Task = Tasks.Channels, Opcode = EventOpcode.Send, Keywords = Keywords.Network | Keywords.Viz)] + public void MsgSend(int channel, int seqno, int len, int src, int dst) { - if (Trace.IsEnabled(EventLevel.LogAlways, (Keywords.Network | Keywords.Viz))) - DataSend(msg.SequenceNumber, msg.Length, msg.FromVertexID, msg.DestVertexID); + if (Trace.IsEnabled(EventLevel.LogAlways, Keywords.Network | Keywords.Viz)) + { + WriteEvent(104, channel, seqno, len, src, dst); + } } - [Event(1, Task = Tasks.Channels, Opcode = EventOpcode.Send, Keywords = (Keywords.Network | Keywords.Viz))] - private void DataSend(int seqno, int len, int src, int dst) + /// + /// Generates an ETW event for message receive. + /// Does nothing if there is no listener for the Network or Viz keywords of the NaiadTracing provider. + /// Id of the channel the message is received on + /// Sequence number in the message header + /// Length value in the message header + /// Source vertex id + /// Destination vertex id + /// + [Conditional("TRACING_ON")] + [Event(105, Task = Tasks.Channels, Opcode = EventOpcode.Send, Keywords = Keywords.Network | Keywords.Viz)] + public void MsgRecv(int channel, int seqno, int len, int src, int dst) { - WriteEvent(1, seqno, len, src, dst); + if (Trace.IsEnabled(EventLevel.LogAlways, Keywords.Network | Keywords.Viz)) + { + WriteEvent(105, channel, seqno, len, src, dst); + } } + #endregion Channels + #region Scheduling /// - /// Generates an ETW event for data message receive. - /// Does nothing if there is no listener for the Network or Viz keywords of the NaiadTracing provider. + /// Generates an ETW event for the scheduling of a work item. + /// Does nothing if there is no listener for the Viz keyword of the NaiadTracing provider. /// - /// Message header with fields correctly populated. + /// Item being scheduled [NonEvent] - public void DataRecv(MessageHeader msg) + [Conditional("TRACING_ON")] + internal void StartSched(Scheduler.WorkItem workitem) { - if (Trace.IsEnabled(EventLevel.LogAlways, (Keywords.Network | Keywords.Viz))) - DataRecv(msg.SequenceNumber, msg.Length, msg.FromVertexID, msg.DestVertexID); + if (Trace.IsEnabled(EventLevel.LogAlways, Keywords.Scheduling | Keywords.Viz)) + StartSched(workitem.Vertex.Stage.StageId); } - [Event(2, Task = Tasks.Channels, Opcode = EventOpcode.Receive, Keywords = (Keywords.Network | Keywords.Viz))] - private void DataRecv(int seqno, int len, int src, int dst) + [Event(200, Task = Tasks.Scheduling, Opcode = EventOpcode.Start, Keywords = Keywords.Scheduling | Keywords.Viz)] + private void StartSched(int stageid) { - WriteEvent(2, seqno, len, src, dst); + WriteEvent(200, stageid); } /// - /// Generates an ETW event for progress message send. - /// Does nothing if there is no listener for the Network or Viz keywords of the NaiadTracing provider. + /// Generates an ETW event for the end of a scheduling period of a work item. + /// Does nothing if there is no listener for the Viz keyword of the NaiadTracing provider. /// - /// Message header with fields correctly populated. + /// Item being descheduled [NonEvent] - public void ProgressSend(MessageHeader msg) + [Conditional("TRACING_ON")] + internal void StopSched(Scheduler.WorkItem workitem) + { + //Console.WriteLine("]Sched {0} {1}", id, workitem.ToString()); + if (Trace.IsEnabled(EventLevel.LogAlways, Keywords.Scheduling | Keywords.Viz)) + StopSched(workitem.Vertex.Stage.StageId); + } + + [Event(201, Task = Tasks.Scheduling, Opcode = EventOpcode.Stop, Keywords = (Keywords.Scheduling | Keywords.Viz))] + private void StopSched(int stageid) { - if (Trace.IsEnabled(EventLevel.LogAlways, (Keywords.Network | Keywords.Viz))) - ProgressSend(msg.SequenceNumber, msg.Length, msg.FromVertexID, msg.DestVertexID); + WriteEvent(201, stageid); } - [Event(3, Task = Tasks.Channels, Opcode = EventOpcode.Send, Keywords = (Keywords.Network | Keywords.Viz))] - private void ProgressSend(int seqno, int len, int src, int dst) + #endregion Scheduling + + #region Control + [Event(300, Task = Tasks.Control, Opcode = EventOpcode.Info, Keywords = Keywords.Viz)] + public void RefAlignFrontier() { - WriteEvent(3, seqno, len, src, dst); + if (Trace.IsEnabled(EventLevel.LogAlways, Keywords.Viz)) + WriteEvent(300); } /// - /// Generates an ETW event for progress message receive. - /// Does nothing if there is no listener for the Network or Viz keywords of the NaiadTracing provider. + /// Generates an ETW event for frontier advance. + /// Does nothing if there is no listener for the Viz keyword of the NaiadTracing provider. /// - /// Message header with fields correctly populated. + /// The new PCS frontier [NonEvent] - public void ProgressRecv(MessageHeader msg) + [Conditional("TRACING_ON")] + public void AdvanceFrontier(Pointstamp[] frontier) { - if (Trace.IsEnabled(EventLevel.LogAlways, (Keywords.Network | Keywords.Viz))) - ProgressRecv(msg.SequenceNumber, msg.Length, msg.FromVertexID, msg.DestVertexID); + if (Trace.IsEnabled(EventLevel.LogAlways, Keywords.Viz)) + AdvanceFrontier(frontier.Select(x => x.ToString()).Aggregate((x, y) => x + " " + y)); + } + [Event(301, Task = Tasks.Control, Opcode = EventOpcode.Info, Keywords = Keywords.Viz)] + private void AdvanceFrontier(string newFrontier) + { + WriteEvent(301, newFrontier); } - [Event(4, Task = Tasks.Channels, Opcode = EventOpcode.Receive, Keywords = (Keywords.Network | Keywords.Viz))] - private void ProgressRecv(int seqno, int len, int src, int dst) + [NonEvent] + [Conditional("TRACING_ON")] + public void SocketError(System.Net.Sockets.SocketError err) + { + if (Trace.IsEnabled(EventLevel.Error, Keywords.Viz | Keywords.Network | Keywords.Debug)) + { + SocketError(Enum.GetName(typeof(System.Net.Sockets.SocketError), err)); + } + } + [Event(302, Task = Tasks.Control, Opcode = EventOpcode.Info, Keywords = Keywords.Viz | Keywords.Network | Keywords.Debug)] + private void SocketError(string msg) { - WriteEvent(4, seqno, len, src, dst); + WriteEvent(302, msg); } - #endregion Channels + [NonEvent] + [Conditional("TRACING_ON")] + public void RegionStart(NaiadTracingRegion region) + { + if (Trace.IsEnabled(EventLevel.Error, Keywords.Viz | Keywords.Network | Keywords.Debug)) + { + if (!regionInfoEventsPosted) + { + RegionInfo(); + } + RegionStart((int)region); + } + } + [Event(303, Task = Tasks.Control, Opcode = EventOpcode.Info, Keywords = Keywords.Viz | Keywords.Debug)] + private void RegionStart(int id) + { + WriteEvent(303, id); + } + + [NonEvent] + [Conditional("TRACING_ON")] + public void RegionStop(NaiadTracingRegion region) + { + if (Trace.IsEnabled(EventLevel.Error, Keywords.Viz | Keywords.Network | Keywords.Debug)) + { + if (!regionInfoEventsPosted) + { + RegionInfo(); + } + RegionStop((int)region); + } + } + [Event(304, Task = Tasks.Control, Opcode = EventOpcode.Info, Keywords = Keywords.Viz | Keywords.Debug)] + private void RegionStop(int id) + { + WriteEvent(304, id); + } + + + private bool regionInfoEventsPosted = false; + [Event(305, Task = Tasks.Control, Opcode = EventOpcode.Info, Keywords = Keywords.Viz | Keywords.Debug)] + [Conditional("TRACING_ON")] + private void RegionInfo() + { + foreach (var val in Enum.GetValues(typeof(NaiadTracingRegion))) + { + WriteEvent(305, val, Enum.GetName(typeof(NaiadTracingRegion), val)); + } + regionInfoEventsPosted = true; + } + + + #endregion Control + + #region Graph + [Event(500, Task = Tasks.Graph, Opcode = EventOpcode.Info, Keywords = Keywords.Viz | Keywords.GraphMetaData)] + [Conditional("TRACING_ON")] + public void StageInfo(int id, string name) + { + if (Trace.IsEnabled(EventLevel.LogAlways, Keywords.Viz | Keywords.GraphMetaData)) + WriteEvent(500, id, name); + } - #region Scheduling /// - /// Generates an ETW event for the scheduling of a work item. - /// Does nothing if there is no listener for the Viz keyword of the NaiadTracing provider. + /// Posts an event describing the placement of a vertex /// - /// Scheduler id - /// Item being scheduled - [NonEvent] - internal void StartSched(int schedulerid, Scheduler.WorkItem workitem) + /// Stage id + /// Vertex id + /// Naiad process id + /// Worker thread id + [Event(501, Task = Tasks.Graph, Opcode = EventOpcode.Info, Keywords = Keywords.Viz | Keywords.GraphMetaData)] + [Conditional("TRACING_ON")] + public void VertexPlacement(int stageid, int vertexid, int proc, int worker) { - if (Trace.IsEnabled(EventLevel.LogAlways, Keywords.Viz)) - StartSched(schedulerid, workitem.Vertex.Stage.StageId, workitem.Vertex.Stage.Name, workitem.Requirement.Timestamp.ToString()); + if (Trace.IsEnabled(EventLevel.LogAlways, Keywords.Viz | Keywords.GraphMetaData)) + WriteEvent(501, stageid, vertexid, proc, worker); } - [Event(5, Task = Tasks.Scheduling, Opcode = EventOpcode.Start, Keywords = Keywords.Viz)] - internal void StartSched(int schedulerid, int stageid, string stagename, string pointstamp) + /// + /// Posts channel info metadata event + /// + /// Channel id + /// Source stage id + /// Destination stage id + /// True if an exchange channel (otherwise a pipeline channel) + /// True if a progress channel (otherwise a data channel) + [Event(502, Task = Tasks.Graph, Opcode = EventOpcode.Info, Keywords = Keywords.Viz | Keywords.GraphMetaData)] + [Conditional("TRACING_ON")] + public void ChannelInfo(int channel, int src, int dst, bool isExchange, bool isProgress) { - WriteEvent(5, schedulerid, stageid, stagename, pointstamp, ""); + if (Trace.IsEnabled(EventLevel.LogAlways, Keywords.Viz | Keywords.GraphMetaData)) + WriteEvent(502, channel, src, dst, isExchange, isProgress); } /// - /// Generates an ETW event for the end of a scheduling period of a work item. - /// Does nothing if there is no listener for the Viz keyword of the NaiadTracing provider. + /// Posts a friendly name for the calling thread. + /// Note that this tracing event is not conditionally compiled because we want these events to appear + /// even when not tracing Naiad specifically. /// - /// Scheduler id - /// Item being descheduled + /// Name for this thread (usually to be displayed in a visualization of the trace) + /// Any formatting args [NonEvent] - internal void StopSched(int schedulerid, Scheduler.WorkItem workitem) + public void ThreadName(string name, params object[] args) { - //Console.WriteLine("]Sched {0} {1}", id, workitem.ToString()); - if (Trace.IsEnabled(EventLevel.LogAlways, Keywords.Viz)) - StopSched(schedulerid, workitem.Vertex.Stage.StageId, workitem.Vertex.Stage.Name, workitem.Requirement.Timestamp.ToString()); - } + StringBuilder sb = new StringBuilder(); + sb.AppendFormat(name, args); + ThreadName(sb.ToString()); + } + [Event(503, Task = Tasks.Graph, Opcode = EventOpcode.Info, Keywords = Keywords.Viz)] + private void ThreadName(string name) + { + WriteEvent(503, name); + } - [Event(6, Task = Tasks.Scheduling, Opcode = EventOpcode.Stop, Keywords = Keywords.Viz)] - internal void StopSched(int schedulerid, int stageid, string stagename, string pointstamp) + /// + /// Posts the Naiad process id and the name of the local machine. + /// + /// Naiad process id + /// Local machine name + [Event(504, Task = Tasks.Graph, Opcode = EventOpcode.Info, Keywords = Keywords.Viz | Keywords.GraphMetaData)] + [Conditional("TRACING_ON")] + public void ProcessInfo(int id, string name) { - WriteEvent(6, schedulerid, stageid, stagename, pointstamp, ""); + WriteEvent(504, id, name); } + #endregion Graph - #endregion Scheduling + #region Locking - #region Metadata - [Event(7, Task = Tasks.Control, Opcode = EventOpcode.Info, Keywords = Keywords.Viz)] - public void RefAlignFrontier() + [NonEvent] + [Conditional("TRACE_LOCKS")] + public void LockInfo(Object obj, string name) { - if (Trace.IsEnabled(EventLevel.LogAlways, Keywords.Viz)) - WriteEvent(7); + if (Trace.IsEnabled(EventLevel.Verbose, Keywords.Locks)) + LockInfo(obj.GetHashCode(), name); + } + [Event(400, Task = Tasks.Locks, Opcode = EventOpcode.Info, Keywords = Keywords.Locks, Level = EventLevel.Verbose)] + private void LockInfo(int id, string name) + { + WriteEvent(400, id, name); } - /// - /// Generates an ETW event for frontier advance. - /// Does nothing if there is no listener for the Viz keyword of the NaiadTracing provider. - /// - /// The new PCS frontier [NonEvent] - public void AdvanceFrontier(Pointstamp[] frontier) + [Conditional("TRACE_LOCKS")] + public void LockAcquire(Object obj) { - if (Trace.IsEnabled(EventLevel.LogAlways, Keywords.Viz)) - AdvanceFrontier(frontier.Select(x => x.ToString()).Aggregate((x, y) => x + " " + y)); + if (Trace.IsEnabled(EventLevel.Verbose, Keywords.Locks)) + LockAcquire(obj.GetHashCode()); + } + [Event(401, Task = Tasks.Locks, Opcode = EventOpcode.Info, Keywords = Keywords.Locks, Level = EventLevel.Verbose)] + private void LockAcquire(int id) + { + WriteEvent(401, id); } - [Event(8, Task = Tasks.Control, Opcode = EventOpcode.Info, Keywords = Keywords.Viz)] - private void AdvanceFrontier(string newFrontier) + [NonEvent] + [Conditional("TRACE_LOCKS")] + public void LockHeld(Object obj) + { + if (Trace.IsEnabled(EventLevel.Verbose, Keywords.Locks)) + LockHeld(obj.GetHashCode()); + } + [Event(402, Task = Tasks.Locks, Opcode = EventOpcode.Info, + Keywords = Keywords.Locks, Level = EventLevel.Verbose)] + private void LockHeld(int id) { - WriteEvent(8, newFrontier); + WriteEvent(402, id); } - #endregion Metadata + [NonEvent] + [Conditional("TRACE_LOCKS")] + public void LockRelease(Object obj) + { + if (Trace.IsEnabled(EventLevel.Verbose, Keywords.Locks)) + LockRelease(obj.GetHashCode()); + } + [Event(403, Task = Tasks.Locks, Opcode = EventOpcode.Info, Keywords = Keywords.Locks, Level = EventLevel.Verbose)] + private void LockRelease(int id) + { + WriteEvent(403, id); + } + #endregion #region Misc utilities /// - /// Write the XML manifest of the Naiad ETW provider to a file + /// Writes the XML manifest of the Naiad ETW provider to a file. + /// This method is not thread-safe, but will catch exceptions and do nothing. /// /// File to write to [NonEvent] public void DumpManifestToFile(string filename) { - using (var s = new StreamWriter(File.Open(filename, FileMode.Create))) + try { - s.Write(NaiadTracing.GenerateManifest(typeof(NaiadTracing), "Naiad.dll")); + using (var s = new StreamWriter(File.Open(filename, FileMode.Create))) + { + s.Write(NaiadTracing.GenerateManifest(typeof(NaiadTracing), "Naiad.dll")); + } } + catch (Exception e) + { + Logging.Error("Error dumping ETW provider manifest: {0}", e.Message); + } + } + + /// + /// DIY versions of WriteEvent to avoid the slow path. + /// See http://msdn.microsoft.com/en-us/library/system.diagnostics.tracing.eventsource.writeeventcore(v=vs.110).aspx. + /// + /// + /// + /// + /// + /// + /// + protected unsafe void WriteEvent(int eventId, int arg1, int arg2, int arg3, int arg4, int arg5) + { + EventSource.EventData* dataDesc = stackalloc EventSource.EventData[1]; + int* data = stackalloc int[6]; + data[0] = arg1; + data[1] = arg2; + data[2] = arg3; + data[3] = arg4; + data[4] = arg5; + + dataDesc[0].DataPointer = (IntPtr)data; + dataDesc[0].Size = 4*5; + WriteEventCore(eventId, 1, dataDesc); } + #endregion Misc utilities } -#endif - public class Tracing + + /// + /// This class containes methods that allow Mark events to be posted in the ETW Kernel Logger session. + /// + public class KernelLoggerTracing { private static object _lock = new object(); private static bool _inited = false; @@ -384,7 +648,7 @@ unsafe public void Trace(string msg, params object[] args) } fixed (byte* ptr = buf) { - EtwSetMark(0, ptr, 4 + sb.Length); + EtwSetMark(0, ptr, Math.Min(4 + sb.Length, buf.Length)); } sb.Clear(); } @@ -395,13 +659,13 @@ unsafe public void Trace(string msg) _initEtwMarks(); } - for (int i = 0; i < msg.Length; i++) + for (int i = 0; i < msg.Length && i + 4 < buf.Length; i++) { - buf[i + 4] = (byte)msg[i]; + buf[i + 4] = (byte) msg[i]; } fixed (byte* ptr = buf) { - EtwSetMark(0, ptr, 4 + msg.Length); + EtwSetMark(0, ptr, Math.Min(4 + msg.Length, buf.Length)); } } } @@ -410,23 +674,25 @@ unsafe public void Trace(string msg) private static ThreadLocal tracingBuffer = new ThreadLocal(() => new TracingBuffer()); /// - /// Writes a freetext trace event using ETW + /// Formats and then writes a freetext Mark event into the ETW Kernel Logger trace session. + /// This event is expensive and should be used sparingly. /// /// The format string to be logged, as in String.Format /// Arguments to be formatted. [Conditional("TRACING_ON")] - public static void Trace(string msg, params object[] args) + public static void PostKernelLoggerMarkEvent(string msg, params object[] args) { var tb = tracingBuffer.Value; tb.Trace(msg, args); } /// - /// Records a tracing message. + /// Writes a freetext Mark event into the ETW Kernel Logger trace session. + /// This event is expensive and should be used sparingly. /// /// message [Conditional("TRACING_ON")] - public static void Trace(string msg) + public static void PostKernelLoggerMarkEvent(string msg) { var tb = tracingBuffer.Value; tb.Trace(msg); diff --git a/Naiad/Util/Win32.cs b/Naiad/Util/Win32.cs index ec1f910..c7ef1f5 100644 --- a/Naiad/Util/Win32.cs +++ b/Naiad/Util/Win32.cs @@ -1,5 +1,5 @@ /* - * Naiad ver. 0.4 + * Naiad ver. 0.5 * Copyright (c) Microsoft Corporation * All rights reserved. * diff --git a/NugetSample/App.config b/NugetSample/App.config index 71c3e81..16f0de3 100644 --- a/NugetSample/App.config +++ b/NugetSample/App.config @@ -7,20 +7,24 @@ - + - + - + + + + + \ No newline at end of file diff --git a/NugetSample/Microsoft.Research.Naiad.Sample.nuspec b/NugetSample/Microsoft.Research.Naiad.Sample.nuspec index 862e327..631cf3d 100644 --- a/NugetSample/Microsoft.Research.Naiad.Sample.nuspec +++ b/NugetSample/Microsoft.Research.Naiad.Sample.nuspec @@ -1,9 +1,9 @@ - - + + Microsoft.Research.Naiad.Sample Naiad - Sample graph analysis program - 0.4.2-beta + 0.5.0-beta naiadquestions@microsoft.com naiadquestions@microsoft.com,Microsoft http://www.apache.org/licenses/LICENSE-2.0.html @@ -19,15 +19,14 @@ - - - + + + - - + - + \ No newline at end of file diff --git a/NugetSample/NugetSample.csproj b/NugetSample/NugetSample.csproj index e2ae58c..51f8ac2 100644 --- a/NugetSample/NugetSample.csproj +++ b/NugetSample/NugetSample.csproj @@ -1,5 +1,5 @@  - + Debug @@ -11,6 +11,8 @@ NugetSample v4.5 512 + ..\ + true AnyCPU @@ -31,35 +33,55 @@ prompt 4 + + true + bin\x64\Debug\ + DEBUG;TRACE + full + x64 + prompt + MinimumRecommendedRules.ruleset + true + + + bin\x64\Release\ + TRACE + true + pdbonly + x64 + prompt + MinimumRecommendedRules.ruleset + true + - + False - ..\packages\Microsoft.Data.Edm.5.6.1\lib\net40\Microsoft.Data.Edm.dll + ..\packages\Microsoft.Data.Edm.5.6.2\lib\net40\Microsoft.Data.Edm.dll - + False - ..\packages\Microsoft.Data.OData.5.6.1\lib\net40\Microsoft.Data.OData.dll + ..\packages\Microsoft.Data.OData.5.6.2\lib\net40\Microsoft.Data.OData.dll - + False - ..\packages\Microsoft.Data.Services.Client.5.6.1\lib\net40\Microsoft.Data.Services.Client.dll + ..\packages\Microsoft.Data.Services.Client.5.6.2\lib\net40\Microsoft.Data.Services.Client.dll ..\packages\Microsoft.WindowsAzure.ConfigurationManager.2.0.3\lib\net40\Microsoft.WindowsAzure.Configuration.dll - + False - ..\packages\WindowsAzure.Storage.3.1.0.1\lib\net40\Microsoft.WindowsAzure.Storage.dll + ..\packages\WindowsAzure.Storage.4.3.0\lib\net40\Microsoft.WindowsAzure.Storage.dll False - ..\packages\Newtonsoft.Json.6.0.2\lib\net45\Newtonsoft.Json.dll + ..\packages\Newtonsoft.Json.6.0.5\lib\net45\Newtonsoft.Json.dll - + False - ..\packages\System.Spatial.5.6.1\lib\net40\System.Spatial.dll + ..\packages\System.Spatial.5.6.2\lib\net40\System.Spatial.dll @@ -68,6 +90,9 @@ + + Properties\SharedAssemblyInfo.cs + @@ -91,6 +116,13 @@ + + + + This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. + + +