Skip to content

Commit

Permalink
Merge branch 'release-3.11' into release-3.11
Browse files Browse the repository at this point in the history
  • Loading branch information
hanwen-pcluste authored Oct 16, 2024
2 parents 368a767 + a38b423 commit 418a464
Show file tree
Hide file tree
Showing 17 changed files with 329 additions and 113 deletions.
5 changes: 3 additions & 2 deletions cookbooks/aws-parallelcluster-platform/attributes/platform.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
# ArmPL
default['conditions']['arm_pl_supported'] = arm_instance?

# Enroot + Pyxis
# Enroot
default['cluster']['enroot']['version'] = '3.4.1'
default['cluster']['pyxis']['version'] = '0.20.0'
default['cluster']['enroot']['temporary_dir'] = '/run/enroot'
default['cluster']['enroot']['persistent_dir'] = '/var/enroot'

# NVidia
default['cluster']['nvidia']['enabled'] = 'no'
Expand Down
3 changes: 0 additions & 3 deletions cookbooks/aws-parallelcluster-platform/recipes/config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,3 @@
include_recipe 'aws-parallelcluster-platform::supervisord_config'
fetch_config 'Fetch and load cluster configs'
include_recipe 'aws-parallelcluster-platform::config_login' if node['cluster']['node_type'] == 'LoginNode'
enroot 'Configure Enroot' do
action :configure
end
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
directory node['cluster']['license_dir']
directory node['cluster']['configs_dir']
directory node['cluster']['shared_dir']
directory node['cluster']['examples_dir']
directory node['cluster']['shared_dir_login_nodes']

# Create ParallelCluster log folder
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# frozen_string_literal: true
#
# Copyright:: 2013-2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# Copyright:: 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
Expand All @@ -16,52 +16,19 @@
default_action :setup

action :setup do
return if on_docker?
return if on_docker? || enroot_installed

action_install_package
end

action :configure do
return if on_docker?
return unless enroot_installed
enroot_examples_dir = "#{node['cluster']['examples_dir']}/enroot"

directory enroot_examples_dir

cookbook_file "/tmp/enroot.template.conf" do
source 'enroot/enroot.template.conf'
cookbook 'aws-parallelcluster-platform'
template "#{enroot_examples_dir}/enroot.conf" do
source 'enroot/enroot.conf.erb'
owner 'root'
group 'root'
mode '0755'
action :create_if_missing
end

bash "Configure enroot" do
user 'root'
code <<-ENROOT_CONFIGURE
set -e
ENROOT_CONFIG_RELEASE=pyxis
SHARED_DIR=#{node['cluster']['shared_dir']}
NONROOT_USER=#{node['cluster']['cluster_user']}
mkdir -p ${SHARED_DIR}/enroot
chown ${NONROOT_USER} ${SHARED_DIR}/enroot
ENROOT_CACHE_PATH=${SHARED_DIR}/enroot envsubst < /tmp/enroot.template.conf > /tmp/enroot.conf
mv /tmp/enroot.conf /etc/enroot/enroot.conf
chmod 0644 /etc/enroot/enroot.conf
mkdir -p /tmp/enroot
chmod 1777 /tmp/enroot
mkdir -p /tmp/enroot/data
chmod 1777 /tmp/enroot/data
chmod 1777 ${SHARED_DIR}/enroot
mkdir -p ${SHARED_DIR}/pyxis/
chown ${NONROOT_USER} ${SHARED_DIR}/pyxis/
sed -i '${s/$/ runtime_path=${SHARED_DIR}\\/pyxis/}' /opt/slurm/etc/plugstack.conf.d/pyxis.conf
SHARED_DIR=${SHARED_DIR} envsubst < /opt/slurm/etc/plugstack.conf.d/pyxis.conf > /opt/slurm/etc/plugstack.conf.d/pyxis.tmp.conf
mv /opt/slurm/etc/plugstack.conf.d/pyxis.tmp.conf /opt/slurm/etc/plugstack.conf.d/pyxis.conf
ENROOT_CONFIGURE
retries 3
retry_delay 5
mode '0644'
end
end

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@
is_expected.to create_directory(node['cluster']['shared_dir'])
end

it 'creates examples directory' do
is_expected.to create_directory(node['cluster']['examples_dir'])
end

it 'creates log directory' do
is_expected.to create_directory(node['cluster']['log_base_dir']).with(
owner: 'root',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,9 @@ def self.setup(chef_run)
end
end
end

def self.configure(chef_run)
chef_run.converge_dsl('aws-parallelcluster-platform') do
enroot 'configure' do
action :configure
end
end
end
end

describe 'enroot:package_version' do
describe 'aws-parallelcluster-platform::enroot:package_version' do
for_all_oses do |platform, version|
context "on #{platform}#{version}" do
cached(:chef_run) do
Expand All @@ -39,7 +31,34 @@ def self.configure(chef_run)
end
end

describe 'enroot:arch_suffix' do
describe 'aws-parallelcluster-platform::enroot:enroot_installed' do
for_all_oses do |platform, version|
context "on #{platform}#{version}" do
binary = '/usr/bin/enroot'
[true, false].each do |binary_exist|
context "when binary #{binary} does #{'not ' unless binary_exist}exist" do
cached(:chef_run) do
allow(File).to receive(:exist?).with(binary).and_return(binary_exist)
runner = runner(platform: platform, version: version, step_into: ['enroot'])
ConvergeEnroot.setup(runner)
end

cached(:resource) do
chef_run.find_resource('enroot', 'setup')
end

expected_result = binary_exist

it "returns #{expected_result}" do
expect(resource.enroot_installed).to eq(expected_result)
end
end
end
end
end
end

describe 'aws-parallelcluster-platform::enroot:arch_suffix' do
for_all_oses do |platform, version|
context "on #{platform}#{version} - arm" do
cached(:chef_run) do
Expand Down Expand Up @@ -81,15 +100,66 @@ def self.configure(chef_run)
end
end

describe 'enroot:setup' do
describe 'aws-parallelcluster-platform::enroot:setup' do
for_all_oses do |platform, version|
context "on #{platform}#{version}" do
cached(:cluster_examples_dir) { '/path/to/cluster/examples/dir' }
cached(:enroot_persistent_dir) { '/path/to/enroot/persistent/dir' }
cached(:enroot_temporary_dir) { '/path/to/enroot/temporary/dir' }

context "when enroot is already installed" do
let(:chef_run) do
stubs_for_resource('enroot') do |res|
allow(res).to receive(:enroot_installed).and_return(true)
end
runner(platform: platform, version: version, step_into: ['enroot']) do |node|
node.override['cluster']['enroot']['version'] = package_version
node.override['cluster']['examples_dir'] = cluster_examples_dir
end
end

before do
ConvergeEnroot.setup(chef_run)
end

it 'does not install Enroot' do
is_expected.not_to run_bash('Install enroot')
end

it 'does not create the Enroot configuration' do
is_expected.not_to create_template("#{cluster_examples_dir}/enroot/enroot.conf")
end
end

let(:chef_run) do
stubs_for_resource('enroot') do |res|
allow(res).to receive(:enroot_installed).and_return(false)
end
runner(platform: platform, version: version, step_into: ['enroot']) do |node|
node.override['cluster']['enroot']['version'] = package_version
node.override['cluster']['examples_dir'] = cluster_examples_dir
node.override['cluster']['enroot']['persistent_dir'] = enroot_persistent_dir
node.override['cluster']['enroot']['temporary_dir'] = enroot_temporary_dir
end
end

before do
ConvergeEnroot.setup(chef_run)
end

it 'installs Enroot' do
is_expected.not_to run_bash('Install enroot')
end

it 'creates the Enroot example configuration' do
is_expected.to create_template("#{cluster_examples_dir}/enroot/enroot.conf").with(
source: 'enroot/enroot.conf.erb',
owner: 'root',
group: 'root',
mode: '0644'
)
end

context 'when nvidia is enabled' do
before do
stubs_for_provider('enroot') do |resource|
Expand Down Expand Up @@ -128,44 +198,3 @@ def self.configure(chef_run)
end
end
end

describe 'enroot:configure' do
for_all_oses do |platform, version|
context "on #{platform}#{version}" do
let(:chef_run) do
runner(platform: platform, version: version, step_into: ['enroot'])
end

context 'when enroot is installed' do
before do
stubs_for_provider('enroot') do |resource|
allow(resource).to receive(:enroot_installed).and_return(true)
end
ConvergeEnroot.configure(chef_run)
end
it 'run configure enroot script' do
is_expected.to run_bash('Configure enroot')
.with(retries: 3)
.with(retry_delay: 5)
.with(user: 'root')
end
end

context 'when enroot is not installed' do
before do
stubs_for_provider('enroot') do |resource|
allow(resource).to receive(:enroot_installed).and_return(false)
end
ConvergeEnroot.configure(chef_run)
end

it 'does not run configure enroot script' do
is_expected.not_to run_bash('Configure enroot')
.with(retries: 3)
.with(retry_delay: 5)
.with(user: 'root')
end
end
end
end
end
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#ENROOT_LIBRARY_PATH /usr/lib/enroot
#ENROOT_SYSCONF_PATH /etc/enroot
ENROOT_RUNTIME_PATH /tmp/enroot/user-$(id -u)
ENROOT_CONFIG_PATH ${ENROOT_CONFIG_PATH}
ENROOT_CACHE_PATH ${ENROOT_CACHE_PATH}
ENROOT_DATA_PATH /tmp/enroot/data/user-$(id -u)
ENROOT_RUNTIME_PATH <%= node['cluster']['enroot']['temporary_dir'] %>/runtime/user-$(id -u)
ENROOT_DATA_PATH <%= node['cluster']['enroot']['temporary_dir'] %>/data/user-$(id -u)
ENROOT_CONFIG_PATH <%= node['cluster']['enroot']['persistent_dir'] %>/config/user-$(id -u)
ENROOT_CACHE_PATH <%= node['cluster']['enroot']['persistent_dir'] %>/cache/group-$(id -g)
#ENROOT_TEMP_PATH ${TMPDIR:-/tmp}

# Gzip program used to uncompress digest layers.
Expand Down Expand Up @@ -68,4 +68,4 @@ ENROOT_RESTRICT_DEV no
#all_proxy
#no_proxy
#http_proxy
#https_proxy
#https_proxy
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,26 @@

expected_enroot_version = node['cluster']['enroot']['version']

describe "gdrcopy version is expected to be #{expected_enroot_version}" do
describe "enroot version is expected to be #{expected_enroot_version}" do
subject { command('enroot version').stdout.strip() }
it { should eq expected_enroot_version }
end

persistent_dirs = %w(/etc/enroot)
persistent_dirs.each do |path|
describe directory(path) do
it { should exist }
its('owner') { should eq 'root' }
its('group') { should eq 'root' }
its('mode') { should cmp '0755' }
end
end
end

control 'tag:config_enroot_enabled_on_graphic_instances' do
only_if { !os_properties.on_docker? && ['yes', true].include?(node['cluster']['nvidia']['enabled']) }

describe file("/opt/parallelcluster/shared/enroot") do
describe file("/var/enroot/cache-group-1000") do
it { should exist }
its('group') { should eq 'root' }
end unless os_properties.redhat_on_docker?
Expand Down
1 change: 1 addition & 0 deletions cookbooks/aws-parallelcluster-shared/attributes/cluster.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
default['cluster']['license_dir'] = "#{node['cluster']['base_dir']}/licenses"
default['cluster']['configs_dir'] = "#{node['cluster']['base_dir']}/configs"
default['cluster']['shared_dir'] = "#{node['cluster']['base_dir']}/shared"
default['cluster']['examples_dir'] = "#{node['cluster']['base_dir']}/examples"
default['cluster']['shared_dir_login_nodes'] = "#{node['cluster']['base_dir']}/shared_login_nodes"
default['cluster']['log_base_dir'] = '/var/log/parallelcluster'
default['cluster']['etc_dir'] = '/etc/parallelcluster'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,10 @@

# Slurmdbd
default['cluster']['slurmdbd_service_enabled'] = "true"

# Spank
default['cluster']['slurm']['spank_config_dir'] = "#{node['cluster']['slurm']['install_dir']}/etc/plugstack.conf.d"

# Pyxis
default['cluster']['pyxis']['version'] = '0.20.0'
default['cluster']['pyxis']['runtime_path'] = '/run/pyxis'
17 changes: 17 additions & 0 deletions cookbooks/aws-parallelcluster-slurm/libraries/pyxis.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# frozen_string_literal: true

# Copyright:: 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "LICENSE.txt" file accompanying this file.
# This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied.
# See the License for the specific language governing permissions and limitations under the License.

def pyxis_installed?
::Dir.exist?('/usr/local/share/pyxis')
end
Loading

0 comments on commit 418a464

Please sign in to comment.