Skip to content

Training models on Google AI Platform with ML_DSL

Anna Safonova edited this page Jun 29, 2020 · 2 revisions

There are two ways to fit a model on Google AI Platform: using API or jupyter magic functions.

Using API

from com.griddynamics.dsl.ml.executors.executors import AIPlatformJobExecutor
from com.griddynamics.dsl.ml.settings.profiles import AIProfile
from com.griddynamics.dsl.ml.jobs.builder import JobBuilder
from com.griddynamics.dsl.ml.sessions import SessionFactory
from com.griddynamics.dsl.ml.settings.description import Platform
from com.griddynamics.dsl.ml.settings.arguments import Arguments

define Profile for job

profile = AIProfile(bucket='test_bucket',cluster='test_cluster',      
                    region='global', job_prefix='test_job',  
                    root_path='scripts', project='test_project', 
                    ai_region='us-central1', job_async=False,                   
                                           package_name='trainer', package_dst='packages',
                                            scale_tier='BASIC', runtime_version='1.14')

set python script

script_name = 'train_script.py'

define arguments for training job

output_path = 'gs://test_bucket/models'
args_dct = {
            '--train_path': 'gs://test_bucket/data',
            '--epochs': 20
           }
args_dct['--output_path'] = output_path
arguments = Arguments()
arguments.set_args(**args_dct)
training_input = {
                 "region": profile.ai_region,
                 "scaleTier": profile.scale_tier,
                 "jobDir": output_path,
                 "pythonModule": '{}.{}'.format(profile.package_name,          
                                                script_name),
                 "runtimeVersion": profile.runtime_version
                 }

ModelBuilder instance

m_builder = ModelBuilder()
model = m_builder.name(job_name).train_arguments(arguments).build()

Define AIJobBuilder instance

ai_job_builder = AIJobBuilder()
ai_job = ai_job_builder.model(model)
                       .package_src(package_src)
                       .package_dst('{}/{}'.format(profile.package_dst,                  
                                                  job_name))
                       .train_input(training_input)
                       .name(job_name)
                       .job_dir(output_path)
                       .build()

Session instance for submitting job to AI platform

session = SessionFactory(platform=Platform.GCP)
                        .build_session(job_bucket=profile.bucket,
                                       job_region=profile.region,                                                                             
                                       cluster=profile.cluster,
                                       job_project_id=profile.project,                                                                    
                                       ml_region=profile.ai_region) 

Executor instance for submitting train job to AI Platform

executor = AIPlatformJobExecutor(session, ai_job)
executor.submit_train_job()

Using Magic Functions

from com.griddynamics.dsl.ml.settings.profiles import AIProfile
from com.griddynamics.dsl.ml.settings.description import Platform

define Profile for job

profile = AIProfile(bucket='test_bucket',cluster='test_cluster',      
                    region='global', job_prefix='test_job',  
                    root_path='scripts', project='test_project', 
                    ai_region='us-central1', job_async=False,                   
                                           package_name='trainer', package_dst='packages',
                                            scale_tier='BASIC', runtime_version='1.14')
Profile.set('AIProfile', profile)
platform = Platform.GCP

Open or load task script using magic functions %py_script, %py_script_open or %py_load:

%%py_script_open --name train_script.py --path scripts -o output

Training using magic function %py_train:

%py_train -n train_script.py -s model -p AIProfile -pm $platform -o gs://test_bucket/models --train_path gs://test_bucket/data --epochs 20