Skip to content

Commit

Permalink
chore: squash plugin-nlu's branch commits
Browse files Browse the repository at this point in the history
  • Loading branch information
vanbasten17 committed Sep 13, 2019
1 parent c65fd7b commit 80b592b
Show file tree
Hide file tree
Showing 46 changed files with 1,018 additions and 1,877 deletions.
48 changes: 48 additions & 0 deletions packages/botonic-cli/src/commands/train.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import { Command, flags } from '@oclif/command'
import { track, getGlobalNodeModulesPath } from '../utils'
import * as colors from 'colors'
const path = require('path')

export default class Run extends Command {
static description = 'Serve your bot in your localhost'

static examples = [
`$ botonic train
TRAINING MODEL FOR {LANGUAGE}...
`
]

static flags = {
lang: flags.string()
}

static args = []

async run() {
const { args, flags } = this.parse(Run)

const botonicNLUPath: string = path.join(
process.cwd(),
'node_modules',
'@botonic',
'nlu'
)
try {
const { BotonicNLU, CONSTANTS } = await import(botonicNLUPath)
process.argv.push(CONSTANTS.LANG_FLAG)
if (flags.lang) {
process.argv.push(flags.lang)
}
track('Trained with Botonic train')
const botonicNLU = new BotonicNLU()
const nluPath = path.join(process.cwd(), 'src', CONSTANTS.NLU_DIRNAME)
await botonicNLU.train({ nluPath })
} catch (e) {
console.log(
`You don't have @botonic/nlu installed.\nPlease, install it by typing the following command:`
.red
)
console.log(` $ npm install @botonic/nlu`)
}
}
}
21 changes: 21 additions & 0 deletions packages/botonic-cli/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ const fs = require('fs')
const os = require('os')
const path = require('path')
const Analytics = require('analytics-node')
import { exec } from 'child_process'

export var analytics: any

Expand Down Expand Up @@ -62,3 +63,23 @@ export function botonicPostInstall() {
export function sleep(ms: number) {
return new Promise(resolve => setTimeout(resolve, ms))
}

async function sh(cmd) {
return new Promise(function(resolve, reject) {
exec(cmd, (err, stdout, stderr) => {
if (err) {
reject(err)
} else {
resolve({ stdout, stderr })
}
})
})
}

export async function getGlobalNodeModulesPath() {
const CROSS_PLATFORM_REGEX = /\r?\n|\r/g
return ((await sh('npm root -g')) as any).stdout.replace(
CROSS_PLATFORM_REGEX,
''
)
}
11 changes: 11 additions & 0 deletions packages/botonic-nlu/.babelrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"presets": [
[
"@babel/preset-env",
{
"modules": "umd"
}
]
],
"plugins": ["@babel/plugin-transform-runtime"]
}
File renamed without changes.
33 changes: 33 additions & 0 deletions packages/botonic-nlu/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"name": "@botonic/nlu",
"version": "0.1.0",
"main": "lib/index",
"scripts": {
"build": "rm -rf lib && babel src -d lib",
"test": "jest"
},
"jest": {
"testEnvironment": "node"
},
"dependencies": {
"@babel/runtime": "^7.5.5",
"@tensorflow/tfjs": "^1.2.7",
"@tensorflow/tfjs-node": "^1.2.7",
"axios": "^0.19.0",
"colors": "^1.3.3",
"compromise": "^11.13.2",
"compromise-plugin": "0.0.9",
"franc": "^4.1.0",
"fs": "0.0.1-security",
"inquirer": "^6.3.1",
"sqlite-async": "^1.0.11"
},
"devDependencies": {
"@babel/cli": "^7.5.5",
"@babel/core": "^7.5.5",
"@babel/plugin-transform-runtime": "^7.5.5",
"@babel/preset-env": "^7.5.5",
"@types/jest": "^24.0.17",
"jest": "^24.8.0"
}
}
184 changes: 184 additions & 0 deletions packages/botonic-nlu/src/botonic-nlu.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
import path from 'path'
import { readJSON, readDir } from './file-utils'
import { detectLang, preprocessData } from './preprocessing'
import { getEmbeddingMatrix } from './word-embeddings'
import * as tf from '@tensorflow/tfjs-node'
import { parseLangFlag, printPrettyConfig } from './utils'
import {
UTTERANCES_DIRNAME,
MODELS_DIRNAME,
NLU_DATA_FILENAME,
MODEL_FILENAME
} from './constants'
import { loadDevData, saveDevData } from './file-utils'
import { getPrediction, getIntent } from './prediction'
import { getEntities } from './ner'

// TODO: interactive command to try intents from terminal
// import inquirer from 'inquirer'
// import { interactiveMode } from './scripts/interactive-mode'
// async function askForInteractiveMode() {
// const questions = [
// {
// type: 'confirm',
// name: 'affirmative',
// message: `Do you want to switch into interactive mode?`
// }
// ]
// return inquirer.prompt(questions)
// }

export class BotonicNLU {
constructor() {
this.languages = parseLangFlag(process.argv)
this.nluPath = ''
this.utterancesPath = ''
this.modelsPath = ''
this.devData = {}
this.models = {}
}

async train({ nluPath }) {
// TODO: Think about passing an arg for using models in memory
this.nluPath = nluPath
this.utterancesPath = path.join(nluPath, UTTERANCES_DIRNAME)
this.modelsPath = path.join(nluPath, MODELS_DIRNAME)
this.devData = loadDevData(this.nluPath, this.languages)
this.languages = Object.keys(this.devData)
for (let language of this.languages) {
let devData = this.devData[language]
let { devIntents, params, devEntities } = devData
params = { ...params, language } // TODO: Think better this reassignment
printPrettyConfig(params)
let start = new Date()
let {
tensorData,
tensorLabels,
vocabulary,
vocabularyLength
} = preprocessData(devIntents, params)
let embeddingMatrix = await getEmbeddingMatrix({
vocabulary,
vocabularyLength,
params
})
this.models[language] = embeddingLSTMModel({
params,
vocabularyLength,
embeddingMatrix: tf.tensor(embeddingMatrix),
outputDim: Object.keys(devIntents.intentsDict).length
})
this.models[language].summary()
this.models[language].compile({
optimizer: tf.train.adam(params.LEARNING_RATE),
loss: 'categoricalCrossentropy',
metrics: ['accuracy']
})
console.log('TRAINING...')

const history = await this.models[language].fit(
tensorData,
tensorLabels,
{
epochs: params.EPOCHS,
validationSplit: params.VALIDATION_SPLIT
}
)
let end = new Date() - start
console.log(`\nTOTAL TRAINING TIME: ${end}ms`)
let nluData = {
maxSeqLength: params.MAX_SEQ_LENGTH,
vocabulary,
intentsDict: devIntents.intentsDict,
language,
devEntities
}
await saveDevData({
modelsPath: this.modelsPath,
model: this.models[language],
language,
nluData
})
}
}

async loadModels({ modelsPath }) {
let models = {}
models.languages = readDir(modelsPath)
for (let language of models.languages) {
models[language] = {}
models[language].nluData = readJSON(
path.join(modelsPath, language, NLU_DATA_FILENAME)
)
models[language].model = await tf.loadLayersModel(
`file://${modelsPath}/${language}/${MODEL_FILENAME}`
)
}
return models
}
predict(models, input) {
let language = detectLang(input, models.languages)
let { model, nluData } = models[language]
let prediction = getPrediction(input, model, nluData)
let intent = getIntent(prediction, nluData.intentsDict, language)
let entities = getEntities(input, nluData.devEntities)
return { intent, entities }
}
// static async interactive({ modelsPath, languages }) {
// let wantsInteractiveMode = await askForInteractiveMode()
// if (wantsInteractiveMode.affirmative) {
// let modelsLanguages =
// parseLangFlag(process.argv) || languages || readDir(modelsPath)
// let nlus = {}
// for (let lang of modelsLanguages) {
// nlus[`${lang}`] = {}
// nlus[`${lang}`].nluData = readJSON(
// path.join(modelsPath, lang, NLU_DATA_FILENAME)
// )
// nlus[`${lang}`].model = await tf.loadLayersModel(
// `file://${modelsPath}/${lang}/${MODEL_FILENAME}`
// )
// }
// interactiveMode(nlus)
// }
// }
}
function embeddingLSTMModel({
vocabularyLength,
embeddingMatrix,
params,
outputDim
}) {
let model = tf.sequential()
model.add(
tf.layers.embedding({
inputDim: vocabularyLength,
outputDim: params.EMBEDDING_DIM,
inputLength: params.MAX_SEQ_LENGTH,
trainable: params.TRAINABLE_EMBEDDINGS,
weights: [embeddingMatrix]
})
)

model.add(
// tf.layers.bidirectional({
// layer: tf.layers.lstm({
// units: params.UNITS,
// dropout: params.DROPOUT_REG,
// recurrentDropout: params.DROPOUT_REG
// })
// })
tf.layers.lstm({
units: params.UNITS,
dropout: params.DROPOUT_REG,
recurrentDropout: params.DROPOUT_REG
})
)
model.add(
tf.layers.dense({
units: outputDim,
activation: 'softmax'
})
)
return model
}
53 changes: 53 additions & 0 deletions packages/botonic-nlu/src/constants.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Execution
export const LANG_FLAG = '--lang'
// Filenames
export const NLU_DATA_FILENAME = 'nlu-data.json'
export const MODEL_FILENAME = 'model.json'
// Dirnames
export const MODELS_DIRNAME = 'models'
export const UTTERANCES_DIRNAME = 'utterances'
// Subpaths
export const NLU_DIRNAME = 'nlu'
export const NLU_CONFIG_FILENAME = 'nlu.config.json'
export const GLOBAL_CONFIG_DIRNAME = '.botonic'
export const WORD_EMBEDDINGS_DIRNAME = 'word-embeddings'

// General Config
export const UTTERANCES_EXTENSION = '.txt'
export const ASSETS_DIRNAME = 'assets'
export const UNKNOWN_TOKEN = '<UNK>'
export const DB = {
TABLE: 'embeddings',
COLUMN: 'token'
}
export const WORD_EMBEDDDINGS_ENDPOINT =
'https://s3-eu-west-1.amazonaws.com/word-embeddings.hubtype.com'

//Entities
export const ENTITIES_REGEX = /\[(.*?)\]\((.*?)\)/
export const GLOBAL_ENTITIES_REGEX = /\[(.*?)\]\((.*?)\)/g
export const DEFAULT_ENTITIES = [
// Nouns
'Organization',
'Currency',
'Unit',
// Places
'Country',
'Region',
'Place',
'City',
// Dates
'WeekDay',
'Date',
'Holiday',
'Month',
'Duration',
'Time',
// People
'FirstName',
'LastName',
'MaleName',
'FemaleName',
'Honorific',
'Person'
]
Loading

0 comments on commit 80b592b

Please sign in to comment.