Skip to content

Commit

Permalink
chore: update schemas
Browse files Browse the repository at this point in the history
  • Loading branch information
joanise committed Apr 16, 2024
1 parent f5a8186 commit bac8d0a
Show file tree
Hide file tree
Showing 6 changed files with 790 additions and 144 deletions.
141 changes: 110 additions & 31 deletions everyvoice/.schema/everyvoice-aligner-schema-0.1.json
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,14 @@
"$schema": "http://json-schema.org/draft-07/schema#",
"additionalProperties": false,
"properties": {
"target_text_representation_level": {
"allOf": [
{
"$ref": "#/$defs/TargetTrainingTextRepresentationLevel"
}
],
"default": "characters"
},
"lstm_dim": {
"default": 512,
"description": "The number of dimensions in the LSTM layers.",
Expand Down Expand Up @@ -477,7 +485,7 @@
"save_dir": {
"default": "logs_and_checkpoints",
"description": "The directory to save your checkpoints and logs to.",
"format": "directory-path",
"format": "path",
"title": "Save Dir",
"type": "string"
},
Expand Down Expand Up @@ -523,7 +531,7 @@
"save_dir": {
"default": "preprocessed/YourDataSet",
"description": "The directory to save preprocessed files to.",
"format": "directory-path",
"format": "path",
"title": "Save Dir",
"type": "string"
},
Expand Down Expand Up @@ -561,53 +569,124 @@
"title": "PreprocessingConfig",
"type": "object"
},
"Punctuation": {
"properties": {
"exclamations": {
"default": [
"!",
"\u00a1"
],
"description": "Exclamation punctuation symbols used in your datasets. Replaces these symbols with <EXCL> internally.",
"items": {
"type": "string"
},
"title": "Exclamations",
"type": "array"
},
"question_symbols": {
"default": [
"?",
"\u00bf"
],
"description": "Question/interrogative punctuation symbols used in your datasets. Replaces these symbols with <QINT> internally.",
"items": {
"type": "string"
},
"title": "Question Symbols",
"type": "array"
},
"quotemarks": {
"default": [
"\"",
"'",
"\u201c",
"\u201d",
"\u00ab",
"\u00bb"
],
"description": "Quotemark punctuation symbols used in your datasets. Replaces these symbols with <QUOTE> internally.",
"items": {
"type": "string"
},
"title": "Quotemarks",
"type": "array"
},
"big_breaks": {
"default": [
".",
":",
";"
],
"description": "Punctuation symbols indicating a 'big break' used in your datasets. Replaces these symbols with <BB> internally.",
"items": {
"type": "string"
},
"title": "Big Breaks",
"type": "array"
},
"small_breaks": {
"default": [
",",
"-",
"\u2014"
],
"description": "Punctuation symbols indicating a 'small break' used in your datasets. Replaces these symbols with <SB> internally.",
"items": {
"type": "string"
},
"title": "Small Breaks",
"type": "array"
},
"ellipsis": {
"default": [
"\u2026"
],
"description": "Punctuation symbols indicating an ellipsis used in your datasets. Replaces these symbols with <EPS> internally.",
"items": {
"type": "string"
},
"title": "Ellipsis",
"type": "array"
}
},
"title": "Punctuation",
"type": "object"
},
"Symbols": {
"additionalProperties": true,
"properties": {
"silence": {
"anyOf": [
{
"type": "string"
},
{
"items": {
"type": "string"
},
"type": "array"
}
],
"default": [
"<SIL>"
],
"description": "The symbol(s) used to indicate silence.",
"title": "Silence"
},
"pad": {
"default": "_",
"description": "The symbol used to indicate padding. Batches are length-normalized by adding this padding character so that each utterance in the batch is the same length.",
"title": "Pad",
"type": "string"
"items": {
"type": "string"
},
"title": "Silence",
"type": "array"
},
"punctuation": {
"anyOf": [
{
"type": "string"
},
"allOf": [
{
"items": {
"type": "string"
},
"type": "array"
"$ref": "#/$defs/Punctuation"
}
],
"default": "-';:,.!?\u00a1\u00bf\u2014\u2026\"\u00ab\u00bb\u201c\u201d ",
"description": "A list of punctuation symbols.",
"title": "Punctuation"
"description": "EveryVoice will combine punctuation and normalize it into a set of five permissible types of punctuation to help tractable training."
}
},
"title": "Symbols",
"type": "object"
},
"TargetTrainingTextRepresentationLevel": {
"enum": [
"characters",
"phones",
"phonological_features"
],
"title": "TargetTrainingTextRepresentationLevel",
"type": "string"
},
"TextConfig": {
"$schema": "http://json-schema.org/draft-07/schema#",
"additionalProperties": false,
Expand Down
2 changes: 1 addition & 1 deletion everyvoice/.schema/everyvoice-shared-data-schema-0.1.json
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@
"save_dir": {
"default": "preprocessed/YourDataSet",
"description": "The directory to save preprocessed files to.",
"format": "directory-path",
"format": "path",
"title": "Save Dir",
"type": "string"
},
Expand Down
120 changes: 91 additions & 29 deletions everyvoice/.schema/everyvoice-shared-text-schema-0.1.json
Original file line number Diff line number Diff line change
@@ -1,47 +1,109 @@
{
"$defs": {
"Punctuation": {
"properties": {
"exclamations": {
"default": [
"!",
"\u00a1"
],
"description": "Exclamation punctuation symbols used in your datasets. Replaces these symbols with <EXCL> internally.",
"items": {
"type": "string"
},
"title": "Exclamations",
"type": "array"
},
"question_symbols": {
"default": [
"?",
"\u00bf"
],
"description": "Question/interrogative punctuation symbols used in your datasets. Replaces these symbols with <QINT> internally.",
"items": {
"type": "string"
},
"title": "Question Symbols",
"type": "array"
},
"quotemarks": {
"default": [
"\"",
"'",
"\u201c",
"\u201d",
"\u00ab",
"\u00bb"
],
"description": "Quotemark punctuation symbols used in your datasets. Replaces these symbols with <QUOTE> internally.",
"items": {
"type": "string"
},
"title": "Quotemarks",
"type": "array"
},
"big_breaks": {
"default": [
".",
":",
";"
],
"description": "Punctuation symbols indicating a 'big break' used in your datasets. Replaces these symbols with <BB> internally.",
"items": {
"type": "string"
},
"title": "Big Breaks",
"type": "array"
},
"small_breaks": {
"default": [
",",
"-",
"\u2014"
],
"description": "Punctuation symbols indicating a 'small break' used in your datasets. Replaces these symbols with <SB> internally.",
"items": {
"type": "string"
},
"title": "Small Breaks",
"type": "array"
},
"ellipsis": {
"default": [
"\u2026"
],
"description": "Punctuation symbols indicating an ellipsis used in your datasets. Replaces these symbols with <EPS> internally.",
"items": {
"type": "string"
},
"title": "Ellipsis",
"type": "array"
}
},
"title": "Punctuation",
"type": "object"
},
"Symbols": {
"additionalProperties": true,
"properties": {
"silence": {
"anyOf": [
{
"type": "string"
},
{
"items": {
"type": "string"
},
"type": "array"
}
],
"default": [
"<SIL>"
],
"description": "The symbol(s) used to indicate silence.",
"title": "Silence"
},
"pad": {
"default": "_",
"description": "The symbol used to indicate padding. Batches are length-normalized by adding this padding character so that each utterance in the batch is the same length.",
"title": "Pad",
"type": "string"
"items": {
"type": "string"
},
"title": "Silence",
"type": "array"
},
"punctuation": {
"anyOf": [
{
"type": "string"
},
"allOf": [
{
"items": {
"type": "string"
},
"type": "array"
"$ref": "#/$defs/Punctuation"
}
],
"default": "-';:,.!?\u00a1\u00bf\u2014\u2026\"\u00ab\u00bb\u201c\u201d ",
"description": "A list of punctuation symbols.",
"title": "Punctuation"
"description": "EveryVoice will combine punctuation and normalize it into a set of five permissible types of punctuation to help tractable training."
}
},
"title": "Symbols",
Expand Down
4 changes: 2 additions & 2 deletions everyvoice/.schema/everyvoice-spec-to-wav-schema-0.1.json
Original file line number Diff line number Diff line change
Expand Up @@ -600,7 +600,7 @@
"save_dir": {
"default": "logs_and_checkpoints",
"description": "The directory to save your checkpoints and logs to.",
"format": "directory-path",
"format": "path",
"title": "Save Dir",
"type": "string"
},
Expand Down Expand Up @@ -646,7 +646,7 @@
"save_dir": {
"default": "preprocessed/YourDataSet",
"description": "The directory to save preprocessed files to.",
"format": "directory-path",
"format": "path",
"title": "Save Dir",
"type": "string"
},
Expand Down
Loading

0 comments on commit bac8d0a

Please sign in to comment.