diff --git a/2.0/404.html b/2.0/404.html new file mode 100644 index 00000000..65901415 --- /dev/null +++ b/2.0/404.html @@ -0,0 +1,912 @@ + + + + + + + + + + + + + + + + + + YData-Synthetic + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ +
+ + + + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + + +
+
+
+ + + + +
+
+ +

404 - Not found

+ +
+
+ + + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/2.0/assets/_mkdocstrings.css b/2.0/assets/_mkdocstrings.css new file mode 100644 index 00000000..e69de29b diff --git a/2.0/assets/images/favicon.png b/2.0/assets/images/favicon.png new file mode 100644 index 00000000..1cf13b9f Binary files /dev/null and b/2.0/assets/images/favicon.png differ diff --git a/2.0/assets/javascripts/bundle.a00a7c5e.min.js b/2.0/assets/javascripts/bundle.a00a7c5e.min.js new file mode 100644 index 00000000..88ee663c --- /dev/null +++ b/2.0/assets/javascripts/bundle.a00a7c5e.min.js @@ -0,0 +1,29 @@ +"use strict";(()=>{var Hi=Object.create;var xr=Object.defineProperty;var Pi=Object.getOwnPropertyDescriptor;var $i=Object.getOwnPropertyNames,kt=Object.getOwnPropertySymbols,Ii=Object.getPrototypeOf,Er=Object.prototype.hasOwnProperty,an=Object.prototype.propertyIsEnumerable;var on=(e,t,r)=>t in e?xr(e,t,{enumerable:!0,configurable:!0,writable:!0,value:r}):e[t]=r,P=(e,t)=>{for(var r in t||(t={}))Er.call(t,r)&&on(e,r,t[r]);if(kt)for(var r of kt(t))an.call(t,r)&&on(e,r,t[r]);return e};var sn=(e,t)=>{var r={};for(var n in e)Er.call(e,n)&&t.indexOf(n)<0&&(r[n]=e[n]);if(e!=null&&kt)for(var n of kt(e))t.indexOf(n)<0&&an.call(e,n)&&(r[n]=e[n]);return r};var Ht=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports);var Fi=(e,t,r,n)=>{if(t&&typeof t=="object"||typeof t=="function")for(let o of $i(t))!Er.call(e,o)&&o!==r&&xr(e,o,{get:()=>t[o],enumerable:!(n=Pi(t,o))||n.enumerable});return e};var yt=(e,t,r)=>(r=e!=null?Hi(Ii(e)):{},Fi(t||!e||!e.__esModule?xr(r,"default",{value:e,enumerable:!0}):r,e));var fn=Ht((wr,cn)=>{(function(e,t){typeof wr=="object"&&typeof cn!="undefined"?t():typeof define=="function"&&define.amd?define(t):t()})(wr,function(){"use strict";function e(r){var n=!0,o=!1,i=null,a={text:!0,search:!0,url:!0,tel:!0,email:!0,password:!0,number:!0,date:!0,month:!0,week:!0,time:!0,datetime:!0,"datetime-local":!0};function s(T){return!!(T&&T!==document&&T.nodeName!=="HTML"&&T.nodeName!=="BODY"&&"classList"in T&&"contains"in T.classList)}function f(T){var Ke=T.type,We=T.tagName;return!!(We==="INPUT"&&a[Ke]&&!T.readOnly||We==="TEXTAREA"&&!T.readOnly||T.isContentEditable)}function c(T){T.classList.contains("focus-visible")||(T.classList.add("focus-visible"),T.setAttribute("data-focus-visible-added",""))}function u(T){T.hasAttribute("data-focus-visible-added")&&(T.classList.remove("focus-visible"),T.removeAttribute("data-focus-visible-added"))}function p(T){T.metaKey||T.altKey||T.ctrlKey||(s(r.activeElement)&&c(r.activeElement),n=!0)}function m(T){n=!1}function d(T){s(T.target)&&(n||f(T.target))&&c(T.target)}function h(T){s(T.target)&&(T.target.classList.contains("focus-visible")||T.target.hasAttribute("data-focus-visible-added"))&&(o=!0,window.clearTimeout(i),i=window.setTimeout(function(){o=!1},100),u(T.target))}function v(T){document.visibilityState==="hidden"&&(o&&(n=!0),B())}function B(){document.addEventListener("mousemove",z),document.addEventListener("mousedown",z),document.addEventListener("mouseup",z),document.addEventListener("pointermove",z),document.addEventListener("pointerdown",z),document.addEventListener("pointerup",z),document.addEventListener("touchmove",z),document.addEventListener("touchstart",z),document.addEventListener("touchend",z)}function ne(){document.removeEventListener("mousemove",z),document.removeEventListener("mousedown",z),document.removeEventListener("mouseup",z),document.removeEventListener("pointermove",z),document.removeEventListener("pointerdown",z),document.removeEventListener("pointerup",z),document.removeEventListener("touchmove",z),document.removeEventListener("touchstart",z),document.removeEventListener("touchend",z)}function z(T){T.target.nodeName&&T.target.nodeName.toLowerCase()==="html"||(n=!1,ne())}document.addEventListener("keydown",p,!0),document.addEventListener("mousedown",m,!0),document.addEventListener("pointerdown",m,!0),document.addEventListener("touchstart",m,!0),document.addEventListener("visibilitychange",v,!0),B(),r.addEventListener("focus",d,!0),r.addEventListener("blur",h,!0),r.nodeType===Node.DOCUMENT_FRAGMENT_NODE&&r.host?r.host.setAttribute("data-js-focus-visible",""):r.nodeType===Node.DOCUMENT_NODE&&(document.documentElement.classList.add("js-focus-visible"),document.documentElement.setAttribute("data-js-focus-visible",""))}if(typeof window!="undefined"&&typeof document!="undefined"){window.applyFocusVisiblePolyfill=e;var t;try{t=new CustomEvent("focus-visible-polyfill-ready")}catch(r){t=document.createEvent("CustomEvent"),t.initCustomEvent("focus-visible-polyfill-ready",!1,!1,{})}window.dispatchEvent(t)}typeof document!="undefined"&&e(document)})});var un=Ht(Sr=>{(function(e){var t=function(){try{return!!Symbol.iterator}catch(c){return!1}},r=t(),n=function(c){var u={next:function(){var p=c.shift();return{done:p===void 0,value:p}}};return r&&(u[Symbol.iterator]=function(){return u}),u},o=function(c){return encodeURIComponent(c).replace(/%20/g,"+")},i=function(c){return decodeURIComponent(String(c).replace(/\+/g," "))},a=function(){var c=function(p){Object.defineProperty(this,"_entries",{writable:!0,value:{}});var m=typeof p;if(m!=="undefined")if(m==="string")p!==""&&this._fromString(p);else if(p instanceof c){var d=this;p.forEach(function(ne,z){d.append(z,ne)})}else if(p!==null&&m==="object")if(Object.prototype.toString.call(p)==="[object Array]")for(var h=0;hd[0]?1:0}),c._entries&&(c._entries={});for(var p=0;p1?i(d[1]):"")}})})(typeof global!="undefined"?global:typeof window!="undefined"?window:typeof self!="undefined"?self:Sr);(function(e){var t=function(){try{var o=new e.URL("b","http://a");return o.pathname="c d",o.href==="http://a/c%20d"&&o.searchParams}catch(i){return!1}},r=function(){var o=e.URL,i=function(f,c){typeof f!="string"&&(f=String(f)),c&&typeof c!="string"&&(c=String(c));var u=document,p;if(c&&(e.location===void 0||c!==e.location.href)){c=c.toLowerCase(),u=document.implementation.createHTMLDocument(""),p=u.createElement("base"),p.href=c,u.head.appendChild(p);try{if(p.href.indexOf(c)!==0)throw new Error(p.href)}catch(T){throw new Error("URL unable to set base "+c+" due to "+T)}}var m=u.createElement("a");m.href=f,p&&(u.body.appendChild(m),m.href=m.href);var d=u.createElement("input");if(d.type="url",d.value=f,m.protocol===":"||!/:/.test(m.href)||!d.checkValidity()&&!c)throw new TypeError("Invalid URL");Object.defineProperty(this,"_anchorElement",{value:m});var h=new e.URLSearchParams(this.search),v=!0,B=!0,ne=this;["append","delete","set"].forEach(function(T){var Ke=h[T];h[T]=function(){Ke.apply(h,arguments),v&&(B=!1,ne.search=h.toString(),B=!0)}}),Object.defineProperty(this,"searchParams",{value:h,enumerable:!0});var z=void 0;Object.defineProperty(this,"_updateSearchParams",{enumerable:!1,configurable:!1,writable:!1,value:function(){this.search!==z&&(z=this.search,B&&(v=!1,this.searchParams._fromString(this.search),v=!0))}})},a=i.prototype,s=function(f){Object.defineProperty(a,f,{get:function(){return this._anchorElement[f]},set:function(c){this._anchorElement[f]=c},enumerable:!0})};["hash","host","hostname","port","protocol"].forEach(function(f){s(f)}),Object.defineProperty(a,"search",{get:function(){return this._anchorElement.search},set:function(f){this._anchorElement.search=f,this._updateSearchParams()},enumerable:!0}),Object.defineProperties(a,{toString:{get:function(){var f=this;return function(){return f.href}}},href:{get:function(){return this._anchorElement.href.replace(/\?$/,"")},set:function(f){this._anchorElement.href=f,this._updateSearchParams()},enumerable:!0},pathname:{get:function(){return this._anchorElement.pathname.replace(/(^\/?)/,"/")},set:function(f){this._anchorElement.pathname=f},enumerable:!0},origin:{get:function(){var f={"http:":80,"https:":443,"ftp:":21}[this._anchorElement.protocol],c=this._anchorElement.port!=f&&this._anchorElement.port!=="";return this._anchorElement.protocol+"//"+this._anchorElement.hostname+(c?":"+this._anchorElement.port:"")},enumerable:!0},password:{get:function(){return""},set:function(f){},enumerable:!0},username:{get:function(){return""},set:function(f){},enumerable:!0}}),i.createObjectURL=function(f){return o.createObjectURL.apply(o,arguments)},i.revokeObjectURL=function(f){return o.revokeObjectURL.apply(o,arguments)},e.URL=i};if(t()||r(),e.location!==void 0&&!("origin"in e.location)){var n=function(){return e.location.protocol+"//"+e.location.hostname+(e.location.port?":"+e.location.port:"")};try{Object.defineProperty(e.location,"origin",{get:n,enumerable:!0})}catch(o){setInterval(function(){e.location.origin=n()},100)}}})(typeof global!="undefined"?global:typeof window!="undefined"?window:typeof self!="undefined"?self:Sr)});var Qr=Ht((Lt,Kr)=>{/*! + * clipboard.js v2.0.11 + * https://clipboardjs.com/ + * + * Licensed MIT © Zeno Rocha + */(function(t,r){typeof Lt=="object"&&typeof Kr=="object"?Kr.exports=r():typeof define=="function"&&define.amd?define([],r):typeof Lt=="object"?Lt.ClipboardJS=r():t.ClipboardJS=r()})(Lt,function(){return function(){var e={686:function(n,o,i){"use strict";i.d(o,{default:function(){return ki}});var a=i(279),s=i.n(a),f=i(370),c=i.n(f),u=i(817),p=i.n(u);function m(j){try{return document.execCommand(j)}catch(O){return!1}}var d=function(O){var w=p()(O);return m("cut"),w},h=d;function v(j){var O=document.documentElement.getAttribute("dir")==="rtl",w=document.createElement("textarea");w.style.fontSize="12pt",w.style.border="0",w.style.padding="0",w.style.margin="0",w.style.position="absolute",w.style[O?"right":"left"]="-9999px";var k=window.pageYOffset||document.documentElement.scrollTop;return w.style.top="".concat(k,"px"),w.setAttribute("readonly",""),w.value=j,w}var B=function(O,w){var k=v(O);w.container.appendChild(k);var F=p()(k);return m("copy"),k.remove(),F},ne=function(O){var w=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body},k="";return typeof O=="string"?k=B(O,w):O instanceof HTMLInputElement&&!["text","search","url","tel","password"].includes(O==null?void 0:O.type)?k=B(O.value,w):(k=p()(O),m("copy")),k},z=ne;function T(j){return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?T=function(w){return typeof w}:T=function(w){return w&&typeof Symbol=="function"&&w.constructor===Symbol&&w!==Symbol.prototype?"symbol":typeof w},T(j)}var Ke=function(){var O=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},w=O.action,k=w===void 0?"copy":w,F=O.container,q=O.target,Le=O.text;if(k!=="copy"&&k!=="cut")throw new Error('Invalid "action" value, use either "copy" or "cut"');if(q!==void 0)if(q&&T(q)==="object"&&q.nodeType===1){if(k==="copy"&&q.hasAttribute("disabled"))throw new Error('Invalid "target" attribute. Please use "readonly" instead of "disabled" attribute');if(k==="cut"&&(q.hasAttribute("readonly")||q.hasAttribute("disabled")))throw new Error(`Invalid "target" attribute. You can't cut text from elements with "readonly" or "disabled" attributes`)}else throw new Error('Invalid "target" value, use a valid Element');if(Le)return z(Le,{container:F});if(q)return k==="cut"?h(q):z(q,{container:F})},We=Ke;function Ie(j){return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?Ie=function(w){return typeof w}:Ie=function(w){return w&&typeof Symbol=="function"&&w.constructor===Symbol&&w!==Symbol.prototype?"symbol":typeof w},Ie(j)}function Ti(j,O){if(!(j instanceof O))throw new TypeError("Cannot call a class as a function")}function nn(j,O){for(var w=0;w0&&arguments[0]!==void 0?arguments[0]:{};this.action=typeof F.action=="function"?F.action:this.defaultAction,this.target=typeof F.target=="function"?F.target:this.defaultTarget,this.text=typeof F.text=="function"?F.text:this.defaultText,this.container=Ie(F.container)==="object"?F.container:document.body}},{key:"listenClick",value:function(F){var q=this;this.listener=c()(F,"click",function(Le){return q.onClick(Le)})}},{key:"onClick",value:function(F){var q=F.delegateTarget||F.currentTarget,Le=this.action(q)||"copy",Rt=We({action:Le,container:this.container,target:this.target(q),text:this.text(q)});this.emit(Rt?"success":"error",{action:Le,text:Rt,trigger:q,clearSelection:function(){q&&q.focus(),window.getSelection().removeAllRanges()}})}},{key:"defaultAction",value:function(F){return yr("action",F)}},{key:"defaultTarget",value:function(F){var q=yr("target",F);if(q)return document.querySelector(q)}},{key:"defaultText",value:function(F){return yr("text",F)}},{key:"destroy",value:function(){this.listener.destroy()}}],[{key:"copy",value:function(F){var q=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body};return z(F,q)}},{key:"cut",value:function(F){return h(F)}},{key:"isSupported",value:function(){var F=arguments.length>0&&arguments[0]!==void 0?arguments[0]:["copy","cut"],q=typeof F=="string"?[F]:F,Le=!!document.queryCommandSupported;return q.forEach(function(Rt){Le=Le&&!!document.queryCommandSupported(Rt)}),Le}}]),w}(s()),ki=Ri},828:function(n){var o=9;if(typeof Element!="undefined"&&!Element.prototype.matches){var i=Element.prototype;i.matches=i.matchesSelector||i.mozMatchesSelector||i.msMatchesSelector||i.oMatchesSelector||i.webkitMatchesSelector}function a(s,f){for(;s&&s.nodeType!==o;){if(typeof s.matches=="function"&&s.matches(f))return s;s=s.parentNode}}n.exports=a},438:function(n,o,i){var a=i(828);function s(u,p,m,d,h){var v=c.apply(this,arguments);return u.addEventListener(m,v,h),{destroy:function(){u.removeEventListener(m,v,h)}}}function f(u,p,m,d,h){return typeof u.addEventListener=="function"?s.apply(null,arguments):typeof m=="function"?s.bind(null,document).apply(null,arguments):(typeof u=="string"&&(u=document.querySelectorAll(u)),Array.prototype.map.call(u,function(v){return s(v,p,m,d,h)}))}function c(u,p,m,d){return function(h){h.delegateTarget=a(h.target,p),h.delegateTarget&&d.call(u,h)}}n.exports=f},879:function(n,o){o.node=function(i){return i!==void 0&&i instanceof HTMLElement&&i.nodeType===1},o.nodeList=function(i){var a=Object.prototype.toString.call(i);return i!==void 0&&(a==="[object NodeList]"||a==="[object HTMLCollection]")&&"length"in i&&(i.length===0||o.node(i[0]))},o.string=function(i){return typeof i=="string"||i instanceof String},o.fn=function(i){var a=Object.prototype.toString.call(i);return a==="[object Function]"}},370:function(n,o,i){var a=i(879),s=i(438);function f(m,d,h){if(!m&&!d&&!h)throw new Error("Missing required arguments");if(!a.string(d))throw new TypeError("Second argument must be a String");if(!a.fn(h))throw new TypeError("Third argument must be a Function");if(a.node(m))return c(m,d,h);if(a.nodeList(m))return u(m,d,h);if(a.string(m))return p(m,d,h);throw new TypeError("First argument must be a String, HTMLElement, HTMLCollection, or NodeList")}function c(m,d,h){return m.addEventListener(d,h),{destroy:function(){m.removeEventListener(d,h)}}}function u(m,d,h){return Array.prototype.forEach.call(m,function(v){v.addEventListener(d,h)}),{destroy:function(){Array.prototype.forEach.call(m,function(v){v.removeEventListener(d,h)})}}}function p(m,d,h){return s(document.body,m,d,h)}n.exports=f},817:function(n){function o(i){var a;if(i.nodeName==="SELECT")i.focus(),a=i.value;else if(i.nodeName==="INPUT"||i.nodeName==="TEXTAREA"){var s=i.hasAttribute("readonly");s||i.setAttribute("readonly",""),i.select(),i.setSelectionRange(0,i.value.length),s||i.removeAttribute("readonly"),a=i.value}else{i.hasAttribute("contenteditable")&&i.focus();var f=window.getSelection(),c=document.createRange();c.selectNodeContents(i),f.removeAllRanges(),f.addRange(c),a=f.toString()}return a}n.exports=o},279:function(n){function o(){}o.prototype={on:function(i,a,s){var f=this.e||(this.e={});return(f[i]||(f[i]=[])).push({fn:a,ctx:s}),this},once:function(i,a,s){var f=this;function c(){f.off(i,c),a.apply(s,arguments)}return c._=a,this.on(i,c,s)},emit:function(i){var a=[].slice.call(arguments,1),s=((this.e||(this.e={}))[i]||[]).slice(),f=0,c=s.length;for(f;f{"use strict";/*! + * escape-html + * Copyright(c) 2012-2013 TJ Holowaychuk + * Copyright(c) 2015 Andreas Lubbe + * Copyright(c) 2015 Tiancheng "Timothy" Gu + * MIT Licensed + */var is=/["'&<>]/;Jo.exports=as;function as(e){var t=""+e,r=is.exec(t);if(!r)return t;var n,o="",i=0,a=0;for(i=r.index;i0&&i[i.length-1])&&(c[0]===6||c[0]===2)){r=0;continue}if(c[0]===3&&(!i||c[1]>i[0]&&c[1]=e.length&&(e=void 0),{value:e&&e[n++],done:!e}}};throw new TypeError(t?"Object is not iterable.":"Symbol.iterator is not defined.")}function W(e,t){var r=typeof Symbol=="function"&&e[Symbol.iterator];if(!r)return e;var n=r.call(e),o,i=[],a;try{for(;(t===void 0||t-- >0)&&!(o=n.next()).done;)i.push(o.value)}catch(s){a={error:s}}finally{try{o&&!o.done&&(r=n.return)&&r.call(n)}finally{if(a)throw a.error}}return i}function D(e,t,r){if(r||arguments.length===2)for(var n=0,o=t.length,i;n1||s(m,d)})})}function s(m,d){try{f(n[m](d))}catch(h){p(i[0][3],h)}}function f(m){m.value instanceof Xe?Promise.resolve(m.value.v).then(c,u):p(i[0][2],m)}function c(m){s("next",m)}function u(m){s("throw",m)}function p(m,d){m(d),i.shift(),i.length&&s(i[0][0],i[0][1])}}function mn(e){if(!Symbol.asyncIterator)throw new TypeError("Symbol.asyncIterator is not defined.");var t=e[Symbol.asyncIterator],r;return t?t.call(e):(e=typeof xe=="function"?xe(e):e[Symbol.iterator](),r={},n("next"),n("throw"),n("return"),r[Symbol.asyncIterator]=function(){return this},r);function n(i){r[i]=e[i]&&function(a){return new Promise(function(s,f){a=e[i](a),o(s,f,a.done,a.value)})}}function o(i,a,s,f){Promise.resolve(f).then(function(c){i({value:c,done:s})},a)}}function A(e){return typeof e=="function"}function at(e){var t=function(n){Error.call(n),n.stack=new Error().stack},r=e(t);return r.prototype=Object.create(Error.prototype),r.prototype.constructor=r,r}var $t=at(function(e){return function(r){e(this),this.message=r?r.length+` errors occurred during unsubscription: +`+r.map(function(n,o){return o+1+") "+n.toString()}).join(` + `):"",this.name="UnsubscriptionError",this.errors=r}});function De(e,t){if(e){var r=e.indexOf(t);0<=r&&e.splice(r,1)}}var Fe=function(){function e(t){this.initialTeardown=t,this.closed=!1,this._parentage=null,this._finalizers=null}return e.prototype.unsubscribe=function(){var t,r,n,o,i;if(!this.closed){this.closed=!0;var a=this._parentage;if(a)if(this._parentage=null,Array.isArray(a))try{for(var s=xe(a),f=s.next();!f.done;f=s.next()){var c=f.value;c.remove(this)}}catch(v){t={error:v}}finally{try{f&&!f.done&&(r=s.return)&&r.call(s)}finally{if(t)throw t.error}}else a.remove(this);var u=this.initialTeardown;if(A(u))try{u()}catch(v){i=v instanceof $t?v.errors:[v]}var p=this._finalizers;if(p){this._finalizers=null;try{for(var m=xe(p),d=m.next();!d.done;d=m.next()){var h=d.value;try{dn(h)}catch(v){i=i!=null?i:[],v instanceof $t?i=D(D([],W(i)),W(v.errors)):i.push(v)}}}catch(v){n={error:v}}finally{try{d&&!d.done&&(o=m.return)&&o.call(m)}finally{if(n)throw n.error}}}if(i)throw new $t(i)}},e.prototype.add=function(t){var r;if(t&&t!==this)if(this.closed)dn(t);else{if(t instanceof e){if(t.closed||t._hasParent(this))return;t._addParent(this)}(this._finalizers=(r=this._finalizers)!==null&&r!==void 0?r:[]).push(t)}},e.prototype._hasParent=function(t){var r=this._parentage;return r===t||Array.isArray(r)&&r.includes(t)},e.prototype._addParent=function(t){var r=this._parentage;this._parentage=Array.isArray(r)?(r.push(t),r):r?[r,t]:t},e.prototype._removeParent=function(t){var r=this._parentage;r===t?this._parentage=null:Array.isArray(r)&&De(r,t)},e.prototype.remove=function(t){var r=this._finalizers;r&&De(r,t),t instanceof e&&t._removeParent(this)},e.EMPTY=function(){var t=new e;return t.closed=!0,t}(),e}();var Or=Fe.EMPTY;function It(e){return e instanceof Fe||e&&"closed"in e&&A(e.remove)&&A(e.add)&&A(e.unsubscribe)}function dn(e){A(e)?e():e.unsubscribe()}var Ae={onUnhandledError:null,onStoppedNotification:null,Promise:void 0,useDeprecatedSynchronousErrorHandling:!1,useDeprecatedNextContext:!1};var st={setTimeout:function(e,t){for(var r=[],n=2;n0},enumerable:!1,configurable:!0}),t.prototype._trySubscribe=function(r){return this._throwIfClosed(),e.prototype._trySubscribe.call(this,r)},t.prototype._subscribe=function(r){return this._throwIfClosed(),this._checkFinalizedStatuses(r),this._innerSubscribe(r)},t.prototype._innerSubscribe=function(r){var n=this,o=this,i=o.hasError,a=o.isStopped,s=o.observers;return i||a?Or:(this.currentObservers=null,s.push(r),new Fe(function(){n.currentObservers=null,De(s,r)}))},t.prototype._checkFinalizedStatuses=function(r){var n=this,o=n.hasError,i=n.thrownError,a=n.isStopped;o?r.error(i):a&&r.complete()},t.prototype.asObservable=function(){var r=new U;return r.source=this,r},t.create=function(r,n){return new wn(r,n)},t}(U);var wn=function(e){ie(t,e);function t(r,n){var o=e.call(this)||this;return o.destination=r,o.source=n,o}return t.prototype.next=function(r){var n,o;(o=(n=this.destination)===null||n===void 0?void 0:n.next)===null||o===void 0||o.call(n,r)},t.prototype.error=function(r){var n,o;(o=(n=this.destination)===null||n===void 0?void 0:n.error)===null||o===void 0||o.call(n,r)},t.prototype.complete=function(){var r,n;(n=(r=this.destination)===null||r===void 0?void 0:r.complete)===null||n===void 0||n.call(r)},t.prototype._subscribe=function(r){var n,o;return(o=(n=this.source)===null||n===void 0?void 0:n.subscribe(r))!==null&&o!==void 0?o:Or},t}(E);var Et={now:function(){return(Et.delegate||Date).now()},delegate:void 0};var wt=function(e){ie(t,e);function t(r,n,o){r===void 0&&(r=1/0),n===void 0&&(n=1/0),o===void 0&&(o=Et);var i=e.call(this)||this;return i._bufferSize=r,i._windowTime=n,i._timestampProvider=o,i._buffer=[],i._infiniteTimeWindow=!0,i._infiniteTimeWindow=n===1/0,i._bufferSize=Math.max(1,r),i._windowTime=Math.max(1,n),i}return t.prototype.next=function(r){var n=this,o=n.isStopped,i=n._buffer,a=n._infiniteTimeWindow,s=n._timestampProvider,f=n._windowTime;o||(i.push(r),!a&&i.push(s.now()+f)),this._trimBuffer(),e.prototype.next.call(this,r)},t.prototype._subscribe=function(r){this._throwIfClosed(),this._trimBuffer();for(var n=this._innerSubscribe(r),o=this,i=o._infiniteTimeWindow,a=o._buffer,s=a.slice(),f=0;f0?e.prototype.requestAsyncId.call(this,r,n,o):(r.actions.push(this),r._scheduled||(r._scheduled=ut.requestAnimationFrame(function(){return r.flush(void 0)})))},t.prototype.recycleAsyncId=function(r,n,o){var i;if(o===void 0&&(o=0),o!=null?o>0:this.delay>0)return e.prototype.recycleAsyncId.call(this,r,n,o);var a=r.actions;n!=null&&((i=a[a.length-1])===null||i===void 0?void 0:i.id)!==n&&(ut.cancelAnimationFrame(n),r._scheduled=void 0)},t}(Ut);var On=function(e){ie(t,e);function t(){return e!==null&&e.apply(this,arguments)||this}return t.prototype.flush=function(r){this._active=!0;var n=this._scheduled;this._scheduled=void 0;var o=this.actions,i;r=r||o.shift();do if(i=r.execute(r.state,r.delay))break;while((r=o[0])&&r.id===n&&o.shift());if(this._active=!1,i){for(;(r=o[0])&&r.id===n&&o.shift();)r.unsubscribe();throw i}},t}(Wt);var we=new On(Tn);var R=new U(function(e){return e.complete()});function Dt(e){return e&&A(e.schedule)}function kr(e){return e[e.length-1]}function Qe(e){return A(kr(e))?e.pop():void 0}function Se(e){return Dt(kr(e))?e.pop():void 0}function Vt(e,t){return typeof kr(e)=="number"?e.pop():t}var pt=function(e){return e&&typeof e.length=="number"&&typeof e!="function"};function zt(e){return A(e==null?void 0:e.then)}function Nt(e){return A(e[ft])}function qt(e){return Symbol.asyncIterator&&A(e==null?void 0:e[Symbol.asyncIterator])}function Kt(e){return new TypeError("You provided "+(e!==null&&typeof e=="object"?"an invalid object":"'"+e+"'")+" where a stream was expected. You can provide an Observable, Promise, ReadableStream, Array, AsyncIterable, or Iterable.")}function Ki(){return typeof Symbol!="function"||!Symbol.iterator?"@@iterator":Symbol.iterator}var Qt=Ki();function Yt(e){return A(e==null?void 0:e[Qt])}function Gt(e){return ln(this,arguments,function(){var r,n,o,i;return Pt(this,function(a){switch(a.label){case 0:r=e.getReader(),a.label=1;case 1:a.trys.push([1,,9,10]),a.label=2;case 2:return[4,Xe(r.read())];case 3:return n=a.sent(),o=n.value,i=n.done,i?[4,Xe(void 0)]:[3,5];case 4:return[2,a.sent()];case 5:return[4,Xe(o)];case 6:return[4,a.sent()];case 7:return a.sent(),[3,2];case 8:return[3,10];case 9:return r.releaseLock(),[7];case 10:return[2]}})})}function Bt(e){return A(e==null?void 0:e.getReader)}function $(e){if(e instanceof U)return e;if(e!=null){if(Nt(e))return Qi(e);if(pt(e))return Yi(e);if(zt(e))return Gi(e);if(qt(e))return _n(e);if(Yt(e))return Bi(e);if(Bt(e))return Ji(e)}throw Kt(e)}function Qi(e){return new U(function(t){var r=e[ft]();if(A(r.subscribe))return r.subscribe(t);throw new TypeError("Provided object does not correctly implement Symbol.observable")})}function Yi(e){return new U(function(t){for(var r=0;r=2;return function(n){return n.pipe(e?_(function(o,i){return e(o,i,n)}):de,Oe(1),r?He(t):zn(function(){return new Xt}))}}function Nn(){for(var e=[],t=0;t=2,!0))}function fe(e){e===void 0&&(e={});var t=e.connector,r=t===void 0?function(){return new E}:t,n=e.resetOnError,o=n===void 0?!0:n,i=e.resetOnComplete,a=i===void 0?!0:i,s=e.resetOnRefCountZero,f=s===void 0?!0:s;return function(c){var u,p,m,d=0,h=!1,v=!1,B=function(){p==null||p.unsubscribe(),p=void 0},ne=function(){B(),u=m=void 0,h=v=!1},z=function(){var T=u;ne(),T==null||T.unsubscribe()};return g(function(T,Ke){d++,!v&&!h&&B();var We=m=m!=null?m:r();Ke.add(function(){d--,d===0&&!v&&!h&&(p=jr(z,f))}),We.subscribe(Ke),!u&&d>0&&(u=new et({next:function(Ie){return We.next(Ie)},error:function(Ie){v=!0,B(),p=jr(ne,o,Ie),We.error(Ie)},complete:function(){h=!0,B(),p=jr(ne,a),We.complete()}}),$(T).subscribe(u))})(c)}}function jr(e,t){for(var r=[],n=2;ne.next(document)),e}function K(e,t=document){return Array.from(t.querySelectorAll(e))}function V(e,t=document){let r=se(e,t);if(typeof r=="undefined")throw new ReferenceError(`Missing element: expected "${e}" to be present`);return r}function se(e,t=document){return t.querySelector(e)||void 0}function _e(){return document.activeElement instanceof HTMLElement&&document.activeElement||void 0}function tr(e){return L(b(document.body,"focusin"),b(document.body,"focusout")).pipe(ke(1),l(()=>{let t=_e();return typeof t!="undefined"?e.contains(t):!1}),N(e===_e()),G())}function Be(e){return{x:e.offsetLeft,y:e.offsetTop}}function Yn(e){return L(b(window,"load"),b(window,"resize")).pipe(Ce(0,we),l(()=>Be(e)),N(Be(e)))}function rr(e){return{x:e.scrollLeft,y:e.scrollTop}}function dt(e){return L(b(e,"scroll"),b(window,"resize")).pipe(Ce(0,we),l(()=>rr(e)),N(rr(e)))}var Bn=function(){if(typeof Map!="undefined")return Map;function e(t,r){var n=-1;return t.some(function(o,i){return o[0]===r?(n=i,!0):!1}),n}return function(){function t(){this.__entries__=[]}return Object.defineProperty(t.prototype,"size",{get:function(){return this.__entries__.length},enumerable:!0,configurable:!0}),t.prototype.get=function(r){var n=e(this.__entries__,r),o=this.__entries__[n];return o&&o[1]},t.prototype.set=function(r,n){var o=e(this.__entries__,r);~o?this.__entries__[o][1]=n:this.__entries__.push([r,n])},t.prototype.delete=function(r){var n=this.__entries__,o=e(n,r);~o&&n.splice(o,1)},t.prototype.has=function(r){return!!~e(this.__entries__,r)},t.prototype.clear=function(){this.__entries__.splice(0)},t.prototype.forEach=function(r,n){n===void 0&&(n=null);for(var o=0,i=this.__entries__;o0},e.prototype.connect_=function(){!zr||this.connected_||(document.addEventListener("transitionend",this.onTransitionEnd_),window.addEventListener("resize",this.refresh),xa?(this.mutationsObserver_=new MutationObserver(this.refresh),this.mutationsObserver_.observe(document,{attributes:!0,childList:!0,characterData:!0,subtree:!0})):(document.addEventListener("DOMSubtreeModified",this.refresh),this.mutationEventsAdded_=!0),this.connected_=!0)},e.prototype.disconnect_=function(){!zr||!this.connected_||(document.removeEventListener("transitionend",this.onTransitionEnd_),window.removeEventListener("resize",this.refresh),this.mutationsObserver_&&this.mutationsObserver_.disconnect(),this.mutationEventsAdded_&&document.removeEventListener("DOMSubtreeModified",this.refresh),this.mutationsObserver_=null,this.mutationEventsAdded_=!1,this.connected_=!1)},e.prototype.onTransitionEnd_=function(t){var r=t.propertyName,n=r===void 0?"":r,o=ya.some(function(i){return!!~n.indexOf(i)});o&&this.refresh()},e.getInstance=function(){return this.instance_||(this.instance_=new e),this.instance_},e.instance_=null,e}(),Jn=function(e,t){for(var r=0,n=Object.keys(t);r0},e}(),Zn=typeof WeakMap!="undefined"?new WeakMap:new Bn,eo=function(){function e(t){if(!(this instanceof e))throw new TypeError("Cannot call a class as a function.");if(!arguments.length)throw new TypeError("1 argument required, but only 0 present.");var r=Ea.getInstance(),n=new Ra(t,r,this);Zn.set(this,n)}return e}();["observe","unobserve","disconnect"].forEach(function(e){eo.prototype[e]=function(){var t;return(t=Zn.get(this))[e].apply(t,arguments)}});var ka=function(){return typeof nr.ResizeObserver!="undefined"?nr.ResizeObserver:eo}(),to=ka;var ro=new E,Ha=I(()=>H(new to(e=>{for(let t of e)ro.next(t)}))).pipe(x(e=>L(Te,H(e)).pipe(C(()=>e.disconnect()))),J(1));function he(e){return{width:e.offsetWidth,height:e.offsetHeight}}function ge(e){return Ha.pipe(S(t=>t.observe(e)),x(t=>ro.pipe(_(({target:r})=>r===e),C(()=>t.unobserve(e)),l(()=>he(e)))),N(he(e)))}function bt(e){return{width:e.scrollWidth,height:e.scrollHeight}}function ar(e){let t=e.parentElement;for(;t&&(e.scrollWidth<=t.scrollWidth&&e.scrollHeight<=t.scrollHeight);)t=(e=t).parentElement;return t?e:void 0}var no=new E,Pa=I(()=>H(new IntersectionObserver(e=>{for(let t of e)no.next(t)},{threshold:0}))).pipe(x(e=>L(Te,H(e)).pipe(C(()=>e.disconnect()))),J(1));function sr(e){return Pa.pipe(S(t=>t.observe(e)),x(t=>no.pipe(_(({target:r})=>r===e),C(()=>t.unobserve(e)),l(({isIntersecting:r})=>r))))}function oo(e,t=16){return dt(e).pipe(l(({y:r})=>{let n=he(e),o=bt(e);return r>=o.height-n.height-t}),G())}var cr={drawer:V("[data-md-toggle=drawer]"),search:V("[data-md-toggle=search]")};function io(e){return cr[e].checked}function qe(e,t){cr[e].checked!==t&&cr[e].click()}function je(e){let t=cr[e];return b(t,"change").pipe(l(()=>t.checked),N(t.checked))}function $a(e,t){switch(e.constructor){case HTMLInputElement:return e.type==="radio"?/^Arrow/.test(t):!0;case HTMLSelectElement:case HTMLTextAreaElement:return!0;default:return e.isContentEditable}}function Ia(){return L(b(window,"compositionstart").pipe(l(()=>!0)),b(window,"compositionend").pipe(l(()=>!1))).pipe(N(!1))}function ao(){let e=b(window,"keydown").pipe(_(t=>!(t.metaKey||t.ctrlKey)),l(t=>({mode:io("search")?"search":"global",type:t.key,claim(){t.preventDefault(),t.stopPropagation()}})),_(({mode:t,type:r})=>{if(t==="global"){let n=_e();if(typeof n!="undefined")return!$a(n,r)}return!0}),fe());return Ia().pipe(x(t=>t?R:e))}function Me(){return new URL(location.href)}function ot(e){location.href=e.href}function so(){return new E}function co(e,t){if(typeof t=="string"||typeof t=="number")e.innerHTML+=t.toString();else if(t instanceof Node)e.appendChild(t);else if(Array.isArray(t))for(let r of t)co(e,r)}function M(e,t,...r){let n=document.createElement(e);if(t)for(let o of Object.keys(t))typeof t[o]!="undefined"&&(typeof t[o]!="boolean"?n.setAttribute(o,t[o]):n.setAttribute(o,""));for(let o of r)co(n,o);return n}function fr(e){if(e>999){let t=+((e-950)%1e3>99);return`${((e+1e-6)/1e3).toFixed(t)}k`}else return e.toString()}function fo(){return location.hash.substring(1)}function uo(e){let t=M("a",{href:e});t.addEventListener("click",r=>r.stopPropagation()),t.click()}function Fa(){return b(window,"hashchange").pipe(l(fo),N(fo()),_(e=>e.length>0),J(1))}function po(){return Fa().pipe(l(e=>se(`[id="${e}"]`)),_(e=>typeof e!="undefined"))}function Nr(e){let t=matchMedia(e);return Zt(r=>t.addListener(()=>r(t.matches))).pipe(N(t.matches))}function lo(){let e=matchMedia("print");return L(b(window,"beforeprint").pipe(l(()=>!0)),b(window,"afterprint").pipe(l(()=>!1))).pipe(N(e.matches))}function qr(e,t){return e.pipe(x(r=>r?t():R))}function ur(e,t={credentials:"same-origin"}){return pe(fetch(`${e}`,t)).pipe(ce(()=>R),x(r=>r.status!==200?Tt(()=>new Error(r.statusText)):H(r)))}function Ue(e,t){return ur(e,t).pipe(x(r=>r.json()),J(1))}function mo(e,t){let r=new DOMParser;return ur(e,t).pipe(x(n=>n.text()),l(n=>r.parseFromString(n,"text/xml")),J(1))}function pr(e){let t=M("script",{src:e});return I(()=>(document.head.appendChild(t),L(b(t,"load"),b(t,"error").pipe(x(()=>Tt(()=>new ReferenceError(`Invalid script: ${e}`))))).pipe(l(()=>{}),C(()=>document.head.removeChild(t)),Oe(1))))}function ho(){return{x:Math.max(0,scrollX),y:Math.max(0,scrollY)}}function bo(){return L(b(window,"scroll",{passive:!0}),b(window,"resize",{passive:!0})).pipe(l(ho),N(ho()))}function vo(){return{width:innerWidth,height:innerHeight}}function go(){return b(window,"resize",{passive:!0}).pipe(l(vo),N(vo()))}function yo(){return Q([bo(),go()]).pipe(l(([e,t])=>({offset:e,size:t})),J(1))}function lr(e,{viewport$:t,header$:r}){let n=t.pipe(X("size")),o=Q([n,r]).pipe(l(()=>Be(e)));return Q([r,t,o]).pipe(l(([{height:i},{offset:a,size:s},{x:f,y:c}])=>({offset:{x:a.x-f,y:a.y-c+i},size:s})))}(()=>{function e(n,o){parent.postMessage(n,o||"*")}function t(...n){return n.reduce((o,i)=>o.then(()=>new Promise(a=>{let s=document.createElement("script");s.src=i,s.onload=a,document.body.appendChild(s)})),Promise.resolve())}var r=class{constructor(n){this.url=n,this.onerror=null,this.onmessage=null,this.onmessageerror=null,this.m=a=>{a.source===this.w&&(a.stopImmediatePropagation(),this.dispatchEvent(new MessageEvent("message",{data:a.data})),this.onmessage&&this.onmessage(a))},this.e=(a,s,f,c,u)=>{if(s===this.url.toString()){let p=new ErrorEvent("error",{message:a,filename:s,lineno:f,colno:c,error:u});this.dispatchEvent(p),this.onerror&&this.onerror(p)}};let o=new EventTarget;this.addEventListener=o.addEventListener.bind(o),this.removeEventListener=o.removeEventListener.bind(o),this.dispatchEvent=o.dispatchEvent.bind(o);let i=document.createElement("iframe");i.width=i.height=i.frameBorder="0",document.body.appendChild(this.iframe=i),this.w.document.open(),this.w.document.write(` + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + + +
+
+
+ + + + +
+
+ + + + + +

Installation

+ +

ydata-sdk is available through PyPi, allowing an easy process of installation and integration with the data science programing environments (Google Colab, Jupyter Notebooks, Visual Studio Code, PyCharm) and stack (pandas, numpy, scikit-learn).

+

Installing the package

+

Currently, the package supports python versions over 3.9 and up-to python 3.12, and can be installed in Windows, Linux or MacOS operating systems.

+

Prior to the package installation, it is recommended the creation of a virtual or conda environment:

+
+
+
+
conda create -n synth-env python=3.12
+conda activate synth-env
+
+
+
+
+

The above command creates and activates a new environment called "synth-env" with Python version 3.12.X. In the new environment, you can then install ydata-sdk:

+
+
+
+
pip install ydata-sdk
+
+
+
+
+

+Installing ydata-synthetic +5min – Step-by-step installation guide

+

Using Google Colab

+

To install inside a Google Colab notebook, you can use the following:

+
!pip install ydata-sdk
+
+

Make sure your Google Colab is running Python versions >=3.9, <=3.12. Learn how to configure Python versions on Google Colab here.

+ + + + + + +
+
+ + + + +
+ + + +
+ +
+ + +
+ +
+
+
+
+ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/2.0/index.html b/2.0/index.html new file mode 100644 index 00000000..04de77fe --- /dev/null +++ b/2.0/index.html @@ -0,0 +1,1074 @@ + + + + + + + + + + + + + + + + + + + + + + Overview - YData-Synthetic + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + + +
+
+
+ + + + +
+
+ + + + + +

Overview

+ +

+

YData Synthetic Logo

+

+ +

pypi +Pythonversion +downloads + + +Build Status +Code Coverage +GitHub stars +Discord

+

Overview

+

YData-Synthetic is an open-source package developed in 2020 with the primary goal of educating users about generative models for synthetic data generation. +Designed as a collection of models, it was intended for exploratory studies and educational purposes. +However, it was not optimized for the quality, performance, and scalability needs typically required by organizations.

+
+

We are now ydata-sdk!

+

Even though the journey was fun, and we have learned a lot from the community it is now time to upgrade ydata-synthetic.

+

Heading towards the future of synthetic data generation we recommend users to transition to ydata-sdk, which provides a superior experience with enhanced performance, +precision, and ease of use, making it the preferred tool for synthetic data generation and a perfect introduction to Generative AI.

+
+

Supported Data Types

+
+
+
+

Tabular data does not have a temporal dependence, and can be structured and organized in a table-like format, where features are represented in columns, whereas observations correspond to the rows.

+

Additionally, tabular data usually comprises both numeric and categorical features. Numeric features are those that encode quantitative values, whereas categorical represent qualitative measurements. Categorical features can further divided in ordinal, binary or boolean, and nominal features.

+

Learn more about synthesizing tabular data in this article, or check the quickstart guide to get started with the synthesization of tabular datasets.

+
+
+

Time-series data exhibit a sequencial, temporal dependency between records, and may present a wide range of patterns and trends, including seasonality (patterns that repeat at calendar periods -- days, weeks, months -- such as holiday sales, for instance) or periodicity (patterns that repeat over time).

+

Read more about generating time-series data in this article and check this quickstart guide to get started with time-series data synthesization.

+
+
+

Multi-Table data or databases exhibit a referential behaviour between and database schema that is expected to be replicated and respected by the synthetic data generated. +Read more about database synthetic data generation in this article and check this quickstart guide for Multi-Table synthetic data generation +Time-series data exhibit a sequential, temporal dependency between records, and may present a wide range of patterns and trends, including seasonality (patterns that repeat at calendar periods -- days, weeks, months -- such as holiday sales, for instance) or periodicity (patterns that repeat over time).

+
+
+
+

Validate the quality of your synthetic data generated

+

Validating the quality of synthetic data is essential to ensure its usefulness and privacy. YData Fabric provides tools for comprehensive synthetic data evaluation through:

+
    +
  1. +

    Profile Comparison Visualization: +Fabric delivers side-by-side visual comparisons of key data properties (e.g., distributions, correlations, and outliers) between synthetic and original datasets, allowing users to assess fidelity at a glance.

    +
  2. +
  3. +

    PDF Report with Metrics: +Fabric generates a PDF report that includes key metrics to evaluate:

    +
  4. +
  5. +

    Fidelity: How closely synthetic data matches the original.

    +
  6. +
  7. Utility: How well it performs in real-world tasks.
  8. +
  9. Privacy: Risk assessment of data leakage and re-identification.
  10. +
+

These tools ensure a thorough validation of synthetic data quality, making it reliable for real-world use.

+

Supported Generative AI Models

+

With the upcoming update of ydata-syntheticto ydata-sdk, users will now have access to a single API that automatically selects and optimizes +the best generative model for their data. This streamlined approach eliminates the need to choose between +various models manually, as the API intelligently identifies the optimal model based on the specific dataset and use case.

+

Instead of having to manually select from models such as:

+
    +
  • GAN
  • +
  • CGAN (Conditional GAN)
  • +
  • WGAN (Wasserstein GAN)
  • +
  • WGAN-GP (Wassertein GAN with Gradient Penalty)
  • +
  • DRAGAN (Deep Regret Analytic GAN)
  • +
  • Cramer GAN (Cramer Distance Solution to Biased Wasserstein Gradients)
  • +
  • CWGAN-GP (Conditional Wassertein GAN with Gradient Penalty)
  • +
  • CTGAN (Conditional Tabular GAN)
  • +
  • TimeGAN (specifically for time-series data)
  • +
  • DoppelGANger (specifically for time-series data)
  • +
+

The new API handles model selection automatically, optimizing for the best performance in fidelity, utility, and privacy. +This significantly simplifies the synthetic data generation process, ensuring that users get the highest quality output without +the need for manual intervention and tiring hyperparameter tuning.

+

+ + + + + + +
+
+ + + + +
+ + + +
+ +
+ + +
+ +
+
+
+
+ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/2.0/integrations/gx_integration/index.html b/2.0/integrations/gx_integration/index.html new file mode 100644 index 00000000..dea47381 --- /dev/null +++ b/2.0/integrations/gx_integration/index.html @@ -0,0 +1,1103 @@ + + + + + + + + + + + + + + + + + + + + + + Great Expectations - YData-Synthetic + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + + +
+
+
+ + + + +
+
+ + + + + +

Great Expectations

+

Great Expectations is a Python-based open-source library for validating, documenting, and profiling your data. It helps you to maintain data quality and improve communication about data between teams. With Great Expectations, you can assert what you expect from the data you load and transform, and catch data issues quickly – Expectations are basically unit tests for your data.

+

About Great Expectations

+

Expectations are assertions about your data. In Great Expectations, those assertions are expressed in a declarative language in the form of simple, human-readable Python methods. For example, in order to assert that you want values in a column passenger_count in your dataset to be integers between 1 and 6, you can say:

+
expect_column_values_to_be_between(column="passenger_count", min_value=1, max_value=6)
+
+

Great Expectations then uses this statement to validate whether the column passenger_count in a given table is indeed between 1 and 6, and returns a success or failure result. The library currently provides several dozen highly expressive built-in Expectations, and allows you to write custom Expectations.

+

Great Expectations renders Expectations to clean, human-readable documentation called Data Docs. These HTML docs contain both your Expectation Suites as well as your data validation results each time validation is run – think of it as a continuously updated data quality report.

+

Validating your Synthetic Data with Great Expectations

+

!!! note Outdated + From ydata-synthetic vx onwards this example will no longer work. Please check ydata-sdk and synthetic data generation examples.

+

1. Install the required libraries:

+

We recommend you create a virtual environment and install ydata-synthetic and great-expectations by running the following command on your terminal.

+
pip install ydata-synthetic great-expectations
+
+

2. Generate your Synthetic Data:

+

In this example, we'll use CTGAN to synthesize samples from the Adult Census Income dataset:

+
from pmlb import fetch_data
+
+from ydata_synthetic.synthesizers.regular import RegularSynthesizer
+from ydata_synthetic.synthesizers import ModelParameters, TrainParameters
+
+# Load data and define the data processor parameters
+data = fetch_data('adult')
+num_cols = ['age', 'fnlwgt', 'capital-gain', 'capital-loss', 'hours-per-week']
+cat_cols = ['workclass','education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex',
+            'native-country', 'target']
+
+# Defining the training parameters
+batch_size = 500
+epochs = 500+1
+learning_rate = 2e-4
+beta_1 = 0.5
+beta_2 = 0.9
+
+ctgan_args = ModelParameters(batch_size=batch_size,
+                             lr=learning_rate,
+                             betas=(beta_1, beta_2))
+
+train_args = TrainParameters(epochs=epochs)
+synth = RegularSynthesizer(modelname='ctgan', model_parameters=ctgan_args)
+synth.fit(data=data, train_arguments=train_args, num_cols=num_cols, cat_cols=cat_cols)
+
+# Sample for the trained synthesizer and save the synthetic data
+synth_data = synth.sample(1000)
+synth_data.to_csv('data/adult_synthetic.csv', index=False)
+
+

3. Create a Data Context and Connect to Data:

+

Import the great_expectations module, create a data context, and connect to your synthetic data:

+
import great_expectations as gx
+
+# Initialize data context
+context = gx.get_context()
+
+# Connect to the synthetic data
+validator = context.sources.pandas_default.read_csv(
+    "data/adult_synthetic.csv"
+)
+
+

4. Create Expectations:

+

You can create Expectation Suites by writing out individual statements, such as the ones below, by using Profilers and Data Assistants or even Custom Profilers.

+
# Create expectations
+validator.expect_column_values_to_not_be_null("age")
+validator.expect_column_values_to_be_between("workclass", auto=True)
+validator.save_expectation_suite()
+
+

5. Validate Data

+

To validate your data, define a checkpoint and examine the data to determine if it matches the defined Expectations:

+

# Validate the synthetic data
+checkpoint = context.add_or_update_checkpoint(
+    name="synthetic_data_checkpoint",
+    validator=validator,
+)
+
+You can run the validations results:

+
checkpoint_result = checkpoint.run()
+
+

And use the following code to view an HTML representation of the Validation results:

+
context.view_validation_result(checkpoint_result)
+
+

+ + + + + + +
+
+ + + + +
+ + + +
+ +
+ + +
+ +
+
+
+
+ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/2.0/sitemap.xml b/2.0/sitemap.xml new file mode 100644 index 00000000..0f8724ef --- /dev/null +++ b/2.0/sitemap.xml @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/2.0/sitemap.xml.gz b/2.0/sitemap.xml.gz new file mode 100644 index 00000000..89d11d9d Binary files /dev/null and b/2.0/sitemap.xml.gz differ diff --git a/2.0/stylesheets/extra.css b/2.0/stylesheets/extra.css new file mode 100644 index 00000000..6b7a5b7a --- /dev/null +++ b/2.0/stylesheets/extra.css @@ -0,0 +1,51 @@ +.md-typeset .md-annotation__index > ::before { + content: attr(data-md-annotation-id); +} +.md-typeset :focus-within > .md-annotation__index > ::before { + transform: none; +} + +.md-content { + --md-typeset-a-color: #002b9e; +} + +@media { + .md-button--ydata { + --md-primary-fg-color: #E32212; + --md-primary-bg-color: #E32212; + } +} + +:root { + /* Primary color shades */ + --md-primary-fg-color: #040404; + --md-primary-fg-color--light: #040404; + --md-primary-fg-color--dark: #040404; + --md-primary-bg-color: hsla(0, 0%, 100%, 1); + --md-primary-bg-color--light: hsla(0, 0%, 100%, 0.7); + --md-text-link-color: #E32212; + + /* Accent color shades */ + --md-accent-fg-color: #E32212; + --md-accent-fg-color--transparent: hsla(189, 100%, 37%, 0.1); + --md-accent-bg-color: hsla(0, 0%, 100%, 1); + --md-accent-bg-color--light: hsla(0, 0%, 100%, 0.7); + } + + :root > * { + /* Code block color shades */ + --md-code-bg-color: hsla(0, 0%, 96%, 1); + --md-code-fg-color: hsla(200, 18%, 26%, 1); + + /* Footer */ + --md-footer-bg-color: #040404; + --md-footer-bg-color--dark: hsla(0, 0%, 0%, 0.32); + --md-footer-fg-color: hsla(0, 0%, 100%, 1); + --md-footer-fg-color--light: hsla(0, 0%, 100%, 0.7); + --md-footer-fg-color--lighter: hsla(0, 0%, 100%, 0.3); + } + +.youtube { + color: #EE0F0F; +} + diff --git a/2.0/support/help-troubleshooting/index.html b/2.0/support/help-troubleshooting/index.html new file mode 100644 index 00000000..04aef7f6 --- /dev/null +++ b/2.0/support/help-troubleshooting/index.html @@ -0,0 +1,995 @@ + + + + + + + + + + + + + + + + + + + + Help & Troubleshooting - YData-Synthetic + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + + +
+
+
+ + + + +
+
+ + + + + +

Help & Troubleshooting

+

Troubleshooting

+ +

Community help

+

If you are just starting, or you are dealing with something new we are always eager to help!

+

Join us in the Data-Centric AI community Discord, we have a space reserved for all your questions about ydata-synthetic! Don't be shy 😳

+

+ + + + + + +
+
+ + + + +
+ + + +
+ +
+ + +
+ +
+
+
+
+ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/2.0/synthetic_data/faqs/index.html b/2.0/synthetic_data/faqs/index.html new file mode 100644 index 00000000..f4730527 --- /dev/null +++ b/2.0/synthetic_data/faqs/index.html @@ -0,0 +1,1066 @@ + + + + + + + + + + + + + + + + + + + + + + Frequently Asked Questions - YData-Synthetic + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + + +
+
+
+ + + + +
+
+ + + + + +

Frequently Asked Questions

+

How to get accurate data from my synthetic data generation processes?

+

Depending on your use case, the downstream application of your synthetic data, and the characteristics of your original data, you will need to adjust your synthetisation process accordingly. That often involves performing a thorough data preparation and fitting your generation models appropriately.

+
+

Tip

+

For a use-case oriented UI experience, try YData Fabric. From an interactive and complete data profiling to an efficient synthetization, your data preparation process will be seamlessly adjusted to your data characteristics.

+
+

What is the best way to evaluate the quality of my synthetic data?

+

The most appropriate metrics to evaluate the quality of your synthetic data are also dependent on the goal for which synthetic data will be used. Nevertheless, we may define three essential pillars for synthetic data quality: privacy, fidelity, and utility:

+
    +
  • +

    Privacy refers to the ability of synthetic data to withhold any personal, private, or sensitive information, avoiding connections being drawn to the original data and preventing data leakage;

    +
  • +
  • +

    Fidelity concerns the ability of the new data to preserve the properties of the original data (in other words, it refers to "how faithful, how precise" is the synthetic data in comparison to real data);

    +
  • +
  • +

    Finally, utility relates to the downstream application where the synthetic data will be used: if the synthetization process is successful, the same insights should be derived from the new data as from the original data.

    +
  • +
+

For each of these components, several specific statistical measures can be evaluated.

+
+

Abstract

+

To learn more about how to define specific trade-offs between privacy, fidelity, and utility, check out this white paper on Synthetic Data Quality Metrics.

+
+

How to generate synthetic data in Google Colab and Python Environments?

+

Most issues with installations are usually associated with unsupported Python versions or misalignment between python environments and package requirements.

+

Let’s see how you can get both right:

+

Python Versions

+

Note that ydata-sdk currently requires Python >=3.9, < 3.13 so if you're trying to run our code in Google Colab, then you need to update your Google Colab’s Python version accordingly. The same goes for your development environment.

+

Virtual Environments

+

A lot of troubleshooting arises due to misalignments between environments and package requirements. +Virtual Environments isolate your installations from the "global" environment so that you don't have to worry about conflicts.

+

Using conda, creating a new environment is as easy as running this on your shell:

+
conda create --name synth-env python==3.12 pip
+conda activate synth-env
+pip install ydata-sdk
+
+

Now you can open up your Python editor or Jupyter Lab and use the synth-env as your development environment, without having to worry about conflicting versions or packages between projects!

+

Does TimeGAN replicate my full sequence of data?

+

No. This is an unrealistic expectation because the TimeGAN architecture is not meant to replicate the long-term behavior of your data.

+

TimeGAN works with the concept of "windows": it learns to map the data distribution of short-term frames of time, within the time windows you provide. It also considers that those windows are independent of each other, so it cannot return a temporal pattern most people expect.

+

That's not supported by this architecture itself, but there are others that allow for both short-term and long-term synthesization, as those available in YData Fabric.

+
+

Abstract

+

Learn more about how YData's Time-Series Synthetic Data Generation compare to TimeGAN in this dedicated post.

+
+

Additional Support

+

Couldn't find what you need? Reach out to our dedicated team for a quick and syn-ple assistance!

+ + + + + + +
+
+ + + + +
+ + + +
+ +
+ + +
+ +
+
+
+
+ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/2.0/synthetic_data/index.html b/2.0/synthetic_data/index.html new file mode 100644 index 00000000..c8940cc3 --- /dev/null +++ b/2.0/synthetic_data/index.html @@ -0,0 +1,946 @@ + + + + + + + + + + + + + + + + + + + + + + Synthetic data generation - YData-Synthetic + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + + +
+
+
+ + + + +
+
+ + + + + +

Synthetic data generation

+

Synthetic data is data that has been created artificially through computer simulation or that algorithms can generate to +take the place of real-world data. The data can be used as an alternative or supplement to real-world data when real-world +data is not readily available. It can also be used as a Machine Learning performance booster.

+

The ydata-sdk package is a Python package developed by YData’s team that allows users to easily benefit from Generative AI +and generate synthetic data. The main goal of the package is to serve as a way for data +scientists to get familiar with synthetic data and its applications in real-world domains, as well as the potential of Generative AI.

+

The ydata-sdk package provides different methods for generating synthetic tabular, time-series data as well as databases.

+

The package also aims to facilitate the exploration and understanding of synthetic data generation methods!

+

📄Get started with synthetic data for tabular data

+

📈 Get started with synthetic data for time-series

+ + + + + + +
+
+ + + + +
+ + + +
+ +
+ + +
+ +
+
+
+
+ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/2.0/synthetic_data/multi_table/fabric_multitable/index.html b/2.0/synthetic_data/multi_table/fabric_multitable/index.html new file mode 100644 index 00000000..5dce8e15 --- /dev/null +++ b/2.0/synthetic_data/multi_table/fabric_multitable/index.html @@ -0,0 +1,1013 @@ + + + + + + + + + + + + + + + + + + + + + + Fabric Multi-Table ** - YData-Synthetic + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + + +
+
+
+ + + + +
+
+ + + + + +

Multiple tables synthetic data generation **

+
+

** YData's Enterprise feature

+

This feature is only available for users of YData Fabric.

+

Sign-up Fabric community and +try synthetic data generation from multiple tables or contact us for more informations.

+
+

Multitable synthetic data enables the creation of large, diverse +datasets crucial for training robust machine learning models, algorithm testing, and addressing privacy concerns. It can be +crucial to enable proper data democratization within an organization.

+

Nevertheless, the process of generating a full database or even several tables that share relations, can be particularly +challenging due to the necessity of preserving referential integrity across diverse tables and scale. This involves maintaining +realistic relationships between entities to mirror real-world scenarios accurately while being able to process large volumes +of data.

+

YData Fabric offers a cutting-edge Synthetic data generation process that seamlessly integrates with your existing Relational databases. +By replicating the data's value and structure to a new target storage, Fabric delivers a wide range of benefits and use-cases. +These include reducing risk and improving compliance by substituting operational databases with synthetic databases for tests and development. It also enables QA teams to create comprehensive and more flexible testing scenarios.

+

Explore Fabric multi-table synthesis capabilities:

+

From what sources am I able to train a multi-tables synthetic data generator?

+
    +
  • From a relational database
  • +
  • From the upload of multiple files
  • +
+ + + + + + + + +
+
+ + + + +
+ + + +
+ +
+ + +
+ +
+
+
+
+ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/2.0/synthetic_data/single_table/cgan_example/index.html b/2.0/synthetic_data/single_table/cgan_example/index.html new file mode 100644 index 00000000..0099930e --- /dev/null +++ b/2.0/synthetic_data/single_table/cgan_example/index.html @@ -0,0 +1,1047 @@ + + + + + + + + + + + + + + + + + + + + + + CGAN - YData-Synthetic + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + + +
+
+
+ + + + +
+
+ + + + + +

Synthesize tabular data

+
+

Outdated

+

Note that this example won't work with the latest version of ydata-synthetic.

+

Please check ydata-sdk to see how to generate conditional synthetic data.

+
+

Using CGAN to generate tabular synthetic data:

+

Real-world domains are often described by tabular data i.e., data that can be structured and organized in a table-like format, where features/variables are represented in columns, whereas observations correspond to the rows.

+

CGAN is a deep learning model that combines GANs with conditional models to generate data samples based on specific conditions:

+ +

Here’s an example of how to synthetize tabular data with CGAN using the Credit Card dataset:

+
"""
+    CGAN architecture example file
+"""
+import pandas as pd
+from sklearn import cluster
+
+from ydata_synthetic.utils.cache import cache_file
+from ydata_synthetic.synthesizers import ModelParameters, TrainParameters
+from ydata_synthetic.synthesizers.regular import RegularSynthesizer
+
+#Read the original data and have it preprocessed
+data_path = cache_file('creditcard.csv', 'https://datahub.io/machine-learning/creditcard/r/creditcard.csv')
+data = pd.read_csv(data_path, index_col=[0])
+
+#Data processing and analysis
+num_cols = list(data.columns[ data.columns != 'Class' ])
+cat_cols = []
+
+print('Dataset columns: {}'.format(num_cols))
+sorted_cols = ['V14', 'V4', 'V10', 'V17', 'V12', 'V26', 'Amount', 'V21', 'V8', 'V11', 'V7', 'V28', 'V19',
+                'V3', 'V22', 'V6', 'V20', 'V27', 'V16', 'V13', 'V25', 'V24', 'V18', 'V2', 'V1', 'V5', 'V15',
+                'V9', 'V23', 'Class']
+processed_data = data[ sorted_cols ].copy()
+
+#For the purpose of this example we will only synthesize the minority class
+train_data = processed_data.loc[processed_data['Class'] == 1].copy()
+
+#Create a new class column using KMeans - This will mainly be useful if we want to leverage conditional GAN
+print("Dataset info: Number of records - {} Number of variables - {}".format(train_data.shape[0], train_data.shape[1]))
+algorithm = cluster.KMeans
+args, kwds = (), {'n_clusters':2, 'random_state':0}
+labels = algorithm(*args, **kwds).fit_predict(train_data[ num_cols ])
+
+fraud_w_classes = train_data.copy()
+fraud_w_classes['Class'] = labels
+
+#----------------------------
+#    GAN Training
+#----------------------------
+
+#Define the Conditional GAN and training parameters
+noise_dim = 32
+dim = 128
+batch_size = 128
+beta_1 = 0.5
+beta_2 = 0.9
+
+log_step = 100
+epochs = 2 + 1
+learning_rate = 5e-4
+models_dir = '../cache'
+
+#Test here the new inputs
+gan_args = ModelParameters(batch_size=batch_size,
+                           lr=learning_rate,
+                           betas=(beta_1, beta_2),
+                           noise_dim=noise_dim,
+                           layers_dim=dim)
+
+train_args = TrainParameters(epochs=epochs,
+                             cache_prefix='',
+                             sample_interval=log_step,
+                             label_dim=-1,
+                             labels=(0,1))
+
+#create a bining
+fraud_w_classes['Amount'] = pd.cut(fraud_w_classes['Amount'], 5).cat.codes
+
+#Init the Conditional GAN providing the index of the label column as one of the arguments
+synth = RegularSynthesizer(modelname='cgan', model_parameters=gan_args)
+
+#Training the Conditional GAN
+synth.fit(data=fraud_w_classes, label_cols=["Class"], train_arguments=train_args, num_cols=num_cols, cat_cols=cat_cols)
+
+#Saving the synthesizer
+synth.save('creditcard_cgan_model.pkl')
+
+#Loading the synthesizer
+synthesizer = RegularSynthesizer.load('creditcard_cgan_model.pkl')
+
+#Sampling from the synthesizer
+cond_array = pd.DataFrame(100*[1], columns=['Class'])
+# Synthesizer samples are returned in the original format (inverse_transform of internal processing already took place)
+sample = synthesizer.sample(cond_array)
+
+print(sample)
+
+ + + + + + +
+
+ + + + +
+ + + +
+ +
+ + +
+ +
+
+
+
+ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/2.0/synthetic_data/single_table/cramer_gan_example/index.html b/2.0/synthetic_data/single_table/cramer_gan_example/index.html new file mode 100644 index 00000000..937dfc5f --- /dev/null +++ b/2.0/synthetic_data/single_table/cramer_gan_example/index.html @@ -0,0 +1,1036 @@ + + + + + + + + + + + + + + + + + + + + + + Cramer GAN - YData-Synthetic + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + + +
+
+
+ + + + +
+
+ + + + + +

Synthesize tabular data

+
+

Outdated

+

Note that this example won't work with the latest version of ydata-synthetic.

+

Please check ydata-sdk to see how to generate synthetic data.

+
+

Using CRAMER GAN to generate tabular synthetic data:

+

Real-world domains are often described by tabular data i.e., data that can be structured and organized in a table-like format, where features/variables are represented in columns, whereas observations correspond to the rows.

+

CRAMER GAN is a variant of GAN that employs the Cramer distance as a measure of similarity between real and generated data distributions to improve training stability and enhance sample quality:

+ +

Here’s an example of how to synthetize tabular data with CRAMER GAN using the Credit Card dataset:

+
"""
+    CramerGAN python file example
+"""
+#Install ydata-synthetic lib
+# pip install ydata-synthetic
+import sklearn.cluster as cluster
+import numpy as np
+import pandas as pd
+
+from ydata_synthetic.utils.cache import cache_file
+from ydata_synthetic.synthesizers import ModelParameters, TrainParameters
+from ydata_synthetic.synthesizers.regular import RegularSynthesizer
+
+#Read the original data and have it preprocessed
+data_path = cache_file('creditcard.csv', 'https://datahub.io/machine-learning/creditcard/r/creditcard.csv')
+data = pd.read_csv(data_path, index_col=[0])
+
+#Data processing and analysis
+num_cols = list(data.columns[ data.columns != 'Class' ])
+cat_cols = ['Class']
+
+print('Dataset columns: {}'.format(num_cols))
+sorted_cols = ['V14', 'V4', 'V10', 'V17', 'V12', 'V26', 'Amount', 'V21', 'V8', 'V11', 'V7', 'V28', 'V19', 'V3', 'V22', 'V6', 'V20', 'V27', 'V16', 'V13', 'V25', 'V24', 'V18', 'V2', 'V1', 'V5', 'V15', 'V9', 'V23', 'Class']
+processed_data = data[ sorted_cols ].copy()
+
+#For the purpose of this example we will only synthesize the minority class
+train_data = processed_data.loc[processed_data['Class'] == 1].copy()
+
+#Create a new class column using KMeans - This will mainly be useful if we want to leverage conditional GAN
+print("Dataset info: Number of records - {} Number of variables - {}".format(train_data.shape[0], train_data.shape[1]))
+algorithm = cluster.KMeans
+args, kwds = (), {'n_clusters':2, 'random_state':0}
+labels = algorithm(*args, **kwds).fit_predict(train_data[ num_cols ])
+
+print( pd.DataFrame( [ [np.sum(labels==i)] for i in np.unique(labels) ], columns=['count'], index=np.unique(labels) ) )
+
+fraud_w_classes = train_data.copy()
+fraud_w_classes['Class'] = labels
+
+# GAN training
+#Define the GAN and training parameters
+noise_dim = 32
+dim = 128
+batch_size = 128
+
+log_step = 100
+epochs = 500+1
+learning_rate = 5e-4
+beta_1 = 0.5
+beta_2 = 0.9
+models_dir = '../cache'
+
+model_parameters = ModelParameters(batch_size=batch_size,
+                           lr=learning_rate,
+                           betas=(beta_1, beta_2),
+                           noise_dim=noise_dim,
+                           layers_dim=dim)
+
+train_args = TrainParameters(epochs=epochs,
+                             sample_interval=log_step)
+
+#Training the CRAMERGAN model
+synth = RegularSynthesizer(modelname='cramer', model_parameters=model_parameters)
+synth.fit(data=train_data, train_arguments = train_args, num_cols = num_cols, cat_cols = cat_cols)
+
+#Saving the synthesizer to later generate new events
+synth.save(path='creditcard_cramergan_model.pkl')
+
+#########################################################
+#    Loading and sampling from a trained synthesizer    #
+#########################################################
+synth = RegularSynthesizer.load(path='creditcard_cramergan_model.pkl')
+#Sampling the data
+#Note that the data returned it is not inverse processed.
+data_sample = synth.sample(100000)
+
+ + + + + + +
+
+ + + + +
+ + + +
+ +
+ + +
+ +
+
+
+
+ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/2.0/synthetic_data/single_table/ctgan_example/index.html b/2.0/synthetic_data/single_table/ctgan_example/index.html new file mode 100644 index 00000000..0d2e8cfb --- /dev/null +++ b/2.0/synthetic_data/single_table/ctgan_example/index.html @@ -0,0 +1,1074 @@ + + + + + + + + + + + + + + + + + + + + + + CTGAN - YData-Synthetic + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + + +
+
+
+ + + + +
+
+ + + + + +

Synthesize tabular data

+
+

Outdated

+

Note that this example won't work with the latest version of ydata-synthetic.

+

Please check ydata-sdk to see how to generate synthetic data.

+
+

Using CTGAN to generate tabular synthetic data:

+

Real-world domains are often described by tabular data i.e., data that can be structured and organized in a table-like format, where features/variables are represented in columns, whereas observations correspond to the rows.

+

Additionally, real-world data usually comprises both numeric and categorical features. Numeric features are those that encode quantitative values, whereas categorical represent qualitative measurements.

+

CTGAN was specifically designed to deal with the challenges posed by tabular datasets, handling mixed (numeric and categorical) data:

+ +

Here’s an example of how to synthetize tabular data with CTGAN using the Adult Census Income dataset:

+
from pmlb import fetch_data
+
+from ydata_synthetic.synthesizers.regular import RegularSynthesizer
+from ydata_synthetic.synthesizers import ModelParameters, TrainParameters
+
+# Load data and define the data processor parameters
+data = fetch_data('adult')
+num_cols = ['age', 'fnlwgt', 'capital-gain', 'capital-loss', 'hours-per-week']
+cat_cols = ['workclass','education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex',
+            'native-country', 'target']
+
+# Defining the training parameters
+batch_size = 500
+epochs = 500+1
+learning_rate = 2e-4
+beta_1 = 0.5
+beta_2 = 0.9
+
+ctgan_args = ModelParameters(batch_size=batch_size,
+                             lr=learning_rate,
+                             betas=(beta_1, beta_2))
+
+train_args = TrainParameters(epochs=epochs)
+synth = RegularSynthesizer(modelname='ctgan', model_parameters=ctgan_args)
+synth.fit(data=data, train_arguments=train_args, num_cols=num_cols, cat_cols=cat_cols)
+
+synth.save('adult_ctgan_model.pkl')
+
+#########################################################
+#    Loading and sampling from a trained synthesizer    #
+#########################################################
+synth = RegularSynthesizer.load('adult_ctgan_model.pkl')
+synth_data = synth.sample(1000)
+print(synth_data)
+
+

Best practices & results optimization

+
+

Generate the best synthetic data quality

+

If you are having a hard time in ensuring that CTGAN returns the synthetic data quality that you need for your use-case +give it a try to YData Fabric Synthetic Data. +Fabric Synthetic Data generation is considered the best in terms of quality. +Read more about it in this benchmark.

+
+

CTGAN, as any other Machine Learning model, requires optimization at the level of the data preparation as well as +hyperparameter tuning. Here follows a list of best-practices and tips to improve your synthetic data quality:

+
    +
  • +

    Understand Your Data: +Thoroughly understand the characteristics and distribution of your original dataset before using CTGAN. +Identify important features, correlations, and patterns in the data. +Leverage ydata-profiling feature to automate the process of understanding your data.

    +
  • +
  • +

    Data Preprocess: +Clean and preprocess your data to handle missing values, outliers, and other anomalies before training CTGAN. +Standardize or normalize numerical features to ensure consistent scales.

    +
  • +
  • +

    Feature Engineering: +Create additional meaningful features that could improve the quality of the synthetic data.

    +
  • +
  • +

    Optimize Model Parameters: +Experiment with CTGAN hyperparameters such as epochs, batch_size, and gen_dim to find the values that work best +for your specific dataset. +Fine-tune the learning rate for better convergence.

    +
  • +
  • +

    Conditional Generation: +Leverage the conditional generation capabilities of CTGAN by specifying conditions for certain features if applicable. +Adjust the conditioning mechanism to enhance the relevance of generated samples.

    +
  • +
  • +

    Handle Imbalanced Data: +If your original dataset is imbalanced, ensure that CTGAN captures the distribution of minority classes effectively. +Adjust sampling strategies if needed.

    +
  • +
  • +

    Use Larger Datasets: +Train CTGAN on larger datasets when possible to capture a more comprehensive representation of the underlying data distribution.

    +
  • +
+ + + + + + +
+
+ + + + +
+ + + +
+ +
+ + +
+ +
+
+
+
+ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/2.0/synthetic_data/single_table/cwgangp_example/index.html b/2.0/synthetic_data/single_table/cwgangp_example/index.html new file mode 100644 index 00000000..5f23d184 --- /dev/null +++ b/2.0/synthetic_data/single_table/cwgangp_example/index.html @@ -0,0 +1,1036 @@ + + + + + + + + + + + + + + + + + + + + + + CWGAN-GP - YData-Synthetic + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + + +
+
+
+ + + + +
+
+ + + + + +

Synthesize tabular data

+
+

Outdated

+

Note that this example won't work with the latest version of ydata-synthetic.

+

Please check ydata-sdk to see how to generate conditional synthetic data.

+
+

Using CWGAN-GP to generate tabular synthetic data:

+

Real-world domains are often described by tabular data i.e., data that can be structured and organized in a table-like format, where features/variables are represented in columns, whereas observations correspond to the rows.

+

CWGAN GP is a variant of GAN that incorporates conditional information to generate data samples, while leveraging the Wasserstein distance to improve training stability and sample quality:

+ +

Here’s an example of how to synthetize tabular data with CWGAN-GP using the Credit Card dataset:

+
"""
+    CramerGAN python file example
+"""
+#Install ydata-synthetic lib
+# pip install ydata-synthetic
+import sklearn.cluster as cluster
+import numpy as np
+import pandas as pd
+
+from ydata_synthetic.utils.cache import cache_file
+from ydata_synthetic.synthesizers import ModelParameters, TrainParameters
+from ydata_synthetic.synthesizers.regular import RegularSynthesizer
+
+#Read the original data and have it preprocessed
+data_path = cache_file('creditcard.csv', 'https://datahub.io/machine-learning/creditcard/r/creditcard.csv')
+data = pd.read_csv(data_path, index_col=[0])
+
+#Data processing and analysis
+num_cols = list(data.columns[ data.columns != 'Class' ])
+cat_cols = ['Class']
+
+print('Dataset columns: {}'.format(num_cols))
+sorted_cols = ['V14', 'V4', 'V10', 'V17', 'V12', 'V26', 'Amount', 'V21', 'V8', 'V11', 'V7', 'V28', 'V19', 'V3', 'V22', 'V6', 'V20', 'V27', 'V16', 'V13', 'V25', 'V24', 'V18', 'V2', 'V1', 'V5', 'V15', 'V9', 'V23', 'Class']
+processed_data = data[ sorted_cols ].copy()
+
+#For the purpose of this example we will only synthesize the minority class
+train_data = processed_data.loc[processed_data['Class'] == 1].copy()
+
+#Create a new class column using KMeans - This will mainly be useful if we want to leverage conditional GAN
+print("Dataset info: Number of records - {} Number of variables - {}".format(train_data.shape[0], train_data.shape[1]))
+algorithm = cluster.KMeans
+args, kwds = (), {'n_clusters':2, 'random_state':0}
+labels = algorithm(*args, **kwds).fit_predict(train_data[ num_cols ])
+
+print( pd.DataFrame( [ [np.sum(labels==i)] for i in np.unique(labels) ], columns=['count'], index=np.unique(labels) ) )
+
+fraud_w_classes = train_data.copy()
+fraud_w_classes['Class'] = labels
+
+# GAN training
+#Define the GAN and training parameters
+noise_dim = 32
+dim = 128
+batch_size = 128
+
+log_step = 100
+epochs = 500+1
+learning_rate = 5e-4
+beta_1 = 0.5
+beta_2 = 0.9
+models_dir = '../cache'
+
+model_parameters = ModelParameters(batch_size=batch_size,
+                           lr=learning_rate,
+                           betas=(beta_1, beta_2),
+                           noise_dim=noise_dim,
+                           layers_dim=dim)
+
+train_args = TrainParameters(epochs=epochs,
+                             sample_interval=log_step)
+
+#Training the CRAMERGAN model
+synth = RegularSynthesizer(modelname='cramer', model_parameters=model_parameters)
+synth.fit(data=train_data, train_arguments = train_args, num_cols = num_cols, cat_cols = cat_cols)
+
+#Saving the synthesizer to later generate new events
+synth.save(path='creditcard_cramergan_model.pkl')
+
+#########################################################
+#    Loading and sampling from a trained synthesizer    #
+#########################################################
+synth = RegularSynthesizer.load(path='creditcard_cramergan_model.pkl')
+#Sampling the data
+#Note that the data returned it is not inverse processed.
+data_sample = synth.sample(100000)
+
+ + + + + + +
+
+ + + + +
+ + + +
+ +
+ + +
+ +
+
+
+
+ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/2.0/synthetic_data/single_table/dragan_example/index.html b/2.0/synthetic_data/single_table/dragan_example/index.html new file mode 100644 index 00000000..e37d4791 --- /dev/null +++ b/2.0/synthetic_data/single_table/dragan_example/index.html @@ -0,0 +1,1004 @@ + + + + + + + + + + + + + + + + + + + + + + DRAGAN - YData-Synthetic + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + + +
+
+
+ + + + +
+
+ + + + + +

Synthesize tabular data

+
+

Outdated

+

Note that this example won't work with the latest version of ydata-synthetic.

+

Please check ydata-sdk to see how to generate synthetic data.

+
+

Using DRAGAN to generate tabular synthetic data:

+

Real-world domains are often described by tabular data i.e., data that can be structured and organized in a table-like format, where features/variables are represented in columns, whereas observations correspond to the rows.

+

DRAGAN is a GAN variant that uses a gradient penalty to improve training stability and mitigate mode collapse:

+ +

Here’s an example of how to synthetize tabular data with DRAGAN using the Adult Census Income dataset:

+
from pmlb import fetch_data
+
+from ydata_synthetic.synthesizers.regular import RegularSynthesizer
+from ydata_synthetic.synthesizers import ModelParameters, TrainParameters
+
+#Load data and define the data processor parameters
+data = fetch_data('adult')
+num_cols = ['age', 'fnlwgt', 'capital-gain', 'capital-loss', 'hours-per-week']
+cat_cols = ['workclass','education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex',
+            'native-country', 'target']
+
+# DRAGAN training
+#Defining the training parameters of DRAGAN
+noise_dim = 128
+dim = 128
+batch_size = 500
+
+log_step = 100
+epochs = 500+1
+learning_rate = 1e-5
+beta_1 = 0.5
+beta_2 = 0.9
+models_dir = '../cache'
+
+gan_args = ModelParameters(batch_size=batch_size,
+                           lr=learning_rate,
+                           betas=(beta_1, beta_2),
+                           noise_dim=noise_dim,
+                           layers_dim=dim)
+
+train_args = TrainParameters(epochs=epochs,
+                             sample_interval=log_step)
+
+synth = RegularSynthesizer(modelname='dragan', model_parameters=gan_args, n_discriminator=3)
+synth.fit(data = data, train_arguments = train_args, num_cols = num_cols, cat_cols = cat_cols)
+
+synth.save('adult_dragan_model.pkl')
+
+#########################################################
+#    Loading and sampling from a trained synthesizer    #
+#########################################################
+synthesizer = RegularSynthesizer.load('adult_dragan_model.pkl')
+synthesizer.sample(1000)
+
+ + + + + + +
+
+ + + + +
+ + + +
+ +
+ + +
+ +
+
+
+
+ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/2.0/synthetic_data/single_table/gmm_example/index.html b/2.0/synthetic_data/single_table/gmm_example/index.html new file mode 100644 index 00000000..c4d467a4 --- /dev/null +++ b/2.0/synthetic_data/single_table/gmm_example/index.html @@ -0,0 +1,963 @@ + + + + + + + + + + + + + + + + + + + + + + GMM - YData-Synthetic + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + + +
+
+
+ + + + +
+
+ + + + + +

Synthesize tabular data

+
+

Outdated

+

Note that this example won't work with the latest version of ydata-synthetic.

+

Please check ydata-sdk to see how to generate synthetic data.

+
+

Using GMMs to generate tabular synthetic data:

+

Real-world domains are often described by tabular data i.e., data that can be structured and organized in a table-like +format, where features/variables are represented in columns, whereas observations correspond to the rows.

+

Gaussian Mixture models (GMMs) are a type of probabilistic models. Probabilistic models can also be leveraged to generate +synthetic data. Particularly, the way GMMs are able to generate synthetic data, is by learning the original data distribution +while fitting it to a mixture of Gaussian distributions.

+ + + + + + + +
+
+ + + + +
+ + + +
+ +
+ + +
+ +
+
+
+
+ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/2.0/synthetic_data/single_table/wgan_example/index.html b/2.0/synthetic_data/single_table/wgan_example/index.html new file mode 100644 index 00000000..a9335160 --- /dev/null +++ b/2.0/synthetic_data/single_table/wgan_example/index.html @@ -0,0 +1,1035 @@ + + + + + + + + + + + + + + + + + + + + + + WGAN - YData-Synthetic + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + + +
+
+
+ + + + +
+
+ + + + + +

Synthesize tabular data

+
+

Outdated

+

Note that this example won't work with the latest version of ydata-synthetic.

+

Please check ydata-sdk to see how to generate synthetic data.

+
+

Using WGAN to generate tabular synthetic data:

+

Real-world domains are often described by tabular data i.e., data that can be structured and organized in a table-like format, where features/variables are represented in columns, whereas observations correspond to the rows.

+

WGAN is a variant of GAN that utilizes the Wasserstein distance to improve training stability and generate higher quality samples:

+ +

Here’s an example of how to synthetize tabular data with WGAN using the Credit Card dataset:

+
#Install ydata-synthetic lib
+# pip install ydata-synthetic
+import sklearn.cluster as cluster
+import pandas as pd
+import numpy as np
+
+from ydata_synthetic.utils.cache import cache_file
+from ydata_synthetic.synthesizers import ModelParameters, TrainParameters
+from ydata_synthetic.synthesizers.regular import RegularSynthesizer
+
+#Read the original data and have it preprocessed
+data_path = cache_file('creditcard.csv', 'https://datahub.io/machine-learning/creditcard/r/creditcard.csv')
+data = pd.read_csv(data_path, index_col=[0])
+
+#Data processing and analysis
+num_cols = list(data.columns[ data.columns != 'Class' ])
+cat_cols = ['Class']
+
+print('Dataset columns: {}'.format(num_cols))
+sorted_cols = ['V14', 'V4', 'V10', 'V17', 'V12', 'V26', 'Amount', 'V21', 'V8', 'V11', 'V7', 'V28', 'V19', 'V3', 'V22', 'V6', 'V20', 'V27', 'V16', 'V13', 'V25', 'V24', 'V18', 'V2', 'V1', 'V5', 'V15', 'V9', 'V23', 'Class']
+processed_data = data[ sorted_cols ].copy()
+
+#For the purpose of this example we will only synthesize the minority class
+train_data = processed_data.loc[processed_data['Class'] == 1].copy()
+
+print("Dataset info: Number of records - {} Number of variables - {}".format(train_data.shape[0], train_data.shape[1]))
+algorithm = cluster.KMeans
+args, kwds = (), {'n_clusters':2, 'random_state':0}
+labels = algorithm(*args, **kwds).fit_predict(train_data[ num_cols ])
+
+print( pd.DataFrame( [ [np.sum(labels==i)] for i in np.unique(labels) ], columns=['count'], index=np.unique(labels) ) )
+
+fraud_w_classes = train_data.copy()
+fraud_w_classes['Class'] = labels
+
+# GAN training
+#Define the GAN and training parameters
+noise_dim = 32
+dim = 128
+batch_size = 128
+
+log_step = 100
+epochs = 500+1
+learning_rate = 5e-4
+beta_1 = 0.5
+beta_2 = 0.9
+models_dir = '../cache'
+
+model_parameters = ModelParameters(batch_size=batch_size,
+                                   lr=learning_rate,
+                                   betas=(beta_1, beta_2),
+                                   noise_dim=noise_dim,
+                                   layers_dim=dim)
+
+train_args = TrainParameters(epochs=epochs,
+                             sample_interval=log_step)
+
+test_size = 492 # number of fraud cases
+noise_dim = 32
+
+#Training the CRAMERGAN model
+synth = RegularSynthesizer(modelname='wgan', model_parameters=model_parameters, n_critic=10)
+synth.fit(data=train_data, train_arguments = train_args, num_cols = num_cols, cat_cols = cat_cols)
+
+#Saving the synthesizer to later generate new events
+synth.save(path='creditcard_wgan_model.pkl')
+
+#########################################################
+#    Loading and sampling from a trained synthesizer    #
+#########################################################
+synth = RegularSynthesizer.load(path='creditcard_wgan_model.pkl')
+
+#Sampling the data
+data_sample = synth.sample(100000)
+
+ + + + + + +
+
+ + + + +
+ + + +
+ +
+ + +
+ +
+
+
+
+ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/2.0/synthetic_data/single_table/wgangp_example/index.html b/2.0/synthetic_data/single_table/wgangp_example/index.html new file mode 100644 index 00000000..ef77f48b --- /dev/null +++ b/2.0/synthetic_data/single_table/wgangp_example/index.html @@ -0,0 +1,1003 @@ + + + + + + + + + + + + + + + + + + + + + + WGAN-GP - YData-Synthetic + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + + +
+
+
+ + + + +
+
+ + + + + +

Synthesize tabular data

+
+

Outdated

+

Note that this example won't work with the latest version of ydata-synthetic.

+

Please check ydata-sdk to see how to generate synthetic data.

+
+

Using WGAN-GP to generate tabular synthetic data:

+

Real-world domains are often described by tabular data i.e., data that can be structured and organized in a table-like format, where features/variables are represented in columns, whereas observations correspond to the rows.

+

WGANGP is a variant of GAN that incorporates a gradient penalty term to enhance training stability and improve the diversity of generated samples:

+ +

Here’s an example of how to synthetize tabular data with WGAN-GP using the Adult Census Income dataset:

+
from pmlb import fetch_data
+
+from ydata_synthetic.synthesizers.regular import RegularSynthesizer
+from ydata_synthetic.synthesizers import ModelParameters, TrainParameters
+
+#Load data and define the data processor parameters
+data = fetch_data('adult')
+num_cols = ['age', 'fnlwgt', 'capital-gain', 'capital-loss', 'hours-per-week']
+cat_cols = ['workclass','education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex',
+            'native-country', 'target']
+
+#Defining the training parameters
+noise_dim = 128
+dim = 128
+batch_size = 500
+
+log_step = 100
+epochs = 500+1
+learning_rate = [5e-4, 3e-3]
+beta_1 = 0.5
+beta_2 = 0.9
+models_dir = '../cache'
+
+gan_args = ModelParameters(batch_size=batch_size,
+                           lr=learning_rate,
+                           betas=(beta_1, beta_2),
+                           noise_dim=noise_dim,
+                           layers_dim=dim)
+
+train_args = TrainParameters(epochs=epochs,
+                             sample_interval=log_step)
+
+synth = RegularSynthesizer(modelname='wgangp', model_parameters=gan_args, n_critic=2)
+synth.fit(data, train_args, num_cols, cat_cols)
+
+synth.save('adult_wgangp_model.pkl')
+
+#########################################################
+#    Loading and sampling from a trained synthesizer    #
+#########################################################
+synth = RegularSynthesizer.load('adult_wgangp_model.pkl')
+synth_data = synth.sample(1000)
+
+ + + + + + +
+
+ + + + +
+ + + +
+ +
+ + +
+ +
+
+
+
+ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/2.0/synthetic_data/time_series/doppelganger_example/index.html b/2.0/synthetic_data/time_series/doppelganger_example/index.html new file mode 100644 index 00000000..c72cb141 --- /dev/null +++ b/2.0/synthetic_data/time_series/doppelganger_example/index.html @@ -0,0 +1,1025 @@ + + + + + + + + + + + + + + + + + + + + + + DoppelGANger - YData-Synthetic + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + + +
+
+
+ + + + +
+
+ + + + + +

Synthesize time-series data

+
+

Outdated

+

Note that this example won't work with the latest version of ydata-synthetic.

+

Please check ydata-sdk to see how to generate synthetic time-series data.

+
+

Using DoppelGANger to generate synthetic time-series data:

+

Although tabular data may be the most frequently discussed type of data, a great number of real-world domains — from traffic and daily trajectories to stock prices and energy consumption patterns — produce time-series data which introduces several aspects of complexity to synthetic data generation.

+

Time-series data is structured sequentially, with observations ordered chronologically based on their associated timestamps or time intervals. It explicitly incorporates the temporal aspect, allowing for the analysis of trends, seasonality, and other dependencies over time.

+

DoppelGANger is a model that uses a Generative Adversarial Network (GAN) framework to generate synthetic time series data by learning the underlying temporal dependencies and characteristics of the original data:

+ +

Here’s an example of how to synthetize time-series data with DoppelGANger using the Measuring Broadband America dataset:

+
"""
+    DoppelGANger architecture example file
+"""
+
+# Importing necessary libraries
+import pandas as pd
+from os import path
+import matplotlib.pyplot as plt
+from ydata_synthetic.synthesizers.timeseries import TimeSeriesSynthesizer
+from ydata_synthetic.synthesizers import ModelParameters, TrainParameters
+
+# Read the data
+mba_data = pd.read_csv("../../data/fcc_mba.csv")
+numerical_cols = ["traffic_byte_counter", "ping_loss_rate"]
+categorical_cols = [col for col in mba_data.columns if col not in numerical_cols]
+
+# Define model parameters
+model_args = ModelParameters(batch_size=100,
+                             lr=0.001,
+                             betas=(0.2, 0.9),
+                             latent_dim=20,
+                             gp_lambda=2,
+                             pac=1)
+
+train_args = TrainParameters(epochs=400, sequence_length=56,
+                             sample_length=8, rounds=1,
+                             measurement_cols=["traffic_byte_counter", "ping_loss_rate"])
+
+# Training the DoppelGANger synthesizer
+if path.exists('doppelganger_mba'):
+    model_dop_gan = TimeSeriesSynthesizer.load('doppelganger_mba')
+else:
+    model_dop_gan = TimeSeriesSynthesizer(modelname='doppelganger', model_parameters=model_args)
+    model_dop_gan.fit(mba_data, train_args, num_cols=numerical_cols, cat_cols=categorical_cols)
+    model_dop_gan.save('doppelganger_mba')
+
+# Generate synthetic data
+synth_data = model_dop_gan.sample(n_samples=600)
+synth_df = pd.concat(synth_data, axis=0)
+
+# Create a plot for each measurement column
+plt.figure(figsize=(10, 6))
+
+plt.subplot(2, 1, 1)
+plt.plot(mba_data['traffic_byte_counter'].reset_index(drop=True), label='Real Traffic')
+plt.plot(synth_df['traffic_byte_counter'].reset_index(drop=True), label='Synthetic Traffic', alpha=0.7)
+plt.xlabel('Index')
+plt.ylabel('Value')
+plt.title('Traffic Comparison')
+plt.legend()
+plt.grid(True)
+
+plt.subplot(2, 1, 2)
+plt.plot(mba_data['ping_loss_rate'].reset_index(drop=True), label='Real Ping')
+plt.plot(synth_df['ping_loss_rate'].reset_index(drop=True), label='Synthetic Ping', alpha=0.7)
+plt.xlabel('Index')
+plt.ylabel('Value')
+plt.title('Ping Comparison')
+plt.legend()
+plt.grid(True)
+
+plt.tight_layout()
+plt.show()
+
+ + + + + + +
+
+ + + + +
+ + + +
+ +
+ + +
+ +
+
+
+
+ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/2.0/synthetic_data/time_series/timegan_example/index.html b/2.0/synthetic_data/time_series/timegan_example/index.html new file mode 100644 index 00000000..c2ff6cbf --- /dev/null +++ b/2.0/synthetic_data/time_series/timegan_example/index.html @@ -0,0 +1,1063 @@ + + + + + + + + + + + + + + + + + + + + + + TimeGAN - YData-Synthetic + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + + +
+
+
+ + + + +
+
+ + + + + +

Synthesize time-series data

+
+

Outdated

+

Note that this example won't work with the latest version of ydata-synthetic.

+

Please check ydata-sdk to see how to generate synthetic time-series data.

+
+

Why YData Fabric vs TimeGAN for time-series data

+

YData Fabric offers advanced capabilities for time-series synthetic data generation, surpassing TimeGAN in terms of flexibility, +scalability, and ease of use. With YData Fabric, users can generate high-quality synthetic time-series data while benefiting from built-in data profiling tools +that ensure the integrity and consistency of the data. Unlike TimeGAN, which is a single model for time-series, YData Fabric offers a solution that is suitable for different types of datasets and behaviours. +Additionally, YData Fabric is designed for scalability, enabling seamless handling of large, complex time-series datasets. Its guided UI makes it easy to adapt to different time-series scenarios, +from healthcare to financial data, making it a more comprehensive and flexible solution for time-series data generation.

+

For more on YData Fabric vs Synthetic data generation with TimeGAN read this blogpost.

+

Using TimeGAN to generate synthetic time-series data

+

Although tabular data may be the most frequently discussed type of data, a great number of real-world domains — from traffic and daily trajectories to stock prices and energy consumption patterns — produce time-series data which introduces several aspects of complexity to synthetic data generation.

+

Time-series data is structured sequentially, with observations ordered chronologically based on their associated timestamps or time intervals. It explicitly incorporates the temporal aspect, allowing for the analysis of trends, seasonality, and other dependencies over time.

+

TimeGAN is a model that uses a Generative Adversarial Network (GAN) framework to generate synthetic time series data by learning the underlying temporal dependencies and characteristics of the original data:

+ +

Here’s an example of how to synthetize time-series data with TimeGAN using the Yahoo Stock Price dataset:

+
"""
+    TimeGAN architecture example file
+"""
+
+# Importing necessary libraries
+from os import path
+from ydata_synthetic.synthesizers.timeseries import TimeSeriesSynthesizer
+from ydata_synthetic.preprocessing.timeseries import processed_stock
+from ydata_synthetic.synthesizers import ModelParameters, TrainParameters
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+# Define model parameters
+gan_args = ModelParameters(batch_size=128,
+                           lr=5e-4,
+                           noise_dim=32,
+                           layers_dim=128,
+                           latent_dim=24,
+                           gamma=1)
+
+train_args = TrainParameters(epochs=50000,
+                             sequence_length=24,
+                             number_sequences=6)
+
+# Read the data
+stock_data = pd.read_csv("../../data/stock_data.csv")
+cols = list(stock_data.columns)
+
+# Training the TimeGAN synthesizer
+if path.exists('synthesizer_stock.pkl'):
+    synth = TimeSeriesSynthesizer.load('synthesizer_stock.pkl')
+else:
+    synth = TimeSeriesSynthesizer(modelname='timegan', model_parameters=gan_args)
+    synth.fit(stock_data, train_args, num_cols=cols)
+    synth.save('synthesizer_stock.pkl')
+
+# Generating new synthetic samples
+stock_data_blocks = processed_stock(path='../../data/stock_data.csv', seq_len=24)
+synth_data = synth.sample(n_samples=len(stock_data_blocks))
+print(synth_data[0].shape)
+
+# Plotting some generated samples. Both Synthetic and Original data are still standartized with values between [0,1]
+fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(15, 10))
+axes=axes.flatten()
+
+time = list(range(1,25))
+obs = np.random.randint(len(stock_data_blocks))
+
+for j, col in enumerate(cols):
+    df = pd.DataFrame({'Real': stock_data_blocks[obs][:, j],
+                   'Synthetic': synth_data[obs].iloc[:, j]})
+    df.plot(ax=axes[j],
+            title = col,
+            secondary_y='Synthetic data', style=['-', '--'])
+fig.tight_layout()
+
+ + + + + + +
+
+ + + + +
+ + + +
+ +
+ + +
+ +
+
+
+
+ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/2.0/synthetic_data/ydata_fabric_app/index.html b/2.0/synthetic_data/ydata_fabric_app/index.html new file mode 100644 index 00000000..1b2fbf50 --- /dev/null +++ b/2.0/synthetic_data/ydata_fabric_app/index.html @@ -0,0 +1,1015 @@ + + + + + + + + + + + + + + + + + + + + + + UI interface - YData Fabric - YData-Synthetic + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + +
+ + + + + + + +
+ +
+ + + + +
+
+ + + +
+
+
+ + + + + + + + +
+
+
+ + + + +
+
+ + + + + +

The UI guided experience for Synthetic Data generation

+

YData Fabric provides a robust, guided user interface (UI) specifically designed to streamline synthetic data generation. +This interface is tailored to support users at every level, ensuring that both novice users and experienced data scientists can efficiently generate +synthetic datasets while adhering to best practices.

+

Step-by-Step Workflow

+

The YData Fabric UI organizes the synthetic data generation process into a structured, step-by-step workflow. +Each stage of the process is clearly defined and supported by guidance within the interface, helping users navigate tasks like data profiling, +metadata and synthesizer configuration and synthetic data quality evaluation.

+
    +
  • Data Upload and Profiling: Users start by uploading their datasets directly into the platform. YData Fabric’s profiling tool automatically scans +the data, generating insights into key attributes such as data distributions, correlations, and missing values. +These insights are presented in an intuitive, visual format, ensuring users can quickly assess the quality and structure of their data.
  • +
  • Alerts for Data Issues: The UI will alert users to potential issues such as data imbalances, outliers, or incomplete fields that may affect the +quality of the synthetic data.
  • +
  • Synthetic Data Generation Model Configuration: Once the data is profiled, the UI supports metadata configuration (categorical, numerical, dates, etc), +anonymization integration.
  • +
  • Model Performance Insights: During the model training phase, YData Fabric monitors key performance indicators (KPIs) like fidelity, utility and privacy. +These KPIs, such as data fidelity and privacy scores, are displayed on the dashboard, allowing users to evaluate how closely the synthetic data aligns with the original dataset.
  • +
  • Customization and Advanced Controls: For more experienced users, YData Fabric provides customization options within the guided UI. +Users have access to advanced settings, such as conditional synthetic data generation or business rules.
  • +
  • Preserving Data Integrity: For datasets requiring strict adherence to structural patterns (e.g., time-series data, healthcare records or databases).
  • +
+

Getting started with YData Fabric (Community version)

+

YData Fabric’s Community Version offers users a free, accessible entry point to explore synthetic data generation. +To get started, users can sign up for the Community Version and access the guided UI directly. +Once registered, users are provided with a range of features, including data profiling, synthetic data generation, pipelines and access to YData’s proprietary models for data quality!

+ + + + + + +
+
+ + + + +
+ + + +
+ +
+ + +
+ +
+
+
+
+ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/latest/404.html b/latest/404.html index 7f3a25f4..ea8203d3 100644 --- a/latest/404.html +++ b/latest/404.html @@ -4,13 +4,13 @@ Redirecting - Redirecting to ../1.5/404.html... + Redirecting to ../2.0/404.html... \ No newline at end of file diff --git a/latest/getting-started/installation/index.html b/latest/getting-started/installation/index.html index 03b9f2e8..a354f272 100644 --- a/latest/getting-started/installation/index.html +++ b/latest/getting-started/installation/index.html @@ -4,13 +4,13 @@ Redirecting - Redirecting to ../../../1.5/getting-started/installation/... + Redirecting to ../../../2.0/getting-started/installation/... \ No newline at end of file diff --git a/latest/index.html b/latest/index.html index 8b9b5d39..0b2f8598 100644 --- a/latest/index.html +++ b/latest/index.html @@ -4,13 +4,13 @@ Redirecting - Redirecting to ../1.5/... + Redirecting to ../2.0/... \ No newline at end of file diff --git a/latest/integrations/gx_integration/index.html b/latest/integrations/gx_integration/index.html index 4ad7aac4..21bb0e8d 100644 --- a/latest/integrations/gx_integration/index.html +++ b/latest/integrations/gx_integration/index.html @@ -4,13 +4,13 @@ Redirecting - Redirecting to ../../../1.5/integrations/gx_integration/... + Redirecting to ../../../2.0/integrations/gx_integration/... \ No newline at end of file diff --git a/latest/support/help-troubleshooting/index.html b/latest/support/help-troubleshooting/index.html index 8cd19807..9f3e9719 100644 --- a/latest/support/help-troubleshooting/index.html +++ b/latest/support/help-troubleshooting/index.html @@ -4,13 +4,13 @@ Redirecting - Redirecting to ../../../1.5/support/help-troubleshooting/... + Redirecting to ../../../2.0/support/help-troubleshooting/... \ No newline at end of file diff --git a/latest/synthetic_data/faqs/index.html b/latest/synthetic_data/faqs/index.html index d100a414..63608e64 100644 --- a/latest/synthetic_data/faqs/index.html +++ b/latest/synthetic_data/faqs/index.html @@ -4,13 +4,13 @@ Redirecting - Redirecting to ../../../1.5/synthetic_data/faqs/... + Redirecting to ../../../2.0/synthetic_data/faqs/... \ No newline at end of file diff --git a/latest/synthetic_data/index.html b/latest/synthetic_data/index.html index 378a5499..ee3d8f43 100644 --- a/latest/synthetic_data/index.html +++ b/latest/synthetic_data/index.html @@ -4,13 +4,13 @@ Redirecting - Redirecting to ../../1.5/synthetic_data/... + Redirecting to ../../2.0/synthetic_data/... \ No newline at end of file diff --git a/latest/synthetic_data/multi_table/fabric_multitable/index.html b/latest/synthetic_data/multi_table/fabric_multitable/index.html index 66fe4054..81bcdfd1 100644 --- a/latest/synthetic_data/multi_table/fabric_multitable/index.html +++ b/latest/synthetic_data/multi_table/fabric_multitable/index.html @@ -4,13 +4,13 @@ Redirecting - Redirecting to ../../../../1.5/synthetic_data/multi_table/fabric_multitable/... + Redirecting to ../../../../2.0/synthetic_data/multi_table/fabric_multitable/... \ No newline at end of file diff --git a/latest/synthetic_data/single_table/cgan_example/index.html b/latest/synthetic_data/single_table/cgan_example/index.html index 0157fb32..8bb1335c 100644 --- a/latest/synthetic_data/single_table/cgan_example/index.html +++ b/latest/synthetic_data/single_table/cgan_example/index.html @@ -4,13 +4,13 @@ Redirecting - Redirecting to ../../../../1.5/synthetic_data/single_table/cgan_example/... + Redirecting to ../../../../2.0/synthetic_data/single_table/cgan_example/... \ No newline at end of file diff --git a/latest/synthetic_data/single_table/cramer_gan_example/index.html b/latest/synthetic_data/single_table/cramer_gan_example/index.html index 9681555a..aeabd4e2 100644 --- a/latest/synthetic_data/single_table/cramer_gan_example/index.html +++ b/latest/synthetic_data/single_table/cramer_gan_example/index.html @@ -4,13 +4,13 @@ Redirecting - Redirecting to ../../../../1.5/synthetic_data/single_table/cramer_gan_example/... + Redirecting to ../../../../2.0/synthetic_data/single_table/cramer_gan_example/... \ No newline at end of file diff --git a/latest/synthetic_data/single_table/ctgan_example/index.html b/latest/synthetic_data/single_table/ctgan_example/index.html index 79124ff6..2326a2f1 100644 --- a/latest/synthetic_data/single_table/ctgan_example/index.html +++ b/latest/synthetic_data/single_table/ctgan_example/index.html @@ -4,13 +4,13 @@ Redirecting - Redirecting to ../../../../1.5/synthetic_data/single_table/ctgan_example/... + Redirecting to ../../../../2.0/synthetic_data/single_table/ctgan_example/... \ No newline at end of file diff --git a/latest/synthetic_data/single_table/cwgangp_example/index.html b/latest/synthetic_data/single_table/cwgangp_example/index.html index 89aa6443..92f74688 100644 --- a/latest/synthetic_data/single_table/cwgangp_example/index.html +++ b/latest/synthetic_data/single_table/cwgangp_example/index.html @@ -4,13 +4,13 @@ Redirecting - Redirecting to ../../../../1.5/synthetic_data/single_table/cwgangp_example/... + Redirecting to ../../../../2.0/synthetic_data/single_table/cwgangp_example/... \ No newline at end of file diff --git a/latest/synthetic_data/single_table/dragan_example/index.html b/latest/synthetic_data/single_table/dragan_example/index.html index 4e6c8393..f00c2775 100644 --- a/latest/synthetic_data/single_table/dragan_example/index.html +++ b/latest/synthetic_data/single_table/dragan_example/index.html @@ -4,13 +4,13 @@ Redirecting - Redirecting to ../../../../1.5/synthetic_data/single_table/dragan_example/... + Redirecting to ../../../../2.0/synthetic_data/single_table/dragan_example/... \ No newline at end of file diff --git a/latest/synthetic_data/single_table/gmm_example/index.html b/latest/synthetic_data/single_table/gmm_example/index.html index cc4c4689..4c0f0bab 100644 --- a/latest/synthetic_data/single_table/gmm_example/index.html +++ b/latest/synthetic_data/single_table/gmm_example/index.html @@ -4,13 +4,13 @@ Redirecting - Redirecting to ../../../../1.5/synthetic_data/single_table/gmm_example/... + Redirecting to ../../../../2.0/synthetic_data/single_table/gmm_example/... \ No newline at end of file diff --git a/latest/synthetic_data/single_table/wgan_example/index.html b/latest/synthetic_data/single_table/wgan_example/index.html index 9b680e89..26321071 100644 --- a/latest/synthetic_data/single_table/wgan_example/index.html +++ b/latest/synthetic_data/single_table/wgan_example/index.html @@ -4,13 +4,13 @@ Redirecting - Redirecting to ../../../../1.5/synthetic_data/single_table/wgan_example/... + Redirecting to ../../../../2.0/synthetic_data/single_table/wgan_example/... \ No newline at end of file diff --git a/latest/synthetic_data/single_table/wgangp_example/index.html b/latest/synthetic_data/single_table/wgangp_example/index.html index 29c2ae64..082a8307 100644 --- a/latest/synthetic_data/single_table/wgangp_example/index.html +++ b/latest/synthetic_data/single_table/wgangp_example/index.html @@ -4,13 +4,13 @@ Redirecting - Redirecting to ../../../../1.5/synthetic_data/single_table/wgangp_example/... + Redirecting to ../../../../2.0/synthetic_data/single_table/wgangp_example/... \ No newline at end of file diff --git a/latest/synthetic_data/time_series/doppelganger_example/index.html b/latest/synthetic_data/time_series/doppelganger_example/index.html index bdb7a0ce..226773d9 100644 --- a/latest/synthetic_data/time_series/doppelganger_example/index.html +++ b/latest/synthetic_data/time_series/doppelganger_example/index.html @@ -4,13 +4,13 @@ Redirecting - Redirecting to ../../../../1.5/synthetic_data/time_series/doppelganger_example/... + Redirecting to ../../../../2.0/synthetic_data/time_series/doppelganger_example/... \ No newline at end of file diff --git a/latest/synthetic_data/time_series/timegan_example/index.html b/latest/synthetic_data/time_series/timegan_example/index.html index 6d4d19ee..63bd63cb 100644 --- a/latest/synthetic_data/time_series/timegan_example/index.html +++ b/latest/synthetic_data/time_series/timegan_example/index.html @@ -4,13 +4,13 @@ Redirecting - Redirecting to ../../../../1.5/synthetic_data/time_series/timegan_example/... + Redirecting to ../../../../2.0/synthetic_data/time_series/timegan_example/... \ No newline at end of file diff --git a/latest/synthetic_data/ydata_fabric_app/index.html b/latest/synthetic_data/ydata_fabric_app/index.html index c347e1cd..83aa7cff 100644 --- a/latest/synthetic_data/ydata_fabric_app/index.html +++ b/latest/synthetic_data/ydata_fabric_app/index.html @@ -4,13 +4,13 @@ Redirecting - Redirecting to ../../../1.5/synthetic_data/ydata_fabric_app/... + Redirecting to ../../../2.0/synthetic_data/ydata_fabric_app/... \ No newline at end of file diff --git a/versions.json b/versions.json index 42e7ab24..f038a9db 100644 --- a/versions.json +++ b/versions.json @@ -1 +1 @@ -[{"version": "1.5", "title": "1.5", "aliases": ["latest"]}, {"version": "1.4", "title": "1.4", "aliases": []}, {"version": "1.3", "title": "1.3", "aliases": []}, {"version": "1.2", "title": "1.2", "aliases": []}, {"version": "1.0", "title": "1.0", "aliases": []}] \ No newline at end of file +[{"version": "2.0", "title": "2.0", "aliases": ["latest"]}, {"version": "1.5", "title": "1.5", "aliases": []}, {"version": "1.4", "title": "1.4", "aliases": []}, {"version": "1.3", "title": "1.3", "aliases": []}, {"version": "1.2", "title": "1.2", "aliases": []}, {"version": "1.0", "title": "1.0", "aliases": []}] \ No newline at end of file