podgorskiy · gongcaiwang · Jun 15, 2020 · Jun 19, 2020 · Jun 20, 2020 · Jun 20, 2020
diff --git a/.DS_Store b/.DS_Store
diff --git a/.gitignore b/.gitignore
@@ -5,7 +5,9 @@
 __pycache__/
 .idea/
 *.pdf
-# *.png
+*.png
 *.eps
 *.txt
+*.jpg
 results*/
+
diff --git a/AllSubjectInfo.json b/AllSubjectInfo.json
@@ -0,0 +1,43 @@
+{
+    "Shared":{
+        "RootPath":"/scratch/akg404-share/ECoGData_Mar_11_20/FunctionalMapping/",
+        "ORG_WAVE_FS": 24414.1,
+        "DOWN_WAVE_FS": 16000,
+        "ORG_ECOG_FS": 3051.7625,
+        "ORG_ECOG_FS_NY": 512,
+        "ORG_TF_FS": 125, 
+        "AUDITORY" : ["cSTG","mSTG"],
+        "BROCA" : ["parstriangularis","parsopercularis"],
+        "MOTO" : ["precentral"],
+        "SENSORY" : ["postcentral"]
+    },
+    "Subj":{
+        "NY717":{
+            "Density":"HB",
+            "Task":["AudN","SenComp","VisRead","PicN","AudRep"]
+        },
+        "NY742":{
+            "Density":"HB",
+            "Task":["AudN","SenComp","VisRead","PicN","AudRep"]
+        },
+        "NY749":{
+            "Density":"HB",
+            "Task":["AudN","SenComp","VisRead","PicN","AudRep"]
+        },
+        "HD06":{
+            "Density":"HD",
+            "Task":["AudName","AudRep"],
+            "EventRange":-100
+        },
+        "HD01":{
+            "Density":"HD",
+            "Task":["AudName","AudRep"],
+            "BadSamples":[[1,2,3],[1,3]]
+        }
+    },
+    "BadSamples":{
+        "HD01":{
+            "AudRep":[1,2,3]
+        }
+    }
+}
diff --git a/ECoGDataSet.py b/ECoGDataSet.py
diff --git a/README.md b/README.md
@@ -38,7 +38,7 @@
 > **Adversarial Latent Autoencoders**<br>
 > Stanislav Pidhorskyi, Donald Adjeroh, Gianfranco Doretto<br>
 >
-> **Abstract:** *Autoencoder networks are unsupervised approaches aiming at combining generative and representational properties by learning simultaneously an encoder-generator map. Although studied extensively, the issues of whether they have the same generative power of GANs, or learn disentangled representations, have not been fully addressed. We introduce an autoencoder that tackles these issues jointly, which we call Adversarial Latent Autoencoder (ALAE). It is a general architecture that can leverage recent improvements on GAN training procedures. We designed two autoencoders: one based on a MLP encoder, and another based on a StyleGAN generator, which we call StyleALAE. We verify the disentanglement properties of both architectures. We show that StyleALAE can not only generate 1024x1024 face images with comparable quality of StyleGAN, but at the same resolution can also produce face reconstructions and manipulations based on real images. This makes ALAE the first autoencoder able to compare with, and go beyond the capabilities of a generator-only type of architecture.*
+> **Abstract:** *Autoencoder networks are unsupervised approaches aiming at combining generative and representational properties by learning simultaneously an encoder-generator map. Although studied extensively, the issues of whether they have the same generative power of GANs, or learn disentangled representations, have not been fully addressed. We introduce an autoencoder that tackles these issues jointly, which we call Adversarial Latent Autoencoder (ALAE). It is a general architecture that can leverage recent improvements on GAN training procedures. We designed two autoencoders: one based on a MLP encoder, and another based on a StyleGAN generator, which we call StyleALAE. We verify the disentanglement properties of both architectures. We show that StyleALAE can not only generate 1024x1024 face images with comparable quality of StyleGAN, but at the same resolution can also produce face reconstructions and manipulations based on real images. This makes ALAE the first autoencoder able to compare with, and go beyond, the capabilities of a generator-only type of architecture.*
 
 ## Citation
 * Stanislav Pidhorskyi, Donald A. Adjeroh, and Gianfranco Doretto. Adversarial Latent Autoencoders. In *Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR)*, 2020. [to appear] 

diff --git a/checkpointer.py b/checkpointer.py
@@ -66,11 +66,12 @@ def save_data():
 
         return save_data()
 
-    def load(self, ignore_last_checkpoint=False, file_name=None):
+    def load(self, ignore_last_checkpoint=False, ignore_auxiliary=False,file_name=None):
         save_file = os.path.join(self.cfg.OUTPUT_DIR, "last_checkpoint")
         try:
             with open(save_file, "r") as last_checkpoint:
                 f = last_checkpoint.read().strip()
+                f = os.path.join(self.cfg.OUTPUT_DIR, f)
         except IOError:
             self.logger.info("No checkpoint found. Initializing model from scratch")
             if file_name is None:
@@ -81,7 +82,6 @@ def load(self, ignore_last_checkpoint=False, file_name=None):
             return {}
         if file_name is not None:
             f = file_name
-
         self.logger.info("Loading checkpoint from {}".format(f))
         checkpoint = torch.load(f, map_location=torch.device("cpu"))
         for name, model in self.models.items():
@@ -98,7 +98,8 @@ def load(self, ignore_last_checkpoint=False, file_name=None):
             else:
                 self.logger.warning("No state dict for model: %s" % name)
         checkpoint.pop('models')
-        if "auxiliary" in checkpoint and self.auxiliary:
+
+        if "auxiliary" in checkpoint and self.auxiliary and not ignore_auxiliary:
             self.logger.info("Loading auxiliary from {}".format(f))
             for name, item in self.auxiliary.items():
                 try:

diff --git a/configs/ecog.yaml b/configs/ecog.yaml
@@ -0,0 +1,66 @@
+ # Config for training ALAE on FFHQ at resolution 1024x1024
+
+NAME: ecog
+DATASET:
+  PART_COUNT: 16
+  SIZE: 60000
+  FFHQ_SOURCE: /data/datasets/ffhq-dataset/tfrecords/ffhq/ffhq-r%02d.tfrecords
+  PATH: /data/datasets/ffhq-dataset_new/tfrecords/ffhq/splitted/ffhq-r%02d.tfrecords.%03d
+
+  FLIP_IMAGES: False
+
+  PART_COUNT_TEST: 2
+  PATH_TEST: /data/datasets/ffhq-dataset_new/tfrecords/ffhq-test/splitted/ffhq-r%02d.tfrecords.%03d
+
+  SAMPLES_PATH: ''
+  STYLE_MIX_PATH: style_mixing/test_images/set_ecog
+  SPEC_CHANS: 64
+  TEMPORAL_SAMPLES: 128
+  BCTS: True
+  MAX_RESOLUTION_LEVEL: 7
+MODEL:
+  LATENT_SPACE_SIZE: 128
+  LAYER_COUNT: 6
+  MAX_CHANNEL_COUNT: 512
+  START_CHANNEL_COUNT: 16
+  DLATENT_AVG_BETA: 0.995
+  MAPPING_LAYERS: 8
+  TRUNCATIOM_CUTOFF: 5
+  CHANNELS: 1
+  UNIQ_WORDS: 50
+  #####TAKE OFF CHECKLIST!!!########
+  AVERAGE_W: False
+  TEMPORAL_W: False
+  RESIDUAL: True
+  W_CLASSIFIER: False
+  CYCLE: True
+  ATTENTIONAL_STYLE: False
+  #T            4      8      16    32    64    128 
+  ATTENTION: [False, False, False, False, True, True]
+  HEADS: 1
+  # ATTENTION: []
+OUTPUT_DIR: training_artifacts/vis
+# OUTPUT_DIR: training_artifacts/ecog_residual_cycle_attention3264wStyleIN_specchan64_more_attentfeatures_heads4
+#####################################
+
+TRAIN:
+  W_WEIGHT: 1
+  CYCLE_WEIGHT: 1
+  BASE_LEARNING_RATE: 0.002
+  EPOCHS_PER_LOD: 16
+  LEARNING_DECAY_RATE: 0.1
+  LEARNING_DECAY_STEPS: [96]
+  TRAIN_EPOCHS: 112
+  #                    4    8   16    32    64    128    256
+  LOD_2_BATCH_8GPU: [512, 256, 128,   64,   32,    32] # If GPU memory ~16GB reduce last number from 32 to 24
+  LOD_2_BATCH_4GPU: [64, 64, 64,   64,   32,    16]
+  LOD_2_BATCH_2GPU: [64, 64, 64,   64,   32,    8]
+  # LOD_2_BATCH_1GPU: [512, 256, 128,   64,   32,    16]
+  # LOD_2_BATCH_1GPU: [512, 256, 128,   64,   32,    32]
+  # LOD_2_BATCH_1GPU: [512, 256, 128,   64,   32,    16]
+  # LOD_2_BATCH_1GPU: [128, 128, 128,   128,   64,    32]
+  # LOD_2_BATCH_1GPU: [512, 256, 256,   128,   64,    16]
+  LOD_2_BATCH_1GPU: [64, 64, 64,   64,   32,    16]
+
+  LEARNING_RATES: [0.0015,  0.0015,   0.0015,    0.002,     0.003,    0.003]
+  # LEARNING_RATES: [0.0015,  0.0015,   0.0005,    0.0003,     0.0003,    0.0002]
diff --git a/configs/ecog_style2.yaml b/configs/ecog_style2.yaml
@@ -0,0 +1,132 @@
+ # Config for training ALAE on FFHQ at resolution 1024x1024
+
+NAME: ecog
+DATASET:
+  PART_COUNT: 16
+  SIZE: 60000
+  FFHQ_SOURCE: /data/datasets/ffhq-dataset/tfrecords/ffhq/ffhq-r%02d.tfrecords
+  PATH: /data/datasets/ffhq-dataset_new/tfrecords/ffhq/splitted/ffhq-r%02d.tfrecords.%03d
+
+  FLIP_IMAGES: False
+
+  PART_COUNT_TEST: 4
+  PATH_TEST: /data/datasets/ffhq-dataset_new/tfrecords/ffhq-test/splitted/ffhq-r%02d.tfrecords.%03d
+
+  SAMPLES_PATH: ''
+  STYLE_MIX_PATH: style_mixing/test_images/set_ecog
+  SPEC_CHANS: 64
+  TEMPORAL_SAMPLES: 128
+  BCTS: True
+  MAX_RESOLUTION_LEVEL: 7
+  SUBJECT: ['NY742']
+MODEL:
+  #####TAKE OFF CHECKLIST!!!########
+  N_FORMANTS: 6
+  N_FORMANTS_NOISE: 1
+  N_FORMANTS_ECOG: 6
+  WAVE_BASED : True
+  DO_MEL_GUIDE : False 
+  BGNOISE_FROMDATA: True
+  N_FFT : 256
+  NOISE_DB : -50 #-50
+  MAX_DB : 22.5 #probablity 28 is better
+  NOISE_DB_AMP : -25
+  MAX_DB_AMP : 14
+  POWER_SYNTH: True
+
+  LESS_TEMPORAL_FEATURE: True
+  LATENT_SPACE_SIZE: 128
+  LAYER_COUNT: 6
+  MAX_CHANNEL_COUNT: 512
+  START_CHANNEL_COUNT: 16
+  DLATENT_AVG_BETA: 0.995
+  MAPPING_LAYERS: 8
+  TRUNCATIOM_CUTOFF: 5
+  CHANNELS: 1
+  UNIQ_WORDS: 50
+  #MAPPING_FROM_ECOG: "ECoGMappingBottleneck"  #ECoGMappingBottlenecklstm1, ECoGMappingBottlenecklstm2
+  #MAPPING_FROM_ECOG: "ECoGMappingBottlenecklstm1"
+  #MAPPING_FROM_ECOG: "ECoGMappingBottlenecklstm"
+  MAPPING_FROM_ECOG: "ECoGMappingBottlenecklstm_pure"
+  ONEDCONFIRST: True
+  RNN_TYPE: 'LSTM'
+  RNN_LAYERS: 4
+  RNN_COMPUTE_DB_LOUDNESS: True
+  BIDIRECTION: True
+  # MAPPING_FROM_ECOG: "ECoGMappingTransformer"
+  ECOG: False #will be overloaded if FINETUNE
+  SUPLOSS_ON_ECOGF: False # will be overloaded to FIX_GEN if FINETUNE,spec supervise loss only apply to ecog encoder
+  W_SUP: False
+  GAN: True
+  GENERATOR: "GeneratorFormant"
+  ENCODER: "EncoderFormant"
+  AVERAGE_W: True
+  TEMPORAL_W: True
+  GLOBAL_W: True
+  TEMPORAL_GLOBAL_CAT: True
+  RESIDUAL: True
+  W_CLASSIFIER: False
+  CYCLE: False
+  ATTENTIONAL_STYLE: True
+  #T            4      8      16    32    64    128 
+  ATTENTION: [False, False, False, False, False, False]
+  HEADS: 1
+  APPLY_PPL: False 
+  APPLY_PPL_D: False
+  PPL_WEIGHT: 100
+  PPL_GLOBAL_WEIGHT: 0
+  PPLD_WEIGHT: 1
+  PPLD_GLOBAL_WEIGHT: 0
+  COMMON_Z: True
+  TRANSFORMER:
+    HIDDEN_DIM : 256
+    DIM_FEEDFORWARD : 256
+    ENCODER_ONLY : False
+    ATTENTIONAL_MASK : False
+    N_HEADS : 4
+    NON_LOCAL: True
+  # ATTENTION: []
+#OUTPUT_DIR: output/ecog_10241800_lstm1 #training_artifacts/debug
+#OUTPUT_DIR: output/ecog_10241800_lstm2
+#OUTPUT_DIR: output/ecog_11011800_conv #after change loudness encoder
+#OUTPUT_DIR: output/ecog_11011800_lstm1 #after change loudness encoder
+OUTPUT_DIR: output/ecog_11021800_lstm1 #after change loudness encoder
+# OUTPUT_DIR: training_artifacts/loudnesscomp_han5_ampamploss
+# OUTPUT_DIR: training_artifacts/loudnesscomp_han5_ampsynth_masknormed
+# OUTPUT_DIR: training_artifacts/debug_f1f2linearmel
+# OUTPUT_DIR: training_artifacts/ecog_finetune_3ecogformants_han5_specsup_guidance_hamonicformantsemph
+# OUTPUT_DIR: training_artifacts/ecog_finetune_3ecogformants_han5_specsup_guidance_hamonicnoiseformantsemphmore
+# OUTPUT_DIR: training_artifacts/formantsythv2_wavebased_NY742_constraintonFB_Bconstrainrefined_absfreq_4formants_1noiseformants_bgnoise_noisemapping_freqconv_duomask
+# OUTPUT_DIR: training_artifacts/ecog_residual_latent128_temporal_lesstemporalfeature_noprogressive_HBw_ppl_ppld_localreg_ecogf_w_spec_sup
+# OUTPUT_DIR: training_artifacts/ecog_residual_latent128_temporal_lesstemporalfeature_ppl_ppld
+# OUTPUT_DIR: training_artifacts/ecog_residual_cycle_attention3264wStyleIN_specchan64_more_attentfeatures_heads4
+FINETUNE:
+  FINETUNE: True
+  FIX_GEN: True
+  ENCODER_GUIDE: True
+  SPECSUP: True
+#####################################
+
+TRAIN:
+  PROGRESSIVE: False
+  W_WEIGHT: 1
+  CYCLE_WEIGHT: 1
+  BASE_LEARNING_RATE: 0.002
+  EPOCHS_PER_LOD: 16
+  LEARNING_DECAY_RATE: 0.1
+  LEARNING_DECAY_STEPS: [96]
+  TRAIN_EPOCHS: 60
+  #                    4    8   16    32    64    128    256
+  LOD_2_BATCH_8GPU: [512, 256, 128,   64,   32,    32] # If GPU memory ~16GB reduce last number from 32 to 24
+  LOD_2_BATCH_4GPU: [64, 64, 64,   64,   32,    16]
+  LOD_2_BATCH_2GPU: [64, 64, 64,   64,   32,    16]
+  # LOD_2_BATCH_1GPU: [512, 256, 128,   64,   32,    16]
+  # LOD_2_BATCH_1GPU: [512, 256, 128,   64,   32,    32]
+  # LOD_2_BATCH_1GPU: [512, 256, 128,   64,   32,    16]
+  # LOD_2_BATCH_1GPU: [128, 128, 128,   128,   64,    32]
+  # LOD_2_BATCH_1GPU: [512, 256, 256,   128,   64,    16]
+  LOD_2_BATCH_1GPU: [64, 64, 64,   64,   32,    16]
+  BATCH_SIZE : 32
+  # BATCH_SIZE : 2
+  LEARNING_RATES: [0.0015,  0.0015,   0.0015,    0.002,     0.003,    0.003]
+  # LEARNING_RATES: [0.0015,  0.0015,   0.0005,    0.0003,     0.0003,    0.0002]
-Original file line number
+Diff line change
@@ Expand Up / @@ -5,7 +5,9 @@ @@
     __pycache__/
     .idea/
     *.pdf
-    # *.png
+    *.png
     *.eps
     *.txt
+    *.jpg
     results*/