open-compass · kennymckormick · Jan 17, 2024 · Jan 4, 2024 · Jan 4, 2024 · Jan 8, 2024
diff --git a/vlmeval/utils/dataset_config.py b/vlmeval/utils/dataset_config.py
@@ -49,7 +49,7 @@
     'ScienceQA_VAL': '96320d05e142e585e7204e72affd29f3',
     'ScienceQA_TEST': 'e42e9e00f9c59a80d8a5db35bc32b71f',
     'HallusionBench': '0c23ac0dc9ef46832d7a24504f2a0c7c',
-    "DocVQA_VAL": '3744f5df4aaf2781c85fe7677ae0a411',
+    "DocVQA_VAL": 'c911fdc5f4974513c112cc83a25c99d9',
     "AI2D": "53db8397adbe73e9cc0b4861227004d4",
     "LLaVABench": "d382a093f749a697820d3dadd61c8428"
 }

diff --git a/vlmeval/vlm/emu.py b/vlmeval/vlm/emu.py
@@ -28,6 +28,16 @@ def __init__(self,
         from transformers import AutoModelForCausalLM, AutoTokenizer
         from accelerate import init_empty_weights, infer_auto_device_map, dispatch_model
 
+        local_rank,world_size = get_rank_and_world_size()
+
+        device_num = torch.cuda.device_count()
+        assert world_size * 2 <= device_num, 'The number of devices does not match the world size'
+
+        device_1 = local_rank
+        device_2 = local_rank+world_size
+        torch.cuda.set_device(device_1)
+        torch.cuda.set_device(device_2)
+
         tokenizer = AutoTokenizer.from_pretrained(model_path) # "BAAI/Emu2-Chat"
         self.tokenizer = tokenizer
         with init_empty_weights():
@@ -37,9 +47,9 @@ def __init__(self,
                 low_cpu_mem_usage=True,
                 trust_remote_code=True)  
 
-        device_map = infer_auto_device_map(model, max_memory={0:'38GiB',1:'38GiB',}, no_split_module_classes=['Block','LlamaDecoderLayer'])  
+        device_map = infer_auto_device_map(model, max_memory={device_1:'38GiB',device_2:'38GiB',}, no_split_module_classes=['Block','LlamaDecoderLayer'])  
         # input and output logits should be on same device
-        device_map["model.decoder.lm.lm_head"] = 0
+        device_map["model.decoder.lm.lm_head"] = device_1
 
         model = dispatch_model(
             model, 
@@ -79,4 +89,4 @@ def interleave_generate(self, ti_list, dataset=None):
     def generate(self, image_path, prompt, dataset=None):
         tl_list = [image_path,prompt]
         output = self.interleave_generate(tl_list, dataset)
-        return output
+        return output