Skip to content

Commit

Permalink
Merge pull request huggingface#7 from huggingface/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
thomwolf authored Nov 7, 2018
2 parents 304dd13 + 84ab91b commit c8fe787
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 34 deletions.
2 changes: 1 addition & 1 deletion modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,6 @@ def forward(self, input_ids, token_type_ids, attention_mask, start_positions=Non
start_loss = loss_fct(start_logits, start_positions)
end_loss = loss_fct(end_logits, end_positions)
total_loss = (start_loss + end_loss) / 2
return total_loss, (start_logits, end_logits)
return total_loss
else:
return start_logits, end_logits
28 changes: 13 additions & 15 deletions run_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,6 @@ def main():
raise ValueError("Task not found: %s" % (task_name))

processor = processors[task_name]()

label_list = processor.get_labels()

tokenizer = tokenization.FullTokenizer(
Expand Down Expand Up @@ -515,23 +514,21 @@ def main():
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size)

model.train()
for epoch in trange(int(args.num_train_epochs), desc="Epoch"):
for _ in trange(int(args.num_train_epochs), desc="Epoch"):
tr_loss = 0
nb_tr_examples, nb_tr_steps = 0, 0
for step, (input_ids, input_mask, segment_ids, label_ids) in enumerate(tqdm(train_dataloader, desc="Iteration")):
input_ids = input_ids.to(device)
input_mask = input_mask.to(device)
segment_ids = segment_ids.to(device)
label_ids = label_ids.to(device)

loss, _ = model(input_ids, segment_ids, input_mask, label_ids)
for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration")):
batch = tuple(t.to(device) for t in batch)
input_ids, input_mask, segment_ids, label_ids = batch
loss = model(input_ids, segment_ids, input_mask, label_ids)
if n_gpu > 1:
loss = loss.mean() # mean() to average on multi-gpu.
if args.gradient_accumulation_steps > 1:
loss = loss / args.gradient_accumulation_steps
loss.backward()
tr_loss += loss.item()
nb_tr_examples += input_ids.size(0)
nb_tr_steps += 1
loss.backward()

if (step + 1) % args.gradient_accumulation_steps == 0:
optimizer.step() # We have accumulated enought gradients
model.zero_grad()
Expand Down Expand Up @@ -567,7 +564,8 @@ def main():
segment_ids = segment_ids.to(device)
label_ids = label_ids.to(device)

tmp_eval_loss, logits = model(input_ids, segment_ids, input_mask, label_ids)
with torch.no_grad():
tmp_eval_loss, logits = model(input_ids, segment_ids, input_mask, label_ids)

logits = logits.detach().cpu().numpy()
label_ids = label_ids.to('cpu').numpy()
Expand All @@ -579,13 +577,13 @@ def main():
nb_eval_examples += input_ids.size(0)
nb_eval_steps += 1

eval_loss = eval_loss / nb_eval_steps #len(eval_dataloader)
eval_accuracy = eval_accuracy / nb_eval_examples #len(eval_dataloader)
eval_loss = eval_loss / nb_eval_steps
eval_accuracy = eval_accuracy / nb_eval_examples

result = {'eval_loss': eval_loss,
'eval_accuracy': eval_accuracy,
'global_step': global_step,
'loss': tr_loss/nb_tr_steps}#'loss': loss.item()}
'loss': tr_loss/nb_tr_steps}

output_eval_file = os.path.join(args.output_dir, "eval_results.txt")
with open(output_eval_file, "w") as writer:
Expand Down
24 changes: 6 additions & 18 deletions run_squad.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,7 @@ def main():
type=int,
default=1,
help="Number of updates steps to accumualte before performing a backward/update pass.")

args = parser.parse_args()

if args.local_rank == -1 or args.no_cuda:
Expand Down Expand Up @@ -855,22 +855,15 @@ def main():
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size)

model.train()
for epoch in trange(int(args.num_train_epochs), desc="Epoch"):
for _ in trange(int(args.num_train_epochs), desc="Epoch"):
for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration")):
batch = tuple(t.to(device) for t in batch)
input_ids, input_mask, segment_ids, start_positions, end_positions = batch
input_ids = input_ids.to(device)
input_mask = input_mask.to(device)
segment_ids = segment_ids.to(device)
start_positions = start_positions.to(device)
end_positions = start_positions.to(device)

start_positions = start_positions.view(-1, 1)
end_positions = end_positions.view(-1, 1)

loss, _ = model(input_ids, segment_ids, input_mask, start_positions, end_positions)
loss = model(input_ids, segment_ids, input_mask, start_positions, end_positions)
if n_gpu > 1:
loss = loss.mean() # mean() to average on multi-gpu.

if args.gradient_accumulation_steps > 1:
loss = loss / args.gradient_accumulation_steps
loss.backward()
if (step + 1) % args.gradient_accumulation_steps == 0:
optimizer.step() # We have accumulated enought gradients
Expand Down Expand Up @@ -911,24 +904,19 @@ def main():
for input_ids, input_mask, segment_ids, example_indices in tqdm(eval_dataloader, desc="Evaluating"):
if len(all_results) % 1000 == 0:
logger.info("Processing example: %d" % (len(all_results)))

input_ids = input_ids.to(device)
input_mask = input_mask.to(device)
segment_ids = segment_ids.to(device)

with torch.no_grad():
batch_start_logits, batch_end_logits = model(input_ids, segment_ids, input_mask)

for i, example_index in enumerate(example_indices):
start_logits = batch_start_logits[i].detach().cpu().tolist()
end_logits = batch_end_logits[i].detach().cpu().tolist()

eval_feature = eval_features[example_index.item()]
unique_id = int(eval_feature.unique_id)
all_results.append(RawResult(unique_id=unique_id,
start_logits=start_logits,
end_logits=end_logits))

output_prediction_file = os.path.join(args.output_dir, "predictions.json")
output_nbest_file = os.path.join(args.output_dir, "nbest_predictions.json")
write_predictions(eval_examples, eval_features, all_results,
Expand Down

0 comments on commit c8fe787

Please sign in to comment.