Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove a redundant XMR test; Add weights_only argument for torch.load #303

Merged
merged 1 commit into from
Jan 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,18 @@ jobs:

steps:
- name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@v3

# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}

# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v2
uses: github/codeql-action/autobuild@v3

# ℹ️ Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
Expand All @@ -48,4 +48,4 @@ jobs:
# make release

- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2
uses: github/codeql-action/analyze@v3
2 changes: 1 addition & 1 deletion .github/workflows/pytest_aarch64.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ jobs:
Ubuntu-Python-Unit-Test:
name: Ubuntu 22.04 Python3.10 Unit Tests

runs-on: ubuntu-latest
runs-on: ubuntu-22.04-arm

steps:
- uses: actions/checkout@v1
Expand Down
1 change: 1 addition & 0 deletions pecos/core/utils/file_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#define __FILE_UTIL_H__

#include <algorithm>
#include <cstdint>
#include <fstream>
#include <stdexcept>
#include <string>
Expand Down
6 changes: 3 additions & 3 deletions pecos/xmc/xtransformer/matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ def load(cls, load_dir):
# load text_model
text_model_dir = os.path.join(load_dir, "text_model")
if os.path.exists(text_model_dir):
text_model = torch.load(text_model_dir)
text_model = torch.load(text_model_dir, weights_only=False)
else:
text_model = None

Expand Down Expand Up @@ -1330,7 +1330,7 @@ def train(
saved_trn_pt = kwargs.get("saved_trn_pt", "")
if not prob.is_tokenized:
if saved_trn_pt and os.path.isfile(saved_trn_pt):
trn_tensors = torch.load(saved_trn_pt)
trn_tensors = torch.load(saved_trn_pt, weights_only=False)
LOGGER.info("trn tensors loaded_from {}".format(saved_trn_pt))
else:
trn_tensors = matcher.text_to_tensor(
Expand All @@ -1345,7 +1345,7 @@ def train(
if val_prob is not None and not val_prob.is_tokenized:
saved_val_pt = kwargs.get("saved_val_pt", "")
if saved_val_pt and os.path.isfile(saved_val_pt):
val_tensors = torch.load(saved_val_pt)
val_tensors = torch.load(saved_val_pt, weights_only=False)
LOGGER.info("val tensors loaded from {}".format(saved_val_pt))
else:
val_tensors = matcher.text_to_tensor(
Expand Down
2 changes: 1 addition & 1 deletion pecos/xmc/xtransformer/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ def load(cls, load_dir, shard=0):
nr_shards = cls.get_data_stats(load_dir)["num_shards"]
if shard >= nr_shards:
raise ValueError(f"Loading shard#{shard} where there are only {nr_shards} available")
return torch.load(f"{load_dir}/{shard}")
return torch.load(f"{load_dir}/{shard}", weights_only=False)

@property
def has_ns(self):
Expand Down
55 changes: 0 additions & 55 deletions test/pecos/xmr/test_reranker.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,58 +50,3 @@ def test_numr_encoder():
out_feat.numpy(),
abs=0.0,
), f"Enc(inp_feat) != inp_feat, given Enc is identity"


def test_textnumr_encoder():
import torch
from transformers import set_seed
from transformers import AutoConfig, AutoTokenizer
from pecos.xmr.reranker.model import TextNumrEncoderConfig
from pecos.xmr.reranker.model import TextNumrEncoder

enc_list = [
"prajjwal1/bert-tiny",
"sentence-transformers/all-MiniLM-L6-v2",
"intfloat/multilingual-e5-small",
]
ans_list = [
0.007879042997956276,
0.0035168465692549944,
-0.0047034271992743015,
]
set_seed(1234)

for idx, enc_name in enumerate(enc_list):
text_config = AutoConfig.from_pretrained(
enc_name,
hidden_dropout_prob=0.0,
)
textnumr_config = TextNumrEncoderConfig(
text_config=text_config,
numr_config=None,
text_pooling_type="cls",
head_actv_type="identity",
head_dropout_prob=0.0,
head_size_list=[1],
)
textnumr_encoder = TextNumrEncoder(textnumr_config)
linear_layer = textnumr_encoder.head_layers.mlp_layers[0]
linear_layer.bias.data.fill_(0.0)
linear_layer.weight.data.fill_(0.0)
linear_layer.weight.data.fill_diagonal_(1.0)
textnumr_encoder.scorer.bias.data.fill_(0.0)
textnumr_encoder.scorer.weight.data.fill_(1.0)

# obtained from bert-tiny tokenizer("I Like coffee")
tokenizer = AutoTokenizer.from_pretrained(enc_name)
input_dict = tokenizer("I Like coffee", return_tensors="pt")
outputs = textnumr_encoder(**input_dict)
assert outputs.text_emb is not None
assert outputs.numr_emb is None

text_emb = outputs.text_emb
mu = torch.mean(text_emb).item()
assert mu == approx(
ans_list[idx],
abs=1e-3,
), f"mu(text_emb)={mu} != {ans_list[idx]}"
Loading