Merge pull request #4 from SoyGema/docs-branch

Structure with library instalations
SoyGema · Oct 31, 2023 · 7bca10b · 7bca10b
2 parents e793dca + bdc9dac
commit 7bca10b
Show file tree

Hide file tree

Showing 19 changed files with 327 additions and 1,109 deletions.
diff --git a/ADVERSARIAL.md b/ADVERSARIAL.md
diff --git a/README.md b/README.md
@@ -12,3 +12,44 @@ pipenv run pre-commit install -t pre-push
 
 ## Credits
 This package was created with Cookiecutter and the [sourcery-ai/python-best-practices-cookiecutter](https://github.com/sourcery-ai/python-best-practices-cookiecutter) project template.
+
+
+
+### Builds
+
+This is right now intented to be a python library 
+Uninstall the Previous Version: If you have a previous version of the package installed, you can uninstall it first to avoid conflicts. You can use the following command for that:
+
+bash
+Copy code
+pip uninstall your_package_name
+Replace your_package_name with the name of your package.
+
+Increment the Version Number: If you've made changes that you want to distribute, it's a good practice to increment the version number in your setup.py file.
+
+python
+Copy code
+setup(
+    name='your_package_name',
+    version='0.2',  # Increment this number
+    # ...
+)
+Clear Old Build Directories: Remove old build artifacts to make sure you're starting fresh. Navigate to the folder containing setup.py and run:
+
+bash
+Copy code
+rm -rf build dist your_package_name.egg-info
+Replace your_package_name with the name of your package.
+
+Rebuild the Package: Navigate to the folder where your setup.py file is located and run:
+
+bash
+Copy code
+pip install .
+This will rebuild the package and install it.
+
+Check Installation: You can check if the package is installed correctly by running:
+
+bash
+Copy code
+pip list
diff --git a/test/test_beyond_the_nest.py → __init__.py b/test/test_beyond_the_nest.py → __init__.py
diff --git a/beyond_the_nest/attacks/__init__.py b/beyond_the_nest/attacks/__init__.py
diff --git a/beyond_the_nest/attacks/tomato_attack.py b/beyond_the_nest/attacks/tomato_attack.py
@@ -1,12 +1,12 @@
 """
-Script Name: Text Attack with Falcon Model Using Tomato Swap
+Script Name: Text Attack with Albert Model Using Tomato Swap
 Author: Your Name
 Date: Current date
 Version: 1.0
 License: MIT (or your chosen license)
 
 Description:
-This script performs adversarial attacks on a pre-trained Falcon-7b-instruct model using the TextAttack library. Specifically, it employs a 'Tomato Swap' attack. In this attack, a certain word from the input text is systematically replaced with the word 'tomato.' The objective is to mislead the model into making an incorrect classification while retaining the input's semantic meaning.
+This script performs adversarial attacks on a pre-trained Albert-base model using the TextAttack library. Specifically, it employs a 'Tomato Swap' attack. In this attack, a certain word from the input text is systematically replaced with the word 'tomato.' The objective is to mislead the model into making an incorrect classification while retaining the input's semantic meaning.
 
 The attack strategy uses the following components:
 - Goal Function: Untargeted Classification
@@ -30,6 +30,8 @@
 Notes:
 - Make sure to have sufficient computational resources, as running attacks can be resource-intensive.
 """
+# inside tomato_attack_Albert.py
+
 from textattack import Attack, AttackArgs, Attacker
 from textattack.constraints.pre_transformation import (
     RepeatModification,
@@ -41,56 +43,31 @@
 from textattack.search_methods import GreedySearch
 from textattack.transformations import WordSwap
 
-from beyond_the_nest.beyond_the_nest.utils.beyond_the_nest_models import (
-    load_falcon_model_classification,
-    load_tokenizer,
-)
-
-# from tqdm import tqdm
+from ..utils.beyond_the_nest_models import load_model_classification, load_tokenizer
 
 
 class TomatoWordSwap(WordSwap):
-    """Transforms an input by replacing any word with 'banana'."""
-
-    # We don't need a constructor, since our class doesn't require any parameters.
-
     def _get_replacement_words(self, word):
-        """Returns 'tomato', no matter what 'word' was originally.
-
-        Returns a list with one item, since `_get_replacement_words` is intended to
-            return a list of candidate replacement words.
-        """
         return ["tomato"]
 
 
-falcon = load_falcon_model_classification("tiiuae/falcon-7b-instruct")
-# falcon2 = falcon.to_bettertransformer()
-tokenizer = load_tokenizer("tiiuae/falcon-7b-instruct")
-tokenizer.pad_token = tokenizer.eos_token
-
-
-if tokenizer.pad_token is None:
-    tokenizer.add_special_tokens({"pad_token": "[PAD]"})
-    falcon.resize_token_embeddings(len(tokenizer))
-
-
-model_wrapper = HuggingFaceModelWrapper(falcon, tokenizer)
-goal_function = UntargetedClassification(model_wrapper)
-
-### Unclear if this dataset is going to work with falcon
-dataset = HuggingFaceDataset("sst2", None, "test")
+def perform_tomato_attack(model_name, dataset_name, num_examples):
+    model = load_model_classification(model_name)
+    tokenizer = load_tokenizer(model_name)
 
+    model_wrapper = HuggingFaceModelWrapper(model, tokenizer)
+    goal_function = UntargetedClassification(model_wrapper)
 
-transformation = TomatoWordSwap()
-constraints = [RepeatModification(), StopwordModification()]
+    dataset = HuggingFaceDataset(dataset_name, None, "test")
 
-search_method = GreedySearch()
-attack = Attack(goal_function, constraints, transformation, search_method)
+    transformation = TomatoWordSwap()
+    constraints = [RepeatModification(), StopwordModification()]
 
-print(attack)
+    search_method = GreedySearch()
+    attack = Attack(goal_function, constraints, transformation, search_method)
 
-attack_args = AttackArgs(num_examples=10)
-attacker = Attacker(attack, dataset, attack_args)
-attack_results = attacker.attack_dataset()
+    attack_args = AttackArgs()
+    attacker = Attacker(attack, dataset, attack_args)
+    attack_results = attacker.attack_dataset()
 
-print(attack_results)
+    return attack_results
diff --git a/beyond_the_nest/attacks/tomato_attack_Falcon.py b/beyond_the_nest/attacks/tomato_attack_Falcon.py
@@ -0,0 +1,96 @@
+"""
+Script Name: Text Attack with Falcon Model Using Tomato Swap
+Author: Your Name
+Date: Current date
+Version: 1.0
+License: MIT (or your chosen license)
+
+Description:
+This script performs adversarial attacks on a pre-trained Falcon-7b-instruct model using the TextAttack library. Specifically, it employs a 'Tomato Swap' attack. In this attack, a certain word from the input text is systematically replaced with the word 'tomato.' The objective is to mislead the model into making an incorrect classification while retaining the input's semantic meaning.
+
+The attack strategy uses the following components:
+- Goal Function: Untargeted Classification
+- Transformation: Custom 'Tomato Swap'
+- Constraints: No repeat modification, no stopword modification
+- Search Method: Greedy Search
+
+The Falcon model is assessed for its robustness against these adversarial examples using the SST-2 dataset, which is a binary classification dataset.
+
+Dependencies:
+- TextAttack
+- Transformers
+- Torch
+- tqdm
+
+Usage:
+Run this script with Python 3.x. Make sure you have installed all required libraries.
+To execute the script, run:
+    python your_script_name.py
+
+Notes:
+- Make sure to have sufficient computational resources, as running attacks can be resource-intensive.
+"""
+from textattack import Attack, AttackArgs, Attacker
+from textattack.constraints.pre_transformation import (
+    RepeatModification,
+    StopwordModification,
+)
+from textattack.datasets import HuggingFaceDataset
+from textattack.goal_functions import UntargetedClassification
+from textattack.models.wrappers import HuggingFaceModelWrapper
+from textattack.search_methods import GreedySearch
+from textattack.transformations import WordSwap
+
+from beyond_the_nest.beyond_the_nest.utils.beyond_the_nest_models import (
+    load_falcon_model_classification,
+    load_tokenizer,
+)
+
+# from tqdm import tqdm
+
+
+class TomatoWordSwap(WordSwap):
+    """Transforms an input by replacing any word with 'banana'."""
+
+    # We don't need a constructor, since our class doesn't require any parameters.
+
+    def _get_replacement_words(self, word):
+        """Returns 'tomato', no matter what 'word' was originally.
+
+        Returns a list with one item, since `_get_replacement_words` is intended to
+            return a list of candidate replacement words.
+        """
+        return ["tomato"]
+
+
+falcon = load_falcon_model_classification("tiiuae/falcon-7b-instruct")
+# falcon2 = falcon.to_bettertransformer()
+tokenizer = load_tokenizer("tiiuae/falcon-7b-instruct")
+tokenizer.pad_token = tokenizer.eos_token
+
+
+if tokenizer.pad_token is None:
+    tokenizer.add_special_tokens({"pad_token": "[PAD]"})
+    falcon.resize_token_embeddings(len(tokenizer))
+
+
+model_wrapper = HuggingFaceModelWrapper(falcon, tokenizer)
+goal_function = UntargetedClassification(model_wrapper)
+
+### Unclear if this dataset is going to work with falcon
+dataset = HuggingFaceDataset("sst2", None, "test")
+
+
+transformation = TomatoWordSwap()
+constraints = [RepeatModification(), StopwordModification()]
+
+search_method = GreedySearch()
+attack = Attack(goal_function, constraints, transformation, search_method)
+
+print(attack)
+
+attack_args = AttackArgs(num_examples=10)
+attacker = Attacker(attack, dataset, attack_args)
+attack_results = attacker.attack_dataset()
+
+print(attack_results)
diff --git a/beyond_the_nest/attacks/word_embedding_attack.py b/beyond_the_nest/attacks/word_embedding_attack.py
@@ -40,47 +40,38 @@
 from textattack.search_methods import GreedySearch
 from textattack.transformations import WordSwapEmbedding
 
-from beyond_the_nest.beyond_the_nest.utils.beyond_the_nest_models import (
-    load_falcon_model_classification,
-    load_tokenizer,
-)
-
-falcon = load_falcon_model_classification("tiiuae/falcon-7b-instruct")
-# falcon2 = falcon.to_bettertransformer()
-tokenizer = load_tokenizer("tiiuae/falcon-7b-instruct")
-tokenizer.pad_token = tokenizer.eos_token
-
-
-if tokenizer.pad_token is None:
-    tokenizer.add_special_tokens({"pad_token": "[PAD]"})
-    falcon.resize_token_embeddings(len(tokenizer))
-
+from ..utils.beyond_the_nest_models import load_model_classification, load_tokenizer
 
-model_wrapper = HuggingFaceModelWrapper(falcon, tokenizer)
-goal_function = UntargetedClassification(model_wrapper)
 
-### Unclear if this dataset is going to work with falcon
-dataset = HuggingFaceDataset("sst2", None, "test")
+def perfom_word_embedding_attack(model_name, dataset_name):
+    falcon = load_model_classification(model_name)
+    # falcon2 = falcon.to_bettertransformer()
+    tokenizer = load_tokenizer(model_name)
+    tokenizer.pad_token = tokenizer.eos_token
 
+    model_wrapper = HuggingFaceModelWrapper(falcon, tokenizer)
+    goal_function = UntargetedClassification(model_wrapper)
 
-## TODO
-# from textattack.constraints.semantics.sentence_encoders.universal_sentence_encoder import UniversalSentenceEncoder
+    ### Unclear if this dataset is going to work with falcon
+    dataset = HuggingFaceDataset(dataset_name, None, "test")
 
-transformation = WordSwapEmbedding(max_candidates=50)
+    ## TODO
+    # from textattack.constraints.semantics.sentence_encoders.universal_sentence_encoder import UniversalSentenceEncoder
 
-constraints = [RepeatModification(), StopwordModification(), PartOfSpeech()]
+    transformation = WordSwapEmbedding(max_candidates=50)
 
-search_method = GreedySearch()
-# OR, using Beam Search with a beam width of 5
-# search_method = BeamSearch(beam_width=5)
+    constraints = [RepeatModification(), StopwordModification(), PartOfSpeech()]
 
+    search_method = GreedySearch()
+    # OR, using Beam Search with a beam width of 5
+    # search_method = BeamSearch(beam_width=5)
 
-attack = Attack(goal_function, constraints, transformation, search_method)
+    attack = Attack(goal_function, constraints, transformation, search_method)
 
-print(attack)
+    print(attack)
 
-attack_args = AttackArgs(num_examples=10)
-attacker = Attacker(attack, dataset, attack_args)
-attack_results = attacker.attack_dataset()
+    attack_args = AttackArgs(num_examples=10)
+    attacker = Attacker(attack, dataset, attack_args)
+    attack_results = attacker.attack_dataset()
 
-print(attack_results)
+    print(attack_results)