bigscience-workshop · Muennighoff · Jul 19, 2022 · Jul 22, 2022 · Jul 22, 2022 · Jul 28, 2022
diff --git a/promptsource/templates.py b/promptsource/templates.py
@@ -27,7 +27,25 @@
 # These are users whose datasets should be included in the results returned by
 # filter_english_datasets (regardless of their metadata)
 
-INCLUDED_USERS = {"Zaid", "craffel", "GEM", "aps", "khalidalt", "shanya", "rbawden", "BigScienceBiasEval", "gsarti"}
+INCLUDED_USERS = {
+    "Zaid",
+    "craffel",
+    "GEM",
+    "aps",
+    "khalidalt",
+    "shanya",
+    "rbawden",
+    "BigScienceBiasEval",
+    "gsarti",
+    "Helsinki-NLP",
+    "Muennighoff",
+    "facebook",
+    "codeparrot",
+    "pasinit",
+    "Fraser",
+    "allenai",
+    "teven",
+}
 
 # These are the metrics with which templates can be tagged
 METRICS = {
@@ -360,12 +378,13 @@ def get_fixed_answer_choices_list(self):
         else:
             return None
 
-    def apply(self, example, truncate=True, highlight_variables=False) -> Tuple[str, List[str]]:
+    def apply(self, example, truncate=True, strip_connection=True, highlight_variables=False) -> Tuple[str, List[str]]:
         """
         Creates a prompt by applying this template to an example
 
         :param example: the dataset example to create a prompt for
         :param truncate: if True, example fields will be truncated to TEXT_VAR_LENGTH chars
+        :param strip_connection: if True, strips the connection between input & target
         :param highlight_variables: highlight the added variables
         :return: tuple of a string and a list of strings, for input and targets
         """
@@ -396,15 +415,27 @@ def apply(self, example, truncate=True, highlight_variables=False) -> Tuple[str,
 
         # Splits on the separator, and then replaces back any occurrences of the
         # separator in the original example
-        parts = [self._unescape_pipe(part).strip() for part in rendered_example.split("|||")]
+        if strip_connection:
+            parts = [self._unescape_pipe(part).strip() for part in rendered_example.split("|||")]
+        else:
+            parts = [self._unescape_pipe(part) for part in rendered_example.split("|||")]
         if parts == [""]:
             # Handles the case of blank results
             # Example: `tydiqa` where prompts are conditionned on the language and thus most of the time will return a blank result
             return parts
         if len(parts) < 2:
             raise ValueError("Prompt did not produce an input and at least one target.")
 
-        return parts[0], parts[1:]
+        if strip_connection:
+            return parts[0], parts[1:]
+        else:
+            # Remove double whitespace
+            if parts[0][-1] == " " and all(p[0] == " " for p in parts[1:]):
+                parts[0] = parts[0][:-1]
+            # Leave the connection between input & target unstripped
+            return parts[0].lstrip(), [p.rstrip() for p in parts[1:]]
+
+
 
     pipe_protector = "3ed2dface8203c4c9dfb1a5dc58e41e0"
 

diff --git a/promptsource/templates/Fraser/python-state-changes/default/templates.yaml b/promptsource/templates/Fraser/python-state-changes/default/templates.yaml
@@ -0,0 +1,87 @@
+dataset: Fraser/python-state-changes
+subset: default
+templates:
+  2b358b1c-7514-488f-99ed-3ca5da70e103: !Template
+    answer_choices: null
+    id: 2b358b1c-7514-488f-99ed-3ca5da70e103
+    jinja: 'Starting variables:
+
+      {{ start }}
+
+      Applied code:
+
+      {{code}}
+
+      Ending variables:
+
+      |||
+      {{ end }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      languages:
+      - en
+      metrics:
+      - Other
+      original_task: true
+    name: startend
+    reference: ''
+  1b218b2c-8514-488f-99ed-3ca5da70e103: !Template
+    answer_choices: null
+    id: 1b218b2c-8514-488f-99ed-3ca5da70e103
+    jinja: 'I applied "{{code}}" given "{{ start }}".
+
+      What are the new values of the variables now?
+
+      |||
+
+      {{ end }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      languages:
+      - en
+      metrics:
+      - Other
+      original_task: true
+    name: newval
+    reference: ''
+  5f318b2c-7514-488f-99ed-3ca5da70e103: !Template
+    answer_choices: null
+    id: 5f318b2c-7514-488f-99ed-3ca5da70e103
+    jinja: 'The final variables are:
+
+      {{ end }}
+
+      We know that the code "{{code}}" was applied.
+
+      What were the variables at the beginning?
+
+      |||
+      {{ start }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      languages:
+      - en
+      metrics:
+      - Other
+      original_task: true
+    name: varbeg
+    reference: ''
+  5b918b2c-7514-488f-99ed-3ca5da70e103: !Template
+    answer_choices: null
+    id: 5b918b2c-7514-488f-99ed-3ca5da70e103
+    jinja: 'What code do I need to apply to get from start to end?
+
+      Start: {{ start }}
+
+      End: {{ end }}
+
+      Needed code: ||| {{ code }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      languages:
+      - en
+      metrics:
+      - Other
+      original_task: true
+    name: needcode
+    reference: ''
diff --git a/promptsource/templates/codeparrot/apps/all/templates.yaml b/promptsource/templates/codeparrot/apps/all/templates.yaml
@@ -0,0 +1,65 @@
+dataset: codeparrot/apps
+subset: all
+templates:
+  5b318b1c-7514-488f-99ed-3ca5da70e103: !Template
+    answer_choices: null
+    id: 5b318b1c-7514-488f-99ed-3ca5da70e103
+    jinja: 'Solve in Python:
+
+      {{ question }}
+
+      |||
+
+      {{ solution }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      languages:
+      - en
+      metrics:
+      - Other
+      original_task: true
+    name: qsol
+    reference: ''
+  5b218b3c-8514-488f-99ed-3ca5da70e103: !Template
+    answer_choices: null
+    id: 5b218b3c-8514-488f-99ed-3ca5da70e103
+    jinja: '{{ question }}
+
+
+      Can you solve the above problem using Python?
+
+      |||
+
+      {{ solution }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      languages:
+      - en
+      metrics:
+      - Other
+      original_task: true
+    name: abovesol
+    reference: ''
+  5b318b2c-7514-488f-99ed-3ca5da70e103: !Template
+    answer_choices: null
+    id: 5b318b2c-7514-488f-99ed-3ca5da70e103
+    jinja: 'I found an interesting problem on {{url}}:
+
+      {{ question }}
+
+
+      I tried it in Python, but could not do it. Can you solve it?
+
+
+      |||
+
+      {{ solution }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      languages:
+      - en
+      metrics:
+      - Other
+      original_task: true
+    name: abovesol
+    reference: ''
diff --git a/promptsource/templates/codeparrot/codecomplex/codeparrot--codecomplex/templates.yaml b/promptsource/templates/codeparrot/codecomplex/codeparrot--codecomplex/templates.yaml
@@ -0,0 +1,45 @@
+dataset: codeparrot/codecomplex
+subset: codeparrot--codecomplex
+templates:
+  5b108b1c-7514-488f-99ed-3ca5da70e103: !Template
+    answer_choices: null
+    id: 5b108b1c-7514-488f-99ed-3ca5da70e103
+    jinja: '{{ code }}
+      What is the time complexity of the previous code?
+      |||
+      {{ complexity }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      languages:
+      - en
+      metrics:
+      - Other
+      original_task: true
+    name: whatcomplexity
+    reference: ''
+  1d85c898-70fe-4a51-be37-5111be357762: !Template
+    answer_choices: null
+    id: 1d85c898-70fe-4a51-be37-5111be357762
+    jinja: "Identify the time complexity of the following code as constant, linear, quadratic, cubic, log(n), nlog(n) or NP-hard. {{ code }} Complexity: |||{{ complexity }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      languages:
+      - en
+      metrics:
+      - Other
+      original_task: false
+    name: identifycomplexity
+    reference: ''
+  5d85c898-70fe-4a51-be37-5111be357762: !Template
+    answer_choices: null
+    id: 5d85c898-70fe-4a51-be37-5111be357762
+    jinja: "{{ code }} Which one is the correct time complexity of the code snippet: constant, linear, quadratic, cubic, log(n), nlog(n) or NP-hard? |||{{ complexity }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      languages:
+      - en
+      metrics:
+      - Other
+      original_task: false
+    name: whichcomplexity
+    reference: ''
diff --git a/promptsource/templates/codeparrot/github-jupyter-text-code-pairs/templates.yaml b/promptsource/templates/codeparrot/github-jupyter-text-code-pairs/templates.yaml
@@ -0,0 +1,77 @@
+dataset: codeparrot/github-jupyter-text-code-pairs
+subset:
+templates:
+  5b718b1c-7514-488f-99ed-3ca5da70e103: !Template
+    answer_choices: null
+    id: 5b718b1c-7514-488f-99ed-3ca5da70e103
+    jinja: '"{{ markdown }}"
+
+      Please write code following the instructions in jupyter notebook style.
+
+      |||
+
+      {{ code }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      languages:
+      - en
+      metrics:
+      - Other
+      original_task: true
+    name: code
+    reference: ''
+  5b218b2e-7514-488f-99ed-3ca5da70e103: !Template
+    answer_choices: null
+    id: 5b218b2e-7514-488f-99ed-3ca5da70e103
+    jinja: 'I am working on the file "{{ path }}".
+
+      The first task is:
+
+      {{ markdown }}
+
+      Can you write Python code for it?
+
+      |||
+
+      {{ code }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      languages:
+      - en
+      metrics:
+      - Other
+      original_task: true
+    name: taskcode
+    reference: ''
+  4d85c898-70fe-4a51-be37-5111be357762: !Template
+    answer_choices: null
+    id: 4d85c898-70fe-4a51-be37-5111be357762
+    jinja: "{{ markdown }}\n|||{{ code }}"
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      languages:
+      - en
+      metrics:
+      - Other
+      original_task: false
+    name: markdowncode
+    reference: ''
+  8d85c898-70fe-4a51-be37-5111be357762: !Template
+    answer_choices: null
+    id: 8d85c898-70fe-4a51-be37-5111be357762
+    jinja: '{{ code }}
+
+    Given the above code, generate some markdown instructions for it.
+
+    |||
+
+    {{ markdown }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      languages:
+      - en
+      metrics:
+      - Other
+      original_task: false
+    name: genmarkdown
+    reference: ''
diff --git a/promptsource/templates/codeparrot/xlcost-text-to-code/C++-program-level/templates.yaml b/promptsource/templates/codeparrot/xlcost-text-to-code/C++-program-level/templates.yaml
@@ -0,0 +1,39 @@
+dataset: codeparrot/xlcost-text-to-code
+subset: C++-program-level
+templates:
+  5f718b2d-7514-488f-99ed-3ca5da70e103: !Template
+    answer_choices: null
+    id: 5f718b2d-7514-488f-99ed-3ca5da70e103
+    jinja: '"{{ text }}"
+
+      Solution in C++:
+
+      |||
+      {{ code_clean }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      languages:
+      - en
+      metrics:
+      - Other
+      original_task: true
+    name: solcpp
+    reference: ''
+  5b218b2e-7525-488f-99ed-3ca5da70e103: !Template
+    answer_choices: null
+    id: 5b218b2e-7525-488f-99ed-3ca5da70e103
+    jinja: '"{{ text }}"
+
+      How can the above be solved in C++?
+
+      |||
+      {{ code_clean }}'
+    metadata: !TemplateMetadata
+      choices_in_prompt: false
+      languages:
+      - en
+      metrics:
+      - Other
+      original_task: true
+    name: abovecpp
+    reference: ''