Get url, optionally selecting CSS selector sel, and convert to clean markdown
+
+
+
+
+
+
+
+
+
+
+
Type
+
Default
+
Details
+
+
+
+
+
url
+
+
+
URL to read
+
+
+
sel
+
NoneType
+
None
+
Read only outerHTML of CSS selector sel
+
+
+
rm_comments
+
bool
+
True
+
Removes HTML comments
+
+
+
rm_details
+
bool
+
True
+
Removes <details> tags
+
+
+
multi
+
bool
+
False
+
Get all matches to sel or first one
+
+
+
wrap_tag
+
NoneType
+
None
+
If multi, each selection wrapped with content
+
+
+
+
+
# test single class selector
+listings = read_html('https://www.answer.ai/', sel='.listing-description')
+assertlen(listings) <500
+
+# Test multi class selector
+listings = read_html('https://www.answer.ai/', sel='.listing-description', multi=True)
+assertlen(listings) >1000# returns more than single so selecting multi
+
+# Test multi_wrap_tag
+listings = read_html('https://www.answer.ai/', sel='.listing-description', multi=True, wrap_tag='document')
+assert'<document>'in listings and'</document>'in listings
+
+
+
# test tag css selectors
+assertlen(read_html('https://www.answer.ai/', sel='div.listing-description', multi=True)) >1000
+assertlen(read_html('https://www.answer.ai/', sel='div', multi=True)) >1000
Split url into base, path, and file name, normalising name to ‘/’ if empty
+
+
urls = ('https://claudette.answer.ai/path/index.html', 'https://claudette.answer.ai/',
+'https://claudette.answer.ai/index.html', 'https://llmstxt.org', 'https://llmstxt.org/')
+
+[split_url(o) for o in urls]
Many LLMs do function calling (aka tool use) by taking advantage of JSON schema.
+
We’ll use docments to make getting JSON schema from Python functions as ergonomic as possible. Each parameter (and the return value) should have a type, and a docments comment with the description of what it is. Here’s an example:
+
+
def silly_sum(
+ a:int, # First thing to sum
+ b:int=1, # Second thing to sum
+ c:list[int]=None, # A pointless argument
+) ->int: # The sum of the inputs
+"Adds a + b."
+return a + b
Generate JSON schema for a class, function, or method
+
+
+Exported source
+
def get_schema(f:callable, pname='input_schema')->dict:
+"Generate JSON schema for a class, function, or method"
+ schema = _get_nested_schema(f)
+ desc = f.__doc__
+assert desc, "Docstring missing!"
+ d = docments(f, full=True)
+ ret = d.pop('return')
+if ret.anno isnot empty: desc +=f'\n\nReturns:\n- type: {_types(ret.anno)[0]}'
+return {"name": f.__name__, "description": desc, pname: schema}
+
+
+
Putting this all together, we can now test getting a schema from silly_sum. The tool use spec doesn’t support return annotations directly, so we put that in the description instead.
+
+
s = get_schema(silly_sum)
+desc = s.pop('description')
+print(desc)
+s
class Dummy:
+def sums(
+self,
+ a:int, # First thing to sum
+ b:int=1# Second thing to sum
+ ) ->int: # The sum of the inputs
+"Adds a + b."
+print(f"Finding the sum of {a} and {b}")
+return a + b
+
+get_schema(Dummy.sums)
get_schema also handles more complicated structures such as nested classes. This is useful for things like structured outputs.
+
+
class Turn:
+"Turn between two speakers"
+def__init__(
+self,
+ speaker_a:str, # First speaker to speak's message
+ speaker_b:str, # Second speaker to speak's message
+ ): store_attr()
+
+class Conversation:
+"A conversation between two speakers"
+def__init__(
+self,
+ turns:list[Turn], # Turns of the conversation
+ ): store_attr()
+
+get_schema(Conversation)
+
+
{'name': 'Conversation',
+ 'description': 'A conversation between two speakers',
+ 'input_schema': {'type': 'object',
+ 'properties': {'turns': {'type': 'array',
+ 'description': 'Turns of the conversation',
+ 'items': {'$ref': '#/$defs/Turn'}}},
+ 'title': 'Conversation',
+ 'required': ['turns'],
+ '$defs': {'Turn': {'type': 'object',
+ 'properties': {'speaker_a': {'type': 'string',
+ 'description': "First speaker to speak's message"},
+ 'speaker_b': {'type': 'string',
+ 'description': "Second speaker to speak's message"}},
+ 'title': 'Turn',
+ 'required': ['speaker_a', 'speaker_b']}}}}
+
+
+
+
class DictConversation:
+"A conversation between two speakers"
+def__init__(
+self,
+ turns:dict[str,list[Turn]], # dictionary of topics and the Turns of the conversation
+ ): store_attr()
+
+get_schema(DictConversation)
+
+
{'name': 'DictConversation',
+ 'description': 'A conversation between two speakers',
+ 'input_schema': {'type': 'object',
+ 'properties': {'turns': {'type': 'object',
+ 'description': 'dictionary of topics and the Turns of the conversation',
+ 'additionalProperties': {'type': 'array',
+ 'items': {'$ref': '#/$defs/Turn'}}}},
+ 'title': 'DictConversation',
+ 'required': ['turns'],
+ '$defs': {'Turn': {'type': 'object',
+ 'properties': {'speaker_a': {'type': 'string',
+ 'description': "First speaker to speak's message"},
+ 'speaker_b': {'type': 'string',
+ 'description': "Second speaker to speak's message"}},
+ 'title': 'Turn',
+ 'required': ['speaker_a', 'speaker_b']}}}}
+
+
+
+
class SetConversation:
+"A conversation between two speakers"
+def__init__(
+self,
+ turns:set[Turn], # the unique Turns of the conversation
+ ): store_attr()
+
+get_schema(SetConversation)
+
+
{'name': 'SetConversation',
+ 'description': 'A conversation between two speakers',
+ 'input_schema': {'type': 'object',
+ 'properties': {'turns': {'type': 'array',
+ 'description': 'the unique Turns of the conversation',
+ 'items': {'$ref': '#/$defs/Turn'},
+ 'uniqueItems': True}},
+ 'title': 'SetConversation',
+ 'required': ['turns'],
+ '$defs': {'Turn': {'type': 'object',
+ 'properties': {'speaker_a': {'type': 'string',
+ 'description': "First speaker to speak's message"},
+ 'speaker_b': {'type': 'string',
+ 'description': "Second speaker to speak's message"}},
+ 'title': 'Turn',
+ 'required': ['speaker_a', 'speaker_b']}}}}
+
+
+
+
+
Python tool
+
In language model clients it’s often useful to have a ‘code interpreter’ – this is something that runs code, and generally outputs the result of the last expression (i.e like IPython or Jupyter).
+
In this section we’ll create the python function, which executes a string as Python code, with an optional timeout. If the last line is an expression, we’ll return that – just like in IPython or Jupyter, but without needing them installed.
+
+
+Exported source
+
import ast, time, signal, traceback
+from fastcore.utils import*
+
+
+
+
+Exported source
+
def _copy_loc(new, orig):
+"Copy location information from original node to new node and all children."
+ new = ast.copy_location(new, orig)
+for field, o in ast.iter_fields(new):
+ifisinstance(o, ast.AST): setattr(new, field, _copy_loc(o, orig))
+elifisinstance(o, list): setattr(new, field, [_copy_loc(value, orig) for value in o])
+return new
+
+
+
This is an internal function that’s needed for _run to ensure that location information is available in the abstract syntax tree (AST), since otherwise python complains.
+
+
+Exported source
+
def _run(code:str ):
+"Run `code`, returning final expression (similar to IPython)"
+ tree = ast.parse(code)
+ last_node = tree.body[-1] if tree.body elseNone
+
+# If the last node is an expression, modify the AST to capture the result
+ifisinstance(last_node, ast.Expr):
+ tgt = [ast.Name(id='_result', ctx=ast.Store())]
+ assign_node = ast.Assign(targets=tgt, value=last_node.value)
+ tree.body[-1] = _copy_loc(assign_node, last_node)
+
+ compiled_code =compile(tree, filename='<ast>', mode='exec')
+ namespace = {}
+ stdout_buffer = io.StringIO()
+ saved_stdout = sys.stdout
+ sys.stdout = stdout_buffer
+try: exec(compiled_code, namespace)
+finally: sys.stdout = saved_stdout
+ _result = namespace.get('_result', None)
+if _result isnotNone: return _result
+return stdout_buffer.getvalue().strip()
+
+
+
This is the internal function used to actually run the code – we pull off the last AST to see if it’s an expression (i.e something that returns a value), and if so, we store it to a special _result variable so we can return it.
+
+
_run('import math;math.factorial(12)')
+
+
479001600
+
+
+
+
_run('print(1+1)')
+
+
'2'
+
+
+
We now have the machinery needed to create our python function.
Executes python code with timeout and returning final expression (similar to IPython). Raised exceptions are returned as a string, with a stack trace.
+
+
+
+
+
+
+
+
+
+
+
Type
+
Default
+
Details
+
+
+
+
+
code
+
+
+
Code to execute
+
+
+
timeout
+
int
+
5
+
Maximum run time in seconds before a TimeoutError is raised
+
+
+
+
+
+Exported source
+
def python(code, # Code to execute
+ timeout=5# Maximum run time in seconds before a `TimeoutError` is raised
+ ): # Result of last node, if it's an expression, or `None` otherwise
+"""Executes python `code` with `timeout` and returning final expression (similar to IPython).
+ Raised exceptions are returned as a string, with a stack trace."""
+def handler(*args): raiseTimeoutError()
+ signal.signal(signal.SIGALRM, handler)
+ signal.alarm(timeout)
+try: return _run(code)
+exceptExceptionas e: return traceback.format_exc()
+finally: signal.alarm(0)
+
+
+
There’s no builtin security here – you should generally use this in a sandbox, or alternatively prompt before running code. It can handle multiline function definitions, and pretty much any other normal Python syntax.
+
+
python("""def factorial(n):
+ if n == 0 or n == 1: return 1
+ else: return n * factorial(n-1)
+factorial(5)""")
+
+
120
+
+
+
If the code takes longer than timeout then it raises a TimeoutError.
Many LLM API providers offer tool calling where an LLM can choose to call a given tool. This is also helpful for structured outputs since the response from the LLM is contrained to the required arguments of the tool.
+
This section will be dedicated to helper functions for calling tools. We don’t want to allow LLMs to call just any possible function (that would be a security disaster!) so we create a namespace – that is, a dictionary of allowable function names to call.
def sums(a, b): return a + b
+ns = mk_ns(sums); ns
+
+
{'sums': <function __main__.sums(a, b)>}
+
+
+
+
ns['sums'](1, 2)
+
+
3
+
+
+
+
class Dummy:
+def__init__(self,a): self.a = a
+def__call__(self): returnself.a
+def sums(self, a, b): return a + b
+@staticmethod
+def subs(a, b): return a - b
+@classmethod
+def mults(cls, a, b): return a * b
Call the function fc_name with the given fc_inputs using namespace ns.
+
+
+Exported source
+
def call_func(fc_name, fc_inputs, ns):
+"Call the function `fc_name` with the given `fc_inputs` using namespace `ns`."
+ifnotisinstance(ns, abc.Mapping): ns = mk_ns(*ns)
+ func = ns[fc_name]
+return func(**fc_inputs)
+
+
+
Now when we an LLM responses with the tool to use and its inputs, we can simply use the same namespace it was given to look up the tool and call it.
+
+
call_func('sums', {'a': 1, 'b': 2}, ns=[sums])
+
+
3
+
+
+
+
call_func('subs', {'a': 1, 'b': 2}, ns=mk_ns(d))
+
+
-1
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/funccall.html.md b/funccall.html.md
new file mode 100644
index 0000000..b0ed3d6
--- /dev/null
+++ b/funccall.html.md
@@ -0,0 +1,700 @@
+# funccall source
+
+
+
+
+Exported source
+
+``` python
+import inspect
+from collections import abc
+from fastcore.utils import *
+from fastcore.docments import docments
+```
+
+
+
+## Function calling
+
+Many LLMs do function calling (aka tool use) by taking advantage of JSON
+schema.
+
+We’ll use [docments](https://fastcore.fast.ai/docments.html) to make
+getting JSON schema from Python functions as ergonomic as possible. Each
+parameter (and the return value) should have a type, and a docments
+comment with the description of what it is. Here’s an example:
+
+``` python
+def silly_sum(
+ a:int, # First thing to sum
+ b:int=1, # Second thing to sum
+ c:list[int]=None, # A pointless argument
+) -> int: # The sum of the inputs
+ "Adds a + b."
+ return a + b
+```
+
+This is what `docments` makes of that:
+
+``` python
+d = docments(silly_sum, full=True)
+d
+```
+
+``` json
+{ 'a': { 'anno': ,
+ 'default': ,
+ 'docment': 'First thing to sum'},
+ 'b': {'anno': , 'default': 1, 'docment': 'Second thing to sum'},
+ 'c': {'anno': list[int], 'default': None, 'docment': 'A pointless argument'},
+ 'return': { 'anno': ,
+ 'default': ,
+ 'docment': 'The sum of the inputs'}}
+```
+
+Note that this is an
+[AttrDict](https://fastcore.fast.ai/basics.html#attrdict) so we can
+treat it like an object, *or* a dict:
+
+``` python
+d.a.docment, d['a']['anno']
+```
+
+ ('First thing to sum', int)
+
+
+Exported source
+
+``` python
+def _types(t:type)->tuple[str,Optional[str]]:
+ "Tuple of json schema type name and (if appropriate) array item name."
+ if t is empty: raise TypeError('Missing type')
+ tmap = {int:"integer", float:"number", str:"string", bool:"boolean", list:"array", dict:"object"}
+ tmap.update({k.__name__: v for k, v in tmap.items()})
+ if getattr(t, '__origin__', None) in (list,tuple): return "array", tmap.get(t.__args__[0].__name__, "object")
+ elif isinstance(t, str): return tmap.get(t, "object"), None
+ else: return tmap.get(t.__name__, "object"), None
+```
+
+
+
+This internal function is needed to convert Python types into JSON
+schema types.
+
+``` python
+_types(list[int]), _types(int), _types('int')
+```
+
+ (('array', 'integer'), ('integer', None), ('integer', None))
+
+Will also convert custom types to the `object` type.
+
+``` python
+class Custom: a: int
+_types(list[Custom]), _types(Custom)
+```
+
+ (('array', 'object'), ('object', None))
+
+
+Exported source
+
+``` python
+def _param(name, info):
+ "json schema parameter given `name` and `info` from docments full dict."
+ paramt,itemt = _types(info.anno)
+ pschema = dict(type=paramt, description=info.docment or "")
+ if itemt: pschema["items"] = {"type": itemt}
+ if info.default is not empty: pschema["default"] = info.default
+ return pschema
+```
+
+
+
+This private function converts a key/value pair from the `docments`
+structure into the `dict` that will be needed for the schema.
+
+``` python
+n,o = first(d.items())
+print(n,'//', o)
+_param(n, o)
+```
+
+ a // {'docment': 'First thing to sum', 'anno': , 'default': }
+
+ {'type': 'integer', 'description': 'First thing to sum'}
+
+``` python
+# Test primitive types
+defs = {}
+assert _handle_type(int, defs) == {'type': 'integer'}
+assert _handle_type(str, defs) == {'type': 'string'}
+assert _handle_type(bool, defs) == {'type': 'boolean'}
+assert _handle_type(float, defs) == {'type': 'number'}
+
+# Test custom class
+class TestClass:
+ def __init__(self, x: int, y: int): store_attr()
+
+result = _handle_type(TestClass, defs)
+assert result == {'$ref': '#/$defs/TestClass'}
+assert 'TestClass' in defs
+assert defs['TestClass']['type'] == 'object'
+assert 'properties' in defs['TestClass']
+```
+
+``` python
+# Test primitive types in containers
+assert _handle_container(list, (int,), defs) == {'type': 'array', 'items': {'type': 'integer'}}
+assert _handle_container(tuple, (str,), defs) == {'type': 'array', 'items': {'type': 'string'}}
+assert _handle_container(set, (str,), defs) == {'type': 'array', 'items': {'type': 'string'}, 'uniqueItems': True}
+assert _handle_container(dict, (str,bool), defs) == {'type': 'object', 'additionalProperties': {'type': 'boolean'}}
+
+result = _handle_container(list, (TestClass,), defs)
+assert result == {'type': 'array', 'items': {'$ref': '#/$defs/TestClass'}}
+assert 'TestClass' in defs
+
+# Test complex nested structure
+ComplexType = dict[str, list[TestClass]]
+result = _handle_container(dict, (str, list[TestClass]), defs)
+assert result == {
+ 'type': 'object',
+ 'additionalProperties': {
+ 'type': 'array',
+ 'items': {'$ref': '#/$defs/TestClass'}
+ }
+}
+```
+
+``` python
+# Test processing of a required integer property
+props, req = {}, {}
+class TestClass:
+ "Test class"
+ def __init__(
+ self,
+ x: int, # First thing
+ y: list[float], # Second thing
+ z: str = "default", # Third thing
+ ): store_attr()
+
+d = docments(TestClass, full=True)
+_process_property('x', d.x, props, req, defs)
+assert 'x' in props
+assert props['x']['type'] == 'integer'
+assert 'x' in req
+
+# Test processing of a required list property
+_process_property('y', d.y, props, req, defs)
+assert 'y' in props
+assert props['y']['type'] == 'array'
+assert props['y']['items']['type'] == 'number'
+assert 'y' in req
+
+# Test processing of an optional string property with default
+_process_property('z', d.z, props, req, defs)
+assert 'z' in props
+assert props['z']['type'] == 'string'
+assert props['z']['default'] == "default"
+assert 'z' not in req
+```
+
+------------------------------------------------------------------------
+
+source
+
+### get_schema
+
+> get_schema (f:, pname='input_schema')
+
+*Generate JSON schema for a class, function, or method*
+
+
+Exported source
+
+``` python
+def get_schema(f:callable, pname='input_schema')->dict:
+ "Generate JSON schema for a class, function, or method"
+ schema = _get_nested_schema(f)
+ desc = f.__doc__
+ assert desc, "Docstring missing!"
+ d = docments(f, full=True)
+ ret = d.pop('return')
+ if ret.anno is not empty: desc += f'\n\nReturns:\n- type: {_types(ret.anno)[0]}'
+ return {"name": f.__name__, "description": desc, pname: schema}
+```
+
+
+
+Putting this all together, we can now test getting a schema from
+`silly_sum`. The tool use spec doesn’t support return annotations
+directly, so we put that in the description instead.
+
+``` python
+s = get_schema(silly_sum)
+desc = s.pop('description')
+print(desc)
+s
+```
+
+ Adds a + b.
+
+ Returns:
+ - type: integer
+
+ {'name': 'silly_sum',
+ 'input_schema': {'type': 'object',
+ 'properties': {'a': {'type': 'integer', 'description': 'First thing to sum'},
+ 'b': {'type': 'integer',
+ 'description': 'Second thing to sum',
+ 'default': 1},
+ 'c': {'type': 'array',
+ 'description': 'A pointless argument',
+ 'items': {'type': 'integer'},
+ 'default': None}},
+ 'title': None,
+ 'required': ['a']}}
+
+This also works with string annotations, e.g:
+
+``` python
+def silly_test(
+ a: 'int', # quoted type hint
+):
+ "Mandatory docstring"
+ return a
+
+get_schema(silly_test)
+```
+
+ {'name': 'silly_test',
+ 'description': 'Mandatory docstring',
+ 'input_schema': {'type': 'object',
+ 'properties': {'a': {'type': 'integer', 'description': 'quoted type hint'}},
+ 'title': None,
+ 'required': ['a']}}
+
+This also works with class methods:
+
+``` python
+class Dummy:
+ def sums(
+ self,
+ a:int, # First thing to sum
+ b:int=1 # Second thing to sum
+ ) -> int: # The sum of the inputs
+ "Adds a + b."
+ print(f"Finding the sum of {a} and {b}")
+ return a + b
+
+get_schema(Dummy.sums)
+```
+
+ {'name': 'sums',
+ 'description': 'Adds a + b.\n\nReturns:\n- type: integer',
+ 'input_schema': {'type': 'object',
+ 'properties': {'a': {'type': 'integer', 'description': 'First thing to sum'},
+ 'b': {'type': 'integer',
+ 'description': 'Second thing to sum',
+ 'default': 1}},
+ 'title': None,
+ 'required': ['a']}}
+
+[`get_schema`](https://AnswerDotAI.github.io/toolslm/funccall.html#get_schema)
+also handles more complicated structures such as nested classes. This is
+useful for things like structured outputs.
+
+``` python
+class Turn:
+ "Turn between two speakers"
+ def __init__(
+ self,
+ speaker_a:str, # First speaker to speak's message
+ speaker_b:str, # Second speaker to speak's message
+ ): store_attr()
+
+class Conversation:
+ "A conversation between two speakers"
+ def __init__(
+ self,
+ turns:list[Turn], # Turns of the conversation
+ ): store_attr()
+
+get_schema(Conversation)
+```
+
+ {'name': 'Conversation',
+ 'description': 'A conversation between two speakers',
+ 'input_schema': {'type': 'object',
+ 'properties': {'turns': {'type': 'array',
+ 'description': 'Turns of the conversation',
+ 'items': {'$ref': '#/$defs/Turn'}}},
+ 'title': 'Conversation',
+ 'required': ['turns'],
+ '$defs': {'Turn': {'type': 'object',
+ 'properties': {'speaker_a': {'type': 'string',
+ 'description': "First speaker to speak's message"},
+ 'speaker_b': {'type': 'string',
+ 'description': "Second speaker to speak's message"}},
+ 'title': 'Turn',
+ 'required': ['speaker_a', 'speaker_b']}}}}
+
+``` python
+class DictConversation:
+ "A conversation between two speakers"
+ def __init__(
+ self,
+ turns:dict[str,list[Turn]], # dictionary of topics and the Turns of the conversation
+ ): store_attr()
+
+get_schema(DictConversation)
+```
+
+ {'name': 'DictConversation',
+ 'description': 'A conversation between two speakers',
+ 'input_schema': {'type': 'object',
+ 'properties': {'turns': {'type': 'object',
+ 'description': 'dictionary of topics and the Turns of the conversation',
+ 'additionalProperties': {'type': 'array',
+ 'items': {'$ref': '#/$defs/Turn'}}}},
+ 'title': 'DictConversation',
+ 'required': ['turns'],
+ '$defs': {'Turn': {'type': 'object',
+ 'properties': {'speaker_a': {'type': 'string',
+ 'description': "First speaker to speak's message"},
+ 'speaker_b': {'type': 'string',
+ 'description': "Second speaker to speak's message"}},
+ 'title': 'Turn',
+ 'required': ['speaker_a', 'speaker_b']}}}}
+
+``` python
+class SetConversation:
+ "A conversation between two speakers"
+ def __init__(
+ self,
+ turns:set[Turn], # the unique Turns of the conversation
+ ): store_attr()
+
+get_schema(SetConversation)
+```
+
+ {'name': 'SetConversation',
+ 'description': 'A conversation between two speakers',
+ 'input_schema': {'type': 'object',
+ 'properties': {'turns': {'type': 'array',
+ 'description': 'the unique Turns of the conversation',
+ 'items': {'$ref': '#/$defs/Turn'},
+ 'uniqueItems': True}},
+ 'title': 'SetConversation',
+ 'required': ['turns'],
+ '$defs': {'Turn': {'type': 'object',
+ 'properties': {'speaker_a': {'type': 'string',
+ 'description': "First speaker to speak's message"},
+ 'speaker_b': {'type': 'string',
+ 'description': "Second speaker to speak's message"}},
+ 'title': 'Turn',
+ 'required': ['speaker_a', 'speaker_b']}}}}
+
+### Python tool
+
+In language model clients it’s often useful to have a ‘code interpreter’
+– this is something that runs code, and generally outputs the result of
+the last expression (i.e like IPython or Jupyter).
+
+In this section we’ll create the
+[`python`](https://AnswerDotAI.github.io/toolslm/funccall.html#python)
+function, which executes a string as Python code, with an optional
+timeout. If the last line is an expression, we’ll return that – just
+like in IPython or Jupyter, but without needing them installed.
+
+
+Exported source
+
+``` python
+import ast, time, signal, traceback
+from fastcore.utils import *
+```
+
+
+
+Exported source
+
+``` python
+def _copy_loc(new, orig):
+ "Copy location information from original node to new node and all children."
+ new = ast.copy_location(new, orig)
+ for field, o in ast.iter_fields(new):
+ if isinstance(o, ast.AST): setattr(new, field, _copy_loc(o, orig))
+ elif isinstance(o, list): setattr(new, field, [_copy_loc(value, orig) for value in o])
+ return new
+```
+
+
+
+This is an internal function that’s needed for
+[`_run`](https://AnswerDotAI.github.io/toolslm/funccall.html#_run) to
+ensure that location information is available in the abstract syntax
+tree (AST), since otherwise python complains.
+
+
+Exported source
+
+``` python
+def _run(code:str ):
+ "Run `code`, returning final expression (similar to IPython)"
+ tree = ast.parse(code)
+ last_node = tree.body[-1] if tree.body else None
+
+ # If the last node is an expression, modify the AST to capture the result
+ if isinstance(last_node, ast.Expr):
+ tgt = [ast.Name(id='_result', ctx=ast.Store())]
+ assign_node = ast.Assign(targets=tgt, value=last_node.value)
+ tree.body[-1] = _copy_loc(assign_node, last_node)
+
+ compiled_code = compile(tree, filename='', mode='exec')
+ namespace = {}
+ stdout_buffer = io.StringIO()
+ saved_stdout = sys.stdout
+ sys.stdout = stdout_buffer
+ try: exec(compiled_code, namespace)
+ finally: sys.stdout = saved_stdout
+ _result = namespace.get('_result', None)
+ if _result is not None: return _result
+ return stdout_buffer.getvalue().strip()
+```
+
+
+
+This is the internal function used to actually run the code – we pull
+off the last AST to see if it’s an expression (i.e something that
+returns a value), and if so, we store it to a special `_result` variable
+so we can return it.
+
+``` python
+_run('import math;math.factorial(12)')
+```
+
+ 479001600
+
+``` python
+_run('print(1+1)')
+```
+
+ '2'
+
+We now have the machinery needed to create our
+[`python`](https://AnswerDotAI.github.io/toolslm/funccall.html#python)
+function.
+
+------------------------------------------------------------------------
+
+source
+
+### python
+
+> python (code, timeout=5)
+
+*Executes python `code` with `timeout` and returning final expression
+(similar to IPython). Raised exceptions are returned as a string, with a
+stack trace.*
+
+
+
+
+
+
+
+
+
+
+
+
Type
+
Default
+
Details
+
+
+
+
+
code
+
+
+
Code to execute
+
+
+
timeout
+
int
+
5
+
Maximum run time in seconds before a TimeoutError is
+raised
+
+
+
+
+
+Exported source
+
+``` python
+def python(code, # Code to execute
+ timeout=5 # Maximum run time in seconds before a `TimeoutError` is raised
+ ): # Result of last node, if it's an expression, or `None` otherwise
+ """Executes python `code` with `timeout` and returning final expression (similar to IPython).
+ Raised exceptions are returned as a string, with a stack trace."""
+ def handler(*args): raise TimeoutError()
+ signal.signal(signal.SIGALRM, handler)
+ signal.alarm(timeout)
+ try: return _run(code)
+ except Exception as e: return traceback.format_exc()
+ finally: signal.alarm(0)
+```
+
+
+
+There’s no builtin security here – you should generally use this in a
+sandbox, or alternatively prompt before running code. It can handle
+multiline function definitions, and pretty much any other normal Python
+syntax.
+
+``` python
+python("""def factorial(n):
+ if n == 0 or n == 1: return 1
+ else: return n * factorial(n-1)
+factorial(5)""")
+```
+
+ 120
+
+If the code takes longer than `timeout` then it raises a `TimeoutError`.
+
+``` python
+try: python('import time; time.sleep(10)', timeout=1)
+except TimeoutError: print('Timed out')
+```
+
+### Tool Calling
+
+Many LLM API providers offer tool calling where an LLM can choose to
+call a given tool. This is also helpful for structured outputs since the
+response from the LLM is contrained to the required arguments of the
+tool.
+
+This section will be dedicated to helper functions for calling tools. We
+don’t want to allow LLMs to call just any possible function (that would
+be a security disaster!) so we create a namespace – that is, a
+dictionary of allowable function names to call.
+
+------------------------------------------------------------------------
+
+source
+
+### mk_ns
+
+> mk_ns (*funcs_or_objs)
+
+``` python
+def sums(a, b): return a + b
+ns = mk_ns(sums); ns
+```
+
+ {'sums': }
+
+``` python
+ns['sums'](1, 2)
+```
+
+ 3
+
+``` python
+class Dummy:
+ def __init__(self,a): self.a = a
+ def __call__(self): return self.a
+ def sums(self, a, b): return a + b
+ @staticmethod
+ def subs(a, b): return a - b
+ @classmethod
+ def mults(cls, a, b): return a * b
+```
+
+``` python
+ns = mk_ns(Dummy); ns
+```
+
+ {'subs': ,
+ 'mults': >,
+ 'Dummy': __main__.Dummy}
+
+``` python
+ns['subs'](1, 2), ns['mults'](3, 2)
+```
+
+ (-1, 6)
+
+``` python
+d = Dummy(10)
+ns = mk_ns(d); ns
+```
+
+ {'__call__': >,
+ '__init__': >,
+ 'mults': >,
+ 'sums': >,
+ 'subs': )>}
+
+``` python
+ns['subs'](1, 2), ns['mults'](3, 2), ns['sums'](3, 2), ns['__call__']()
+```
+
+ (-1, 6, 5, 10)
+
+``` python
+ns['__init__'](-99), ns['__call__']()
+```
+
+ (None, -99)
+
+------------------------------------------------------------------------
+
+source
+
+### call_func
+
+> call_func (fc_name, fc_inputs, ns)
+
+*Call the function `fc_name` with the given `fc_inputs` using namespace
+`ns`.*
+
+
+Exported source
+
+``` python
+def call_func(fc_name, fc_inputs, ns):
+ "Call the function `fc_name` with the given `fc_inputs` using namespace `ns`."
+ if not isinstance(ns, abc.Mapping): ns = mk_ns(*ns)
+ func = ns[fc_name]
+ return func(**fc_inputs)
+```
+
+
+
+Now when we an LLM responses with the tool to use and its inputs, we can
+simply use the same namespace it was given to look up the tool and call
+it.
+
+``` python
+call_func('sums', {'a': 1, 'b': 2}, ns=[sums])
+```
+
+ 3
+
+``` python
+call_func('subs', {'a': 1, 'b': 2}, ns=mk_ns(d))
+```
+
+ -1
diff --git a/index.html b/index.html
new file mode 100644
index 0000000..9690b62
--- /dev/null
+++ b/index.html
@@ -0,0 +1,756 @@
+
+
+
+
+
+
+
+
+
+
+toolslm
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Many language models work well with XML inputs, but XML can be a bit clunky to work with manually. Therefore, toolslm includes a couple of more streamlined approaches for XML generation.
+
An XML node contains a tag, optional children, and optional attributes. ft creates a tuple of these three things, which we will use to general XML shortly. Attributes are passed as kwargs; since these might conflict with reserved words in Python, you can optionally add a _ prefix and it’ll be stripped off.
+
+
ft('x-custom', ['hi'], _class='bar')
+
+
('x-custom', ['hi'], {'class': 'bar'})
+
+
+
Claudette has functions defined for some common HTML elements to create ft tuples more easily, including these:
+
+
from toolslm.xml import div,img,h1,h2,p,hr,html
+
+
+
a = html([
+ p('This is a paragraph'),
+ hr(),
+ img(src='http://example.prg'),
+ div([
+ h1('This is a header'),
+ h2('This is a sub-header', style='k:v'),
+ ], _class='foo')
+])
+a
+
+
('html',
+ [('p', 'This is a paragraph', {}),
+ ('hr', None, {}),
+ ('img', None, {'src': 'http://example.prg'}),
+ ('div',
+ [('h1', 'This is a header', {}),
+ ('h2', 'This is a sub-header', {'style': 'k:v'})],
+ {'class': 'foo'})],
+ {})
+
+
+
To convert a tuple data structure created with ft and friends into XML, use to_xml, adding the hl parameter to optionally add syntax highlighting:
+
+
to_xml(a, hl=True)
+
+
<html>
+ <p>This is a paragraph</p>
+ <hr />
+ <img src="http://example.prg" />
+ <div class="foo">
+ <h1>This is a header</h1>
+ <h2 style="k:v">This is a sub-header</h2>
+ </div>
+</html>
+
+
+
JSON doesn’t map as nicely to XML as the ft data structure, but for simple XML trees it can be convenient. The json_to_xml function handles that conversion:
+
+
a =dict(surname='Howard', firstnames=['Jeremy','Peter'],
+ address=dict(state='Queensland',country='Australia'))
+print(json_to_xml(a, 'person'))
See the xml source section for a walkthru of XML and document context generation functionality.
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/index.html.md b/index.html.md
new file mode 100644
index 0000000..9464ea0
--- /dev/null
+++ b/index.html.md
@@ -0,0 +1,131 @@
+# toolslm
+
+
+
+
+This is a work in progress…
+
+## Install
+
+``` sh
+pip install toolslm
+```
+
+## How to use
+
+### Context creation
+
+toolslm has some helpers to make it easier to generate XML context from
+files, for instance
+[`folder2ctx`](https://AnswerDotAI.github.io/toolslm/xml.html#folder2ctx):
+
+``` python
+print(folder2ctx('samples', prefix=False, file_glob='*.py'))
+```
+
+
+
+
+
+ import inspect
+ empty = inspect.Parameter.empty
+ models = 'claude-3-opus-20240229','claude-3-sonnet-20240229','claude-3-haiku-20240307'
+
+
+
+
+### XML helpers
+
+Many language models work well with XML inputs, but XML can be a bit
+clunky to work with manually. Therefore, toolslm includes a couple of
+more streamlined approaches for XML generation.
+
+An XML node contains a tag, optional children, and optional attributes.
+`ft` creates a tuple of these three things, which we will use to general
+XML shortly. Attributes are passed as kwargs; since these might conflict
+with reserved words in Python, you can optionally add a `_` prefix and
+it’ll be stripped off.
+
+``` python
+ft('x-custom', ['hi'], _class='bar')
+```
+
+ ('x-custom', ['hi'], {'class': 'bar'})
+
+Claudette has functions defined for some common HTML elements to create
+`ft` tuples more easily, including these:
+
+``` python
+from toolslm.xml import div,img,h1,h2,p,hr,html
+```
+
+``` python
+a = html([
+ p('This is a paragraph'),
+ hr(),
+ img(src='http://example.prg'),
+ div([
+ h1('This is a header'),
+ h2('This is a sub-header', style='k:v'),
+ ], _class='foo')
+])
+a
+```
+
+ ('html',
+ [('p', 'This is a paragraph', {}),
+ ('hr', None, {}),
+ ('img', None, {'src': 'http://example.prg'}),
+ ('div',
+ [('h1', 'This is a header', {}),
+ ('h2', 'This is a sub-header', {'style': 'k:v'})],
+ {'class': 'foo'})],
+ {})
+
+To convert a tuple data structure created with `ft` and friends into
+XML, use `to_xml`, adding the `hl` parameter to optionally add syntax
+highlighting:
+
+``` python
+to_xml(a, hl=True)
+```
+
+``` xml
+
+
This is a paragraph
+
+
+
+
This is a header
+
This is a sub-header
+
+
+```
+
+JSON doesn’t map as nicely to XML as the `ft` data structure, but for
+simple XML trees it can be convenient. The
+[`json_to_xml`](https://AnswerDotAI.github.io/toolslm/xml.html#json_to_xml)
+function handles that conversion:
+
+``` python
+a = dict(surname='Howard', firstnames=['Jeremy','Peter'],
+ address=dict(state='Queensland',country='Australia'))
+print(json_to_xml(a, 'person'))
+```
+
+
+ Howard
+
+ Jeremy
+ Peter
+
+
+ Queensland
+ Australia
+
+
+
+See the `xml source` section for a walkthru of XML and document context
+generation functionality.
diff --git a/robots.txt b/robots.txt
new file mode 100644
index 0000000..ce5c02d
--- /dev/null
+++ b/robots.txt
@@ -0,0 +1 @@
+Sitemap: https://AnswerDotAI.github.io/toolslm/sitemap.xml
diff --git a/search.json b/search.json
new file mode 100644
index 0000000..6c2363e
--- /dev/null
+++ b/search.json
@@ -0,0 +1,154 @@
+[
+ {
+ "objectID": "download.html",
+ "href": "download.html",
+ "title": "Download helpers",
+ "section": "",
+ "text": "from IPython.display import Markdown,HTML\nfrom fastcore.test import *\n\n\nsource\n\nclean_md\n\n clean_md (text, rm_comments=True, rm_details=True)\n\nRemove comments and <details> sections from text\n\nsource\n\n\nread_md\n\n read_md (url, rm_comments=True, rm_details=True,\n params:QueryParamTypes|None=None, headers:HeaderTypes|None=None,\n cookies:CookieTypes|None=None, auth:AuthTypes|None=None,\n proxy:ProxyTypes|None=None, proxies:ProxiesTypes|None=None,\n follow_redirects:bool=False, cert:CertTypes|None=None,\n verify:VerifyTypes=True,\n timeout:TimeoutTypes=Timeout(timeout=5.0), trust_env:bool=True)\n\nRead text from url and clean with clean_docs\n\nmdurl = 'https://claudette.answer.ai/index.html.md'\nmd = read_md(mdurl)\n# Markdown(md)\n\n\nsource\n\n\nhtml2md\n\n html2md (s:str)\n\nConvert s from HTML to markdown\n\nsource\n\n\nread_html\n\n read_html (url, sel=None, rm_comments=True, rm_details=True, multi=False,\n wrap_tag=None)\n\nGet url, optionally selecting CSS selector sel, and convert to clean markdown\n\n\n\n\n\n\n\n\n\n\nType\nDefault\nDetails\n\n\n\n\nurl\n\n\nURL to read\n\n\nsel\nNoneType\nNone\nRead only outerHTML of CSS selector sel\n\n\nrm_comments\nbool\nTrue\nRemoves HTML comments\n\n\nrm_details\nbool\nTrue\nRemoves <details> tags\n\n\nmulti\nbool\nFalse\nGet all matches to sel or first one\n\n\nwrap_tag\nNoneType\nNone\nIf multi, each selection wrapped with content\n\n\n\n\n# test single class selector\nlistings = read_html('https://www.answer.ai/', sel='.listing-description')\nassert len(listings) < 500\n\n# Test multi class selector\nlistings = read_html('https://www.answer.ai/', sel='.listing-description', multi=True)\nassert len(listings) > 1000 # returns more than single so selecting multi\n\n# Test multi_wrap_tag\nlistings = read_html('https://www.answer.ai/', sel='.listing-description', multi=True, wrap_tag='document')\nassert '<document>' in listings and '</document>' in listings\n\n\n# test tag css selectors\nassert len(read_html('https://www.answer.ai/', sel='div.listing-description', multi=True)) > 1000\nassert len(read_html('https://www.answer.ai/', sel='div', multi=True)) > 1000\n\n\nhtmlurl = 'https://hypermedia.systems/hypermedia-a-reintroduction/'\nhmd = read_html(htmlurl)\nassert len(hmd) > 100\n# Markdown(hmd)\n\n\nsource\n\n\nget_llmstxt\n\n get_llmstxt (url, optional=False, n_workers=None)\n\nGet llms.txt file from and expand it with llms_txt.create_ctx()\n\n# print(get_llmstxt('https://llmstxt.org/llms.txt'))\n\n\nsource\n\n\nsplit_url\n\n split_url (url)\n\nSplit url into base, path, and file name, normalising name to ‘/’ if empty\n\nurls = ('https://claudette.answer.ai/path/index.html', 'https://claudette.answer.ai/',\n 'https://claudette.answer.ai/index.html', 'https://llmstxt.org', 'https://llmstxt.org/')\n\n[split_url(o) for o in urls]\n\n[('https://claudette.answer.ai', '/path', '/index.html'),\n ('https://claudette.answer.ai', '/', ''),\n ('https://claudette.answer.ai', '', '/index.html'),\n ('https://llmstxt.org', '/', ''),\n ('https://llmstxt.org', '/', '')]\n\n\n\nsource\n\n\nfind_docs\n\n find_docs (url)\n\nIf available, return LLM-friendly llms.txt context or markdown file location from url\n\nfl_url = 'https://answerdotai.github.io/fastlite'\n\n\nfind_docs(fl_url)\n\n'https://answerdotai.github.io/fastlite/index.html.md'\n\n\n\nfor o in urls: print(find_docs(o))\n\nhttps://claudette.answer.ai/index.html.md\nhttps://claudette.answer.ai/index.html.md\nhttps://claudette.answer.ai/index.html.md\nhttps://llmstxt.org/llms.txt\nhttps://llmstxt.org/llms.txt\n\n\n\nsuffixes = [\"/\", \"/tmp\", \"/tmp/\", \"/tmp/tmp\", \"/tmp/tmp/\"]\nfor suff in suffixes:\n for o in urls: test_eq(find_docs(o), find_docs(o+suff))\n\n\ntest_eq(find_docs(\"https://github.com\"), \"https://github.com/llms.txt\")\ntest_eq(find_docs(\"https://github.com/AnswerDotAI\"), \"https://github.com/llms.txt\")\ntest_eq(find_docs(\"https://github.com/AnswerDotAI/\"), \"https://github.com/llms.txt\")\n\n\nsource\n\n\nread_docs\n\n read_docs (url, optional=False, n_workers=None, rm_comments=True,\n rm_details=True)\n\nIf available, return LLM-friendly llms.txt context or markdown file response for url",
+ "crumbs": [
+ "Download helpers"
+ ]
+ },
+ {
+ "objectID": "index.html",
+ "href": "index.html",
+ "title": "toolslm",
+ "section": "",
+ "text": "This is a work in progress…",
+ "crumbs": [
+ "toolslm"
+ ]
+ },
+ {
+ "objectID": "index.html#install",
+ "href": "index.html#install",
+ "title": "toolslm",
+ "section": "Install",
+ "text": "Install\npip install toolslm",
+ "crumbs": [
+ "toolslm"
+ ]
+ },
+ {
+ "objectID": "index.html#how-to-use",
+ "href": "index.html#how-to-use",
+ "title": "toolslm",
+ "section": "How to use",
+ "text": "How to use\n\nContext creation\ntoolslm has some helpers to make it easier to generate XML context from files, for instance folder2ctx:\n\nprint(folder2ctx('samples', prefix=False, file_glob='*.py'))\n\n<documents>\n<document index=\"1\">\n<source>\nsamples/sample_core.py\n</source>\n<document_content>\nimport inspect\nempty = inspect.Parameter.empty\nmodels = 'claude-3-opus-20240229','claude-3-sonnet-20240229','claude-3-haiku-20240307'\n</document_content>\n</document>\n</documents>\n\n\n\n\nXML helpers\nMany language models work well with XML inputs, but XML can be a bit clunky to work with manually. Therefore, toolslm includes a couple of more streamlined approaches for XML generation.\nAn XML node contains a tag, optional children, and optional attributes. ft creates a tuple of these three things, which we will use to general XML shortly. Attributes are passed as kwargs; since these might conflict with reserved words in Python, you can optionally add a _ prefix and it’ll be stripped off.\n\nft('x-custom', ['hi'], _class='bar')\n\n('x-custom', ['hi'], {'class': 'bar'})\n\n\nClaudette has functions defined for some common HTML elements to create ft tuples more easily, including these:\n\nfrom toolslm.xml import div,img,h1,h2,p,hr,html\n\n\na = html([\n p('This is a paragraph'),\n hr(),\n img(src='http://example.prg'),\n div([\n h1('This is a header'),\n h2('This is a sub-header', style='k:v'),\n ], _class='foo')\n])\na\n\n('html',\n [('p', 'This is a paragraph', {}),\n ('hr', None, {}),\n ('img', None, {'src': 'http://example.prg'}),\n ('div',\n [('h1', 'This is a header', {}),\n ('h2', 'This is a sub-header', {'style': 'k:v'})],\n {'class': 'foo'})],\n {})\n\n\nTo convert a tuple data structure created with ft and friends into XML, use to_xml, adding the hl parameter to optionally add syntax highlighting:\n\nto_xml(a, hl=True)\n\n<html>\n <p>This is a paragraph</p>\n <hr />\n <img src=\"http://example.prg\" />\n <div class=\"foo\">\n <h1>This is a header</h1>\n <h2 style=\"k:v\">This is a sub-header</h2>\n </div>\n</html>\n\n\nJSON doesn’t map as nicely to XML as the ft data structure, but for simple XML trees it can be convenient. The json_to_xml function handles that conversion:\n\na = dict(surname='Howard', firstnames=['Jeremy','Peter'],\n address=dict(state='Queensland',country='Australia'))\nprint(json_to_xml(a, 'person'))\n\n<person>\n <surname>Howard</surname>\n <firstnames>\n <item>Jeremy</item>\n <item>Peter</item>\n </firstnames>\n <address>\n <state>Queensland</state>\n <country>Australia</country>\n </address>\n</person>\n\n\nSee the xml source section for a walkthru of XML and document context generation functionality.",
+ "crumbs": [
+ "toolslm"
+ ]
+ },
+ {
+ "objectID": "CHANGELOG.html",
+ "href": "CHANGELOG.html",
+ "title": "Release notes",
+ "section": "",
+ "text": "Add read_docs and find_docs (#8)\n\n\n\n\n\n\n\n\nXML tools assume all files have content (#3)\n\n\n\n\n\n\nMinor updates\n\n\n\n\n\nRename project\n\n\n\n\n\nInitial alpha release"
+ },
+ {
+ "objectID": "CHANGELOG.html#section",
+ "href": "CHANGELOG.html#section",
+ "title": "Release notes",
+ "section": "",
+ "text": "Add read_docs and find_docs (#8)"
+ },
+ {
+ "objectID": "CHANGELOG.html#section-1",
+ "href": "CHANGELOG.html#section-1",
+ "title": "Release notes",
+ "section": "",
+ "text": "XML tools assume all files have content (#3)"
+ },
+ {
+ "objectID": "CHANGELOG.html#section-2",
+ "href": "CHANGELOG.html#section-2",
+ "title": "Release notes",
+ "section": "",
+ "text": "Minor updates"
+ },
+ {
+ "objectID": "CHANGELOG.html#section-3",
+ "href": "CHANGELOG.html#section-3",
+ "title": "Release notes",
+ "section": "",
+ "text": "Rename project"
+ },
+ {
+ "objectID": "CHANGELOG.html#section-4",
+ "href": "CHANGELOG.html#section-4",
+ "title": "Release notes",
+ "section": "",
+ "text": "Initial alpha release"
+ },
+ {
+ "objectID": "shell.html",
+ "href": "shell.html",
+ "title": "shell source",
+ "section": "",
+ "text": "Exported source\nimport ast, time, signal, traceback\nfrom fastcore.utils import *\n\n\nget_shell is like python, except it also maintains a stateful interpreter, rather than just running a single line of code. This is implemented using IPython, so that must be installed.\n\n\nExported source\nfrom IPython.terminal.interactiveshell import TerminalInteractiveShell\nfrom IPython.utils.capture import capture_output\n\n\n\ndef exception2str(ex:Exception)->str:\n \"Convert exception `ex` into a string\"\n return ''.join(traceback.format_exception(type(ex), ex, ex.__traceback__))\n\n\ntry: print(1/0)\nexcept Exception as e: print(exception2str(e))\n\nTraceback (most recent call last):\n File \"/var/folders/ss/34z569j921v58v8n1n_8z7h40000gn/T/ipykernel_37260/4058275565.py\", line 1, in <module>\n try: print(1/0)\n ~^~\nZeroDivisionError: division by zero\n\n\n\n\nsource\n\nTerminalInteractiveShell.run_cell\n\n TerminalInteractiveShell.run_cell (cell, timeout=None)\n\nWrapper for original run_cell which adds timeout and output capture\n\n\nExported source\nTerminalInteractiveShell.orig_run = TerminalInteractiveShell.run_cell\n\n\n\n\nExported source\n@patch\ndef run_cell(self:TerminalInteractiveShell, cell, timeout=None):\n \"Wrapper for original `run_cell` which adds timeout and output capture\"\n if timeout:\n def handler(*args): raise TimeoutError()\n signal.signal(signal.SIGALRM, handler)\n signal.alarm(timeout)\n try:\n with capture_output() as io: result = self.orig_run(cell)\n result.stdout = io.stdout\n return result\n except TimeoutException as e:\n result = self.ExecutionResult(error_before_exec=None, error_in_exec=e)\n finally:\n if timeout: signal.alarm(0)\n\n\n\nsource\n\n\nget_shell\n\n get_shell ()\n\nGet a TerminalInteractiveShell with minimal functionality\n\n\nExported source\ndef get_shell()->TerminalInteractiveShell:\n \"Get a `TerminalInteractiveShell` with minimal functionality\"\n sh = TerminalInteractiveShell()\n sh.logger.log_output = sh.history_manager.enabled = False\n dh = sh.displayhook\n dh.finish_displayhook = dh.write_output_prompt = dh.start_displayhook = lambda: None\n dh.write_format_data = lambda format_dict, md_dict=None: None\n sh.logstart = sh.automagic = sh.autoindent = False\n sh.autocall = 0\n sh.system = lambda cmd: None\n return sh\n\n\n\nshell = get_shell()\n\n\nr = shell.run_cell('print(3); 1+1')\nr.result,r.stdout\n\n(2, '3\\n')\n\n\n\nr = shell.run_cell('raise Exception(\"blah\")')\nprint(exception2str(r.error_in_exec))\n\nTraceback (most recent call last):\n File \"/Users/jhoward/miniconda3/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3577, in run_code\n exec(code_obj, self.user_global_ns, self.user_ns)\n File \"<ipython-input-1-338156281413>\", line 1, in <module>\n raise Exception(\"blah\")\nException: blah\n\n\n\n\nr = shell.run_cell('import time; time.sleep(10)', timeout=1)\nr.error_in_exec\n\nTimeoutError()",
+ "crumbs": [
+ "shell source"
+ ]
+ },
+ {
+ "objectID": "xml.html",
+ "href": "xml.html",
+ "title": "xml source",
+ "section": "",
+ "text": "source\n\n\n\n json_to_xml (d:dict, rnm:str)\n\nConvert d to XML.\n\n\n\n\nType\nDetails\n\n\n\n\nd\ndict\nJSON dictionary to convert\n\n\nrnm\nstr\nRoot name\n\n\nReturns\nstr\n\n\n\n\n\n\nExported source\ndef json_to_xml(d:dict, # JSON dictionary to convert\n rnm:str # Root name\n )->str:\n \"Convert `d` to XML.\"\n root = ET.Element(rnm)\n def build_xml(data, parent):\n if isinstance(data, dict):\n for key, value in data.items(): build_xml(value, ET.SubElement(parent, key))\n elif isinstance(data, list):\n for item in data: build_xml(item, ET.SubElement(parent, 'item'))\n else: parent.text = str(data)\n build_xml(d, root)\n ET.indent(root)\n return ET.tostring(root, encoding='unicode')\n\n\nJSON doesn’t map as nicely to XML as the data structure used in fastcore.xml, but for simple XML trees it can be convenient – for example:\n\na = dict(surname='Howard', firstnames=['Jeremy','Peter'],\n address=dict(state='Queensland',country='Australia'))\nhl_md(json_to_xml(a, 'person'))\n\n<person>\n <surname>Howard</surname>\n <firstnames>\n <item>Jeremy</item>\n <item>Peter</item>\n </firstnames>\n <address>\n <state>Queensland</state>\n <country>Australia</country>\n </address>\n</person>",
+ "crumbs": [
+ "xml source"
+ ]
+ },
+ {
+ "objectID": "xml.html#setup",
+ "href": "xml.html#setup",
+ "title": "xml source",
+ "section": "",
+ "text": "source\n\n\n\n json_to_xml (d:dict, rnm:str)\n\nConvert d to XML.\n\n\n\n\nType\nDetails\n\n\n\n\nd\ndict\nJSON dictionary to convert\n\n\nrnm\nstr\nRoot name\n\n\nReturns\nstr\n\n\n\n\n\n\nExported source\ndef json_to_xml(d:dict, # JSON dictionary to convert\n rnm:str # Root name\n )->str:\n \"Convert `d` to XML.\"\n root = ET.Element(rnm)\n def build_xml(data, parent):\n if isinstance(data, dict):\n for key, value in data.items(): build_xml(value, ET.SubElement(parent, key))\n elif isinstance(data, list):\n for item in data: build_xml(item, ET.SubElement(parent, 'item'))\n else: parent.text = str(data)\n build_xml(d, root)\n ET.indent(root)\n return ET.tostring(root, encoding='unicode')\n\n\nJSON doesn’t map as nicely to XML as the data structure used in fastcore.xml, but for simple XML trees it can be convenient – for example:\n\na = dict(surname='Howard', firstnames=['Jeremy','Peter'],\n address=dict(state='Queensland',country='Australia'))\nhl_md(json_to_xml(a, 'person'))\n\n<person>\n <surname>Howard</surname>\n <firstnames>\n <item>Jeremy</item>\n <item>Peter</item>\n </firstnames>\n <address>\n <state>Queensland</state>\n <country>Australia</country>\n </address>\n</person>",
+ "crumbs": [
+ "xml source"
+ ]
+ },
+ {
+ "objectID": "xml.html#including-documents",
+ "href": "xml.html#including-documents",
+ "title": "xml source",
+ "section": "Including documents",
+ "text": "Including documents\nAccording to Anthropic, “it’s essential to structure your prompts in a way that clearly separates the input data from the instructions”. They recommend using the following format:\nHere are some documents for you to reference for your task:\n \n<documents>\n<document index=\"1\">\n<source>\n(URL, file name, hash, etc)\n</source>\n<document_content>\n(the text content)\n</document_content>\n</document>\n</documents>\nWe will create some small helper functions to make it easier to generate context in this format. Although it’s based on Anthropic’s recommendation, it’s likely to work well with other models too.\n\n\nExported source\ndoctype = namedtuple('doctype', ['source', 'content'])\n\n\nWe’ll use doctype to store our pairs.\n\n\nExported source\ndef _add_nls(s):\n \"Add newlines to start and end of `s` if missing\"\n if not s: return s\n if s[ 0]!='\\n': s = '\\n'+s\n if s[-1]!='\\n': s = s+'\\n'\n return s\n\n\nSince Anthropic’s example shows newlines before and after each tag, we’ll do the same.\n\nsource\n\nmk_doctype\n\n mk_doctype (content:str, source:Optional[str]=None)\n\nCreate a doctype named tuple\n\n\n\n\n\n\n\n\n\n\nType\nDefault\nDetails\n\n\n\n\ncontent\nstr\n\nThe document content\n\n\nsource\nOptional\nNone\nURL, filename, etc; defaults to md5(content) if not provided\n\n\nReturns\nnamedtuple\n\n\n\n\n\n\n\nExported source\ndef mk_doctype(content:str, # The document content\n source:Optional[str]=None # URL, filename, etc; defaults to `md5(content)` if not provided\n ) -> namedtuple:\n \"Create a `doctype` named tuple\"\n if source is None: source = hashlib.md5(content.encode()).hexdigest()[:8]\n return doctype(_add_nls(str(source).strip()), _add_nls(content.strip()))\n\n\nThis is a convenience wrapper to ensure that a doctype has the needed information in the right format.\n\ndoc = 'This is a sample'\nmk_doctype(doc)\n\ndoctype(source='\\nb8898fab\\n', content='\\nThis is a sample\\n')\n\n\n\nsource\n\n\nmk_doc\n\n mk_doc (index:int, content:str, source:Optional[str]=None, **kwargs)\n\nCreate an ft format tuple for a single doc in Anthropic’s recommended format\n\n\n\n\n\n\n\n\n\n\nType\nDefault\nDetails\n\n\n\n\nindex\nint\n\nThe document index\n\n\ncontent\nstr\n\nThe document content\n\n\nsource\nOptional\nNone\nURL, filename, etc; defaults to md5(content) if not provided\n\n\nkwargs\n\n\n\n\n\nReturns\ntuple\n\n\n\n\n\n\n\nExported source\ndef mk_doc(index:int, # The document index\n content:str, # The document content\n source:Optional[str]=None, # URL, filename, etc; defaults to `md5(content)` if not provided\n **kwargs\n ) -> tuple:\n \"Create an `ft` format tuple for a single doc in Anthropic's recommended format\"\n dt = mk_doctype(content, source)\n content = Document_content(dt.content)\n source = Source(dt.source)\n return Document(source, content, index=index, **kwargs)\n\n\nWe can now generate XML for one document in the suggested format:\n\nmk_doc(1, doc, title=\"test\")\n\n<document index=\"1\" title=\"test\">\n <source>\nb8898fab\n\n <document-content>\nThis is a sample\n</document-content>\n</document>\n\n\n\nsource\n\n\ndocs_xml\n\n docs_xml (docs:list[str], sources:Optional[list]=None, prefix:bool=True,\n details:Optional[list]=None)\n\nCreate an XML string containing docs in Anthropic’s recommended format\n\n\n\n\n\n\n\n\n\n\nType\nDefault\nDetails\n\n\n\n\ndocs\nlist\n\nThe content of each document\n\n\nsources\nOptional\nNone\nURLs, filenames, etc; each one defaults to md5(content) if not provided\n\n\nprefix\nbool\nTrue\nInclude Anthropic’s suggested prose intro?\n\n\ndetails\nOptional\nNone\nOptional list of dicts with additional attrs for each doc\n\n\nReturns\nstr\n\n\n\n\n\n\n\nExported source\ndef docs_xml(docs:list[str], # The content of each document\n sources:Optional[list]=None, # URLs, filenames, etc; each one defaults to `md5(content)` if not provided\n prefix:bool=True, # Include Anthropic's suggested prose intro?\n details:Optional[list]=None # Optional list of dicts with additional attrs for each doc\n )->str:\n \"Create an XML string containing `docs` in Anthropic's recommended format\"\n pre = 'Here are some documents for you to reference for your task:\\n\\n' if prefix else ''\n if sources is None: sources = [None]*len(docs)\n if details is None: details = [{}]*len(docs)\n docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) in enumerate(zip(docs,sources,details)))\n return pre + to_xml(Documents(docs))\n\n\nPutting it all together, we have our final XML format:\n\ndocs = [doc, 'And another one']\nsources = [None, 'doc.txt']\nprint(docs_xml(docs, sources))\n\nHere are some documents for you to reference for your task:\n\n<documents>\n <document index=\"1\">\n <source>\nb8898fab\n\n <document-content>\nThis is a sample\n</document-content>\n </document>\n <document index=\"2\">\n <source>\ndoc.txt\n\n <document-content>\nAnd another one\n</document-content>\n </document>\n</documents>",
+ "crumbs": [
+ "xml source"
+ ]
+ },
+ {
+ "objectID": "xml.html#context-creation",
+ "href": "xml.html#context-creation",
+ "title": "xml source",
+ "section": "Context creation",
+ "text": "Context creation\nNow that we can generate Anthropic’s XML format, let’s make it easy for a few common cases.\n\nFile list to context\nFor generating XML context from files, we’ll just read them as text and use the file names as source.\n\nsource\n\n\nfiles2ctx\n\n files2ctx (fnames:list[typing.Union[str,pathlib.Path]], prefix:bool=True)\n\n\n\n\n\nType\nDefault\nDetails\n\n\n\n\nfnames\nlist\n\nList of file names to add to context\n\n\nprefix\nbool\nTrue\nInclude Anthropic’s suggested prose intro?\n\n\nReturns\nstr\n\nXML for LM context\n\n\n\n\n\nExported source\ndef files2ctx(\n fnames:list[Union[str,Path]], # List of file names to add to context\n prefix:bool=True # Include Anthropic's suggested prose intro?\n)->str: # XML for LM context\n fnames = [Path(o) for o in fnames]\n contents = [o.read_text() for o in fnames]\n return docs_xml(contents, fnames, prefix=prefix)\n\n\n\nfnames = ['samples/sample_core.py', 'samples/sample_styles.css']\nhl_md(files2ctx(fnames))\n\nHere are some documents for you to reference for your task:\n\n<documents>\n <document index=\"1\">\n <source>\nsamples/sample_core.py\n\n <document-content>\nimport inspect\nempty = inspect.Parameter.empty\nmodels = 'claude-3-opus-20240229','claude-3-sonnet-20240229','claude-3-haiku-20240307'\n</document-content>\n </document>\n <document index=\"2\">\n <source>\nsamples/sample_styles.css\n\n <document-content>\n.cell { margin-bottom: 1rem; }\n.cell > .sourceCode { margin-bottom: 0; }\n.cell-output > pre { margin-bottom: 0; }\n</document-content>\n </document>\n</documents>\n\n\n\n\nFolder to context\n\nsource\n\n\nfolder2ctx\n\n folder2ctx (folder:Union[str,pathlib.Path], prefix:bool=True,\n recursive:bool=True, symlinks:bool=True, file_glob:str=None,\n file_re:str=None, folder_re:str=None,\n skip_file_glob:str=None, skip_file_re:str=None,\n skip_folder_re:str=None, func:callable=<function join>,\n ret_folders:bool=False)\n\n\n\n\n\nType\nDefault\nDetails\n\n\n\n\nfolder\nUnion\n\nFolder name containing files to add to context\n\n\nprefix\nbool\nTrue\nInclude Anthropic’s suggested prose intro?\n\n\nrecursive\nbool\nTrue\nsearch subfolders\n\n\nsymlinks\nbool\nTrue\nfollow symlinks?\n\n\nfile_glob\nstr\nNone\nOnly include files matching glob\n\n\nfile_re\nstr\nNone\nOnly include files matching regex\n\n\nfolder_re\nstr\nNone\nOnly enter folders matching regex\n\n\nskip_file_glob\nstr\nNone\nSkip files matching glob\n\n\nskip_file_re\nstr\nNone\nSkip files matching regex\n\n\nskip_folder_re\nstr\nNone\nSkip folders matching regex,\n\n\nfunc\ncallable\njoin\nfunction to apply to each matched file\n\n\nret_folders\nbool\nFalse\nreturn folders, not just files\n\n\nReturns\nstr\n\nXML for Claude context\n\n\n\n\n\nExported source\n@delegates(globtastic)\ndef folder2ctx(\n folder:Union[str,Path], # Folder name containing files to add to context\n prefix:bool=True, # Include Anthropic's suggested prose intro?\n **kwargs # Passed to `globtastic`\n)->str: # XML for Claude context\n fnames = globtastic(folder, **kwargs)\n return files2ctx(fnames, prefix=prefix)\n\n\n\nprint(folder2ctx('samples', prefix=False, file_glob='*.py'))\n\n<documents>\n <document index=\"1\">\n <source>\nsamples/sample_core.py\n\n <document-content>\nimport inspect\nempty = inspect.Parameter.empty\nmodels = 'claude-3-opus-20240229','claude-3-sonnet-20240229','claude-3-haiku-20240307'\n</document-content>\n </document>\n</documents>\n\n\n\n\n\n\n\n\n\nTip\n\n\n\nAfter you install toolslm, folder2ctx becomes available from the command line. You can see how to use it with the following command:\nfolder2ctx -h",
+ "crumbs": [
+ "xml source"
+ ]
+ },
+ {
+ "objectID": "funccall.html",
+ "href": "funccall.html",
+ "title": "funccall source",
+ "section": "",
+ "text": "Exported source\nimport inspect\nfrom collections import abc\nfrom fastcore.utils import *\nfrom fastcore.docments import docments",
+ "crumbs": [
+ "funccall source"
+ ]
+ },
+ {
+ "objectID": "funccall.html#function-calling",
+ "href": "funccall.html#function-calling",
+ "title": "funccall source",
+ "section": "Function calling",
+ "text": "Function calling\nMany LLMs do function calling (aka tool use) by taking advantage of JSON schema.\nWe’ll use docments to make getting JSON schema from Python functions as ergonomic as possible. Each parameter (and the return value) should have a type, and a docments comment with the description of what it is. Here’s an example:\n\ndef silly_sum(\n a:int, # First thing to sum\n b:int=1, # Second thing to sum\n c:list[int]=None, # A pointless argument\n) -> int: # The sum of the inputs\n \"Adds a + b.\"\n return a + b\n\nThis is what docments makes of that:\n\nd = docments(silly_sum, full=True)\nd\n\n{ 'a': { 'anno': <class 'int'>,\n 'default': <class 'inspect._empty'>,\n 'docment': 'First thing to sum'},\n 'b': {'anno': <class 'int'>, 'default': 1, 'docment': 'Second thing to sum'},\n 'c': {'anno': list[int], 'default': None, 'docment': 'A pointless argument'},\n 'return': { 'anno': <class 'int'>,\n 'default': <class 'inspect._empty'>,\n 'docment': 'The sum of the inputs'}}\n\n\nNote that this is an AttrDict so we can treat it like an object, or a dict:\n\nd.a.docment, d['a']['anno']\n\n('First thing to sum', int)\n\n\n\n\nExported source\ndef _types(t:type)->tuple[str,Optional[str]]:\n \"Tuple of json schema type name and (if appropriate) array item name.\"\n if t is empty: raise TypeError('Missing type')\n tmap = {int:\"integer\", float:\"number\", str:\"string\", bool:\"boolean\", list:\"array\", dict:\"object\"}\n tmap.update({k.__name__: v for k, v in tmap.items()})\n if getattr(t, '__origin__', None) in (list,tuple): return \"array\", tmap.get(t.__args__[0].__name__, \"object\")\n elif isinstance(t, str): return tmap.get(t, \"object\"), None\n else: return tmap.get(t.__name__, \"object\"), None\n\n\nThis internal function is needed to convert Python types into JSON schema types.\n\n_types(list[int]), _types(int), _types('int')\n\n(('array', 'integer'), ('integer', None), ('integer', None))\n\n\nWill also convert custom types to the object type.\n\nclass Custom: a: int\n_types(list[Custom]), _types(Custom)\n\n(('array', 'object'), ('object', None))\n\n\n\n\nExported source\ndef _param(name, info):\n \"json schema parameter given `name` and `info` from docments full dict.\"\n paramt,itemt = _types(info.anno)\n pschema = dict(type=paramt, description=info.docment or \"\")\n if itemt: pschema[\"items\"] = {\"type\": itemt}\n if info.default is not empty: pschema[\"default\"] = info.default\n return pschema\n\n\nThis private function converts a key/value pair from the docments structure into the dict that will be needed for the schema.\n\nn,o = first(d.items())\nprint(n,'//', o)\n_param(n, o)\n\na // {'docment': 'First thing to sum', 'anno': <class 'int'>, 'default': <class 'inspect._empty'>}\n\n\n{'type': 'integer', 'description': 'First thing to sum'}\n\n\n\n# Test primitive types\ndefs = {}\nassert _handle_type(int, defs) == {'type': 'integer'}\nassert _handle_type(str, defs) == {'type': 'string'}\nassert _handle_type(bool, defs) == {'type': 'boolean'}\nassert _handle_type(float, defs) == {'type': 'number'}\n\n# Test custom class\nclass TestClass:\n def __init__(self, x: int, y: int): store_attr()\n\nresult = _handle_type(TestClass, defs)\nassert result == {'$ref': '#/$defs/TestClass'}\nassert 'TestClass' in defs\nassert defs['TestClass']['type'] == 'object'\nassert 'properties' in defs['TestClass']\n\n\n# Test primitive types in containers\nassert _handle_container(list, (int,), defs) == {'type': 'array', 'items': {'type': 'integer'}}\nassert _handle_container(tuple, (str,), defs) == {'type': 'array', 'items': {'type': 'string'}}\nassert _handle_container(set, (str,), defs) == {'type': 'array', 'items': {'type': 'string'}, 'uniqueItems': True}\nassert _handle_container(dict, (str,bool), defs) == {'type': 'object', 'additionalProperties': {'type': 'boolean'}}\n\nresult = _handle_container(list, (TestClass,), defs)\nassert result == {'type': 'array', 'items': {'$ref': '#/$defs/TestClass'}}\nassert 'TestClass' in defs\n\n# Test complex nested structure\nComplexType = dict[str, list[TestClass]]\nresult = _handle_container(dict, (str, list[TestClass]), defs)\nassert result == {\n 'type': 'object',\n 'additionalProperties': {\n 'type': 'array',\n 'items': {'$ref': '#/$defs/TestClass'}\n }\n}\n\n\n# Test processing of a required integer property\nprops, req = {}, {}\nclass TestClass:\n \"Test class\"\n def __init__(\n self,\n x: int, # First thing\n y: list[float], # Second thing\n z: str = \"default\", # Third thing\n ): store_attr()\n\nd = docments(TestClass, full=True)\n_process_property('x', d.x, props, req, defs)\nassert 'x' in props\nassert props['x']['type'] == 'integer'\nassert 'x' in req\n\n# Test processing of a required list property\n_process_property('y', d.y, props, req, defs)\nassert 'y' in props\nassert props['y']['type'] == 'array'\nassert props['y']['items']['type'] == 'number'\nassert 'y' in req\n\n# Test processing of an optional string property with default\n_process_property('z', d.z, props, req, defs)\nassert 'z' in props\nassert props['z']['type'] == 'string'\nassert props['z']['default'] == \"default\"\nassert 'z' not in req\n\n\nsource\n\nget_schema\n\n get_schema (f:<built-infunctioncallable>, pname='input_schema')\n\nGenerate JSON schema for a class, function, or method\n\n\nExported source\ndef get_schema(f:callable, pname='input_schema')->dict:\n \"Generate JSON schema for a class, function, or method\"\n schema = _get_nested_schema(f)\n desc = f.__doc__\n assert desc, \"Docstring missing!\"\n d = docments(f, full=True)\n ret = d.pop('return')\n if ret.anno is not empty: desc += f'\\n\\nReturns:\\n- type: {_types(ret.anno)[0]}'\n return {\"name\": f.__name__, \"description\": desc, pname: schema}\n\n\nPutting this all together, we can now test getting a schema from silly_sum. The tool use spec doesn’t support return annotations directly, so we put that in the description instead.\n\ns = get_schema(silly_sum)\ndesc = s.pop('description')\nprint(desc)\ns\n\nAdds a + b.\n\nReturns:\n- type: integer\n\n\n{'name': 'silly_sum',\n 'input_schema': {'type': 'object',\n 'properties': {'a': {'type': 'integer', 'description': 'First thing to sum'},\n 'b': {'type': 'integer',\n 'description': 'Second thing to sum',\n 'default': 1},\n 'c': {'type': 'array',\n 'description': 'A pointless argument',\n 'items': {'type': 'integer'},\n 'default': None}},\n 'title': None,\n 'required': ['a']}}\n\n\nThis also works with string annotations, e.g:\n\ndef silly_test(\n a: 'int', # quoted type hint\n):\n \"Mandatory docstring\"\n return a\n\nget_schema(silly_test)\n\n{'name': 'silly_test',\n 'description': 'Mandatory docstring',\n 'input_schema': {'type': 'object',\n 'properties': {'a': {'type': 'integer', 'description': 'quoted type hint'}},\n 'title': None,\n 'required': ['a']}}\n\n\nThis also works with class methods:\n\nclass Dummy:\n def sums(\n self,\n a:int, # First thing to sum\n b:int=1 # Second thing to sum\n ) -> int: # The sum of the inputs\n \"Adds a + b.\"\n print(f\"Finding the sum of {a} and {b}\")\n return a + b\n\nget_schema(Dummy.sums)\n\n{'name': 'sums',\n 'description': 'Adds a + b.\\n\\nReturns:\\n- type: integer',\n 'input_schema': {'type': 'object',\n 'properties': {'a': {'type': 'integer', 'description': 'First thing to sum'},\n 'b': {'type': 'integer',\n 'description': 'Second thing to sum',\n 'default': 1}},\n 'title': None,\n 'required': ['a']}}\n\n\nget_schema also handles more complicated structures such as nested classes. This is useful for things like structured outputs.\n\nclass Turn:\n \"Turn between two speakers\"\n def __init__(\n self,\n speaker_a:str, # First speaker to speak's message\n speaker_b:str, # Second speaker to speak's message\n ): store_attr()\n\nclass Conversation:\n \"A conversation between two speakers\"\n def __init__(\n self,\n turns:list[Turn], # Turns of the conversation\n ): store_attr()\n\nget_schema(Conversation)\n\n{'name': 'Conversation',\n 'description': 'A conversation between two speakers',\n 'input_schema': {'type': 'object',\n 'properties': {'turns': {'type': 'array',\n 'description': 'Turns of the conversation',\n 'items': {'$ref': '#/$defs/Turn'}}},\n 'title': 'Conversation',\n 'required': ['turns'],\n '$defs': {'Turn': {'type': 'object',\n 'properties': {'speaker_a': {'type': 'string',\n 'description': \"First speaker to speak's message\"},\n 'speaker_b': {'type': 'string',\n 'description': \"Second speaker to speak's message\"}},\n 'title': 'Turn',\n 'required': ['speaker_a', 'speaker_b']}}}}\n\n\n\nclass DictConversation:\n \"A conversation between two speakers\"\n def __init__(\n self,\n turns:dict[str,list[Turn]], # dictionary of topics and the Turns of the conversation\n ): store_attr()\n\nget_schema(DictConversation)\n\n{'name': 'DictConversation',\n 'description': 'A conversation between two speakers',\n 'input_schema': {'type': 'object',\n 'properties': {'turns': {'type': 'object',\n 'description': 'dictionary of topics and the Turns of the conversation',\n 'additionalProperties': {'type': 'array',\n 'items': {'$ref': '#/$defs/Turn'}}}},\n 'title': 'DictConversation',\n 'required': ['turns'],\n '$defs': {'Turn': {'type': 'object',\n 'properties': {'speaker_a': {'type': 'string',\n 'description': \"First speaker to speak's message\"},\n 'speaker_b': {'type': 'string',\n 'description': \"Second speaker to speak's message\"}},\n 'title': 'Turn',\n 'required': ['speaker_a', 'speaker_b']}}}}\n\n\n\nclass SetConversation:\n \"A conversation between two speakers\"\n def __init__(\n self,\n turns:set[Turn], # the unique Turns of the conversation\n ): store_attr()\n\nget_schema(SetConversation)\n\n{'name': 'SetConversation',\n 'description': 'A conversation between two speakers',\n 'input_schema': {'type': 'object',\n 'properties': {'turns': {'type': 'array',\n 'description': 'the unique Turns of the conversation',\n 'items': {'$ref': '#/$defs/Turn'},\n 'uniqueItems': True}},\n 'title': 'SetConversation',\n 'required': ['turns'],\n '$defs': {'Turn': {'type': 'object',\n 'properties': {'speaker_a': {'type': 'string',\n 'description': \"First speaker to speak's message\"},\n 'speaker_b': {'type': 'string',\n 'description': \"Second speaker to speak's message\"}},\n 'title': 'Turn',\n 'required': ['speaker_a', 'speaker_b']}}}}\n\n\n\n\nPython tool\nIn language model clients it’s often useful to have a ‘code interpreter’ – this is something that runs code, and generally outputs the result of the last expression (i.e like IPython or Jupyter).\nIn this section we’ll create the python function, which executes a string as Python code, with an optional timeout. If the last line is an expression, we’ll return that – just like in IPython or Jupyter, but without needing them installed.\n\n\nExported source\nimport ast, time, signal, traceback\nfrom fastcore.utils import *\n\n\n\n\nExported source\ndef _copy_loc(new, orig):\n \"Copy location information from original node to new node and all children.\"\n new = ast.copy_location(new, orig)\n for field, o in ast.iter_fields(new):\n if isinstance(o, ast.AST): setattr(new, field, _copy_loc(o, orig))\n elif isinstance(o, list): setattr(new, field, [_copy_loc(value, orig) for value in o])\n return new\n\n\nThis is an internal function that’s needed for _run to ensure that location information is available in the abstract syntax tree (AST), since otherwise python complains.\n\n\nExported source\ndef _run(code:str ):\n \"Run `code`, returning final expression (similar to IPython)\"\n tree = ast.parse(code)\n last_node = tree.body[-1] if tree.body else None\n \n # If the last node is an expression, modify the AST to capture the result\n if isinstance(last_node, ast.Expr):\n tgt = [ast.Name(id='_result', ctx=ast.Store())]\n assign_node = ast.Assign(targets=tgt, value=last_node.value)\n tree.body[-1] = _copy_loc(assign_node, last_node)\n\n compiled_code = compile(tree, filename='<ast>', mode='exec')\n namespace = {}\n stdout_buffer = io.StringIO()\n saved_stdout = sys.stdout\n sys.stdout = stdout_buffer\n try: exec(compiled_code, namespace)\n finally: sys.stdout = saved_stdout\n _result = namespace.get('_result', None)\n if _result is not None: return _result\n return stdout_buffer.getvalue().strip()\n\n\nThis is the internal function used to actually run the code – we pull off the last AST to see if it’s an expression (i.e something that returns a value), and if so, we store it to a special _result variable so we can return it.\n\n_run('import math;math.factorial(12)')\n\n479001600\n\n\n\n_run('print(1+1)')\n\n'2'\n\n\nWe now have the machinery needed to create our python function.\n\nsource\n\n\npython\n\n python (code, timeout=5)\n\nExecutes python code with timeout and returning final expression (similar to IPython). Raised exceptions are returned as a string, with a stack trace.\n\n\n\n\n\n\n\n\n\n\nType\nDefault\nDetails\n\n\n\n\ncode\n\n\nCode to execute\n\n\ntimeout\nint\n5\nMaximum run time in seconds before a TimeoutError is raised\n\n\n\n\n\nExported source\ndef python(code, # Code to execute\n timeout=5 # Maximum run time in seconds before a `TimeoutError` is raised\n ): # Result of last node, if it's an expression, or `None` otherwise\n \"\"\"Executes python `code` with `timeout` and returning final expression (similar to IPython).\n Raised exceptions are returned as a string, with a stack trace.\"\"\"\n def handler(*args): raise TimeoutError()\n signal.signal(signal.SIGALRM, handler)\n signal.alarm(timeout)\n try: return _run(code)\n except Exception as e: return traceback.format_exc()\n finally: signal.alarm(0)\n\n\nThere’s no builtin security here – you should generally use this in a sandbox, or alternatively prompt before running code. It can handle multiline function definitions, and pretty much any other normal Python syntax.\n\npython(\"\"\"def factorial(n):\n if n == 0 or n == 1: return 1\n else: return n * factorial(n-1)\nfactorial(5)\"\"\")\n\n120\n\n\nIf the code takes longer than timeout then it raises a TimeoutError.\n\ntry: python('import time; time.sleep(10)', timeout=1)\nexcept TimeoutError: print('Timed out')\n\n\n\nTool Calling\nMany LLM API providers offer tool calling where an LLM can choose to call a given tool. This is also helpful for structured outputs since the response from the LLM is contrained to the required arguments of the tool.\nThis section will be dedicated to helper functions for calling tools. We don’t want to allow LLMs to call just any possible function (that would be a security disaster!) so we create a namespace – that is, a dictionary of allowable function names to call.\n\nsource\n\n\nmk_ns\n\n mk_ns (*funcs_or_objs)\n\n\ndef sums(a, b): return a + b\nns = mk_ns(sums); ns\n\n{'sums': <function __main__.sums(a, b)>}\n\n\n\nns['sums'](1, 2)\n\n3\n\n\n\nclass Dummy:\n def __init__(self,a): self.a = a\n def __call__(self): return self.a\n def sums(self, a, b): return a + b\n @staticmethod\n def subs(a, b): return a - b\n @classmethod\n def mults(cls, a, b): return a * b\n\n\nns = mk_ns(Dummy); ns\n\n{'subs': <function __main__.Dummy.subs(a, b)>,\n 'mults': <bound method Dummy.mults of <class '__main__.Dummy'>>,\n 'Dummy': __main__.Dummy}\n\n\n\nns['subs'](1, 2), ns['mults'](3, 2)\n\n(-1, 6)\n\n\n\nd = Dummy(10)\nns = mk_ns(d); ns\n\n{'__call__': <bound method Dummy.__call__ of <__main__.Dummy object>>,\n '__init__': <bound method Dummy.__init__ of <__main__.Dummy object>>,\n 'mults': <bound method Dummy.mults of <class '__main__.Dummy'>>,\n 'sums': <bound method Dummy.sums of <__main__.Dummy object>>,\n 'subs': <staticmethod(<function Dummy.subs>)>}\n\n\n\nns['subs'](1, 2), ns['mults'](3, 2), ns['sums'](3, 2), ns['__call__']()\n\n(-1, 6, 5, 10)\n\n\n\nns['__init__'](-99), ns['__call__']()\n\n(None, -99)\n\n\n\nsource\n\n\ncall_func\n\n call_func (fc_name, fc_inputs, ns)\n\nCall the function fc_name with the given fc_inputs using namespace ns.\n\n\nExported source\ndef call_func(fc_name, fc_inputs, ns):\n \"Call the function `fc_name` with the given `fc_inputs` using namespace `ns`.\"\n if not isinstance(ns, abc.Mapping): ns = mk_ns(*ns)\n func = ns[fc_name]\n return func(**fc_inputs)\n\n\nNow when we an LLM responses with the tool to use and its inputs, we can simply use the same namespace it was given to look up the tool and call it.\n\ncall_func('sums', {'a': 1, 'b': 2}, ns=[sums])\n\n3\n\n\n\ncall_func('subs', {'a': 1, 'b': 2}, ns=mk_ns(d))\n\n-1",
+ "crumbs": [
+ "funccall source"
+ ]
+ }
+]
\ No newline at end of file
diff --git a/shell.html b/shell.html
new file mode 100644
index 0000000..374ea12
--- /dev/null
+++ b/shell.html
@@ -0,0 +1,766 @@
+
+
+
+
+
+
+
+
+
+shell source – toolslm
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
import ast, time, signal, traceback
+from fastcore.utils import*
+
+
+
get_shell is like python, except it also maintains a stateful interpreter, rather than just running a single line of code. This is implemented using IPython, so that must be installed.
+
+
+Exported source
+
from IPython.terminal.interactiveshell import TerminalInteractiveShell
+from IPython.utils.capture import capture_output
+
+
+
+
def exception2str(ex:Exception)->str:
+"Convert exception `ex` into a string"
+return''.join(traceback.format_exception(type(ex), ex, ex.__traceback__))
According to Anthropic, “it’s essential to structure your prompts in a way that clearly separates the input data from the instructions”. They recommend using the following format:
+
Here are some documents for you to reference for your task:
+
+<documents>
+<document index="1">
+<source>
+(URL, file name, hash, etc)
+</source>
+<document_content>
+(the text content)
+</document_content>
+</document>
+</documents>
+
We will create some small helper functions to make it easier to generate context in this format. Although it’s based on Anthropic’s recommendation, it’s likely to work well with other models too.
def _add_nls(s):
+"Add newlines to start and end of `s` if missing"
+ifnot s: return s
+if s[ 0]!='\n': s ='\n'+s
+if s[-1]!='\n': s = s+'\n'
+return s
+
+
+
Since Anthropic’s example shows newlines before and after each tag, we’ll do the same.
Create an ft format tuple for a single doc in Anthropic’s recommended format
+
+
+
+
+
+
+
+
+
+
+
Type
+
Default
+
Details
+
+
+
+
+
index
+
int
+
+
The document index
+
+
+
content
+
str
+
+
The document content
+
+
+
source
+
Optional
+
None
+
URL, filename, etc; defaults to md5(content) if not provided
+
+
+
kwargs
+
+
+
+
+
+
Returns
+
tuple
+
+
+
+
+
+
+
+Exported source
+
def mk_doc(index:int, # The document index
+ content:str, # The document content
+ source:Optional[str]=None, # URL, filename, etc; defaults to `md5(content)` if not provided
+**kwargs
+ ) ->tuple:
+"Create an `ft` format tuple for a single doc in Anthropic's recommended format"
+ dt = mk_doctype(content, source)
+ content = Document_content(dt.content)
+ source = Source(dt.source)
+return Document(source, content, index=index, **kwargs)
+
+
+
We can now generate XML for one document in the suggested format:
+
+
mk_doc(1, doc, title="test")
+
+
<document index="1" title="test">
+<source>
+b8898fab
+
+<document-content>
+This is a sample
+</document-content>
+</document>
Create an XML string containing docs in Anthropic’s recommended format
+
+
+
+
+
+
+
+
+
+
+
Type
+
Default
+
Details
+
+
+
+
+
docs
+
list
+
+
The content of each document
+
+
+
sources
+
Optional
+
None
+
URLs, filenames, etc; each one defaults to md5(content) if not provided
+
+
+
prefix
+
bool
+
True
+
Include Anthropic’s suggested prose intro?
+
+
+
details
+
Optional
+
None
+
Optional list of dicts with additional attrs for each doc
+
+
+
Returns
+
str
+
+
+
+
+
+
+
+Exported source
+
def docs_xml(docs:list[str], # The content of each document
+ sources:Optional[list]=None, # URLs, filenames, etc; each one defaults to `md5(content)` if not provided
+ prefix:bool=True, # Include Anthropic's suggested prose intro?
+ details:Optional[list]=None# Optional list of dicts with additional attrs for each doc
+ )->str:
+"Create an XML string containing `docs` in Anthropic's recommended format"
+ pre ='Here are some documents for you to reference for your task:\n\n'if prefix else''
+if sources isNone: sources = [None]*len(docs)
+if details isNone: details = [{}]*len(docs)
+ docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) inenumerate(zip(docs,sources,details)))
+return pre + to_xml(Documents(docs))
+
+
+
Putting it all together, we have our final XML format:
Here are some documents for you to reference for your task:
+
+<documents>
+ <document index="1">
+ <source>
+b8898fab
+
+ <document-content>
+This is a sample
+</document-content>
+ </document>
+ <document index="2">
+ <source>
+doc.txt
+
+ <document-content>
+And another one
+</document-content>
+ </document>
+</documents>
+
+
+
+
+
+
+
Context creation
+
Now that we can generate Anthropic’s XML format, let’s make it easy for a few common cases.
+
+
File list to context
+
For generating XML context from files, we’ll just read them as text and use the file names as source.
def files2ctx(
+ fnames:list[Union[str,Path]], # List of file names to add to context
+ prefix:bool=True# Include Anthropic's suggested prose intro?
+)->str: # XML for LM context
+ fnames = [Path(o) for o in fnames]
+ contents = [o.read_text() for o in fnames]
+return docs_xml(contents, fnames, prefix=prefix)
After you install toolslm, folder2ctx becomes available from the command line. You can see how to use it with the following command:
+
folder2ctx-h
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/xml.html.md b/xml.html.md
new file mode 100644
index 0000000..836209a
--- /dev/null
+++ b/xml.html.md
@@ -0,0 +1,684 @@
+# xml source
+
+
+
+
+## Setup
+
+------------------------------------------------------------------------
+
+source
+
+### json_to_xml
+
+> json_to_xml (d:dict, rnm:str)
+
+*Convert `d` to XML.*
+
+
+
+
+
+
Type
+
Details
+
+
+
+
+
d
+
dict
+
JSON dictionary to convert
+
+
+
rnm
+
str
+
Root name
+
+
+
Returns
+
str
+
+
+
+
+
+
+Exported source
+
+``` python
+def json_to_xml(d:dict, # JSON dictionary to convert
+ rnm:str # Root name
+ )->str:
+ "Convert `d` to XML."
+ root = ET.Element(rnm)
+ def build_xml(data, parent):
+ if isinstance(data, dict):
+ for key, value in data.items(): build_xml(value, ET.SubElement(parent, key))
+ elif isinstance(data, list):
+ for item in data: build_xml(item, ET.SubElement(parent, 'item'))
+ else: parent.text = str(data)
+ build_xml(d, root)
+ ET.indent(root)
+ return ET.tostring(root, encoding='unicode')
+```
+
+
+
+JSON doesn’t map as nicely to XML as the data structure used in
+`fastcore.xml`, but for simple XML trees it can be convenient – for
+example:
+
+``` python
+a = dict(surname='Howard', firstnames=['Jeremy','Peter'],
+ address=dict(state='Queensland',country='Australia'))
+hl_md(json_to_xml(a, 'person'))
+```
+
+``` xml
+
+ Howard
+
+ Jeremy
+ Peter
+
+
+ Queensland
+ Australia
+
+
+```
+
+## Including documents
+
+According [to
+Anthropic](https://docs.anthropic.com/claude/docs/long-context-window-tips),
+“*it’s essential to structure your prompts in a way that clearly
+separates the input data from the instructions*”. They recommend using
+the following format:
+
+``` xml
+Here are some documents for you to reference for your task:
+
+
+
+
+
+(the text content)
+
+
+
+```
+
+We will create some small helper functions to make it easier to generate
+context in this format. Although it’s based on Anthropic’s
+recommendation, it’s likely to work well with other models too.
+
+
+Exported source
+
+``` python
+doctype = namedtuple('doctype', ['source', 'content'])
+```
+
+
+
+We’ll use `doctype` to store our pairs.
+
+
+Exported source
+
+``` python
+def _add_nls(s):
+ "Add newlines to start and end of `s` if missing"
+ if not s: return s
+ if s[ 0]!='\n': s = '\n'+s
+ if s[-1]!='\n': s = s+'\n'
+ return s
+```
+
+
+
+Since Anthropic’s example shows newlines before and after each tag,
+we’ll do the same.
+
+------------------------------------------------------------------------
+
+source
+
+### mk_doctype
+
+> mk_doctype (content:str, source:Optional[str]=None)
+
+*Create a `doctype` named tuple*
+
+
+
+
+
+
+
+
+
+
+
+
Type
+
Default
+
Details
+
+
+
+
+
content
+
str
+
+
The document content
+
+
+
source
+
Optional
+
None
+
URL, filename, etc; defaults to md5(content) if not
+provided
+
+
+
Returns
+
namedtuple
+
+
+
+
+
+
+
+Exported source
+
+``` python
+def mk_doctype(content:str, # The document content
+ source:Optional[str]=None # URL, filename, etc; defaults to `md5(content)` if not provided
+ ) -> namedtuple:
+ "Create a `doctype` named tuple"
+ if source is None: source = hashlib.md5(content.encode()).hexdigest()[:8]
+ return doctype(_add_nls(str(source).strip()), _add_nls(content.strip()))
+```
+
+
+
+This is a convenience wrapper to ensure that a `doctype` has the needed
+information in the right format.
+
+``` python
+doc = 'This is a sample'
+mk_doctype(doc)
+```
+
+ doctype(source='\nb8898fab\n', content='\nThis is a sample\n')
+
+------------------------------------------------------------------------
+
+source
+
+### mk_doc
+
+> mk_doc (index:int, content:str, source:Optional[str]=None, **kwargs)
+
+*Create an `ft` format tuple for a single doc in Anthropic’s recommended
+format*
+
+
+
+
+
+
+
+
+
+
+
+
Type
+
Default
+
Details
+
+
+
+
+
index
+
int
+
+
The document index
+
+
+
content
+
str
+
+
The document content
+
+
+
source
+
Optional
+
None
+
URL, filename, etc; defaults to md5(content) if not
+provided
+
+
+
kwargs
+
+
+
+
+
+
Returns
+
tuple
+
+
+
+
+
+
+
+Exported source
+
+``` python
+def mk_doc(index:int, # The document index
+ content:str, # The document content
+ source:Optional[str]=None, # URL, filename, etc; defaults to `md5(content)` if not provided
+ **kwargs
+ ) -> tuple:
+ "Create an `ft` format tuple for a single doc in Anthropic's recommended format"
+ dt = mk_doctype(content, source)
+ content = Document_content(dt.content)
+ source = Source(dt.source)
+ return Document(source, content, index=index, **kwargs)
+```
+
+
+
+We can now generate XML for one document in the suggested format:
+
+``` python
+mk_doc(1, doc, title="test")
+```
+
+``` html
+
+
+```
+
+------------------------------------------------------------------------
+
+source
+
+### docs_xml
+
+> docs_xml (docs:list[str], sources:Optional[list]=None, prefix:bool=True,
+> details:Optional[list]=None)
+
+*Create an XML string containing `docs` in Anthropic’s recommended
+format*
+
+
+
+
+
+
+
+
+
+
+
+
Type
+
Default
+
Details
+
+
+
+
+
docs
+
list
+
+
The content of each document
+
+
+
sources
+
Optional
+
None
+
URLs, filenames, etc; each one defaults to md5(content)
+if not provided
+
+
+
prefix
+
bool
+
True
+
Include Anthropic’s suggested prose intro?
+
+
+
details
+
Optional
+
None
+
Optional list of dicts with additional attrs for each doc
+
+
+
Returns
+
str
+
+
+
+
+
+
+
+Exported source
+
+``` python
+def docs_xml(docs:list[str], # The content of each document
+ sources:Optional[list]=None, # URLs, filenames, etc; each one defaults to `md5(content)` if not provided
+ prefix:bool=True, # Include Anthropic's suggested prose intro?
+ details:Optional[list]=None # Optional list of dicts with additional attrs for each doc
+ )->str:
+ "Create an XML string containing `docs` in Anthropic's recommended format"
+ pre = 'Here are some documents for you to reference for your task:\n\n' if prefix else ''
+ if sources is None: sources = [None]*len(docs)
+ if details is None: details = [{}]*len(docs)
+ docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) in enumerate(zip(docs,sources,details)))
+ return pre + to_xml(Documents(docs))
+```
+
+
+
+Putting it all together, we have our final XML format:
+
+``` python
+docs = [doc, 'And another one']
+sources = [None, 'doc.txt']
+print(docs_xml(docs, sources))
+```
+
+ Here are some documents for you to reference for your task:
+
+
+
+
+
+
+
+
+## Context creation
+
+Now that we can generate Anthropic’s XML format, let’s make it easy for
+a few common cases.
+
+### File list to context
+
+For generating XML context from files, we’ll just read them as text and
+use the file names as `source`.
+
+------------------------------------------------------------------------
+
+source
+
+### files2ctx
+
+> files2ctx (fnames:list[typing.Union[str,pathlib.Path]], prefix:bool=True)
+
+
+
+
+
+
Type
+
Default
+
Details
+
+
+
+
+
fnames
+
list
+
+
List of file names to add to context
+
+
+
prefix
+
bool
+
True
+
Include Anthropic’s suggested prose intro?
+
+
+
Returns
+
str
+
+
XML for LM context
+
+
+
+
+
+Exported source
+
+``` python
+def files2ctx(
+ fnames:list[Union[str,Path]], # List of file names to add to context
+ prefix:bool=True # Include Anthropic's suggested prose intro?
+)->str: # XML for LM context
+ fnames = [Path(o) for o in fnames]
+ contents = [o.read_text() for o in fnames]
+ return docs_xml(contents, fnames, prefix=prefix)
+```
+
+
+
+``` python
+fnames = ['samples/sample_core.py', 'samples/sample_styles.css']
+hl_md(files2ctx(fnames))
+```
+
+``` xml
+Here are some documents for you to reference for your task:
+
+
+
+
+
+
+
+```
+
+### Folder to context
+
+------------------------------------------------------------------------
+
+source
+
+### folder2ctx
+
+> folder2ctx (folder:Union[str,pathlib.Path], prefix:bool=True,
+> recursive:bool=True, symlinks:bool=True, file_glob:str=None,
+> file_re:str=None, folder_re:str=None,
+> skip_file_glob:str=None, skip_file_re:str=None,
+> skip_folder_re:str=None, func:callable=,
+> ret_folders:bool=False)
+
+
+
+
+
+
Type
+
Default
+
Details
+
+
+
+
+
folder
+
Union
+
+
Folder name containing files to add to context
+
+
+
prefix
+
bool
+
True
+
Include Anthropic’s suggested prose intro?
+
+
+
recursive
+
bool
+
True
+
search subfolders
+
+
+
symlinks
+
bool
+
True
+
follow symlinks?
+
+
+
file_glob
+
str
+
None
+
Only include files matching glob
+
+
+
file_re
+
str
+
None
+
Only include files matching regex
+
+
+
folder_re
+
str
+
None
+
Only enter folders matching regex
+
+
+
skip_file_glob
+
str
+
None
+
Skip files matching glob
+
+
+
skip_file_re
+
str
+
None
+
Skip files matching regex
+
+
+
skip_folder_re
+
str
+
None
+
Skip folders matching regex,
+
+
+
func
+
callable
+
join
+
function to apply to each matched file
+
+
+
ret_folders
+
bool
+
False
+
return folders, not just files
+
+
+
Returns
+
str
+
+
XML for Claude context
+
+
+
+
+
+Exported source
+
+``` python
+@delegates(globtastic)
+def folder2ctx(
+ folder:Union[str,Path], # Folder name containing files to add to context
+ prefix:bool=True, # Include Anthropic's suggested prose intro?
+ **kwargs # Passed to `globtastic`
+)->str: # XML for Claude context
+ fnames = globtastic(folder, **kwargs)
+ return files2ctx(fnames, prefix=prefix)
+```
+
+
+
+``` python
+print(folder2ctx('samples', prefix=False, file_glob='*.py'))
+```
+
+
+
+
+
+
+
+
+> **Tip**
+>
+> After you install `toolslm`,
+> [`folder2ctx`](https://AnswerDotAI.github.io/toolslm/xml.html#folder2ctx)
+> becomes available from the command line. You can see how to use it
+> with the following command:
+>
+> ``` bash
+> folder2ctx -h
+> ```
+
+