Skip to content

Commit

Permalink
Add syntax highlighting to chatbot
Browse files Browse the repository at this point in the history
  • Loading branch information
jart committed Oct 9, 2024
1 parent f2014a8 commit 63205ee
Show file tree
Hide file tree
Showing 26 changed files with 2,660 additions and 31 deletions.
3 changes: 3 additions & 0 deletions build/rules.mk
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ LINK.o = $(CXX) $(CCFLAGS) $(LDFLAGS)
COMPILE.c = $(CC) $(CCFLAGS) $(CFLAGS) $(CPPFLAGS_) $(CPPFLAGS) $(TARGET_ARCH) -c
COMPILE.cc = $(CXX) $(CCFLAGS) $(CXXFLAGS) $(CPPFLAGS_) $(CPPFLAGS) $(TARGET_ARCH) -c

%.c: %.gperf
gperf $< >$@

o/$(MODE)/%.o: %.c $(COSMOCC)
@mkdir -p $(@D)
$(COMPILE.c) -o $@ $<
Expand Down
21 changes: 21 additions & 0 deletions llamafile/BUILD.mk
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ o/$(MODE)/llamafile: \
o/$(MODE)/llamafile/zipcheck \
o/$(MODE)/llamafile/tokenize \
o/$(MODE)/llamafile/addnl \
o/$(MODE)/llamafile/high \
o/$(MODE)/llamafile/highlight_c_test.runs \
o/$(MODE)/llamafile/highlight_python_test.runs \
o/$(MODE)/llamafile/pool_test.runs \
o/$(MODE)/llamafile/pool_cancel_test.runs \
o/$(MODE)/llamafile/parse_cidr_test.runs \
Expand Down Expand Up @@ -162,6 +165,24 @@ o/$(MODE)/llamafile/pool_test: \
o/$(MODE)/llamafile/crash.o \
o/$(MODE)/llamafile/pool.o \

o/$(MODE)/llamafile/highlight_c_test: \
o/$(MODE)/llamafile/highlight_c_test.o \
o/$(MODE)/llamafile/highlight_c.o \
o/$(MODE)/llamafile/is_keyword_c.o \

o/$(MODE)/llamafile/highlight_python_test: \
o/$(MODE)/llamafile/highlight_python_test.o \
o/$(MODE)/llamafile/highlight_python.o \
o/$(MODE)/llamafile/is_keyword_python.o \

o/$(MODE)/llamafile/high: \
o/$(MODE)/llamafile/high.o \
o/$(MODE)/llama.cpp/llama.cpp.a \

o/$(MODE)/llamafile/hex2xterm: \
o/$(MODE)/llamafile/hex2xterm.o \
o/$(MODE)/llamafile/xterm.o \

o/$(MODE)/llamafile/pool_cancel_test: \
o/$(MODE)/llamafile/pool_cancel_test.o \
o/$(MODE)/llamafile/crash.o \
Expand Down
10 changes: 8 additions & 2 deletions llamafile/chatbot.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include "llamafile/highlight.h"
#include <assert.h>
#include <cosmo.h>
#include <ctype.h>
Expand Down Expand Up @@ -227,6 +228,7 @@ int chatbot_main(int argc, char **argv) {
printf("%s\n", params.special ? msg.c_str() : params.prompt.c_str());

// perform important setup
HighlightMarkdown highlighter;
struct llama_sampling_context *sampler = llama_sampling_init(params.sparams);
signal(SIGINT, on_sigint);

Expand Down Expand Up @@ -258,13 +260,17 @@ int chatbot_main(int argc, char **argv) {
llama_sampling_accept(sampler, g_ctx, id, true);
if (llama_token_is_eog(g_model, id))
break;
printf("%s", llama_token_to_piece(g_ctx, id, params.special).c_str());
std::string s;
highlighter.feed(&s, llama_token_to_piece(g_ctx, id, params.special));
printf("%s", s.c_str());
fflush(stdout);
eval_id(id);
}
g_got_sigint = 0;
printf("\n");
free(line);
std::string s;
highlighter.flush(&s);
printf("%s\n", s.c_str());
}

print_ephemeral("freeing context...");
Expand Down
40 changes: 40 additions & 0 deletions llamafile/hex2xterm.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "xterm.h"

#include <stdio.h>
#include <stdlib.h>

int main(int argc, char *argv[]) {

if (argc < 2) {
fprintf(stderr, "%s: missing operand\n", argv[0]);
exit(1);
}

// process RRGGBB hex arguments
for (int i = 1; i < argc; ++i) {
int rgb = strtol(argv[i], 0, 16);
int xterm = rgb2xterm256(rgb);
printf("\n");
printf("xterm code %d\n", xterm);
printf("html5 code #%06x\n", rgb);
printf("foreground \033[38;5;%dm\\033[38;5;%dm\033[0m\n", xterm, xterm);
printf("background \033[48;5;%dm\\033[48;5;%dm\033[0m\n", xterm, xterm);
}
}
100 changes: 100 additions & 0 deletions llamafile/high.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "highlight.h"
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <string_view>
#include <unistd.h>

// syntax highlighter demo

int main(int argc, char *argv[]) {

// process flags
int opt;
int infd = 0;
int outfd = 1;
const char *lang = nullptr;
const char *inpath = nullptr;
while ((opt = getopt(argc, argv, "hl:o:")) != -1) {
switch (opt) {
case 'h':
printf("usage: %s [-l LANG] [-o OUTFILE] [INFILE]\n", argv[0]);
exit(0);
case 'l':
lang = optarg;
break;
case 'o':
if ((outfd = creat(optarg, 0644)) == -1) {
perror(optarg);
exit(1);
}
break;
default:
exit(1);
}
}
if (optind < argc) {
inpath = argv[optind];
if ((infd = open(inpath, O_RDONLY)) == -1) {
perror(inpath);
exit(1);
}
}

// create syntax highlighter
Highlight *h;
const char *ext;
if (lang) {
h = Highlight::create(lang);
} else if (inpath && (ext = strrchr(inpath, '.'))) {
h = Highlight::create(ext + 1);
} else {
h = Highlight::create("markdown");
}

// process input
std::string res;
for (;;) {

// read input chunk
char buf[256];
ssize_t rc = read(infd, buf, sizeof(buf));
if (rc == -1) {
perror("read");
exit(1);
}
size_t got = rc;
if (!got)
break;

// highlight chunk
res.clear();
h->feed(&res, std::string_view(buf, got));

// write highlighted output chunk
write(outfd, res.data(), res.size());
}

// flush highlighter
res.clear();
h->flush(&res);
write(outfd, res.data(), res.size());
}
36 changes: 36 additions & 0 deletions llamafile/highlight.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "highlight.h"

Highlight *Highlight::create(const std::string_view &lang) {
if (lang == "markdown" || lang == "md") {
return new HighlightMarkdown;
} else if (lang == "c") {
return new HighlightC(is_keyword_c);
} else if (lang == "c++" || lang == "cxx" || lang == "cpp") {
return new HighlightC(is_keyword_cxx);
} else if (lang == "js" || lang == "javascript") {
return new HighlightC(is_keyword_js);
} else if (lang == "java") {
return new HighlightC(is_keyword_java);
} else if (lang == "py" || lang == "python") {
return new HighlightPython();
} else {
return new HighlightPlain;
}
}
90 changes: 90 additions & 0 deletions llamafile/highlight.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#include <string>
#include <string_view>

#define RESET "\033[0m"
#define BOLDE "\033[1m"
#define KEYWORD "\033[1;34m" // bold blue
#define STRING "\033[0;32m" // green
#define COMMENT "\033[0;31m" // red

typedef const char *is_keyword_f(const char *, size_t);

extern "C" {
is_keyword_f is_keyword_c;
is_keyword_f is_keyword_cxx;
is_keyword_f is_keyword_js;
is_keyword_f is_keyword_java;
is_keyword_f is_keyword_python;
}

class Highlight {
public:
static Highlight *create(const std::string_view &lang);
virtual ~Highlight() = default;
virtual void feed(std::string *result, std::string_view input) = 0;
virtual void flush(std::string *result) = 0;
};

class HighlightPlain : public Highlight {
public:
HighlightPlain();
~HighlightPlain() override;
void feed(std::string *result, std::string_view input) override;
void flush(std::string *result) override;
};

class HighlightC : public Highlight {
public:
explicit HighlightC(is_keyword_f is_keyword = is_keyword_c);
~HighlightC() override;
void feed(std::string *result, std::string_view input) override;
void flush(std::string *result) override;

private:
int t_ = 0;
std::string word_;
is_keyword_f *is_keyword_;
};

class HighlightPython : public Highlight {
public:
HighlightPython();
~HighlightPython() override;
void feed(std::string *result, std::string_view input) override;
void flush(std::string *result) override;

private:
int t_ = 0;
std::string word_;
};

class HighlightMarkdown : public Highlight {
public:
HighlightMarkdown();
~HighlightMarkdown() override;
void feed(std::string *result, std::string_view input) override;
void flush(std::string *result) override;

private:
int t_ = 0;
std::string lang_;
Highlight *highlighter_ = nullptr;
};
Loading

0 comments on commit 63205ee

Please sign in to comment.