Skip to content

Commit

Permalink
Change argument processing to allow prompt or file args. (#103)
Browse files Browse the repository at this point in the history
  • Loading branch information
Tindell authored Mar 21, 2023
1 parent 428aa70 commit 9116ae9
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 26 deletions.
43 changes: 23 additions & 20 deletions chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
fin.close();

std::vector<uint8_t> tmp;

for (int i = 0; i < n_parts; ++i) {
const int part_id = i;
//const int part_id = n_parts - i - 1;
Expand Down Expand Up @@ -797,14 +797,6 @@ int main(int argc, char ** argv) {

gpt_params params;

params.temp = 0.1f;
params.top_p = 0.95f;
params.n_ctx = 2048;
params.interactive = true;
params.interactive_start = true;
params.use_color = true;
params.model = "ggml-alpaca-7b-q4.bin";

if (gpt_params_parse(argc, argv, params) == false) {
return 1;
}
Expand Down Expand Up @@ -856,13 +848,26 @@ int main(int argc, char ** argv) {
// Add a space in front of the first character to match OG llama tokenizer behavior
// params.prompt.insert(0, 1, ' ');
// tokenize the prompt
std::vector<gpt_vocab::id> embd_inp;// = ::llama_tokenize(vocab, params.prompt, true);
std::vector<gpt_vocab::id> embd_inp;

// params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size());

// // tokenize the reverse prompt
// std::vector<gpt_vocab::id> antiprompt_inp = ::llama_tokenize(vocab, params.antiprompt, false);


std::vector<gpt_vocab::id> instruct_inp = ::llama_tokenize(vocab, " Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n", true);
std::vector<gpt_vocab::id> prompt_inp = ::llama_tokenize(vocab, "### Instruction:\n\n", true);
std::vector<gpt_vocab::id> response_inp = ::llama_tokenize(vocab, "### Response:\n\n", false);
embd_inp.insert(embd_inp.end(), instruct_inp.begin(), instruct_inp.end());

if(!params.prompt.empty()) {
std::vector<gpt_vocab::id> param_inp = ::llama_tokenize(vocab, params.prompt, true);
embd_inp.insert(embd_inp.end(), prompt_inp.begin(), prompt_inp.end());
embd_inp.insert(embd_inp.end(), param_inp.begin(), param_inp.end());
embd_inp.insert(embd_inp.end(), response_inp.begin(), response_inp.end());
}

// fprintf(stderr, "\n");
// fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
// fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
Expand All @@ -871,13 +876,6 @@ int main(int argc, char ** argv) {
// }
// fprintf(stderr, "\n");

std::vector<gpt_vocab::id> instruct_inp = ::llama_tokenize(vocab, " Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n", true);
std::vector<gpt_vocab::id> prompt_inp = ::llama_tokenize(vocab, "### Instruction:\n\n", true);
std::vector<gpt_vocab::id> response_inp = ::llama_tokenize(vocab, "### Response:\n\n", false);

embd_inp.insert(embd_inp.end(), instruct_inp.begin(), instruct_inp.end());


if (params.interactive) {
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
struct sigaction sigint_action;
Expand Down Expand Up @@ -1076,9 +1074,14 @@ int main(int argc, char ** argv) {

// end of text token
if (embd.back() == 2) {
// fprintf(stderr, " [end of text]\n");
is_interacting = true;
continue;
if (params.interactive) {
is_interacting = true;
continue;
} else {
printf("\n");
fprintf(stderr, " [end of text]\n");
break;
}
}
}

Expand Down
8 changes: 8 additions & 0 deletions utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,17 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
} else if (arg == "-t" || arg == "--threads") {
params.n_threads = std::stoi(argv[++i]);
} else if (arg == "-p" || arg == "--prompt") {
params.interactive = false;
params.interactive_start = false;
params.use_color = false;

params.prompt = argv[++i];
} else if (arg == "-f" || arg == "--file") {

params.interactive = false;
params.interactive_start = false;
params.use_color = false;

std::ifstream file(argv[++i]);

std::copy(std::istreambuf_iterator<char>(file),
Expand Down
13 changes: 7 additions & 6 deletions utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,28 +12,29 @@
// CLI argument parsing
//

// The default parameters
struct gpt_params {
int32_t seed = -1; // RNG seed
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
int32_t n_predict = 128; // new tokens to predict
int32_t repeat_last_n = 64; // last n tokens to penalize
int32_t n_ctx = 512; //context size
int32_t n_ctx = 2048; //context size

// sampling parameters
int32_t top_k = 40;
float top_p = 0.95f;
float temp = 0.80f;
float temp = 0.10f;
float repeat_penalty = 1.30f;

int32_t n_batch = 8; // batch size for prompt processing

std::string model = "models/lamma-7B/ggml-model.bin"; // model path
std::string model = "ggml-alpaca-7b-q4.bin"; // model path
std::string prompt;

bool use_color = false; // use color to distinguish generations and inputs
bool use_color = true; // use color to distinguish generations and inputs

bool interactive = false; // interactive mode
bool interactive_start = false; // reverse prompt immediately
bool interactive = true; // interactive mode
bool interactive_start = true; // reverse prompt immediately
std::string antiprompt = ""; // string upon seeing which more user input is prompted
};

Expand Down

0 comments on commit 9116ae9

Please sign in to comment.