LLaMA AI Text Inference Function
This is a function node for the LLaMA AI model - inference is done on the CPU and does not require any special hardware.
Having the actual inference as a self-contained function node gives you the possibility to create your own user interface or even use it as part of an autonomous agent.
See the related GitHub repository for any instructions on how to install and use it.
The repo also contains a complete flow for an HTTP endpoint (which uses this function node to answer HTTP requests) and a simple web page which may serve as a user interface for this endpoint.
[{"id":"d79bc2bdb11d700c","type":"function","z":"50ed45470ea01d88","name":"LLaMA","func":"(async () => {\n let Prompt = (msg.payload || '').trim()\n if (Prompt === '') {\n msg.payload = 'empty or missing prompt'\n node.send([null,msg])\n return\n }\n \n/**** retrieve settings or provide defaults ****/\n\n let Seed = parseInt(msg.seed,10)\n if (isNaN(Seed)) { Seed = -1 }\n \n let Threads = parseInt(msg.threads,10)\n if (isNaN(Threads)) { Threads = 4 }\n Threads = Math.max(1,Threads)\n Threads = Math.min(Threads,Math.max(1,os.cpus().length))\n\n let Context = parseInt(msg.context,10)\n if (isNaN(Context)) { Context = 512 }\n Context = Math.max(0,Math.min(Context,10000))\n\n let keep = parseInt(msg.keep,10)\n if (isNaN(keep)) { keep = 0 }\n keep = Math.max(-1,Math.min(keep,10000))\n\n let Prediction = parseInt(msg.predict,10)\n if (isNaN(Prediction)) { Prediction = 128 }\n Prediction = Math.max(1,Math.min(Prediction,10000)) // no -1!\n\n let topK = parseInt(msg.topk,10)\n if (isNaN(topK)) { topK = 40 }\n topK = Math.max(1,Math.min(topK,100))\n\n let topP = parseFloat(msg.topp)\n if (isNaN(topP)) { topP = 0.9 }\n topP = Math.max(0.1,Math.min(topP,1.0))\n\n let Temperature = parseFloat(msg.temperature)\n if (isNaN(Temperature)) { Temperature = 0.8 }\n Temperature = Math.max(0.0,Math.min(Temperature,1.0))\n\n let Batches = parseInt(msg.batches,10)\n if (isNaN(Batches)) { Batches = 8 }\n Batches = Math.max(1,Math.min(Batches,100))\n\n Prompt = Prompt.replace(/\"/g,'\\\\\"')\n\n/**** combine all these settings into a command ****/\n\n let Command = ( 'cd ai && ' +\n './llama --model ././ggml-llama-7b-q4.bin --mlock ' +\n ' --ctx_size ' + Context + ' --keep ' + keep +\n ' --n_predict ' + Prediction + \n ' --threads ' + Threads + ' --batch_size ' + Batches +\n ' --seed ' + Seed + ' --temp ' + Temperature +\n ' --top_k ' + topK + ' --top_p ' + topP +\n ' --reverse-prompt \"<|endoftext|>\"' + // experimental\n ' --prompt \"' + Prompt + '\"'\n )\n\n/**** extract actual reponse from command output ****/\n\n function ResponseFrom (Text) {\n let HeaderLength = Text.indexOf('\\n\\n\\n')\n Text = Text.slice(HeaderLength + 1)\n\n let TrailerIndex = Text.indexOf('<|endoftext|>')\n if (TrailerIndex < 0) {\n TrailerIndex = Text.indexOf('\\nllama_print_timings')\n }\n Text = Text.slice(0,TrailerIndex)\n\n return Text\n }\n\n/**** now infer a response from the given prompt ****/\n \n let { stdout,stderr, StatusCode,Signal } = child_process.spawnSync(\n 'bash', [], { input:Command }\n )\n \n stdout = stdout.toString().trim()\n stderr = stderr.toString().trim()\n\n switch (true) {\n case (StatusCode == null):\n case (StatusCode === 0):\n msg.statusCode = (stdout === '' ? 204 : 200)\n msg.payload = ResponseFrom(stdout)\n break\n default:\n msg.statusCode = 500 + StatusCode\n msg.payload = (stdout === '' ? '' : '>>>> stdout >>>>\\n' + stdout + '\\n') +\n '>>>> stderr >>>>\\n' + stderr +\n (Signal == null ? '' : '\\n' + Signal)\n break\n }\n\n node.send([msg,null])\n})()\n","outputs":2,"noerr":0,"initialize":"","finalize":"","libs":[{"var":"os","module":"os"},{"var":"child_process","module":"child_process"}],"x":960,"y":160,"wires":[["f9880bb93913432e","9879fc0007ee6e42"],["0d454219826ee13e"]]}]