GPT4-x-Alpaca AI Text Inference Function
This is a function node for the Stanford Alpaca AI model trained with transcripts of GPT-4 sessions - inference is done on the CPU and does not require any special hardware.
Warning: this flow uses a 13B model - used it only if you have at least 16GB of RAM - more is highly recommended!
Having the actual inference as a self-contained function node gives you the possibility to create your own user interface or even use it as part of an autonomous agent.
See the related GitHub repository for any instructions on how to install and use it.
The repo also contains a complete flow for an HTTP endpoint (which uses this function node to answer HTTP requests) and a simple web page which may serve as a user interface for this endpoint.
[{"id":"09b6f057a321b96c","type":"function","z":"50ed45470ea01d88","name":"GPT4-x-Alpaca","func":"(async () => {\n let Prompt = (msg.payload || '').trim()\n if (Prompt === '') {\n msg.payload = 'empty or missing prompt'\n node.send([null,msg])\n return\n }\n\n/**** retrieve settings or provide defaults ****/\n\n let Seed = parseInt(msg.seed,10)\n if (isNaN(Seed)) { Seed = -1 }\n\n let Threads = parseInt(msg.threads,10)\n if (isNaN(Threads)) { Threads = 4 }\n Threads = Math.max(1,Threads)\n Threads = Math.min(Threads,Math.max(1,os.cpus().length))\n\n let Context = parseInt(msg.context,10)\n if (isNaN(Context)) { Context = 512 }\n Context = Math.max(0,Math.min(Context,10000))\n\n let keep = parseInt(msg.keep,10)\n if (isNaN(keep)) { keep = 0 }\n keep = Math.max(-1,Math.min(keep,10000))\n\n let Prediction = parseInt(msg.predict,10)\n if (isNaN(Prediction)) { Prediction = 128 }\n Prediction = Math.max(1,Math.min(Prediction,10000)) // no -1!\n\n let topK = parseInt(msg.topk,10)\n if (isNaN(topK)) { topK = 40 }\n topK = Math.max(1,Math.min(topK,100))\n\n let topP = parseFloat(msg.topp)\n if (isNaN(topP)) { topP = 0.9 }\n topP = Math.max(0.1,Math.min(topP,1.0))\n\n let Temperature = parseFloat(msg.temperature)\n if (isNaN(Temperature)) { Temperature = 0.8 }\n Temperature = Math.max(0.0,Math.min(Temperature,1.0))\n\n let Batches = parseInt(msg.batches,10)\n if (isNaN(Batches)) { Batches = 8 }\n Batches = Math.max(1,Math.min(Batches,100))\n\n Prompt = Prompt.replace(/\"/g,'\\\\\"')\n\n/**** combine all these settings into a command ****/\n\n let Command = ( 'cd ai && ' +\n './llama --model ./ggml-gpt-x-alpaca-13b-q4_1.bin --mlock ' +\n ' --ctx_size ' + Context + ' --keep ' + keep +\n ' --n_predict ' + Prediction +\n ' --threads ' + Threads + ' --batch_size ' + Batches +\n ' --seed ' + Seed + ' --temp ' + Temperature +\n ' --top_k ' + topK + ' --top_p ' + topP +\n ' --reverse-prompt \"<|endoftext|>\"' + // experimental\n ' --prompt \"' + Prompt + '\"'\n )\n\n/**** extract actual reponse from command output ****/\n\n function ResponseFrom (Text) {\n let HeaderLength = Text.indexOf('\\n\\n\\n')\n Text = Text.slice(HeaderLength + 1)\n\n let TrailerIndex = Text.indexOf('<|endoftext|>')\n if (TrailerIndex < 0) {\n TrailerIndex = Text.indexOf('\\nllama_print_timings')\n }\n Text = Text.slice(0,TrailerIndex)\n\n return Text\n }\n\n/**** now infer a response from the given prompt ****/\n\n let { stdout,stderr, StatusCode,Signal } = child_process.spawnSync(\n 'bash', [], { input:Command }\n )\n\n stdout = stdout.toString().trim()\n stderr = stderr.toString().trim()\n\n switch (true) {\n case (StatusCode == null):\n case (StatusCode === 0):\n msg.statusCode = (stdout === '' ? 204 : 200)\n msg.payload = ResponseFrom(stdout)\n break\n default:\n msg.statusCode = 500 + StatusCode\n msg.payload = (stdout === '' ? '' : '>>>> stdout >>>>\\n' + stdout + '\\n') +\n '>>>> stderr >>>>\\n' + stderr +\n (Signal == null ? '' : '\\n' + Signal)\n break\n }\n\n node.send([msg,null])\n})()\n","outputs":2,"noerr":0,"initialize":"","finalize":"","libs":[{"var":"os","module":"os"},{"var":"child_process","module":"child_process"}],"x":950,"y":420,"wires":[["962a19d09f9195ee","5c61dfbde0032b8c"],["92f944fdd2b235c1"]]}]