LLaMA AI Text Inference Function

This is a function node for the LLaMA AI model - inference is done on the CPU and does not require any special hardware.

Having the actual inference as a self-contained function node gives you the possibility to create your own user interface or even use it as part of an autonomous agent.

See the related GitHub repository for any instructions on how to install and use it.

The repo also contains a complete flow for an HTTP endpoint (which uses this function node to answer HTTP requests) and a simple web page which may serve as a user interface for this endpoint.

Note: some third-party nodes may appear with blank styling, and not as they appear in the Node-RED Editor.

[{"id":"d79bc2bdb11d700c","type":"function","z":"50ed45470ea01d88","name":"LLaMA","func":"(async () => {\n  let Prompt = (msg.payload || '').trim()\n  if (Prompt === '') {\n    msg.payload = 'empty or missing prompt'\n    node.send([null,msg])\n    return\n  }\n  \n/**** retrieve settings or provide defaults ****/\n\n  let Seed = parseInt(msg.seed,10)\n  if (isNaN(Seed)) { Seed = -1 }\n  \n  let Threads = parseInt(msg.threads,10)\n  if (isNaN(Threads)) { Threads = 4 }\n  Threads = Math.max(1,Threads)\n  Threads = Math.min(Threads,Math.max(1,os.cpus().length))\n\n  let Context = parseInt(msg.context,10)\n  if (isNaN(Context)) { Context = 512 }\n  Context = Math.max(0,Math.min(Context,10000))\n\n  let keep = parseInt(msg.keep,10)\n  if (isNaN(keep)) { keep = 0 }\n  keep = Math.max(-1,Math.min(keep,10000))\n\n  let Prediction = parseInt(msg.predict,10)\n  if (isNaN(Prediction)) { Prediction = 128 }\n  Prediction = Math.max(1,Math.min(Prediction,10000)) // no -1!\n\n  let topK = parseInt(msg.topk,10)\n  if (isNaN(topK)) { topK = 40 }\n  topK = Math.max(1,Math.min(topK,100))\n\n  let topP = parseFloat(msg.topp)\n  if (isNaN(topP)) { topP = 0.9 }\n  topP = Math.max(0.1,Math.min(topP,1.0))\n\n  let Temperature = parseFloat(msg.temperature)\n  if (isNaN(Temperature)) { Temperature = 0.8 }\n  Temperature = Math.max(0.0,Math.min(Temperature,1.0))\n\n  let Batches = parseInt(msg.batches,10)\n  if (isNaN(Batches)) { Batches = 8 }\n  Batches = Math.max(1,Math.min(Batches,100))\n\n  Prompt = Prompt.replace(/\"/g,'\\\\\"')\n\n/**** combine all these settings into a command ****/\n\n  let Command = ( 'cd ai && ' +\n    './llama --model ././ggml-llama-7b-q4.bin --mlock ' +\n    ' --ctx_size ' + Context + ' --keep ' + keep +\n    ' --n_predict ' + Prediction + \n    ' --threads ' + Threads + ' --batch_size ' + Batches +\n    ' --seed ' + Seed + ' --temp ' + Temperature +\n    ' --top_k ' + topK + ' --top_p ' + topP +\n    ' --reverse-prompt \"<|endoftext|>\"' +     // experimental\n    ' --prompt \"' + Prompt + '\"'\n  )\n\n/**** extract actual reponse from command output ****/\n\n  function ResponseFrom (Text) {\n    let HeaderLength = Text.indexOf('\\n\\n\\n')\n    Text = Text.slice(HeaderLength + 1)\n\n    let TrailerIndex = Text.indexOf('<|endoftext|>')\n    if (TrailerIndex < 0) {\n      TrailerIndex = Text.indexOf('\\nllama_print_timings')\n    }\n    Text = Text.slice(0,TrailerIndex)\n\n    return Text\n  }\n\n/**** now infer a response from the given prompt ****/\n  \n  let { stdout,stderr, StatusCode,Signal } = child_process.spawnSync(\n    'bash', [], { input:Command }\n  )\n  \n  stdout = stdout.toString().trim()\n  stderr = stderr.toString().trim()\n\n  switch (true) {\n    case (StatusCode == null):\n    case (StatusCode === 0):\n      msg.statusCode = (stdout === '' ? 204 : 200)\n      msg.payload    = ResponseFrom(stdout)\n      break\n    default:\n      msg.statusCode = 500 + StatusCode\n      msg.payload    = (stdout === '' ? '' : '>>>> stdout >>>>\\n' + stdout + '\\n') +\n                       '>>>> stderr >>>>\\n' + stderr +\n                       (Signal == null ? '' : '\\n' + Signal)\n      break\n  }\n\n  node.send([msg,null])\n})()\n","outputs":2,"noerr":0,"initialize":"","finalize":"","libs":[{"var":"os","module":"os"},{"var":"child_process","module":"child_process"}],"x":960,"y":160,"wires":[["f9880bb93913432e","9879fc0007ee6e42"],["0d454219826ee13e"]]}]

Flow Info

Created 2 years, 6 months ago

Rating: not yet rated

view on github

Owner

rozek

Actions

share flow

Node Types

Core