node-red-flow-openai-api

This flow contains a few HTTP endpoints which implement a relevant subset of the OpenAI API in order to serve as a drop-in replacement for OpenAI in LangChain and similar tools. Because they use llama.cpp under the hood - inference is done on the CPU and does not require any special hardware.

Having the actual inference as a self-contained function node gives you the possibility to create your own user interface or even use it as part of an autonomous agent.

So far, it has been tested both with low level tools (like curl) and Flowise, the no-code environment for LangChain - if you build the author's own version instead of the original Flowise, you will automatically get the nodes needed to access the Node-RED server.

See the related GitHub repository for instructions on how to install and use it.

[{"id":"7de43e17655400bd","type":"comment","z":"82da24bd0fb90b8b","name":"OpenAI-compatible API","info":"","x":120,"y":40,"wires":[]},{"id":"49ff9e70f3453438","type":"http in","z":"82da24bd0fb90b8b","name":"/v1/embeddings","url":"/v1/embeddings","method":"post","upload":false,"swaggerDoc":"","x":100,"y":280,"wires":[["5dae579222f27264"]]},{"id":"da4466821138a8c2","type":"http in","z":"82da24bd0fb90b8b","name":"/v1/completions","url":"/v1/completions","method":"post","upload":false,"swaggerDoc":"","x":100,"y":480,"wires":[["3f5450b1f7f9aaf6"]]},{"id":"5571b980dbde39d0","type":"http in","z":"82da24bd0fb90b8b","name":"/v1/chat/completions","url":"/v1/chat/completions","method":"post","upload":false,"swaggerDoc":"","x":120,"y":740,"wires":[["1eae5b9651fc75bc"]]},{"id":"5dae579222f27264","type":"reusable","z":"82da24bd0fb90b8b","name":"","target":"api key check","outputs":2,"x":300,"y":280,"wires":[["cc62107216a2c6d9"],["6939bc798a16831f"]]},{"id":"3f5450b1f7f9aaf6","type":"reusable","z":"82da24bd0fb90b8b","name":"","target":"api key check","outputs":2,"x":300,"y":480,"wires":[["189fe066d97cc94e"],["2da1327358576bfc"]]},{"id":"1eae5b9651fc75bc","type":"reusable","z":"82da24bd0fb90b8b","name":"","target":"api key check","outputs":2,"x":340,"y":740,"wires":[["d4c5ee20e0d87247"],["ef562b4588954520"]]},{"id":"ef562b4588954520","type":"http response","z":"82da24bd0fb90b8b","name":"","statusCode":"","headers":{},"x":510,"y":740,"wires":[]},{"id":"6939bc798a16831f","type":"http response","z":"82da24bd0fb90b8b","name":"","statusCode":"","headers":{},"x":510,"y":280,"wires":[]},{"id":"2da1327358576bfc","type":"http response","z":"82da24bd0fb90b8b","name":"","statusCode":"","headers":{},"x":510,"y":480,"wires":[]},{"id":"09b6f057a321b96c","type":"function","z":"82da24bd0fb90b8b","name":"LLaMA 2 Inference","func":"(async () => {\n  let Prompt = msg.prompt\n  Prompt = Prompt.replace(/\"/g,'\\\\\"')\n\n  let Threads = parseInt(flow.get('number-of-threads'),10)\n  if (isNaN(Threads)) { Threads = 4 }\n  Threads = Math.max(1,Threads)\n  Threads = Math.min(Threads,Math.max(1,os.cpus().length))\n\n  let Context = parseInt(flow.get('context-length'),10)\n  if (isNaN(Context)) { Context = 512 }\n  Context = Math.max(0,Math.min(Context,4096))\n\n  let Batches = parseInt(flow.get('number-of-batches'),10)\n  if (isNaN(Batches)) { Batches = 8 }\n  Batches = Math.max(1,Math.min(Batches,100))\n\n  Prompt = Prompt.replace(/\"/g,'\\\\\"')\n\n/**** combine all these settings into a command ****/\n\n  let Command = ( 'cd ai && ' +\n    './llama --model ./llama-2-13b.ggmlv3.q4_0.bin --mlock ' +\n    ' --threads ' + Threads + ' --batch_size ' + Batches +\n    ' -c ' + Context + ' -n ' + msg.max_tokens +\n    ' --temp ' + msg.temperature + ' --top_p ' + msg.top_p +\n    ' --repeat-penalty ' + msg.frequency_penalty +\n    ' --prompt \"' + Prompt + '\" --tfs 2.0'\n  )\n  \n  for (let i = 0, l = msg.stop.length; i < l; i++) {\n    Command += ' -r \"' + msg.stop[i].replace(/\"/g,'\\\\\"') + '\"'\n  }\n\n/**** extract actual reponse from command output ****/\n\n  function ResponseFrom (Text) {\n    let maxTokens = (msg.max_tokens < 0 ? Context : msg.max_tokens)\n    msg.finish_reason = (Text.length >= maxTokens ? 'length' : 'stop')\n    \n    if (Text.startsWith(msg.prompt)) {\n      Text = Text.slice(msg.prompt.length)\n    }\n\n    for (let i = 0, l = msg.stop.length; i < l; i++) {\n      if (Text.endsWith(msg.stop[i])) {\n        msg.finish_reason = 'stop'\n        \n        Text = Text.slice(0,-msg.stop[i].length)\n        return Text.trim()\n      }\n    }\n\n    return Text.trim()\n  }\n\n/**** now infer a response from the given prompt ****/\n\n  let { stdout,stderr, StatusCode,Signal } = child_process.spawnSync(\n    'bash', [], { input:Command }\n  )\n\n  stdout = stdout.toString().trim()\n  stderr = stderr.toString().trim()\n\n  switch (true) {\n    case (StatusCode == null):\n    case (StatusCode === 0):\n      msg.statusCode = (stdout === '' ? 204 : 200)\n      msg.payload    = ResponseFrom(stdout)\n      break\n    default:\n      msg.statusCode = 500 + StatusCode\n      msg.payload    = (stdout === '' ? '' : '>>>> stdout >>>>\\n' + stdout + '\\n') +\n                       '>>>> stderr >>>>\\n' + stderr +\n                       (Signal == null ? '' : '\\n' + Signal)\n      break\n  }\n\n  node.send([msg,null])\n})()\n","outputs":2,"noerr":0,"initialize":"","finalize":"","libs":[{"var":"os","module":"os"},{"var":"child_process","module":"child_process"}],"x":150,"y":600,"wires":[["a5fb554550f203b2"],["2da1327358576bfc"]]},{"id":"b6dfdddec687b9db","type":"function","z":"82da24bd0fb90b8b","name":"LLaMA 2 Embeddings","func":"(async () => {\n  let Prompt = msg.input\n  Prompt = Prompt.replace(/\"/g,'\\\\\"')\n\n/**** construct a command ****/\n\n  let Command = ( 'cd ai && ' +\n    './llama-embeddings --model ./llama-2-13b.ggmlv3.q4_0.bin --mlock ' +\n    ' --prompt \"' + Prompt + '\"'\n  )\n\n/**** extract actual reponse from command output ****/\n\n  function ResponseFrom (Text) {\n    let HeaderLength = Text.indexOf('system_info')\n    Text = Text.slice(HeaderLength + 1)\n      .replace(/^[^\\n]*\\n/,'')\n\n    let TrailerIndex = Text.indexOf('\\n\\nllama_print_timings')\n    Text = Text.slice(0,TrailerIndex)\n\n    return Text.replace(/\\s+/g,' ').split(' ')\n  }\n\n/**** now tokenize the given prompt ****/\n\n  let { stdout,stderr, StatusCode,Signal } = child_process.spawnSync(\n    'bash', [], { input:Command }\n  )\n\n  stdout = stdout.toString().trim()\n  stderr = stderr.toString().trim()\n\n  switch (true) {\n    case (StatusCode == null):\n    case (StatusCode === 0):\n      msg.statusCode = (stdout === '' ? 204 : 200)\n      msg.embedding = ResponseFrom(stdout)\n      break\n    default:\n      msg.statusCode = 500 + StatusCode\n      msg.payload    = (stdout === '' ? '' : '>>>> stdout >>>>\\n' + stdout + '\\n') +\n                       '>>>> stderr >>>>\\n' + stderr +\n                       (Signal == null ? '' : '\\n' + Signal)\n      break\n  }\n\n  node.send([msg,null])\n})()\n","outputs":2,"noerr":0,"initialize":"","finalize":"","libs":[{"var":"os","module":"os"},{"var":"child_process","module":"child_process"}],"x":160,"y":400,"wires":[["266427dd2bcbcdf6"],["6939bc798a16831f"]]},{"id":"cc62107216a2c6d9","type":"function","z":"82da24bd0fb90b8b","name":"parse request body","func":"/**** parse request body ****/\n\n  let JSONPayload = msg.payload\n    if ((typeof JSONPayload !== 'object') || (JSONPayload == null)) {\n      return handleError(400,'request body is no valid JSON')\n    }\n  msg.model = JSONPayload.model\n    if ((typeof msg.model !== 'string') || (msg.model.trim() === '')) {\n      return handleError(400,'\"model\" must be a non-empty string')\n    }  \n  msg.input = JSONPayload.input\n    if (typeof msg.input !== 'string') {\n      return handleError(400,'\"input\" must be a string')\n    }\n  node.send([msg,null])\n  node.done()\n    \n/**** handleError ****/\n\n  function handleError (StatusCode, StatusText) {\n    msg.payload    = StatusText\n    msg.statusCode = StatusCode\n\n    node.send([null,msg])\n    node.done()\n  }\n","outputs":2,"noerr":0,"initialize":"","finalize":"","libs":[],"x":150,"y":340,"wires":[["e74bdbd85bc3650b"],["6939bc798a16831f"]]},{"id":"94e796282b41a449","type":"catch","z":"82da24bd0fb90b8b","name":"uncaught exceptions","scope":null,"uncaught":true,"x":120,"y":1000,"wires":[["0f9511dcddb4b4d7"]]},{"id":"0f9511dcddb4b4d7","type":"debug","z":"82da24bd0fb90b8b","name":"","active":true,"tosidebar":true,"console":false,"tostatus":true,"complete":"true","targetType":"full","statusVal":"'internal error'","statusType":"jsonata","x":310,"y":1000,"wires":[]},{"id":"266427dd2bcbcdf6","type":"function","z":"82da24bd0fb90b8b","name":"assemble response","func":"/**** assemble response ****/\n\n  msg.statusCode = 200\n  msg.payload = {\n    model:'llama-2-13b.ggmlv3.q4_0',\n    object:'list',\n    data:[{\n      index:0,\n      object:'embedding',\n      embedding:msg.embedding\n    }],\n    usage:{\n      prompt_tokens:msg.prompt_tokens,\n      total_tokens: msg.prompt_tokens\n    }\n  }\n  \n/**** cleanup \"msg\" ****/\n  \n  delete msg.model\n  delete msg.input\n  delete msg.embedding\n  delete msg.prompt_tokens\n  \n  return msg","outputs":1,"noerr":0,"initialize":"","finalize":"","libs":[],"x":390,"y":400,"wires":[["6939bc798a16831f"]]},{"id":"189fe066d97cc94e","type":"function","z":"82da24bd0fb90b8b","name":"parse request body","func":"/**** parse request body ****/\n\n  let JSONPayload = msg.payload\n    if ((typeof JSONPayload !== 'object') || (JSONPayload == null)) {\n      return handleError(400,'request body is no valid JSON')\n    }\n  msg.model = JSONPayload.model\n    if ((typeof msg.model !== 'string') || (msg.model.trim() === '')) {\n      return handleError(400,'\"model\" must be a non-empty string')\n    }  \n  msg.prompt = JSONPayload.prompt\n    if (Array.isArray(msg.prompt)) {\n      msg.prompt = msg.prompt[0]\n    }\n    \n    if (typeof msg.prompt !== 'string') {\n      return handleError(400,'\"prompt\" must be a string')\n    }\n  msg.max_tokens = (JSONPayload.max_tokens == null ? -1 : JSONPayload.max_tokens)\n    if (\n      (typeof msg.max_tokens !== 'number') ||\n      (Math.round(msg.max_tokens) !== msg.max_tokens) ||\n      (msg.max_tokens < -1)\n    ) {\n      return handleError(400,'\"max_tokens\" must be -1 or an integer >= 0')\n    }\n    \n    if (msg.max_tokens === -1) {\n      msg.max_tokens = flow.get('context-length')\n    } else {\n      msg.max_tokens = Math.min(msg.max_tokens,flow.get('context-length'))\n    }\n  msg.temperature = (JSONPayload.temperature == null ? 1.0 : JSONPayload.temperature)\n    if (\n      (typeof msg.temperature !== 'number') ||\n      (msg.temperature < 0) || (msg.temperature > 2)\n    ) {\n      return handleError(400,'\"temperature\" must be a number in the range 0...2')\n    }\n  msg.top_p = (JSONPayload.top_p == null ? 1.0 : JSONPayload.top_p)\n    if (\n      (typeof msg.top_p !== 'number') ||\n      (msg.top_p < 0) || (msg.top_p > 1)\n    ) {\n      return handleError(400,'\"top_p\" must be a number in the range 0...1')\n    }\n  msg.stop = JSONPayload.stop || []\n    if (typeof msg.stop === 'string') {\n      msg.stop = [msg.stop]\n    } else {\n      if (! Array.isArray(msg.stop) || (msg.stop.length > 4)) {\n        return handleError(400,'\"stop\" must be a single string or an array with up to 4 strings')\n      }\n      \n      for (let i = 0, l = msg.stop.length; i < l; i++) {\n        if (typeof msg.stop[i] !== 'string') {\n          return handleError(400,'\"stop\" must contain strings only')\n        }\n      }\n    }\n  msg.frequency_penalty = (JSONPayload.frequency_penalty == null ? 0 : JSONPayload.frequency_penalty)\n    if (\n      (typeof msg.frequency_penalty !== 'number') ||\n      (msg.frequency_penalty < -2) || (msg.frequency_penalty > 2)\n    ) {\n      return handleError(400,'\"frequency_penalty\" must be a number in the range -2...2')\n    }\n  node.send([msg,null])\n  node.done()\n    \n/**** handleError ****/\n\n  function handleError (StatusCode, StatusText) {\n    msg.payload    = StatusText\n    msg.statusCode = StatusCode\n\n    node.send([null,msg])\n    node.done()\n  }\n","outputs":2,"noerr":0,"initialize":"","finalize":"","libs":[],"x":150,"y":540,"wires":[["f25b86492768781e"],["2da1327358576bfc"]]},{"id":"d4c5ee20e0d87247","type":"function","z":"82da24bd0fb90b8b","name":"parse request body","func":"/**** parse request body ****/\n\n  let JSONPayload = msg.payload\n    if ((typeof JSONPayload !== 'object') || (JSONPayload == null)) {\n      return handleError(400,'request body is no valid JSON')\n    }\n  msg.model = JSONPayload.model\n    if ((typeof msg.model !== 'string') || (msg.model.trim() === '')) {\n      return handleError(400,'\"model\" must be a non-empty string')\n    }  \n  msg.messages = JSONPayload.messages\n    if (! Array.isArray(msg.messages)) {\n      return handleError(400,'\"messages\" must be a list of messages')\n    }\n    for (let i = 0, l = msg.messages.length; i < l; i++) {\n      let message = msg.messages[i]\n      if (\n        (message == null) || (typeof message !== 'object') ||\n        (['system','user','assistant'].indexOf(message.role) < 0) ||\n        (typeof message.content !== 'string')\n      ) {\n        return handleError(400,'\"messages\" must contain valid message objects only')\n      }\n    }\n  msg.max_tokens = (JSONPayload.max_tokens == null ? -1 : JSONPayload.max_tokens)\n    if (\n      (typeof msg.max_tokens !== 'number') ||\n      (Math.round(msg.max_tokens) !== msg.max_tokens) ||\n      (msg.max_tokens < -1)\n    ) {\n      return handleError(400,'\"max_tokens\" must be -1 or an integer >= 0')\n    }\n    \n    if (msg.max_tokens === -1) {\n      msg.max_tokens = flow.get('context-length')\n    } else {\n      msg.max_tokens = Math.min(msg.max_tokens,flow.get('context-length'))\n    }\n  msg.temperature = (JSONPayload.temperature == null ? 1.0 : JSONPayload.temperature)\n    if (\n      (typeof msg.temperature !== 'number') ||\n      (msg.temperature < 0) || (msg.temperature > 2)\n    ) {\n      return handleError(400,'\"temperature\" must be a number in the range 0...2')\n    }\n  msg.top_p = (JSONPayload.top_p == null ? 1.0 : JSONPayload.top_p)\n    if (\n      (typeof msg.top_p !== 'number') ||\n      (msg.top_p < 0) || (msg.top_p > 1)\n    ) {\n      return handleError(400,'\"top_p\" must be a number in the range 0...1')\n    }\n  msg.stop = JSONPayload.stop || []\n    if (typeof msg.stop === 'string') {\n      msg.stop = [msg.stop]\n    } else {\n      if (! Array.isArray(msg.stop) || (msg.stop.length > 4)) {\n        return handleError(400,'\"stop\" must be a single string or an array with up to 4 strings')\n      }\n      \n      for (let i = 0, l = msg.stop.length; i < l; i++) {\n        if (typeof msg.stop[i] !== 'string') {\n          return handleError(400,'\"stop\" must contain strings only')\n        }\n      }\n    }\n  msg.frequency_penalty = (JSONPayload.frequency_penalty == null ? 0 : JSONPayload.frequency_penalty)\n    if (\n      (typeof msg.frequency_penalty !== 'number') ||\n      (msg.frequency_penalty < -2) || (msg.frequency_penalty > 2)\n    ) {\n      return handleError(400,'\"frequency_penalty\" must be a number in the range -2...2')\n    }\n    \n  let Prompt = ''\n    for (let i = 0, l = msg.messages.length; i < l; i++) {\n      let Message = msg.messages[i]\n      Prompt += flow.get('prompt-template')[Message.role].replace('{input}',Message.content)\n    }\n  msg.prompt = Prompt += flow.get('prompt-template')['suffix']\n  \n  node.send([msg,null])\n  node.done()\n    \n/**** handleError ****/\n\n  function handleError (StatusCode, StatusText) {\n    msg.payload    = StatusText\n    msg.statusCode = StatusCode\n\n    node.send([null,msg])\n    node.done()\n  }\n","outputs":2,"noerr":0,"initialize":"","finalize":"","libs":[],"x":150,"y":800,"wires":[["0799dfad4bb9ebb7"],["ef562b4588954520"]]},{"id":"78cbd7a2e2c84d5d","type":"function","z":"82da24bd0fb90b8b","name":"LLaMA 2 Inference","func":"(async () => {\n  let Prompt = msg.prompt\n  Prompt = Prompt.replace(/\"/g,'\\\\\"')\n\n  let Threads = parseInt(flow.get('number-of-threads'),10)\n  if (isNaN(Threads)) { Threads = 4 }\n  Threads = Math.max(1,Threads)\n  Threads = Math.min(Threads,Math.max(1,os.cpus().length))\n\n  let Context = parseInt(flow.get('context-length'),10)\n  if (isNaN(Context)) { Context = 512 }\n  Context = Math.max(0,Math.min(Context,4096))\n\n  let Batches = parseInt(flow.get('number-of-batches'),10)\n  if (isNaN(Batches)) { Batches = 8 }\n  Batches = Math.max(1,Math.min(Batches,100))\n\n  Prompt = Prompt.replace(/\"/g,'\\\\\"')\n\n/**** combine all these settings into a command ****/\n\n  let Command = ( 'cd ai && ' +\n    './llama --model ./llama-2-13b.ggmlv3.q4_0.bin --mlock ' +\n    ' --threads ' + Threads + ' --batch_size ' + Batches +\n    ' -c ' + Context + ' -n ' + msg.max_tokens +\n    ' --temp ' + msg.temperature + ' --top_p ' + msg.top_p +\n    ' --repeat-penalty ' + msg.frequency_penalty +\n    ' --prompt \"' + Prompt + '\" --tfs 2.0'\n  )\n  \n  if (flow.get('stop-sequence') != null) {\n    Command += ' -r \"' + flow.get('stop-sequence') + '\"'\n  }\n  \n  for (let i = 0, l = msg.stop.length; i < l; i++) {\n    Command += ' -r \"' + msg.stop[i].replace(/\"/g,'\\\\\"') + '\"'\n  }\n\n/**** extract actual reponse from command output ****/\n\n  function ResponseFrom (Text) {\n    let maxTokens = (msg.max_tokens < 0 ? Context : msg.max_tokens)\n    msg.finish_reason = (Text.length >= maxTokens ? 'length' : 'stop')\n    \n    if (Text.startsWith(msg.prompt)) {\n      Text = Text.slice(msg.prompt.length)\n    }\n\n    if (flow.get('stop') != null) {\n      let stop = flow.get('stop-sequence')\n      if (Text.endsWith(stop)) {\n        msg.finish_reason = 'stop'\n        \n        Text = Text.slice(0,-stop.length)\n        return Text.trim()\n      }\n    }\n\n    for (let i = 0, l = msg.stop.length; i < l; i++) {\n      if (Text.endsWith(msg.stop[i])) {\n        msg.finish_reason = 'stop'\n        \n        Text = Text.slice(0,-msg.stop[i].length)\n        return Text.trim()\n      }\n    }\n\n    return Text.trim()\n  }\n\n/**** now infer a response from the given prompt ****/\n\n  let { stdout,stderr, StatusCode,Signal } = child_process.spawnSync(\n    'bash', [], { input:Command }\n  )\n\n  stdout = stdout.toString().trim()\n  stderr = stderr.toString().trim()\n\n  switch (true) {\n    case (StatusCode == null):\n    case (StatusCode === 0):\n      msg.statusCode = (stdout === '' ? 204 : 200)\n      msg.payload    = ResponseFrom(stdout)\n      break\n    default:\n      msg.statusCode = 500 + StatusCode\n      msg.payload    = (stdout === '' ? '' : '>>>> stdout >>>>\\n' + stdout + '\\n') +\n                       '>>>> stderr >>>>\\n' + stderr +\n                       (Signal == null ? '' : '\\n' + Signal)\n      break\n  }\n\n  node.send([msg,null])\n})()\n","outputs":2,"noerr":0,"initialize":"","finalize":"","libs":[{"var":"os","module":"os"},{"var":"child_process","module":"child_process"}],"x":150,"y":860,"wires":[["ef7682ecdbce8cdf"],["ef562b4588954520"]]},{"id":"8c8fe2a2f27ec4ab","type":"function","z":"82da24bd0fb90b8b","name":"assemble response","func":"/**** assemble response ****/\n\n  let maxTokens = (msg.max_tokens < 0 ? Infinity : msg.max_tokens)\n\n  msg.statusCode = 200\n  msg.payload = {\n    id:'chatcmpl-' + msg._msgid,\n    object:'chat.completion',\n    created:Date.now(),\n    model:'llama-2-13b.ggmlv3.q4_0',\n    choices:[{\n      index:0,\n      message:{\n        role:'assistant',\n        content:msg.payload\n      },\n      finish_reason:msg.finish_reason\n    }],\n    usage:{\n      prompt_tokens:msg.prompt_tokens,\n      completion_tokens:msg.completion_tokens,\n      total_tokens: msg.prompt_tokens + msg.completion_tokens\n    }\n  }\n  \n/**** cleanup \"msg\" ****/\n\n  delete msg.model\n  delete msg.messages\n  delete msg.prompt\n  delete msg.max_tokens\n  delete msg.temperature\n  delete msg.top_p\n  delete msg.stream\n  delete msg.stop\n  delete msg.frequency_penalty\n  delete msg.prompt_tokens\n  delete msg.completion_tokens\n  delete msg.finish_reason\n  \n  return msg","outputs":1,"noerr":0,"initialize":"","finalize":"","libs":[],"x":150,"y":920,"wires":[["ef562b4588954520"]]},{"id":"8b8bed0ed98d6c96","type":"function","z":"82da24bd0fb90b8b","name":"assemble response","func":"/**** assemble response ****/\n\n  let maxTokens = (msg.max_tokens < 0 ? Infinity : msg.max_tokens)\n\n  msg.statusCode = 200\n  msg.payload = {\n    id:'cmpl-' + msg._msgid,\n    object:'text_completion',\n    created:Date.now(),\n    model:'llama-2-13b.ggmlv3.q4_0',\n    choices:[{\n      index:0,\n      text:msg.payload,\n      logprobs:null,\n      finish_reason:msg.finish_reason\n    }],\n    usage:{\n      prompt_tokens:msg.prompt_tokens,\n      completion_tokens:msg.completion_tokens,\n      total_tokens: msg.prompt_tokens + msg.completion_tokens\n    }\n  }\n  \n/**** cleanup \"msg\" ****/\n  \n  delete msg.model\n  delete msg.prompt\n  delete msg.max_tokens\n  delete msg.temperature\n  delete msg.top_p\n  delete msg.stream\n  delete msg.stop\n  delete msg.frequency_penalty\n  delete msg.prompt_tokens\n  delete msg.completion_tokens\n  delete msg.finish_reason\n  \n  return msg","outputs":1,"noerr":0,"initialize":"","finalize":"","libs":[],"x":150,"y":660,"wires":[["2da1327358576bfc"]]},{"id":"e74bdbd85bc3650b","type":"function","z":"82da24bd0fb90b8b","name":"count prompt tokens","func":"(async () => {\n  if (flow.get('count-tokens') != true) {\n    msg.prompt_tokens = 0\n    \n    node.send([msg,null])\n    node.done()\n    \n    return\n  }\n  \n/**** prepare prompt as a command argument ****/\n    \n  let Prompt = msg.input || msg.prompt\n  Prompt = Prompt.replace(/\"/g,'\\\\\"')\n\n/**** construct a command ****/\n\n  let Command = ( 'cd ai && ' +\n    './llama-tokens --model ./llama-2-13b.ggmlv3.q4_0.bin --mlock ' +\n    ' --prompt \"' + Prompt + '\"'\n  )\n\n/**** extract actual reponse from command output ****/\n\n  function ResponseFrom (Text) {\n    let HeaderLength = Text.indexOf('system_info')\n    Text = Text.slice(HeaderLength + 1)\n      .replace(/^[^\\n]*\\n/,'')\n\n    let TrailerIndex = Text.indexOf('\\n\\nllama_print_timings')\n    Text = Text.slice(0,TrailerIndex)\n\n    return Text.replace(/\\s+/g,' ').split(' ').length\n  }\n\n/**** now tokenize the given prompt ****/\n\nnode.warn('counting prompt tokens')\n  let { stdout,stderr, StatusCode,Signal } = child_process.spawnSync(\n    'bash', [], { input:Command }\n  )\n\n  stdout = stdout.toString().trim()\n  stderr = stderr.toString().trim()\n\n  switch (true) {\n    case (StatusCode == null):\n    case (StatusCode === 0):\n      msg.statusCode = (stdout === '' ? 204 : 200)\n      msg.prompt_tokens = ResponseFrom(stdout)\n      break\n    default:\n      msg.statusCode = 500 + StatusCode\n      msg.payload    = (stdout === '' ? '' : '>>>> stdout >>>>\\n' + stdout + '\\n') +\n                       '>>>> stderr >>>>\\n' + stderr +\n                       (Signal == null ? '' : '\\n' + Signal)\n      break\n  }\n\n  node.send([msg,null])\n  node.done()\n})()\n","outputs":2,"noerr":0,"initialize":"","finalize":"","libs":[{"var":"os","module":"os"},{"var":"child_process","module":"child_process"}],"x":380,"y":340,"wires":[["b6dfdddec687b9db"],["6939bc798a16831f"]]},{"id":"ef7682ecdbce8cdf","type":"function","z":"82da24bd0fb90b8b","name":"count completion tokens","func":"(async () => {\n  if (flow.get('count-tokens') != true) {\n    msg.completion_tokens = 0\n    \n    node.send([msg,null])\n    node.done()\n    \n    return\n  }\n  \n/**** prepare prompt as a command argument ****/\n    \n  let Prompt = msg.payload\n  Prompt = Prompt.replace(/\"/g,'\\\\\"')\n\n/**** construct a command ****/\n\n  let Command = ( 'cd ai && ' +\n    './llama-tokens --model ./llama-2-13b.ggmlv3.q4_0.bin --mlock ' +\n    ' --prompt \"' + Prompt + '\"'\n  )\n\n/**** extract actual reponse from command output ****/\n\n  function ResponseFrom (Text) {\n    let HeaderLength = Text.indexOf('system_info')\n    Text = Text.slice(HeaderLength + 1)\n      .replace(/^[^\\n]*\\n/,'')\n\n    let TrailerIndex = Text.indexOf('\\n\\nllama_print_timings')\n    Text = Text.slice(0,TrailerIndex)\n\n    return Text.replace(/\\s+/g,' ').split(' ').length\n  }\n\n/**** now tokenize the given response ****/\n\nnode.warn('counting completion tokens')\n  let { stdout,stderr, StatusCode,Signal } = child_process.spawnSync(\n    'bash', [], { input:Command }\n  )\n\n  stdout = stdout.toString().trim()\n  stderr = stderr.toString().trim()\n\n  switch (true) {\n    case (StatusCode == null):\n    case (StatusCode === 0):\n      msg.statusCode = (stdout === '' ? 204 : 200)\n      msg.completion_tokens = ResponseFrom(stdout)\n      break\n    default:\n      msg.statusCode = 500 + StatusCode\n      msg.payload    = (stdout === '' ? '' : '>>>> stdout >>>>\\n' + stdout + '\\n') +\n                       '>>>> stderr >>>>\\n' + stderr +\n                       (Signal == null ? '' : '\\n' + Signal)\n      break\n  }\n\n  node.send([msg,null])\n  node.done()\n})()\n","outputs":2,"noerr":0,"initialize":"","finalize":"","libs":[{"var":"os","module":"os"},{"var":"child_process","module":"child_process"}],"x":390,"y":860,"wires":[["8c8fe2a2f27ec4ab"],["ef562b4588954520"]]},{"id":"f25b86492768781e","type":"function","z":"82da24bd0fb90b8b","name":"count prompt tokens","func":"(async () => {\n  if (flow.get('count-tokens') != true) {\n    msg.prompt_tokens = 0\n    \n    node.send([msg,null])\n    node.done()\n    \n    return\n  }\n  \n/**** prepare prompt as a command argument ****/\n    \n  let Prompt = msg.input || msg.prompt\n  Prompt = Prompt.replace(/\"/g,'\\\\\"')\n\n/**** construct a command ****/\n\n  let Command = ( 'cd ai && ' +\n    './llama-tokens --model ./llama-2-13b.ggmlv3.q4_0.bin --mlock ' +\n    ' --prompt \"' + Prompt + '\"'\n  )\n\n/**** extract actual reponse from command output ****/\n\n  function ResponseFrom (Text) {\n    let HeaderLength = Text.indexOf('system_info')\n    Text = Text.slice(HeaderLength + 1)\n      .replace(/^[^\\n]*\\n/,'')\n\n    let TrailerIndex = Text.indexOf('\\n\\nllama_print_timings')\n    Text = Text.slice(0,TrailerIndex)\n\n    return Text.replace(/\\s+/g,' ').split(' ').length\n  }\n\n/**** now tokenize the given prompt ****/\n\nnode.warn('counting prompt tokens')\n  let { stdout,stderr, StatusCode,Signal } = child_process.spawnSync(\n    'bash', [], { input:Command }\n  )\n\n  stdout = stdout.toString().trim()\n  stderr = stderr.toString().trim()\n\n  switch (true) {\n    case (StatusCode == null):\n    case (StatusCode === 0):\n      msg.statusCode = (stdout === '' ? 204 : 200)\n      msg.prompt_tokens = ResponseFrom(stdout)\n      break\n    default:\n      msg.statusCode = 500 + StatusCode\n      msg.payload    = (stdout === '' ? '' : '>>>> stdout >>>>\\n' + stdout + '\\n') +\n                       '>>>> stderr >>>>\\n' + stderr +\n                       (Signal == null ? '' : '\\n' + Signal)\n      break\n  }\n\n  node.send([msg,null])\n  node.done()\n})()\n","outputs":2,"noerr":0,"initialize":"","finalize":"","libs":[{"var":"os","module":"os"},{"var":"child_process","module":"child_process"}],"x":380,"y":540,"wires":[["09b6f057a321b96c"],["2da1327358576bfc"]]},{"id":"a5fb554550f203b2","type":"function","z":"82da24bd0fb90b8b","name":"count completion tokens","func":"(async () => {\n  if (flow.get('count-tokens') != true) {\n    msg.completion_tokens = 0\n    \n    node.send([msg,null])\n    node.done()\n    \n    return\n  }\n  \n/**** prepare prompt as a command argument ****/\n    \n  let Prompt = msg.payload\n  Prompt = Prompt.replace(/\"/g,'\\\\\"')\n\n/**** construct a command ****/\n\n  let Command = ( 'cd ai && ' +\n    './llama-tokens --model ./llama-2-13b.ggmlv3.q4_0.bin --mlock ' +\n    ' --prompt \"' + Prompt + '\"'\n  )\n\n/**** extract actual reponse from command output ****/\n\n  function ResponseFrom (Text) {\n    let HeaderLength = Text.indexOf('system_info')\n    Text = Text.slice(HeaderLength + 1)\n      .replace(/^[^\\n]*\\n/,'')\n\n    let TrailerIndex = Text.indexOf('\\n\\nllama_print_timings')\n    Text = Text.slice(0,TrailerIndex)\n\n    return Text.replace(/\\s+/g,' ').split(' ').length\n  }\n\n/**** now tokenize the given response ****/\n\nnode.warn('counting completion tokens')\n  let { stdout,stderr, StatusCode,Signal } = child_process.spawnSync(\n    'bash', [], { input:Command }\n  )\n\n  stdout = stdout.toString().trim()\n  stderr = stderr.toString().trim()\n\n  switch (true) {\n    case (StatusCode == null):\n    case (StatusCode === 0):\n      msg.statusCode = (stdout === '' ? 204 : 200)\n      msg.completion_tokens = ResponseFrom(stdout)\n      break\n    default:\n      msg.statusCode = 500 + StatusCode\n      msg.payload    = (stdout === '' ? '' : '>>>> stdout >>>>\\n' + stdout + '\\n') +\n                       '>>>> stderr >>>>\\n' + stderr +\n                       (Signal == null ? '' : '\\n' + Signal)\n      break\n  }\n\n  node.send([msg,null])\n  node.done()\n})()\n","outputs":2,"noerr":0,"initialize":"","finalize":"","libs":[{"var":"os","module":"os"},{"var":"child_process","module":"child_process"}],"x":390,"y":600,"wires":[["8b8bed0ed98d6c96"],["2da1327358576bfc"]]},{"id":"0799dfad4bb9ebb7","type":"function","z":"82da24bd0fb90b8b","name":"count prompt tokens","func":"(async () => {\n  if (flow.get('count-tokens') != true) {\n    msg.prompt_tokens = 0\n    \n    node.send([msg,null])\n    node.done()\n\n    return\n  }\n  \n/**** prepare prompt as a command argument ****/\n    \n  let Prompt = msg.input || msg.prompt\n  Prompt = Prompt.replace(/\"/g,'\\\\\"')\n\n/**** construct a command ****/\n\n  let Command = ( 'cd ai && ' +\n    './llama-tokens --model ./llama-2-13b.ggmlv3.q4_0.bin --mlock ' +\n    ' --prompt \"' + Prompt + '\"'\n  )\n\n/**** extract actual reponse from command output ****/\n\n  function ResponseFrom (Text) {\n    let HeaderLength = Text.indexOf('system_info')\n    Text = Text.slice(HeaderLength + 1)\n      .replace(/^[^\\n]*\\n/,'')\n\n    let TrailerIndex = Text.indexOf('\\n\\nllama_print_timings')\n    Text = Text.slice(0,TrailerIndex)\n\n    return Text.replace(/\\s+/g,' ').split(' ').length\n  }\n\n/**** now tokenize the given prompt ****/\n\nnode.warn('counting prompt tokens')\n  let { stdout,stderr, StatusCode,Signal } = child_process.spawnSync(\n    'bash', [], { input:Command }\n  )\n\n  stdout = stdout.toString().trim()\n  stderr = stderr.toString().trim()\n\n  switch (true) {\n    case (StatusCode == null):\n    case (StatusCode === 0):\n      msg.statusCode = (stdout === '' ? 204 : 200)\n      msg.prompt_tokens = ResponseFrom(stdout)\n      break\n    default:\n      msg.statusCode = 500 + StatusCode\n      msg.payload    = (stdout === '' ? '' : '>>>> stdout >>>>\\n' + stdout + '\\n') +\n                       '>>>> stderr >>>>\\n' + stderr +\n                       (Signal == null ? '' : '\\n' + Signal)\n      break\n  }\n\n  node.send([msg,null])\n  node.done()\n})()\n","outputs":2,"noerr":0,"initialize":"","finalize":"","libs":[{"var":"os","module":"os"},{"var":"child_process","module":"child_process"}],"x":380,"y":800,"wires":[["78cbd7a2e2c84d5d"],["ef562b4588954520"]]},{"id":"160fdecf43f71c65","type":"inject","z":"82da24bd0fb90b8b","name":"on start-up","props":[{"p":"payload"},{"p":"topic","vt":"str"}],"repeat":"","crontab":"","once":true,"onceDelay":0.1,"topic":"","payload":"","payloadType":"date","x":110,"y":100,"wires":[["936d6293f8f40254"]]},{"id":"936d6293f8f40254","type":"change","z":"82da24bd0fb90b8b","name":"define common settings","rules":[{"t":"set","p":"API-Key","pt":"flow","to":"sk-xxxx","tot":"str"},{"t":"set","p":"number-of-threads","pt":"flow","to":"4","tot":"num"},{"t":"set","p":"context-length","pt":"flow","to":"4096","tot":"num"},{"t":"set","p":"number-of-batches","pt":"flow","to":"8","tot":"num"},{"t":"set","p":"prompt-template","pt":"flow","to":"{\"system\":\"{input}\\n\",\"user\":\"### Instruction: {input}\\n\",\"assistant\":\"### Response: {input}\\n\",\"suffix\":\"### Response:\"}","tot":"json"},{"t":"set","p":"count-tokens","pt":"flow","to":"false","tot":"bool"},{"t":"set","p":"stop-sequence","pt":"flow","to":"###","tot":"str"}],"action":"","property":"","from":"","to":"","reg":false,"x":330,"y":100,"wires":[[]]},{"id":"ebef1c7c36c2955a","type":"function","z":"82da24bd0fb90b8b","name":"validate API Key","func":"  let APIKey = (flow.get('API-Key') || '').trim()\n  if (APIKey === '') { return authorized() }\n\n  let Credentials = msg.req.headers['authorization'] || ''\n  if (! Credentials.startsWith('Bearer ')) {\n    return unauthorized()\n  }\n\n  Credentials = Credentials.replace(/^Bearer\\s+/,'')\n  return (Credentials === APIKey ? authorized() : unauthorized())\n\n  function authorized () {\n    node.send([msg,null])\n    node.done()\n  }\n\n  function unauthorized () {\n    msg.payload    = 'Unauthorized'\n    msg.statusCode = 401\n\n    node.send([null,msg])\n    node.done()\n  }\n","outputs":2,"noerr":0,"initialize":"","finalize":"","libs":[{"var":"crypto","module":"crypto"}],"x":300,"y":180,"wires":[["121431785ceb72e3"],["b2a4009e8e1a0213"]]},{"id":"37c9cf4e084a2e1e","type":"reusable-in","z":"82da24bd0fb90b8b","name":"API Key Check","info":"describe your reusable flow here","scope":"global","x":100,"y":180,"wires":[["ebef1c7c36c2955a"]]},{"id":"121431785ceb72e3","type":"reusable-out","z":"82da24bd0fb90b8b","name":"valid","position":1,"x":470,"y":160,"wires":[]},{"id":"b2a4009e8e1a0213","type":"reusable-out","z":"82da24bd0fb90b8b","name":"invalid","position":"2","x":470,"y":200,"wires":[]}]

Flow Info

Created 1 year, 10 months ago
Rating: not yet rated

Owner

Actions

Rate:

Node Types

Core
  • catch (x1)
  • change (x1)
  • comment (x1)
  • debug (x1)
  • function (x15)
  • http in (x3)
  • http response (x3)
  • inject (x1)
Other

Tags

  • llama
  • llama2
  • openai
  • llama.cpp
Copy this flow JSON to your clipboard and then import into Node-RED using the Import From > Clipboard (Ctrl-I) menu option