Node-RED Flows for the Dans-TotSirocco-7B AI model
This flow creates HTTP endpoints for text completion, prompt tokenization and embeddings calculation based on PocketDoc Labs Dans-TotSirocco-7B model using llama.cpp under the hood.
Further details and instructions can be found in the related GitHub repository.
[{"id":"aed2be241ae8bba5","type":"inject","z":"706bdd3f3f20c07c","name":"on StartUp","props":[{"p":"payload"},{"p":"topic","vt":"str"}],"repeat":"","crontab":"","once":true,"onceDelay":0.1,"topic":"","payload":"","payloadType":"date","x":770,"y":40,"wires":[["7431f09f4d61416e"]]},{"id":"7431f09f4d61416e","type":"change","z":"706bdd3f3f20c07c","name":"configure Settings","rules":[{"t":"set","p":"UserDir","pt":"global","to":"","tot":"str"},{"t":"set","p":"payload","pt":"msg","to":"configured","tot":"str"}],"action":"","property":"","from":"","to":"","reg":false,"x":970,"y":40,"wires":[["e734d784afdb8d8e"]]},{"id":"e734d784afdb8d8e","type":"debug","z":"706bdd3f3f20c07c","name":"Status","active":true,"tosidebar":false,"console":false,"tostatus":true,"complete":"payload","targetType":"msg","statusVal":"payload","statusType":"auto","x":1150,"y":40,"wires":[]},{"id":"a0bd7a882df09085","type":"comment","z":"706bdd3f3f20c07c","name":" Dans-TotSirocco-7B Completion","info":"","x":810,"y":100,"wires":[]},{"id":"d879c37cffbbafa9","type":"comment","z":"706bdd3f3f20c07c","name":" Dans-TotSirocco-7B Tokenization","info":"","x":810,"y":360,"wires":[]},{"id":"ada0a4a67bca5db5","type":"comment","z":"706bdd3f3f20c07c","name":" Dans-TotSirocco-7B Embeddings","info":"","x":810,"y":620,"wires":[]},{"id":"853e374da27d1f6f","type":"comment","z":"706bdd3f3f20c07c","name":"cwd = UserDir","info":"","x":1170,"y":620,"wires":[]},{"id":"60d6aa7c18a7ce3e","type":"comment","z":"706bdd3f3f20c07c","name":"cwd = UserDir","info":"","x":1170,"y":360,"wires":[]},{"id":"87fd167f1bf86604","type":"comment","z":"706bdd3f3f20c07c","name":"cwd = UserDir","info":"","x":1170,"y":100,"wires":[]},{"id":"79937f51b992b4ac","type":"catch","z":"706bdd3f3f20c07c","name":"uncaught exceptions","scope":null,"uncaught":true,"x":780,"y":880,"wires":[["3139b5cd0e19b096"]]},{"id":"3139b5cd0e19b096","type":"debug","z":"706bdd3f3f20c07c","name":"","active":true,"tosidebar":true,"console":false,"tostatus":true,"complete":"true","targetType":"full","statusVal":"'internal error'","statusType":"jsonata","x":970,"y":880,"wires":[]},{"id":"301ac0c9514d9004","type":"http in","z":"706bdd3f3f20c07c","name":"[get] /dans-totsirocco","url":"/dans-totsirocco","method":"get","upload":false,"swaggerDoc":"","x":780,"y":160,"wires":[["d884abb4dc9c1eb0"]]},{"id":"aba8fb2f9c6c7997","type":"function","z":"706bdd3f3f20c07c","name":"Dans-TotSirocco Completion","func":"(async () => {\n let Prompt = (msg.payload || '').trim()\n if (Prompt === '') {\n msg.payload = 'empty or missing prompt'\n node.send([null, msg])\n node.done()\n return\n }\n\n /**** retrieve settings or provide defaults ****/\n\n let Seed = parseInt(msg.seed, 10)\n if (isNaN(Seed)) { Seed = -1 }\n\n let Threads = parseInt(msg.threads, 10)\n if (isNaN(Threads)) { Threads = 4 }\n Threads = Math.max(1, Threads)\n Threads = Math.min(Threads, Math.max(1, os.cpus().length))\n\n let ContextLength = parseInt(msg.context, 10)\n if (isNaN(ContextLength)) { ContextLength = 512 }\n ContextLength = Math.max(0, Math.min(ContextLength, 4096))\n\n let keep = parseInt(msg.keep, 10)\n if (isNaN(keep)) { keep = 0 }\n keep = Math.max(-1, Math.min(keep, ContextLength))\n\n let PredictionLength = parseInt(msg.predict, 10)\n if (isNaN(PredictionLength)) { PredictionLength = 128 }\n PredictionLength = Math.max(1, Math.min(PredictionLength, ContextLength)) // no -1!\n\n let topK = parseInt(msg.topk, 10)\n if (isNaN(topK)) { topK = 40 }\n topK = Math.max(1, Math.min(topK, 100))\n\n let topP = parseFloat(msg.topp)\n if (isNaN(topP)) { topP = 0.9 }\n topP = Math.max(0.1, Math.min(topP, 1.0))\n\n let Temperature = parseFloat(msg.temperature)\n if (isNaN(Temperature)) { Temperature = 0.8 }\n Temperature = Math.max(0.0, Math.min(Temperature, 1.0))\n\n let Batches = parseInt(msg.batches, 10)\n if (isNaN(Batches)) { Batches = 8 }\n Batches = Math.max(1, Math.min(Batches, 100))\n\n let Grammar = (msg.grammar || '').trim()\n\n Prompt = Prompt.replace(/\"/g, '\\\\\"')\n\n /**** retrieve UserDir - crash if not a folder ****/\n\n let UserDir = (global.get('UserDir') || '').trim()\n if (UserDir === '') { UserDir = process.env.HOME + '/.node-red' }\n\n if (!fs.statSync(UserDir, { throwIfNoEntry: false })?.isDirectory()) {\n throw new Error(\n 'the given \"UserDir\" (\"' + UserDir + '\") is either missing or ' +\n 'not a folder - exiting'\n )\n }\n\n /**** combine all these settings into a command ****/\n\n const Command = (\n './llama --model ./dans-totsirocco-7b.Q5_K_M.gguf --mlock ' +\n ' --ctx_size ' + ContextLength + ' --keep ' + keep +\n ' --n_predict ' + PredictionLength +\n ' --threads ' + Threads + ' --batch_size ' + Batches +\n ' --seed ' + Seed + ' --temp ' + Temperature +\n ' --top_k ' + topK + ' --top_p ' + topP +\n ' --reverse-prompt \"<|endoftext|>\"' +\n (Grammar === '' ? '' : ' --grammar \"' + encoded(Grammar) + '\"') +\n ' --prompt \"' + encoded(Prompt) + '\"'\n )\n const Options = {\n cwd: UserDir\n }\n\n /**** extract actual reponse from command output ****/\n\n function ResponseFrom(Text) {\n let HeaderLength = Text.indexOf('\\n\\n\\n')\n Text = Text.slice(HeaderLength + 1)\n\n let TrailerIndex = Text.indexOf('<|endoftext|>')\n if (TrailerIndex < 0) {\n TrailerIndex = Text.indexOf('\\nllama_print_timings')\n }\n Text = Text.slice(0, TrailerIndex)\n\n return Text\n }\n\n /**** now infer a response from the given prompt ****/\n\n node.status({ fill: 'yellow', shape: 'ring', text: 'running' })\n\n const ShellProcess = child_process.exec(Command, Options)\n let stdout = ''\n for await (const Chunk of ShellProcess.stdout) {\n stdout += Chunk\n }\n\n let stderr = ''\n for await (const Chunk of ShellProcess.stderr) {\n stderr += Chunk\n }\n\n const ExitCode = (await new Promise((resolve) => {\n ShellProcess.on('close', resolve)\n })) || 0\n if (ExitCode !== 0) {\n node.status({ fill: 'red', shape: 'dot', text: 'ExitCode = ' + ExitCode })\n\n msg.statusCode = 500 + ExitCode\n msg.payload = stderr\n } else {\n node.status({ fill: 'green', shape: 'dot', text: 'finished' })\n\n stdout = stdout.trim()\n\n msg.statusCode = (stdout === '' ? 204 : 200)\n msg.payload = ResponseFrom(stdout)\n }\n\n node.send([msg, null])\n node.done()\n\n /**** encoded ****/\n\n function encoded(Text) {\n return Text.replace(/'/g, \"'\\\"'\\\"'\")\n }\n})()\n","outputs":2,"timeout":"","noerr":0,"initialize":"","finalize":"","libs":[{"var":"child_process","module":"child_process"},{"var":"fs","module":"fs"},{"var":"process","module":"process"},{"var":"path","module":"path"},{"var":"os","module":"os"}],"x":1020,"y":220,"wires":[["d06d73a912c10946","315ea6e17feed944"],["74a831a4ec19588b"]],"outputLabels":["on success","on failure"]},{"id":"65c4b5f007be6549","type":"debug","z":"706bdd3f3f20c07c","name":"","active":false,"tosidebar":true,"console":false,"tostatus":false,"complete":"'Prompt: \"' & msg.payload & '\"'","targetType":"jsonata","statusVal":"","statusType":"auto","x":820,"y":280,"wires":[]},{"id":"327bcacd2a5c8f79","type":"http response","z":"706bdd3f3f20c07c","name":"","statusCode":"","headers":{},"x":1190,"y":160,"wires":[]},{"id":"74a831a4ec19588b","type":"change","z":"706bdd3f3f20c07c","name":"400","rules":[{"t":"set","p":"statusCode","pt":"msg","to":"400","tot":"num"}],"action":"","property":"","from":"","to":"","reg":false,"x":1190,"y":260,"wires":[["327bcacd2a5c8f79"]]},{"id":"d06d73a912c10946","type":"change","z":"706bdd3f3f20c07c","name":"200","rules":[{"t":"set","p":"statusCode","pt":"msg","to":"200","tot":"num"}],"action":"","property":"","from":"","to":"","reg":false,"x":1190,"y":220,"wires":[["327bcacd2a5c8f79"]]},{"id":"315ea6e17feed944","type":"debug","z":"706bdd3f3f20c07c","name":"","active":false,"tosidebar":true,"console":false,"tostatus":false,"complete":"'Response: \"' & msg.payload & '\"'","targetType":"jsonata","statusVal":"","statusType":"auto","x":1020,"y":300,"wires":[]},{"id":"d884abb4dc9c1eb0","type":"change","z":"706bdd3f3f20c07c","name":"parse query","rules":[{"t":"set","p":"payload","pt":"msg","to":"req.query.prompt","tot":"msg"},{"t":"set","p":"seed","pt":"msg","to":"req.query.seed","tot":"msg"},{"t":"set","p":"threads","pt":"msg","to":"req.query.threads","tot":"msg"},{"t":"set","p":"predict","pt":"msg","to":"req.query.predict","tot":"msg"},{"t":"set","p":"topk","pt":"msg","to":"req.query.topk","tot":"msg"},{"t":"set","p":"topp","pt":"msg","to":"req.query.topp","tot":"msg"},{"t":"set","p":"temperature","pt":"msg","to":"req.query.temperature","tot":"msg"},{"t":"set","p":"batches","pt":"msg","to":"req.query.batches","tot":"msg"},{"t":"set","p":"model","pt":"msg","to":"req.query.model","tot":"msg"},{"t":"set","p":"context","pt":"msg","to":"req.query.context","tot":"msg"},{"t":"set","p":"keep","pt":"msg","to":"req.query.keep","tot":"msg"},{"t":"set","p":"grammar","pt":"msg","to":"req.query.grammar","tot":"msg"}],"action":"","property":"","from":"","to":"","reg":false,"x":790,"y":220,"wires":[["aba8fb2f9c6c7997","65c4b5f007be6549"]]},{"id":"c06ba6e31715030e","type":"http in","z":"706bdd3f3f20c07c","name":"[get] /dans-totsirocco-tokenization","url":"/dans-totsirocco-tokenization","method":"get","upload":false,"swaggerDoc":"","x":820,"y":420,"wires":[["e1b0b52da84d56ce"]]},{"id":"b2eda929a29784f1","type":"function","z":"706bdd3f3f20c07c","name":"Dans-TotSirocco Tokenization","func":"(async () => {\n let Prompt = (msg.payload || '').trim()\n if (Prompt === '') {\n msg.payload = 'empty or missing prompt'\n node.send([null, msg])\n node.done()\n return\n }\n\n /**** retrieve settings or provide defaults ****/\n\n let Threads = parseInt(msg.threads, 10)\n if (isNaN(Threads)) { Threads = 4 }\n Threads = Math.max(1, Threads)\n Threads = Math.min(Threads, Math.max(1, os.cpus().length))\n\n let ContextLength = parseInt(msg.context, 10)\n if (isNaN(ContextLength)) { ContextLength = 512 }\n ContextLength = Math.max(0, Math.min(ContextLength, 4096))\n\n Prompt = Prompt.replace(/\"/g, '\\\\\"')\n\n /**** retrieve UserDir - crash if not a folder ****/\n\n let UserDir = (global.get('UserDir') || '').trim()\n if (UserDir === '') { UserDir = process.env.HOME + '/.node-red' }\n\n if (!fs.statSync(UserDir, { throwIfNoEntry: false })?.isDirectory()) {\n throw new Error(\n 'the given \"UserDir\" (\"' + UserDir + '\") is either missing or ' +\n 'not a folder - exiting'\n )\n }\n\n /**** combine all these settings into a command ****/\n\n let Command = ('cd \"' + UserDir + '\" && ' +\n './llama-tokens --model ./dans-totsirocco-7b.Q5_K_M.gguf --mlock ' +\n ' --ctx_size ' + ContextLength +\n ' --threads ' + Threads +\n ' --prompt \"' + encoded(Prompt) + '\"'\n )\n const Options = {\n cwd: UserDir\n }\n\n /**** extract actual reponse from command output ****/\n\n function ResponseFrom(Text) {\n let HeaderLength = Text.indexOf('system_info')\n Text = Text.slice(HeaderLength + 1)\n .replace(/^[^\\n]*\\n/, '')\n\n let TrailerIndex = Text.indexOf('\\n\\nllama_print_timings')\n Text = Text.slice(0, TrailerIndex)\n\n return Text\n }\n\n /**** now tokenize the given prompt ****/\n\n node.status({ fill: 'yellow', shape: 'ring', text: 'running' })\n\n const ShellProcess = child_process.exec(Command, Options)\n let stdout = ''\n for await (const Chunk of ShellProcess.stdout) {\n stdout += Chunk\n }\n\n let stderr = ''\n for await (const Chunk of ShellProcess.stderr) {\n stderr += Chunk\n }\n\n const ExitCode = (await new Promise((resolve) => {\n ShellProcess.on('close', resolve)\n })) || 0\n if (ExitCode !== 0) {\n node.status({ fill: 'red', shape: 'dot', text: 'ExitCode = ' + ExitCode })\n\n msg.statusCode = 500 + ExitCode\n msg.payload = stderr\n } else {\n node.status({ fill: 'green', shape: 'dot', text: 'finished' })\n\n stdout = stdout.trim()\n\n msg.statusCode = (stdout === '' ? 204 : 200)\n msg.payload = ResponseFrom(stdout)\n }\n\n node.send([msg, null])\n node.done()\n\n /**** encoded ****/\n\n function encoded(Text) {\n return Text.replace(/'/g, \"'\\\"'\\\"'\")\n }\n})()\n","outputs":2,"timeout":"","noerr":0,"initialize":"","finalize":"","libs":[{"var":"os","module":"os"},{"var":"child_process","module":"child_process"},{"var":"fs","module":"fs"},{"var":"process","module":"process"}],"x":1010,"y":480,"wires":[["1fa9703fc36e7b13","820541e11d948374"],["41145a6fb5abd62c"]],"outputLabels":["on success","on failure"]},{"id":"4cd656fa7df431a4","type":"debug","z":"706bdd3f3f20c07c","name":"","active":false,"tosidebar":true,"console":false,"tostatus":false,"complete":"'Prompt: \"' & msg.payload & '\"'","targetType":"jsonata","statusVal":"","statusType":"auto","x":820,"y":540,"wires":[]},{"id":"7b01f8cc7c702ae8","type":"http response","z":"706bdd3f3f20c07c","name":"","statusCode":"","headers":{},"x":1190,"y":420,"wires":[]},{"id":"41145a6fb5abd62c","type":"change","z":"706bdd3f3f20c07c","name":"400","rules":[{"t":"set","p":"statusCode","pt":"msg","to":"400","tot":"num"}],"action":"","property":"","from":"","to":"","reg":false,"x":1190,"y":520,"wires":[["7b01f8cc7c702ae8"]]},{"id":"1fa9703fc36e7b13","type":"change","z":"706bdd3f3f20c07c","name":"200","rules":[{"t":"set","p":"statusCode","pt":"msg","to":"200","tot":"num"}],"action":"","property":"","from":"","to":"","reg":false,"x":1190,"y":480,"wires":[["7b01f8cc7c702ae8"]]},{"id":"820541e11d948374","type":"debug","z":"706bdd3f3f20c07c","name":"","active":false,"tosidebar":true,"console":false,"tostatus":false,"complete":"'Response: \"' & msg.payload & '\"'","targetType":"jsonata","statusVal":"","statusType":"auto","x":1020,"y":560,"wires":[]},{"id":"e1b0b52da84d56ce","type":"change","z":"706bdd3f3f20c07c","name":"parse query","rules":[{"t":"set","p":"payload","pt":"msg","to":"req.query.prompt","tot":"msg"},{"t":"set","p":"threads","pt":"msg","to":"req.query.threads","tot":"msg"},{"t":"set","p":"model","pt":"msg","to":"req.query.model","tot":"msg"},{"t":"set","p":"context","pt":"msg","to":"req.query.context","tot":"msg"}],"action":"","property":"","from":"","to":"","reg":false,"x":790,"y":480,"wires":[["4cd656fa7df431a4","b2eda929a29784f1"]]},{"id":"2f48938d9983ed5f","type":"http in","z":"706bdd3f3f20c07c","name":"[get] /dans-totsirocco-embeddings","url":"/dans-totsirocco-embeddings","method":"get","upload":false,"swaggerDoc":"","x":820,"y":680,"wires":[["333b47ed17b82ad0"]]},{"id":"4f86686802338bd3","type":"function","z":"706bdd3f3f20c07c","name":"Dans-TotSirocco Embeddings","func":"(async () => {\n let Prompt = (msg.payload || '').trim()\n if (Prompt === '') {\n msg.payload = 'empty or missing prompt'\n node.send([null, msg])\n node.done()\n return\n }\n\n /**** retrieve settings or provide defaults ****/\n\n let Seed = parseInt(msg.seed, 10)\n if (isNaN(Seed)) { Seed = -1 }\n\n let Threads = parseInt(msg.threads, 10)\n if (isNaN(Threads)) { Threads = 4 }\n Threads = Math.max(1, Threads)\n Threads = Math.min(Threads, Math.max(1, os.cpus().length))\n\n let ContextLength = parseInt(msg.context, 10)\n if (isNaN(ContextLength)) { ContextLength = 512 }\n ContextLength = Math.max(0, Math.min(ContextLength, 2048))\n\n Prompt = Prompt.replace(/\"/g, '\\\\\"')\n\n /**** retrieve UserDir - crash if not a folder ****/\n\n let UserDir = (global.get('UserDir') || '').trim()\n if (UserDir === '') { UserDir = process.env.HOME + '/.node-red' }\n\n if (!fs.statSync(UserDir, { throwIfNoEntry: false })?.isDirectory()) {\n throw new Error(\n 'the given \"UserDir\" (\"' + UserDir + '\") is either missing or ' +\n 'not a folder - exiting'\n )\n }\n\n /**** combine all these settings into a command ****/\n\n let Command = ('cd \"' + UserDir + '\" && ' +\n './llama-embeddings --model ./dans-totsirocco-7b.Q5_K_M.gguf --mlock ' +\n ' --ctx_size ' + ContextLength +\n ' --threads ' + Threads +\n ' --seed ' + Seed +\n ' --prompt \"' + encoded(Prompt) + '\"'\n )\n const Options = {\n cwd: UserDir\n }\n\n /**** extract actual reponse from command output ****/\n\n function ResponseFrom(Text) {\n let HeaderLength = Text.indexOf('system_info')\n Text = Text.slice(HeaderLength + 1)\n .replace(/^[^\\n]*\\n/, '')\n\n let TrailerIndex = Text.indexOf('\\n\\nllama_print_timings')\n Text = Text.slice(0, TrailerIndex)\n\n return Text\n }\n\n /**** now calculate embeddings for the given prompt ****/\n\n node.status({ fill: 'yellow', shape: 'ring', text: 'running' })\n\n const ShellProcess = child_process.exec(Command, Options)\n let stdout = ''\n for await (const Chunk of ShellProcess.stdout) {\n stdout += Chunk\n }\n\n let stderr = ''\n for await (const Chunk of ShellProcess.stderr) {\n stderr += Chunk\n }\n\n const ExitCode = (await new Promise((resolve) => {\n ShellProcess.on('close', resolve)\n })) || 0\n if (ExitCode !== 0) {\n node.status({ fill: 'red', shape: 'dot', text: 'ExitCode = ' + ExitCode })\n\n msg.statusCode = 500 + ExitCode\n msg.payload = stderr\n } else {\n node.status({ fill: 'green', shape: 'dot', text: 'finished' })\n\n stdout = stdout.trim()\n\n msg.statusCode = (stdout === '' ? 204 : 200)\n msg.payload = ResponseFrom(stdout)\n }\n\n node.send([msg, null])\n node.done()\n\n /**** encoded ****/\n\n function encoded(Text) {\n return Text.replace(/'/g, \"'\\\"'\\\"'\")\n }\n})()\n","outputs":2,"timeout":"","noerr":0,"initialize":"","finalize":"","libs":[{"var":"os","module":"os"},{"var":"child_process","module":"child_process"},{"var":"fs","module":"fs"},{"var":"process","module":"process"}],"x":1010,"y":740,"wires":[["c0a4252bf31dd9c8","b2577deb9a7695cf"],["e98d52e58cbe4822"]],"outputLabels":["on success","on failure"]},{"id":"23ba4ba6c8df2c74","type":"debug","z":"706bdd3f3f20c07c","name":"","active":false,"tosidebar":true,"console":false,"tostatus":false,"complete":"'Prompt: \"' & msg.payload & '\"'","targetType":"jsonata","statusVal":"","statusType":"auto","x":820,"y":800,"wires":[]},{"id":"84c6b78f415a9b95","type":"http response","z":"706bdd3f3f20c07c","name":"","statusCode":"","headers":{},"x":1190,"y":680,"wires":[]},{"id":"e98d52e58cbe4822","type":"change","z":"706bdd3f3f20c07c","name":"400","rules":[{"t":"set","p":"statusCode","pt":"msg","to":"400","tot":"num"}],"action":"","property":"","from":"","to":"","reg":false,"x":1190,"y":780,"wires":[["84c6b78f415a9b95"]]},{"id":"c0a4252bf31dd9c8","type":"change","z":"706bdd3f3f20c07c","name":"200","rules":[{"t":"set","p":"statusCode","pt":"msg","to":"200","tot":"num"}],"action":"","property":"","from":"","to":"","reg":false,"x":1190,"y":740,"wires":[["84c6b78f415a9b95"]]},{"id":"b2577deb9a7695cf","type":"debug","z":"706bdd3f3f20c07c","name":"","active":false,"tosidebar":true,"console":false,"tostatus":false,"complete":"'Response: \"' & msg.payload & '\"'","targetType":"jsonata","statusVal":"","statusType":"auto","x":1020,"y":820,"wires":[]},{"id":"333b47ed17b82ad0","type":"change","z":"706bdd3f3f20c07c","name":"parse query","rules":[{"t":"set","p":"payload","pt":"msg","to":"req.query.prompt","tot":"msg"},{"t":"set","p":"seed","pt":"msg","to":"req.query.seed","tot":"msg"},{"t":"set","p":"threads","pt":"msg","to":"req.query.threads","tot":"msg"},{"t":"set","p":"model","pt":"msg","to":"req.query.model","tot":"msg"},{"t":"set","p":"context","pt":"msg","to":"req.query.context","tot":"msg"}],"action":"","property":"","from":"","to":"","reg":false,"x":790,"y":740,"wires":[["23ba4ba6c8df2c74","4f86686802338bd3"]]}]