metal-kompanion/src/KLLMReply.cpp

107 lines
4.4 KiB
C++

// SPDX-FileCopyrightText: 2023 Loren Burkholder <computersemiexpert@outlook.com>
// SPDX-FileCopyrightText: 2023 Klarälvdalens Datakonsult AB, a KDAB Group company <info@kdab.com>
//
// SPDX-License-Identifier: GPL-2.0-only OR GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL
#include "KLLMReply.h"
#include "kllmcore_debug.h"
#include <QNetworkReply>
using namespace Qt::StringLiterals;
using namespace KLLMCore;
KLLMReply::KLLMReply(QNetworkReply *netReply, QObject *parent, RequestTypes requestType)
: QObject{parent}
, m_reply{netReply}
, m_requestType{requestType}
{
connect(m_reply, &QNetworkReply::finished, m_reply, [this] {
// Normally, we could assume that the tokens will never be empty once the request finishes, but it could be possible
// that the request failed and we have no tokens to parse.
if (m_requestType == RequestTypes::StreamingGenerate && !m_tokens.empty()) {
const auto finalResponse = m_tokens.constLast();
m_context.setOllamaContext(finalResponse["context"_L1].toArray());
m_info.totalDuration = std::chrono::nanoseconds{finalResponse["total_duration"_L1].toVariant().toULongLong()};
m_info.loadDuration = std::chrono::nanoseconds{finalResponse["load_duration"_L1].toVariant().toULongLong()};
m_info.promptEvalTokenCount = finalResponse["prompt_eval_count"_L1].toVariant().toULongLong();
m_info.promptEvalDuration = std::chrono::nanoseconds{finalResponse["prompt_eval_duration"_L1].toVariant().toULongLong()};
m_info.tokenCount = finalResponse["eval_count"_L1].toVariant().toULongLong();
m_info.duration = std::chrono::nanoseconds{finalResponse["eval_duration"_L1].toVariant().toULongLong()};
}
qCDebug(KLLMCORE_LOG) << "Ollama response finished";
m_finished = true;
Q_EMIT finished();
});
connect(m_reply, &QNetworkReply::errorOccurred, m_reply, [](QNetworkReply::NetworkError e) {
qCDebug(KLLMCORE_LOG) << "Ollama HTTP error:" << e;
});
connect(m_reply, &QNetworkReply::downloadProgress, m_reply, [this](qint64 received, qint64 /*total*/) {
m_incompleteTokens += m_reply->read(received - m_receivedSize);
m_receivedSize = received;
switch (m_requestType) {
case RequestTypes::Show:
m_tokens.append(QJsonDocument::fromJson(m_incompleteTokens));
break;
case RequestTypes::StreamingGenerate:
auto completeTokens = m_incompleteTokens.split('\n');
if (completeTokens.size() <= 1) {
return;
}
m_incompleteTokens = completeTokens.last();
completeTokens.removeLast();
m_tokens.reserve(completeTokens.count());
for (const auto &tok : std::as_const(completeTokens)) {
m_tokens.append(QJsonDocument::fromJson(tok));
}
break;
}
Q_EMIT contentAdded();
});
}
QString KLLMReply::readResponse() const
{
QString ret;
switch (m_requestType) {
case RequestTypes::Show:
ret += QString::fromLatin1("## Template: \n```\n") + m_tokens.constFirst()["template"_L1].toString() + QString::fromLatin1("\n```\n");
ret += QString::fromLatin1("## Modelfile: \n```\n") + m_tokens.constFirst()["modelfile"_L1].toString() + QString::fromLatin1("\n```\n");
ret += QString::fromLatin1("## Parameters: \n```\n") + m_tokens.constFirst()["parameters"_L1].toString() + QString::fromLatin1("\n```\n");
ret += QString::fromLatin1("## Details: \n```\n")
+ QString::fromLatin1(QJsonDocument::fromVariant(m_tokens.constFirst()["details"_L1].toVariant()).toJson()) + QString::fromLatin1("\n```\n");
ret += QString::fromLatin1("## Model Info: \n```\n")
+ QString::fromLatin1(QJsonDocument::fromVariant(m_tokens.constFirst()["model_info"_L1].toVariant()).toJson()) + QString::fromLatin1("\n```\n");
break;
case RequestTypes::StreamingGenerate:
for (const auto &tok : m_tokens)
ret += tok["response"_L1].toString();
break;
}
return ret;
}
const KLLMContext &KLLMReply::context() const
{
return m_context;
}
const KLLMReplyInfo &KLLMReply::info() const
{
return m_info;
}
const KLLMReply::RequestTypes &KLLMReply::requestType() const
{
return m_requestType;
}
bool KLLMReply::isFinished() const
{
return m_finished;
}
#include "moc_KLLMReply.cpp"