metal-kompanion/src/KLLMReply.h

155 lines
5.3 KiB
C++

// SPDX-FileCopyrightText: 2023 Loren Burkholder <computersemiexpert@outlook.com>
// SPDX-FileCopyrightText: 2023 Klarälvdalens Datakonsult AB, a KDAB Group company <info@kdab.com>
//
// SPDX-License-Identifier: GPL-2.0-only OR GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL
#pragma once
#include "kllmcore_export.h"
#include <QJsonDocument>
#include "KLLMContext.h"
class QNetworkReply;
namespace KLLMCore
{
/**
* @brief The KLLMReplyInfo class represents information about a reply from an LLM.
*
* When an LLM generates a completion, the server generally will return some information about the completion, including the
* duration of the completion, the number of tokens received, and the duration of the prompt evaluation. This struct encapsulates such information.
* If any one of these fields is not available, it will be set to its default value.
*/
struct KLLMCORE_EXPORT KLLMReplyInfo {
//! The total time from when the request was received by the server to when the reply was returned.
std::chrono::nanoseconds totalDuration;
//! The time spent loading the model.
std::chrono::nanoseconds loadDuration;
//! The number of tokens in the prompt.
int promptEvalTokenCount;
//! The time spent evaluating the prompt.
std::chrono::nanoseconds promptEvalDuration;
//! The number of tokens in the reply.
int tokenCount;
//! The time spent generating the reply.
std::chrono::nanoseconds duration;
};
/**
* @brief The KLLMReply class represents a reply from an LLM.
*
* Requesting a completion from a KLLMInterface will return a KLLMReply. You can use this to track the progress of the LLM's
* reply.
*
* If you want to stream a reply as it is written in real time, connect to contentAdded() and use readResponse() to retrieve
* the new content. If you prefer to wait for the entire reply before displaying anything, connect to finished(), which will
* only be emitted once the reply is complete.
*/
class KLLMCORE_EXPORT KLLMReply : public QObject
{
Q_OBJECT
public:
/**
* @brief Specifies the request type.
*
* When the class in instantiated the type of request should be specified
*/
enum class RequestTypes {
StreamingGenerate,
Show
};
/**
* @brief Get the current response content.
*
* This function returns what it has recieved of the response so far. Therefore, until finished() is emitted, this
* function may return different values. However, once finished() is emitted, the content is guaranteed to remain
* constant.
*
* @return The content that has been returned so far.
*/
[[nodiscard]] QString readResponse() const;
/**
* @brief Get the context token for this response (if applicable).
*
* Messages sent by most LLMs have a context identifier that allows you to chain messages into a conversation. To create
* such a conversation, you need to take this context object and set it on the next KLLMRequest in the conversation.
* KLLMInterface::getCompletion() will use that context object to continue the message thread.
*
* @return A context object that refers to this response.
*/
const KLLMContext &context() const;
/**
* @brief Get extra information about the reply (if applicable).
*
* This function returns a KLLMReplyInfo object containing information about this reply. If the reply has not finished, the KLLMReplyInfo object will have
* all members set to their default values.
*
* @return Extra information about the reply.
*/
const KLLMReplyInfo &info() const;
/**
* @brief Check whether the reply has finished.
*
* If you need to know if the response has finished changing or if the context has been received yet, call this function.
*
* @return Whether the reply has finished.
*/
[[nodiscard]] bool isFinished() const;
/**
* @brief Get request type.
*
* The request type is set when this object is created.
*
* @return Corresponding request type.
*/
const RequestTypes &requestType() const;
protected:
explicit KLLMReply(QNetworkReply *netReply, QObject *parent = nullptr, RequestTypes requestType = RequestTypes::StreamingGenerate);
friend class KLLMInterface;
Q_SIGNALS:
/**
* @brief Emits when new content has been added to the response.
*
* If you are not streaming the response live, this signal is not of importance to you. However, if you are streaming
* content, when this signal is emitted, you should call readResponse() to update the response that your application
* shows.
*/
void contentAdded();
/**
* @brief Emits when the LLM has finished returning its response.
*
* After this signal has emitted, the content is guaranteed to not change. At this point, you should call readResponse()
* to get the content and then either take ownership of the KLLMReply or delete it, as automatic reply deletion is not
* implemented yet.
*/
void finished();
private:
QNetworkReply *const m_reply;
QByteArray m_incompleteTokens;
QList<QJsonDocument> m_tokens;
KLLMContext m_context;
KLLMReplyInfo m_info;
RequestTypes m_requestType = RequestTypes::StreamingGenerate;
int m_receivedSize = 0;
bool m_finished = false;
};
}