// SPDX-FileCopyrightText: 2023 Loren Burkholder // SPDX-FileCopyrightText: 2023 Klarälvdalens Datakonsult AB, a KDAB Group company // // SPDX-License-Identifier: GPL-2.0-only OR GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL #pragma once #include "kllmcore_export.h" #include #include "KLLMContext.h" class QNetworkReply; namespace KLLMCore { /** * @brief The KLLMReplyInfo class represents information about a reply from an LLM. * * When an LLM generates a completion, the server generally will return some information about the completion, including the * duration of the completion, the number of tokens received, and the duration of the prompt evaluation. This struct encapsulates such information. * If any one of these fields is not available, it will be set to its default value. */ struct KLLMCORE_EXPORT KLLMReplyInfo { //! The total time from when the request was received by the server to when the reply was returned. std::chrono::nanoseconds totalDuration; //! The time spent loading the model. std::chrono::nanoseconds loadDuration; //! The number of tokens in the prompt. int promptEvalTokenCount; //! The time spent evaluating the prompt. std::chrono::nanoseconds promptEvalDuration; //! The number of tokens in the reply. int tokenCount; //! The time spent generating the reply. std::chrono::nanoseconds duration; }; /** * @brief The KLLMReply class represents a reply from an LLM. * * Requesting a completion from a KLLMInterface will return a KLLMReply. You can use this to track the progress of the LLM's * reply. * * If you want to stream a reply as it is written in real time, connect to contentAdded() and use readResponse() to retrieve * the new content. If you prefer to wait for the entire reply before displaying anything, connect to finished(), which will * only be emitted once the reply is complete. */ class KLLMCORE_EXPORT KLLMReply : public QObject { Q_OBJECT public: /** * @brief Specifies the request type. * * When the class in instantiated the type of request should be specified */ enum class RequestTypes { StreamingGenerate, Show }; /** * @brief Get the current response content. * * This function returns what it has recieved of the response so far. Therefore, until finished() is emitted, this * function may return different values. However, once finished() is emitted, the content is guaranteed to remain * constant. * * @return The content that has been returned so far. */ [[nodiscard]] QString readResponse() const; /** * @brief Get the context token for this response (if applicable). * * Messages sent by most LLMs have a context identifier that allows you to chain messages into a conversation. To create * such a conversation, you need to take this context object and set it on the next KLLMRequest in the conversation. * KLLMInterface::getCompletion() will use that context object to continue the message thread. * * @return A context object that refers to this response. */ const KLLMContext &context() const; /** * @brief Get extra information about the reply (if applicable). * * This function returns a KLLMReplyInfo object containing information about this reply. If the reply has not finished, the KLLMReplyInfo object will have * all members set to their default values. * * @return Extra information about the reply. */ const KLLMReplyInfo &info() const; /** * @brief Check whether the reply has finished. * * If you need to know if the response has finished changing or if the context has been received yet, call this function. * * @return Whether the reply has finished. */ [[nodiscard]] bool isFinished() const; /** * @brief Get request type. * * The request type is set when this object is created. * * @return Corresponding request type. */ const RequestTypes &requestType() const; protected: explicit KLLMReply(QNetworkReply *netReply, QObject *parent = nullptr, RequestTypes requestType = RequestTypes::StreamingGenerate); friend class KLLMInterface; Q_SIGNALS: /** * @brief Emits when new content has been added to the response. * * If you are not streaming the response live, this signal is not of importance to you. However, if you are streaming * content, when this signal is emitted, you should call readResponse() to update the response that your application * shows. */ void contentAdded(); /** * @brief Emits when the LLM has finished returning its response. * * After this signal has emitted, the content is guaranteed to not change. At this point, you should call readResponse() * to get the content and then either take ownership of the KLLMReply or delete it, as automatic reply deletion is not * implemented yet. */ void finished(); private: QNetworkReply *const m_reply; QByteArray m_incompleteTokens; QList m_tokens; KLLMContext m_context; KLLMReplyInfo m_info; RequestTypes m_requestType = RequestTypes::StreamingGenerate; int m_receivedSize = 0; bool m_finished = false; }; }