diff --git a/.gitignore b/.gitignore index d63aa3e..b0cc92a 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,13 @@ build vcpkg_installed CMakeCache.txt CMakeFiles +.idea/ +.cmake/ +cmake_install.cmake + +build.ninja +src/cmake_install.cmake +vcpkg-manifest-install.log +/src/txtv +.ninja_deps +.ninja_log diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 94c4a31..50aa9b6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,6 +1,10 @@ find_package(LibXml2 REQUIRED) find_package(cpr CONFIG REQUIRED) -add_executable(${PROJECT_NAME} main.cpp) +add_executable(${PROJECT_NAME} + main.cpp + Page.hpp + Page.cpp +) target_link_libraries(${PROJECT_NAME} PRIVATE LibXml2::LibXml2 cpr::cpr) diff --git a/src/Page.cpp b/src/Page.cpp new file mode 100644 index 0000000..1d3dfa3 --- /dev/null +++ b/src/Page.cpp @@ -0,0 +1,115 @@ +// _____ _ _____ +// |_ _|____ _| ||_ _|_ __ +// | |/ _ \ \/ / __|| | \ \ / / +// | | __/> <| |_ | | \ V / +// |_|\___/_/\_\\__||_| \_/ +// Author: Love Billenius +// License: GPL-3 + +#include "Page.hpp" + +#include "cpr/cpr.h" +#include "libxml/HTMLparser.h" +#include "libxml/xpath.h" + + +Page::Page(const uint_fast8_t number): number(number), subpages(fetchSubpages()) { +} + +Page Page::operator--(int) const { + return Page(number - 1); +} + +Page Page::operator++(int) const { + return Page(number + 1); +} + +Page &Page::operator-=(int) { + number--; + refresh(); + return *this; +} + +std::string Page::str() const { + std::string ret; + for (const std::string &_pageText: subpages) { + std::string pageText = _pageText; + pageText.erase(std::ranges::remove(pageText, '\t').begin(), pageText.end()); + std::istringstream stream(pageText); + std::string line; + while (std::getline(stream, line)) + ret += line + "\n"; + } + return ret; +} + +Page &Page::operator+=(int) { + number++; + refresh(); + return *this; +} + +bool Page::refresh() { + std::vector newSubpages = fetchSubpages(); + const bool replace = !contentEquals(newSubpages); + if (replace) + subpages = newSubpages; + + return replace; +} + +std::string Page::url() const { + return std::format("https://www.svt.se/svttext/web/pages/{}.html", number); +} + +std::vector Page::fetchSubpages() const { + const cpr::Response response = cpr::Get(url()); + if (response.status_code / 100 != 2) + throw std::runtime_error("Page not found"); + const htmlDocPtr doc = htmlReadMemory(response.text.c_str(), response.text.size(), nullptr, nullptr, + HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING); + if (doc == nullptr) + throw std::runtime_error("Could not parse HTML."); + + const auto xpathCtx = xmlXPathNewContext(doc); + if (!xpathCtx) { + xmlFreeDoc(doc); + throw std::runtime_error("Could not create XPath context."); + } + const auto xpathObj = xmlXPathEvalExpression( + reinterpret_cast("//div[contains(@class,'Content_screenreaderOnly')]"), xpathCtx); + + if (!xpathObj) { + xmlXPathFreeContext(xpathCtx); + xmlFreeDoc(doc); + throw std::runtime_error("Could not evaluate XPath expression."); + } + + std::vector pages; + if (const xmlNodeSetPtr nodes = xpathObj->nodesetval) { + for (int i = 0; i < nodes->nodeNr; ++i) { + xmlChar *content = xmlNodeGetContent(nodes->nodeTab[i]); + if (!content) + continue; + pages.emplace_back(reinterpret_cast(content)); + xmlFree(content); + } + } + + xmlXPathFreeObject(xpathObj); + xmlXPathFreeContext(xpathCtx); + xmlFreeDoc(doc); + + return pages; +} + +bool Page::contentEquals(const std::vector &subpagesOther) const { + if (subpagesOther.size() != subpages.size()) + return false; + + for (std::size_t i = 0; i < subpages.size(); i++) + if (subpages[i] != subpagesOther[i]) + return false; + + return true; +} diff --git a/src/Page.hpp b/src/Page.hpp new file mode 100644 index 0000000..0a37f99 --- /dev/null +++ b/src/Page.hpp @@ -0,0 +1,42 @@ +// _____ _ _____ +// |_ _|____ _| ||_ _|_ __ +// | |/ _ \ \/ / __|| | \ \ / / +// | | __/> <| |_ | | \ V / +// |_|\___/_/\_\\__||_| \_/ +// Author: Love Billenius +// License: GPL-3 + +#pragma once + +#include + +static constexpr uint_fast8_t DEFAULT_NUMBER = 100; + +class Page { +private: + uint_fast8_t number{}; + std::vector subpages; + +public: + explicit Page(uint_fast8_t number = DEFAULT_NUMBER); + + Page operator--(int) const; + + Page operator++(int) const; + + Page &operator+=(int); + + Page &operator-=(int); + + std::string str() const; + + + bool refresh(); + +private: + [[nodiscard]] std::string url() const; + + [[nodiscard]] std::vector fetchSubpages() const; + + [[nodiscard]] bool contentEquals(const std::vector &subpagesOther) const; +}; diff --git a/src/main.cpp b/src/main.cpp index 3b03c30..3b04661 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,5 +1,5 @@ #include -int int main(int argc, char *argv[]) { - std::cout << "Hello, world!" << std::endl; +int main(int argc, char *argv[]) { + std::cout << "Hello, world!" << std::endl; }