// _____ _ _____ // |_ _|____ _| ||_ _|_ __ // | |/ _ \ \/ / __|| | \ \ / / // | | __/> <| |_ | | \ V / // |_|\___/_/\_\\__||_| \_/ // Author: Love Billenius // License: GPL-3 #include "Page.hpp" #include #include "stringutil.hpp" #include "cpr/cpr.h" #include "libxml/HTMLparser.h" #include "libxml/xpath.h" Page::Page(const uint_fast8_t number): number(number), subpages(fetchSubpages()) { } Page Page::operator--(int) const { return Page(number - 1); } Page Page::operator++(int) const { return Page(number + 1); } Page &Page::operator-=(int) { number--; refresh(); return *this; } std::string Page::str() const { std::string ret; for (const std::string &pageText: subpages) { std::istringstream stream(pageText); std::string line; while (std::getline(stream, line)) ret += line + "\n"; string_utils::removeTabs(ret); string_utils::limitConsecutiveWhitespace(ret, MAX_WHITESPACE); string_utils::removeTrailingWhitespace(ret); } return ret; } Page &Page::operator+=(int) { number++; refresh(); return *this; } bool Page::refresh() { std::vector newSubpages = fetchSubpages(); const bool replace = !contentEquals(newSubpages); if (replace) subpages = newSubpages; return replace; } std::string Page::url() const { return std::format("https://www.svt.se/svttext/web/pages/{}.html", number); } std::vector Page::fetchSubpages() const { const cpr::Response response = cpr::Get(cpr::Url{url()}); if (response.status_code / 100 != 2) throw std::runtime_error("Page not found"); const htmlDocPtr doc = htmlReadMemory(response.text.c_str(), response.text.size(), nullptr, nullptr, HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING); if (doc == nullptr) throw std::runtime_error("Could not parse HTML."); const auto xpathCtx = xmlXPathNewContext(doc); if (!xpathCtx) { xmlFreeDoc(doc); throw std::runtime_error("Could not create XPath context."); } const auto xpathObj = xmlXPathEvalExpression( reinterpret_cast("//div[contains(@class,'Content_screenreaderOnly')]"), xpathCtx); if (!xpathObj) { xmlXPathFreeContext(xpathCtx); xmlFreeDoc(doc); throw std::runtime_error("Could not evaluate XPath expression."); } std::vector pages; if (const xmlNodeSetPtr nodes = xpathObj->nodesetval) { for (int i = 0; i < nodes->nodeNr; ++i) { xmlChar *content = xmlNodeGetContent(nodes->nodeTab[i]); if (!content) continue; pages.emplace_back(reinterpret_cast(content)); xmlFree(content); } } xmlXPathFreeObject(xpathObj); xmlXPathFreeContext(xpathCtx); xmlFreeDoc(doc); return pages; } bool Page::contentEquals(const std::vector &subpagesOther) const { if (subpagesOther.size() != subpages.size()) return false; for (std::size_t i = 0; i < subpages.size(); i++) if (subpages[i] != subpagesOther[i]) return false; return true; }