// _____ _ _____ // |_ _|____ _| ||_ _|_ __ // | |/ _ \ \/ / __|| | \ \ / / // | | __/> <| |_ | | \ V / // |_|\___/_/\_\\__||_| \_/ // Author: Love Billenius // License: GPL-3 #include "Page.hpp" #include #include #include #include "ansi.hpp" #include "stringutil.hpp" #include "cpr/cpr.h" #include "libxml/HTMLparser.h" #include "libxml/xpath.h" bool is_number(const std::string_view s) { return std::ranges::all_of(s, [](const unsigned char c) { return std::isdigit(c); }); } void italize_numbers(std::string &content, size_t leave_chars = 0) { size_t end = content.size(); // Process backwards, word by word for (;;) { const size_t space = content.rfind(' ', end - 1); const size_t begin = (space == std::string::npos) ? 0 : space + 1; const size_t word_length = end - begin; if (is_number(content.substr(begin, word_length))) { content.insert(end, ansi::CLEAR); content.insert(begin, ansi::ITALIC); } if (space == std::string::npos) break; if (leave_chars >= space) break; end = space; } } Page::Page(const int number): m_number(number), m_subpage(fetchSubpage()) { } Page Page::operator--(int) const { return Page(m_number - 1); } Page Page::operator++(int) const { return Page(m_number + 1); } Page &Page::operator-=(int) { m_number--; refresh(); return *this; } std::string Page::str_pretty() const { std::string content = str(); content.insert(0, ansi::BOLD); size_t line_end = content.find('\n'); if (line_end == std::string::npos) line_end = content.size(); content.insert(line_end, ansi::CLEAR); italize_numbers(content, line_end + ansi::CLEAR.size()); return content; } std::string Page::str() const { std::string ret; std::istringstream stream(m_subpage); std::string line; while (std::getline(stream, line)) ret += line + "\n"; string_utils::limitConsecutiveWhitespace(ret, MAX_WHITESPACE); string_utils::removeTrailingWhitespace(ret); return ret; } Page &Page::operator+=(int) { m_number++; refresh(); return *this; } bool Page::refresh() { std::string newSubpage = fetchSubpage(); const bool replace = newSubpage != m_subpage; if (replace) m_subpage = newSubpage; return replace; } std::string Page::url() const { return std::format("https://www.svt.se/svttext/web/pages/{}.html", m_number); } std::string Page::fetchSubpage() const { const cpr::Response response = cpr::Get(cpr::Url{url()}); if (response.status_code / 100 != 2) throw std::runtime_error("Page not found"); const htmlDocPtr doc = htmlReadMemory(response.text.c_str(), response.text.size(), nullptr, nullptr, HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING); if (doc == nullptr) throw std::runtime_error("Could not parse HTML."); const auto xpathCtx = xmlXPathNewContext(doc); if (!xpathCtx) { xmlFreeDoc(doc); throw std::runtime_error("Could not create XPath context."); } const auto xpathObj = xmlXPathEvalExpression( reinterpret_cast("//div[contains(@class,'Content_screenreaderOnly')]"), xpathCtx); if (!xpathObj) { xmlXPathFreeContext(xpathCtx); xmlFreeDoc(doc); throw std::runtime_error("Could not evaluate XPath expression."); } // There's only one valid page std::string page; if (const xmlNodeSetPtr nodes = xpathObj->nodesetval) { for (int i = 0; i < nodes->nodeNr; i++) { xmlChar *content = xmlNodeGetContent(nodes->nodeTab[i]); if (!content) continue; page = reinterpret_cast(content); xmlFree(content); break; } } xmlXPathFreeObject(xpathObj); xmlXPathFreeContext(xpathCtx); xmlFreeDoc(doc); return page; }