156 lines
4.0 KiB
C++
156 lines
4.0 KiB
C++
// _____ _ _____
|
|
// |_ _|____ _| ||_ _|_ __
|
|
// | |/ _ \ \/ / __|| | \ \ / /
|
|
// | | __/> <| |_ | | \ V /
|
|
// |_|\___/_/\_\\__||_| \_/
|
|
// Author: Love Billenius <lovebillenius@disroot.org>
|
|
// License: GPL-3
|
|
|
|
#include "Page.hpp"
|
|
|
|
#include <iostream>
|
|
#include <format>
|
|
#include <algorithm>
|
|
|
|
#include "ansi.hpp"
|
|
#include "stringutil.hpp"
|
|
#include "cpr/cpr.h"
|
|
#include "libxml/HTMLparser.h"
|
|
#include "libxml/xpath.h"
|
|
|
|
|
|
bool is_number(const std::string_view s) {
|
|
return std::ranges::all_of(s, [](const unsigned char c) {
|
|
return std::isdigit(c);
|
|
});
|
|
}
|
|
|
|
void italize_numbers(std::string &content, size_t leave_chars = 0) {
|
|
size_t end = content.size();
|
|
|
|
// Process backwards, word by word
|
|
for (;;) {
|
|
const size_t space = content.rfind(' ', end - 1);
|
|
const size_t begin = (space == std::string::npos) ? 0 : space + 1;
|
|
const size_t word_length = end - begin;
|
|
|
|
if (is_number(content.substr(begin, word_length))) {
|
|
content.insert(end, ansi::CLEAR);
|
|
content.insert(begin, ansi::ITALIC);
|
|
}
|
|
|
|
if (space == std::string::npos)
|
|
break;
|
|
if (leave_chars >= space)
|
|
break;
|
|
|
|
end = space;
|
|
}
|
|
}
|
|
|
|
|
|
Page::Page(const int number): m_number(number), m_subpage(fetchSubpage()) {
|
|
}
|
|
|
|
Page Page::operator--(int) const {
|
|
return Page(m_number - 1);
|
|
}
|
|
|
|
Page Page::operator++(int) const {
|
|
return Page(m_number + 1);
|
|
}
|
|
|
|
Page &Page::operator-=(int) {
|
|
m_number--;
|
|
refresh();
|
|
return *this;
|
|
}
|
|
|
|
std::string Page::str_pretty() const {
|
|
std::string content = str();
|
|
content.insert(0, ansi::BOLD);
|
|
size_t line_end = content.find('\n');
|
|
if (line_end == std::string::npos)
|
|
line_end = content.size();
|
|
content.insert(line_end, ansi::CLEAR);
|
|
|
|
italize_numbers(content, line_end + ansi::CLEAR.size());
|
|
return content;
|
|
}
|
|
|
|
std::string Page::str() const {
|
|
std::string ret;
|
|
|
|
std::istringstream stream(m_subpage);
|
|
std::string line;
|
|
while (std::getline(stream, line))
|
|
ret += line + "\n";
|
|
|
|
string_utils::limitConsecutiveWhitespace(ret, MAX_WHITESPACE);
|
|
string_utils::removeTrailingWhitespace(ret);
|
|
|
|
return ret;
|
|
}
|
|
|
|
Page &Page::operator+=(int) {
|
|
m_number++;
|
|
refresh();
|
|
return *this;
|
|
}
|
|
|
|
bool Page::refresh() {
|
|
std::string newSubpage = fetchSubpage();
|
|
const bool replace = newSubpage != m_subpage;
|
|
if (replace)
|
|
m_subpage = newSubpage;
|
|
|
|
return replace;
|
|
}
|
|
|
|
std::string Page::url() const {
|
|
return std::format("https://www.svt.se/svttext/web/pages/{}.html", m_number);
|
|
}
|
|
|
|
std::string Page::fetchSubpage() const {
|
|
const cpr::Response response = cpr::Get(cpr::Url{url()});
|
|
if (response.status_code / 100 != 2)
|
|
throw std::runtime_error("Page not found");
|
|
const htmlDocPtr doc = htmlReadMemory(response.text.c_str(), response.text.size(), nullptr, nullptr,
|
|
HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
|
|
if (doc == nullptr)
|
|
throw std::runtime_error("Could not parse HTML.");
|
|
|
|
const auto xpathCtx = xmlXPathNewContext(doc);
|
|
if (!xpathCtx) {
|
|
xmlFreeDoc(doc);
|
|
throw std::runtime_error("Could not create XPath context.");
|
|
}
|
|
const auto xpathObj = xmlXPathEvalExpression(
|
|
reinterpret_cast<const xmlChar *>("//div[contains(@class,'Content_screenreaderOnly')]"), xpathCtx);
|
|
|
|
if (!xpathObj) {
|
|
xmlXPathFreeContext(xpathCtx);
|
|
xmlFreeDoc(doc);
|
|
throw std::runtime_error("Could not evaluate XPath expression.");
|
|
}
|
|
|
|
// There's only one valid page
|
|
std::string page;
|
|
if (const xmlNodeSetPtr nodes = xpathObj->nodesetval) {
|
|
for (int i = 0; i < nodes->nodeNr; i++) {
|
|
xmlChar *content = xmlNodeGetContent(nodes->nodeTab[i]);
|
|
if (!content)
|
|
continue;
|
|
page = reinterpret_cast<const char *>(content);
|
|
xmlFree(content);
|
|
break;
|
|
}
|
|
}
|
|
|
|
xmlXPathFreeObject(xpathObj);
|
|
xmlXPathFreeContext(xpathCtx);
|
|
xmlFreeDoc(doc);
|
|
|
|
return page;
|
|
}
|