page
This commit is contained in:
parent
8e21df48c8
commit
baee064d3b
10
.gitignore
vendored
10
.gitignore
vendored
@ -2,3 +2,13 @@ build
|
||||
vcpkg_installed
|
||||
CMakeCache.txt
|
||||
CMakeFiles
|
||||
.idea/
|
||||
.cmake/
|
||||
cmake_install.cmake
|
||||
|
||||
build.ninja
|
||||
src/cmake_install.cmake
|
||||
vcpkg-manifest-install.log
|
||||
/src/txtv
|
||||
.ninja_deps
|
||||
.ninja_log
|
||||
|
@ -1,6 +1,10 @@
|
||||
find_package(LibXml2 REQUIRED)
|
||||
find_package(cpr CONFIG REQUIRED)
|
||||
|
||||
add_executable(${PROJECT_NAME} main.cpp)
|
||||
add_executable(${PROJECT_NAME}
|
||||
main.cpp
|
||||
Page.hpp
|
||||
Page.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE LibXml2::LibXml2 cpr::cpr)
|
||||
|
115
src/Page.cpp
Normal file
115
src/Page.cpp
Normal file
@ -0,0 +1,115 @@
|
||||
// _____ _ _____
|
||||
// |_ _|____ _| ||_ _|_ __
|
||||
// | |/ _ \ \/ / __|| | \ \ / /
|
||||
// | | __/> <| |_ | | \ V /
|
||||
// |_|\___/_/\_\\__||_| \_/
|
||||
// Author: Love Billenius <lovebillenius@disroot.org>
|
||||
// License: GPL-3
|
||||
|
||||
#include "Page.hpp"
|
||||
|
||||
#include "cpr/cpr.h"
|
||||
#include "libxml/HTMLparser.h"
|
||||
#include "libxml/xpath.h"
|
||||
|
||||
|
||||
Page::Page(const uint_fast8_t number): number(number), subpages(fetchSubpages()) {
|
||||
}
|
||||
|
||||
Page Page::operator--(int) const {
|
||||
return Page(number - 1);
|
||||
}
|
||||
|
||||
Page Page::operator++(int) const {
|
||||
return Page(number + 1);
|
||||
}
|
||||
|
||||
Page &Page::operator-=(int) {
|
||||
number--;
|
||||
refresh();
|
||||
return *this;
|
||||
}
|
||||
|
||||
std::string Page::str() const {
|
||||
std::string ret;
|
||||
for (const std::string &_pageText: subpages) {
|
||||
std::string pageText = _pageText;
|
||||
pageText.erase(std::ranges::remove(pageText, '\t').begin(), pageText.end());
|
||||
std::istringstream stream(pageText);
|
||||
std::string line;
|
||||
while (std::getline(stream, line))
|
||||
ret += line + "\n";
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
Page &Page::operator+=(int) {
|
||||
number++;
|
||||
refresh();
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool Page::refresh() {
|
||||
std::vector<std::string> newSubpages = fetchSubpages();
|
||||
const bool replace = !contentEquals(newSubpages);
|
||||
if (replace)
|
||||
subpages = newSubpages;
|
||||
|
||||
return replace;
|
||||
}
|
||||
|
||||
std::string Page::url() const {
|
||||
return std::format("https://www.svt.se/svttext/web/pages/{}.html", number);
|
||||
}
|
||||
|
||||
std::vector<std::string> Page::fetchSubpages() const {
|
||||
const cpr::Response response = cpr::Get(url());
|
||||
if (response.status_code / 100 != 2)
|
||||
throw std::runtime_error("Page not found");
|
||||
const htmlDocPtr doc = htmlReadMemory(response.text.c_str(), response.text.size(), nullptr, nullptr,
|
||||
HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
|
||||
if (doc == nullptr)
|
||||
throw std::runtime_error("Could not parse HTML.");
|
||||
|
||||
const auto xpathCtx = xmlXPathNewContext(doc);
|
||||
if (!xpathCtx) {
|
||||
xmlFreeDoc(doc);
|
||||
throw std::runtime_error("Could not create XPath context.");
|
||||
}
|
||||
const auto xpathObj = xmlXPathEvalExpression(
|
||||
reinterpret_cast<const xmlChar *>("//div[contains(@class,'Content_screenreaderOnly')]"), xpathCtx);
|
||||
|
||||
if (!xpathObj) {
|
||||
xmlXPathFreeContext(xpathCtx);
|
||||
xmlFreeDoc(doc);
|
||||
throw std::runtime_error("Could not evaluate XPath expression.");
|
||||
}
|
||||
|
||||
std::vector<std::string> pages;
|
||||
if (const xmlNodeSetPtr nodes = xpathObj->nodesetval) {
|
||||
for (int i = 0; i < nodes->nodeNr; ++i) {
|
||||
xmlChar *content = xmlNodeGetContent(nodes->nodeTab[i]);
|
||||
if (!content)
|
||||
continue;
|
||||
pages.emplace_back(reinterpret_cast<const char *>(content));
|
||||
xmlFree(content);
|
||||
}
|
||||
}
|
||||
|
||||
xmlXPathFreeObject(xpathObj);
|
||||
xmlXPathFreeContext(xpathCtx);
|
||||
xmlFreeDoc(doc);
|
||||
|
||||
return pages;
|
||||
}
|
||||
|
||||
bool Page::contentEquals(const std::vector<std::string> &subpagesOther) const {
|
||||
if (subpagesOther.size() != subpages.size())
|
||||
return false;
|
||||
|
||||
for (std::size_t i = 0; i < subpages.size(); i++)
|
||||
if (subpages[i] != subpagesOther[i])
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
42
src/Page.hpp
Normal file
42
src/Page.hpp
Normal file
@ -0,0 +1,42 @@
|
||||
// _____ _ _____
|
||||
// |_ _|____ _| ||_ _|_ __
|
||||
// | |/ _ \ \/ / __|| | \ \ / /
|
||||
// | | __/> <| |_ | | \ V /
|
||||
// |_|\___/_/\_\\__||_| \_/
|
||||
// Author: Love Billenius <lovebillenius@disroot.org>
|
||||
// License: GPL-3
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
static constexpr uint_fast8_t DEFAULT_NUMBER = 100;
|
||||
|
||||
class Page {
|
||||
private:
|
||||
uint_fast8_t number{};
|
||||
std::vector<std::string> subpages;
|
||||
|
||||
public:
|
||||
explicit Page(uint_fast8_t number = DEFAULT_NUMBER);
|
||||
|
||||
Page operator--(int) const;
|
||||
|
||||
Page operator++(int) const;
|
||||
|
||||
Page &operator+=(int);
|
||||
|
||||
Page &operator-=(int);
|
||||
|
||||
std::string str() const;
|
||||
|
||||
|
||||
bool refresh();
|
||||
|
||||
private:
|
||||
[[nodiscard]] std::string url() const;
|
||||
|
||||
[[nodiscard]] std::vector<std::string> fetchSubpages() const;
|
||||
|
||||
[[nodiscard]] bool contentEquals(const std::vector<std::string> &subpagesOther) const;
|
||||
};
|
@ -1,5 +1,5 @@
|
||||
#include <iostream>
|
||||
|
||||
int int main(int argc, char *argv[]) {
|
||||
int main(int argc, char *argv[]) {
|
||||
std::cout << "Hello, world!" << std::endl;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user