page
This commit is contained in:
		@@ -1,6 +1,10 @@
 | 
			
		||||
find_package(LibXml2 REQUIRED)
 | 
			
		||||
find_package(cpr CONFIG REQUIRED)
 | 
			
		||||
 | 
			
		||||
add_executable(${PROJECT_NAME} main.cpp)
 | 
			
		||||
add_executable(${PROJECT_NAME}
 | 
			
		||||
        main.cpp
 | 
			
		||||
        Page.hpp
 | 
			
		||||
        Page.cpp
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
target_link_libraries(${PROJECT_NAME} PRIVATE LibXml2::LibXml2 cpr::cpr)
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										115
									
								
								src/Page.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										115
									
								
								src/Page.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,115 @@
 | 
			
		||||
//  _____         _  _____
 | 
			
		||||
// |_   _|____  _| ||_   _|_   __
 | 
			
		||||
//   | |/ _ \ \/ / __|| | \ \ / /
 | 
			
		||||
//   | |  __/>  <| |_ | |  \ V /
 | 
			
		||||
//   |_|\___/_/\_\\__||_|   \_/
 | 
			
		||||
// Author: Love Billenius <lovebillenius@disroot.org>
 | 
			
		||||
// License: GPL-3
 | 
			
		||||
 | 
			
		||||
#include "Page.hpp"
 | 
			
		||||
 | 
			
		||||
#include "cpr/cpr.h"
 | 
			
		||||
#include "libxml/HTMLparser.h"
 | 
			
		||||
#include "libxml/xpath.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Page::Page(const uint_fast8_t number): number(number), subpages(fetchSubpages()) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Page Page::operator--(int) const {
 | 
			
		||||
    return Page(number - 1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Page Page::operator++(int) const {
 | 
			
		||||
    return Page(number + 1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Page &Page::operator-=(int) {
 | 
			
		||||
    number--;
 | 
			
		||||
    refresh();
 | 
			
		||||
    return *this;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string Page::str() const {
 | 
			
		||||
    std::string ret;
 | 
			
		||||
    for (const std::string &_pageText: subpages) {
 | 
			
		||||
        std::string pageText = _pageText;
 | 
			
		||||
        pageText.erase(std::ranges::remove(pageText, '\t').begin(), pageText.end());
 | 
			
		||||
        std::istringstream stream(pageText);
 | 
			
		||||
        std::string line;
 | 
			
		||||
        while (std::getline(stream, line))
 | 
			
		||||
            ret += line + "\n";
 | 
			
		||||
    }
 | 
			
		||||
    return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Page &Page::operator+=(int) {
 | 
			
		||||
    number++;
 | 
			
		||||
    refresh();
 | 
			
		||||
    return *this;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool Page::refresh() {
 | 
			
		||||
    std::vector<std::string> newSubpages = fetchSubpages();
 | 
			
		||||
    const bool replace = !contentEquals(newSubpages);
 | 
			
		||||
    if (replace)
 | 
			
		||||
        subpages = newSubpages;
 | 
			
		||||
 | 
			
		||||
    return replace;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string Page::url() const {
 | 
			
		||||
    return std::format("https://www.svt.se/svttext/web/pages/{}.html", number);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::vector<std::string> Page::fetchSubpages() const {
 | 
			
		||||
    const cpr::Response response = cpr::Get(url());
 | 
			
		||||
    if (response.status_code / 100 != 2)
 | 
			
		||||
        throw std::runtime_error("Page not found");
 | 
			
		||||
    const htmlDocPtr doc = htmlReadMemory(response.text.c_str(), response.text.size(), nullptr, nullptr,
 | 
			
		||||
                                          HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
 | 
			
		||||
    if (doc == nullptr)
 | 
			
		||||
        throw std::runtime_error("Could not parse HTML.");
 | 
			
		||||
 | 
			
		||||
    const auto xpathCtx = xmlXPathNewContext(doc);
 | 
			
		||||
    if (!xpathCtx) {
 | 
			
		||||
        xmlFreeDoc(doc);
 | 
			
		||||
        throw std::runtime_error("Could not create XPath context.");
 | 
			
		||||
    }
 | 
			
		||||
    const auto xpathObj = xmlXPathEvalExpression(
 | 
			
		||||
        reinterpret_cast<const xmlChar *>("//div[contains(@class,'Content_screenreaderOnly')]"), xpathCtx);
 | 
			
		||||
 | 
			
		||||
    if (!xpathObj) {
 | 
			
		||||
        xmlXPathFreeContext(xpathCtx);
 | 
			
		||||
        xmlFreeDoc(doc);
 | 
			
		||||
        throw std::runtime_error("Could not evaluate XPath expression.");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::vector<std::string> pages;
 | 
			
		||||
    if (const xmlNodeSetPtr nodes = xpathObj->nodesetval) {
 | 
			
		||||
        for (int i = 0; i < nodes->nodeNr; ++i) {
 | 
			
		||||
            xmlChar *content = xmlNodeGetContent(nodes->nodeTab[i]);
 | 
			
		||||
            if (!content)
 | 
			
		||||
                continue;
 | 
			
		||||
            pages.emplace_back(reinterpret_cast<const char *>(content));
 | 
			
		||||
            xmlFree(content);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    xmlXPathFreeObject(xpathObj);
 | 
			
		||||
    xmlXPathFreeContext(xpathCtx);
 | 
			
		||||
    xmlFreeDoc(doc);
 | 
			
		||||
 | 
			
		||||
    return pages;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool Page::contentEquals(const std::vector<std::string> &subpagesOther) const {
 | 
			
		||||
    if (subpagesOther.size() != subpages.size())
 | 
			
		||||
        return false;
 | 
			
		||||
 | 
			
		||||
    for (std::size_t i = 0; i < subpages.size(); i++)
 | 
			
		||||
        if (subpages[i] != subpagesOther[i])
 | 
			
		||||
            return false;
 | 
			
		||||
 | 
			
		||||
    return true;
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										42
									
								
								src/Page.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								src/Page.hpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,42 @@
 | 
			
		||||
//  _____         _  _____
 | 
			
		||||
// |_   _|____  _| ||_   _|_   __
 | 
			
		||||
//   | |/ _ \ \/ / __|| | \ \ / /
 | 
			
		||||
//   | |  __/>  <| |_ | |  \ V /
 | 
			
		||||
//   |_|\___/_/\_\\__||_|   \_/
 | 
			
		||||
// Author: Love Billenius <lovebillenius@disroot.org>
 | 
			
		||||
// License: GPL-3
 | 
			
		||||
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
static constexpr uint_fast8_t DEFAULT_NUMBER = 100;
 | 
			
		||||
 | 
			
		||||
class Page {
 | 
			
		||||
private:
 | 
			
		||||
    uint_fast8_t number{};
 | 
			
		||||
    std::vector<std::string> subpages;
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
    explicit Page(uint_fast8_t number = DEFAULT_NUMBER);
 | 
			
		||||
 | 
			
		||||
    Page operator--(int) const;
 | 
			
		||||
 | 
			
		||||
    Page operator++(int) const;
 | 
			
		||||
 | 
			
		||||
    Page &operator+=(int);
 | 
			
		||||
 | 
			
		||||
    Page &operator-=(int);
 | 
			
		||||
 | 
			
		||||
    std::string str() const;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    bool refresh();
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    [[nodiscard]] std::string url() const;
 | 
			
		||||
 | 
			
		||||
    [[nodiscard]] std::vector<std::string> fetchSubpages() const;
 | 
			
		||||
 | 
			
		||||
    [[nodiscard]] bool contentEquals(const std::vector<std::string> &subpagesOther) const;
 | 
			
		||||
};
 | 
			
		||||
@@ -1,5 +1,5 @@
 | 
			
		||||
#include <iostream>
 | 
			
		||||
 | 
			
		||||
int int main(int argc, char *argv[]) {
 | 
			
		||||
  std::cout << "Hello, world!" << std::endl;
 | 
			
		||||
int main(int argc, char *argv[]) {
 | 
			
		||||
    std::cout << "Hello, world!" << std::endl;
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user