fix print

This commit is contained in:
Love 2024-09-16 16:58:28 +02:00
parent baee064d3b
commit 478a21aedb
6 changed files with 135 additions and 5 deletions

View File

@ -5,6 +5,8 @@ add_executable(${PROJECT_NAME}
main.cpp main.cpp
Page.hpp Page.hpp
Page.cpp Page.cpp
stringutil.hpp
stringutil.cpp
) )
target_link_libraries(${PROJECT_NAME} PRIVATE LibXml2::LibXml2 cpr::cpr) target_link_libraries(${PROJECT_NAME} PRIVATE LibXml2::LibXml2 cpr::cpr)

View File

@ -8,6 +8,9 @@
#include "Page.hpp" #include "Page.hpp"
#include <iostream>
#include "stringutil.hpp"
#include "cpr/cpr.h" #include "cpr/cpr.h"
#include "libxml/HTMLparser.h" #include "libxml/HTMLparser.h"
#include "libxml/xpath.h" #include "libxml/xpath.h"
@ -32,13 +35,16 @@ Page &Page::operator-=(int) {
std::string Page::str() const { std::string Page::str() const {
std::string ret; std::string ret;
for (const std::string &_pageText: subpages) {
std::string pageText = _pageText; for (const std::string &pageText: subpages) {
pageText.erase(std::ranges::remove(pageText, '\t').begin(), pageText.end());
std::istringstream stream(pageText); std::istringstream stream(pageText);
std::string line; std::string line;
while (std::getline(stream, line)) while (std::getline(stream, line))
ret += line + "\n"; ret += line + "\n";
string_utils::removeTabs(ret);
string_utils::limitConsecutiveWhitespace(ret, MAX_WHITESPACE);
string_utils::removeTrailingWhitespace(ret);
} }
return ret; return ret;
} }
@ -63,7 +69,7 @@ std::string Page::url() const {
} }
std::vector<std::string> Page::fetchSubpages() const { std::vector<std::string> Page::fetchSubpages() const {
const cpr::Response response = cpr::Get(url()); const cpr::Response response = cpr::Get(cpr::Url{url()});
if (response.status_code / 100 != 2) if (response.status_code / 100 != 2)
throw std::runtime_error("Page not found"); throw std::runtime_error("Page not found");
const htmlDocPtr doc = htmlReadMemory(response.text.c_str(), response.text.size(), nullptr, nullptr, const htmlDocPtr doc = htmlReadMemory(response.text.c_str(), response.text.size(), nullptr, nullptr,

View File

@ -11,6 +11,7 @@
#include <vector> #include <vector>
static constexpr uint_fast8_t DEFAULT_NUMBER = 100; static constexpr uint_fast8_t DEFAULT_NUMBER = 100;
static constexpr uint_fast8_t MAX_WHITESPACE = 2;
class Page { class Page {
private: private:

View File

@ -1,5 +1,8 @@
#include <iostream> #include <iostream>
#include "Page.hpp"
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
std::cout << "Hello, world!" << std::endl; const auto page = Page();
std::cout << page.str() << std::endl;
} }

72
src/stringutil.cpp Normal file
View File

@ -0,0 +1,72 @@
#include "stringutil.hpp"
#include <algorithm>
#include <cctype>
#include <sstream>
namespace string_utils {
bool isAllWhitespace(const std::string &str) {
return std::ranges::all_of(str, [](const unsigned char c) -> bool {
return std::isspace(c);
});
}
void removeTrailingWhitespace(std::string &str) {
auto shouldRemoveTrailingWhitespace = [&str]() -> bool {
std::size_t last_newline = str.find_last_of('\n');
if (last_newline == std::string::npos)
return isAllWhitespace(str);
const std::string last_line = str.substr(last_newline + 1);
return isAllWhitespace(last_line);
};
while (shouldRemoveTrailingWhitespace()) {
const std::size_t last_newline = str.find_last_of('\n');
if (last_newline == std::string::npos) {
if (isAllWhitespace(str))
str.clear();
break;
}
str.erase(last_newline);
}
}
void removeTabs(std::string &str) {
std::erase(str, '\t');
}
void limitConsecutiveWhitespace(std::string &str, const uint_fast8_t maxWhitespace) {
std::istringstream stream(str);
std::string line;
std::ostringstream processedStream;
uint_fast8_t whitespaceRow = 0;
bool hasAddedRealTextJet = false;
while (std::getline(stream, line)) {
const bool onlySpace = isAllWhitespace(line);
if (!hasAddedRealTextJet) {
if (onlySpace)
continue; // Skip leading empty lines
hasAddedRealTextJet = true;
} else if (onlySpace) {
whitespaceRow++;
} else {
whitespaceRow = 0;
}
if (whitespaceRow > maxWhitespace)
continue; // Skip lines exceeding maxWhitespace
processedStream << line << "\n";
}
str = processedStream.str();
}
}

46
src/stringutil.hpp Normal file
View File

@ -0,0 +1,46 @@
#pragma once
#include <string>
/**
* @brief String utility functions.
*/
namespace string_utils {
/**
* @brief Checks if a given string consists solely of whitespace characters.
*
* @param str The string to check.
* @return true If all characters in the string are whitespace.
* @return false Otherwise.
*/
bool isAllWhitespace(const std::string& str);
/**
* @brief Removes trailing whitespace lines from the given string.
*
* This function removes all consecutive empty or whitespace-only lines at the end
* of the input string.
*
* @param str The string from which to remove trailing whitespace lines.
*/
void removeTrailingWhitespace(std::string& str);
/**
* @brief Removes all tab characters from the given string.
*
* @param str The string from which to remove tab characters.
*/
void removeTabs(std::string& str);
/**
* @brief Limits the number of consecutive whitespace lines in the given string.
*
* This function ensures that no more than a specified number of consecutive empty
* or whitespace-only lines exist in the string.
*
* @param str The string to process.
* @param maxWhitespace The maximum allowed consecutive whitespace lines.
*/
void limitConsecutiveWhitespace(std::string& str, uint_fast8_t maxWhitespace);
}