fix print
This commit is contained in:
parent
baee064d3b
commit
478a21aedb
@ -5,6 +5,8 @@ add_executable(${PROJECT_NAME}
|
||||
main.cpp
|
||||
Page.hpp
|
||||
Page.cpp
|
||||
stringutil.hpp
|
||||
stringutil.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE LibXml2::LibXml2 cpr::cpr)
|
||||
|
14
src/Page.cpp
14
src/Page.cpp
@ -8,6 +8,9 @@
|
||||
|
||||
#include "Page.hpp"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "stringutil.hpp"
|
||||
#include "cpr/cpr.h"
|
||||
#include "libxml/HTMLparser.h"
|
||||
#include "libxml/xpath.h"
|
||||
@ -32,13 +35,16 @@ Page &Page::operator-=(int) {
|
||||
|
||||
std::string Page::str() const {
|
||||
std::string ret;
|
||||
for (const std::string &_pageText: subpages) {
|
||||
std::string pageText = _pageText;
|
||||
pageText.erase(std::ranges::remove(pageText, '\t').begin(), pageText.end());
|
||||
|
||||
for (const std::string &pageText: subpages) {
|
||||
std::istringstream stream(pageText);
|
||||
std::string line;
|
||||
while (std::getline(stream, line))
|
||||
ret += line + "\n";
|
||||
|
||||
string_utils::removeTabs(ret);
|
||||
string_utils::limitConsecutiveWhitespace(ret, MAX_WHITESPACE);
|
||||
string_utils::removeTrailingWhitespace(ret);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@ -63,7 +69,7 @@ std::string Page::url() const {
|
||||
}
|
||||
|
||||
std::vector<std::string> Page::fetchSubpages() const {
|
||||
const cpr::Response response = cpr::Get(url());
|
||||
const cpr::Response response = cpr::Get(cpr::Url{url()});
|
||||
if (response.status_code / 100 != 2)
|
||||
throw std::runtime_error("Page not found");
|
||||
const htmlDocPtr doc = htmlReadMemory(response.text.c_str(), response.text.size(), nullptr, nullptr,
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <vector>
|
||||
|
||||
static constexpr uint_fast8_t DEFAULT_NUMBER = 100;
|
||||
static constexpr uint_fast8_t MAX_WHITESPACE = 2;
|
||||
|
||||
class Page {
|
||||
private:
|
||||
|
@ -1,5 +1,8 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "Page.hpp"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
std::cout << "Hello, world!" << std::endl;
|
||||
const auto page = Page();
|
||||
std::cout << page.str() << std::endl;
|
||||
}
|
||||
|
72
src/stringutil.cpp
Normal file
72
src/stringutil.cpp
Normal file
@ -0,0 +1,72 @@
|
||||
#include "stringutil.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <sstream>
|
||||
|
||||
|
||||
namespace string_utils {
|
||||
bool isAllWhitespace(const std::string &str) {
|
||||
return std::ranges::all_of(str, [](const unsigned char c) -> bool {
|
||||
return std::isspace(c);
|
||||
});
|
||||
}
|
||||
|
||||
void removeTrailingWhitespace(std::string &str) {
|
||||
auto shouldRemoveTrailingWhitespace = [&str]() -> bool {
|
||||
std::size_t last_newline = str.find_last_of('\n');
|
||||
|
||||
if (last_newline == std::string::npos)
|
||||
return isAllWhitespace(str);
|
||||
|
||||
const std::string last_line = str.substr(last_newline + 1);
|
||||
return isAllWhitespace(last_line);
|
||||
};
|
||||
|
||||
while (shouldRemoveTrailingWhitespace()) {
|
||||
const std::size_t last_newline = str.find_last_of('\n');
|
||||
|
||||
if (last_newline == std::string::npos) {
|
||||
if (isAllWhitespace(str))
|
||||
str.clear();
|
||||
break;
|
||||
}
|
||||
|
||||
str.erase(last_newline);
|
||||
}
|
||||
}
|
||||
|
||||
void removeTabs(std::string &str) {
|
||||
std::erase(str, '\t');
|
||||
}
|
||||
|
||||
void limitConsecutiveWhitespace(std::string &str, const uint_fast8_t maxWhitespace) {
|
||||
std::istringstream stream(str);
|
||||
std::string line;
|
||||
std::ostringstream processedStream;
|
||||
|
||||
uint_fast8_t whitespaceRow = 0;
|
||||
bool hasAddedRealTextJet = false;
|
||||
|
||||
while (std::getline(stream, line)) {
|
||||
const bool onlySpace = isAllWhitespace(line);
|
||||
|
||||
if (!hasAddedRealTextJet) {
|
||||
if (onlySpace)
|
||||
continue; // Skip leading empty lines
|
||||
hasAddedRealTextJet = true;
|
||||
} else if (onlySpace) {
|
||||
whitespaceRow++;
|
||||
} else {
|
||||
whitespaceRow = 0;
|
||||
}
|
||||
|
||||
if (whitespaceRow > maxWhitespace)
|
||||
continue; // Skip lines exceeding maxWhitespace
|
||||
|
||||
processedStream << line << "\n";
|
||||
}
|
||||
|
||||
str = processedStream.str();
|
||||
}
|
||||
}
|
46
src/stringutil.hpp
Normal file
46
src/stringutil.hpp
Normal file
@ -0,0 +1,46 @@
|
||||
#pragma once
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* @brief String utility functions.
|
||||
*/
|
||||
namespace string_utils {
|
||||
|
||||
/**
|
||||
* @brief Checks if a given string consists solely of whitespace characters.
|
||||
*
|
||||
* @param str The string to check.
|
||||
* @return true If all characters in the string are whitespace.
|
||||
* @return false Otherwise.
|
||||
*/
|
||||
bool isAllWhitespace(const std::string& str);
|
||||
|
||||
/**
|
||||
* @brief Removes trailing whitespace lines from the given string.
|
||||
*
|
||||
* This function removes all consecutive empty or whitespace-only lines at the end
|
||||
* of the input string.
|
||||
*
|
||||
* @param str The string from which to remove trailing whitespace lines.
|
||||
*/
|
||||
void removeTrailingWhitespace(std::string& str);
|
||||
|
||||
/**
|
||||
* @brief Removes all tab characters from the given string.
|
||||
*
|
||||
* @param str The string from which to remove tab characters.
|
||||
*/
|
||||
void removeTabs(std::string& str);
|
||||
|
||||
/**
|
||||
* @brief Limits the number of consecutive whitespace lines in the given string.
|
||||
*
|
||||
* This function ensures that no more than a specified number of consecutive empty
|
||||
* or whitespace-only lines exist in the string.
|
||||
*
|
||||
* @param str The string to process.
|
||||
* @param maxWhitespace The maximum allowed consecutive whitespace lines.
|
||||
*/
|
||||
void limitConsecutiveWhitespace(std::string& str, uint_fast8_t maxWhitespace);
|
||||
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user