fix print
This commit is contained in:
parent
baee064d3b
commit
478a21aedb
@ -5,6 +5,8 @@ add_executable(${PROJECT_NAME}
|
|||||||
main.cpp
|
main.cpp
|
||||||
Page.hpp
|
Page.hpp
|
||||||
Page.cpp
|
Page.cpp
|
||||||
|
stringutil.hpp
|
||||||
|
stringutil.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
target_link_libraries(${PROJECT_NAME} PRIVATE LibXml2::LibXml2 cpr::cpr)
|
target_link_libraries(${PROJECT_NAME} PRIVATE LibXml2::LibXml2 cpr::cpr)
|
||||||
|
14
src/Page.cpp
14
src/Page.cpp
@ -8,6 +8,9 @@
|
|||||||
|
|
||||||
#include "Page.hpp"
|
#include "Page.hpp"
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include "stringutil.hpp"
|
||||||
#include "cpr/cpr.h"
|
#include "cpr/cpr.h"
|
||||||
#include "libxml/HTMLparser.h"
|
#include "libxml/HTMLparser.h"
|
||||||
#include "libxml/xpath.h"
|
#include "libxml/xpath.h"
|
||||||
@ -32,13 +35,16 @@ Page &Page::operator-=(int) {
|
|||||||
|
|
||||||
std::string Page::str() const {
|
std::string Page::str() const {
|
||||||
std::string ret;
|
std::string ret;
|
||||||
for (const std::string &_pageText: subpages) {
|
|
||||||
std::string pageText = _pageText;
|
for (const std::string &pageText: subpages) {
|
||||||
pageText.erase(std::ranges::remove(pageText, '\t').begin(), pageText.end());
|
|
||||||
std::istringstream stream(pageText);
|
std::istringstream stream(pageText);
|
||||||
std::string line;
|
std::string line;
|
||||||
while (std::getline(stream, line))
|
while (std::getline(stream, line))
|
||||||
ret += line + "\n";
|
ret += line + "\n";
|
||||||
|
|
||||||
|
string_utils::removeTabs(ret);
|
||||||
|
string_utils::limitConsecutiveWhitespace(ret, MAX_WHITESPACE);
|
||||||
|
string_utils::removeTrailingWhitespace(ret);
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -63,7 +69,7 @@ std::string Page::url() const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string> Page::fetchSubpages() const {
|
std::vector<std::string> Page::fetchSubpages() const {
|
||||||
const cpr::Response response = cpr::Get(url());
|
const cpr::Response response = cpr::Get(cpr::Url{url()});
|
||||||
if (response.status_code / 100 != 2)
|
if (response.status_code / 100 != 2)
|
||||||
throw std::runtime_error("Page not found");
|
throw std::runtime_error("Page not found");
|
||||||
const htmlDocPtr doc = htmlReadMemory(response.text.c_str(), response.text.size(), nullptr, nullptr,
|
const htmlDocPtr doc = htmlReadMemory(response.text.c_str(), response.text.size(), nullptr, nullptr,
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
static constexpr uint_fast8_t DEFAULT_NUMBER = 100;
|
static constexpr uint_fast8_t DEFAULT_NUMBER = 100;
|
||||||
|
static constexpr uint_fast8_t MAX_WHITESPACE = 2;
|
||||||
|
|
||||||
class Page {
|
class Page {
|
||||||
private:
|
private:
|
||||||
|
@ -1,5 +1,8 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
|
#include "Page.hpp"
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
std::cout << "Hello, world!" << std::endl;
|
const auto page = Page();
|
||||||
|
std::cout << page.str() << std::endl;
|
||||||
}
|
}
|
||||||
|
72
src/stringutil.cpp
Normal file
72
src/stringutil.cpp
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
#include "stringutil.hpp"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cctype>
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
|
|
||||||
|
namespace string_utils {
|
||||||
|
bool isAllWhitespace(const std::string &str) {
|
||||||
|
return std::ranges::all_of(str, [](const unsigned char c) -> bool {
|
||||||
|
return std::isspace(c);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void removeTrailingWhitespace(std::string &str) {
|
||||||
|
auto shouldRemoveTrailingWhitespace = [&str]() -> bool {
|
||||||
|
std::size_t last_newline = str.find_last_of('\n');
|
||||||
|
|
||||||
|
if (last_newline == std::string::npos)
|
||||||
|
return isAllWhitespace(str);
|
||||||
|
|
||||||
|
const std::string last_line = str.substr(last_newline + 1);
|
||||||
|
return isAllWhitespace(last_line);
|
||||||
|
};
|
||||||
|
|
||||||
|
while (shouldRemoveTrailingWhitespace()) {
|
||||||
|
const std::size_t last_newline = str.find_last_of('\n');
|
||||||
|
|
||||||
|
if (last_newline == std::string::npos) {
|
||||||
|
if (isAllWhitespace(str))
|
||||||
|
str.clear();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
str.erase(last_newline);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void removeTabs(std::string &str) {
|
||||||
|
std::erase(str, '\t');
|
||||||
|
}
|
||||||
|
|
||||||
|
void limitConsecutiveWhitespace(std::string &str, const uint_fast8_t maxWhitespace) {
|
||||||
|
std::istringstream stream(str);
|
||||||
|
std::string line;
|
||||||
|
std::ostringstream processedStream;
|
||||||
|
|
||||||
|
uint_fast8_t whitespaceRow = 0;
|
||||||
|
bool hasAddedRealTextJet = false;
|
||||||
|
|
||||||
|
while (std::getline(stream, line)) {
|
||||||
|
const bool onlySpace = isAllWhitespace(line);
|
||||||
|
|
||||||
|
if (!hasAddedRealTextJet) {
|
||||||
|
if (onlySpace)
|
||||||
|
continue; // Skip leading empty lines
|
||||||
|
hasAddedRealTextJet = true;
|
||||||
|
} else if (onlySpace) {
|
||||||
|
whitespaceRow++;
|
||||||
|
} else {
|
||||||
|
whitespaceRow = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (whitespaceRow > maxWhitespace)
|
||||||
|
continue; // Skip lines exceeding maxWhitespace
|
||||||
|
|
||||||
|
processedStream << line << "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
str = processedStream.str();
|
||||||
|
}
|
||||||
|
}
|
46
src/stringutil.hpp
Normal file
46
src/stringutil.hpp
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief String utility functions.
|
||||||
|
*/
|
||||||
|
namespace string_utils {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Checks if a given string consists solely of whitespace characters.
|
||||||
|
*
|
||||||
|
* @param str The string to check.
|
||||||
|
* @return true If all characters in the string are whitespace.
|
||||||
|
* @return false Otherwise.
|
||||||
|
*/
|
||||||
|
bool isAllWhitespace(const std::string& str);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Removes trailing whitespace lines from the given string.
|
||||||
|
*
|
||||||
|
* This function removes all consecutive empty or whitespace-only lines at the end
|
||||||
|
* of the input string.
|
||||||
|
*
|
||||||
|
* @param str The string from which to remove trailing whitespace lines.
|
||||||
|
*/
|
||||||
|
void removeTrailingWhitespace(std::string& str);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Removes all tab characters from the given string.
|
||||||
|
*
|
||||||
|
* @param str The string from which to remove tab characters.
|
||||||
|
*/
|
||||||
|
void removeTabs(std::string& str);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Limits the number of consecutive whitespace lines in the given string.
|
||||||
|
*
|
||||||
|
* This function ensures that no more than a specified number of consecutive empty
|
||||||
|
* or whitespace-only lines exist in the string.
|
||||||
|
*
|
||||||
|
* @param str The string to process.
|
||||||
|
* @param maxWhitespace The maximum allowed consecutive whitespace lines.
|
||||||
|
*/
|
||||||
|
void limitConsecutiveWhitespace(std::string& str, uint_fast8_t maxWhitespace);
|
||||||
|
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user