fix print
This commit is contained in:
		@@ -5,6 +5,8 @@ add_executable(${PROJECT_NAME}
 | 
			
		||||
        main.cpp
 | 
			
		||||
        Page.hpp
 | 
			
		||||
        Page.cpp
 | 
			
		||||
        stringutil.hpp
 | 
			
		||||
        stringutil.cpp
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
target_link_libraries(${PROJECT_NAME} PRIVATE LibXml2::LibXml2 cpr::cpr)
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										14
									
								
								src/Page.cpp
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								src/Page.cpp
									
									
									
									
									
								
							@@ -8,6 +8,9 @@
 | 
			
		||||
 | 
			
		||||
#include "Page.hpp"
 | 
			
		||||
 | 
			
		||||
#include <iostream>
 | 
			
		||||
 | 
			
		||||
#include "stringutil.hpp"
 | 
			
		||||
#include "cpr/cpr.h"
 | 
			
		||||
#include "libxml/HTMLparser.h"
 | 
			
		||||
#include "libxml/xpath.h"
 | 
			
		||||
@@ -32,13 +35,16 @@ Page &Page::operator-=(int) {
 | 
			
		||||
 | 
			
		||||
std::string Page::str() const {
 | 
			
		||||
    std::string ret;
 | 
			
		||||
    for (const std::string &_pageText: subpages) {
 | 
			
		||||
        std::string pageText = _pageText;
 | 
			
		||||
        pageText.erase(std::ranges::remove(pageText, '\t').begin(), pageText.end());
 | 
			
		||||
 | 
			
		||||
    for (const std::string &pageText: subpages) {
 | 
			
		||||
        std::istringstream stream(pageText);
 | 
			
		||||
        std::string line;
 | 
			
		||||
        while (std::getline(stream, line))
 | 
			
		||||
            ret += line + "\n";
 | 
			
		||||
 | 
			
		||||
        string_utils::removeTabs(ret);
 | 
			
		||||
        string_utils::limitConsecutiveWhitespace(ret, MAX_WHITESPACE);
 | 
			
		||||
        string_utils::removeTrailingWhitespace(ret);
 | 
			
		||||
    }
 | 
			
		||||
    return ret;
 | 
			
		||||
}
 | 
			
		||||
@@ -63,7 +69,7 @@ std::string Page::url() const {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::vector<std::string> Page::fetchSubpages() const {
 | 
			
		||||
    const cpr::Response response = cpr::Get(url());
 | 
			
		||||
    const cpr::Response response = cpr::Get(cpr::Url{url()});
 | 
			
		||||
    if (response.status_code / 100 != 2)
 | 
			
		||||
        throw std::runtime_error("Page not found");
 | 
			
		||||
    const htmlDocPtr doc = htmlReadMemory(response.text.c_str(), response.text.size(), nullptr, nullptr,
 | 
			
		||||
 
 | 
			
		||||
@@ -11,6 +11,7 @@
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
static constexpr uint_fast8_t DEFAULT_NUMBER = 100;
 | 
			
		||||
static constexpr uint_fast8_t MAX_WHITESPACE = 2;
 | 
			
		||||
 | 
			
		||||
class Page {
 | 
			
		||||
private:
 | 
			
		||||
 
 | 
			
		||||
@@ -1,5 +1,8 @@
 | 
			
		||||
#include <iostream>
 | 
			
		||||
 | 
			
		||||
#include "Page.hpp"
 | 
			
		||||
 | 
			
		||||
int main(int argc, char *argv[]) {
 | 
			
		||||
    std::cout << "Hello, world!" << std::endl;
 | 
			
		||||
    const auto page = Page();
 | 
			
		||||
    std::cout << page.str() << std::endl;
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										72
									
								
								src/stringutil.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								src/stringutil.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,72 @@
 | 
			
		||||
#include "stringutil.hpp"
 | 
			
		||||
 | 
			
		||||
#include <algorithm>
 | 
			
		||||
#include <cctype>
 | 
			
		||||
#include <sstream>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
namespace string_utils {
 | 
			
		||||
    bool isAllWhitespace(const std::string &str) {
 | 
			
		||||
        return std::ranges::all_of(str, [](const unsigned char c) -> bool {
 | 
			
		||||
            return std::isspace(c);
 | 
			
		||||
        });
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void removeTrailingWhitespace(std::string &str) {
 | 
			
		||||
        auto shouldRemoveTrailingWhitespace = [&str]() -> bool {
 | 
			
		||||
            std::size_t last_newline = str.find_last_of('\n');
 | 
			
		||||
 | 
			
		||||
            if (last_newline == std::string::npos)
 | 
			
		||||
                return isAllWhitespace(str);
 | 
			
		||||
 | 
			
		||||
            const std::string last_line = str.substr(last_newline + 1);
 | 
			
		||||
            return isAllWhitespace(last_line);
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        while (shouldRemoveTrailingWhitespace()) {
 | 
			
		||||
            const std::size_t last_newline = str.find_last_of('\n');
 | 
			
		||||
 | 
			
		||||
            if (last_newline == std::string::npos) {
 | 
			
		||||
                if (isAllWhitespace(str))
 | 
			
		||||
                    str.clear();
 | 
			
		||||
                break;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            str.erase(last_newline);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void removeTabs(std::string &str) {
 | 
			
		||||
        std::erase(str, '\t');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    void limitConsecutiveWhitespace(std::string &str, const uint_fast8_t maxWhitespace) {
 | 
			
		||||
        std::istringstream stream(str);
 | 
			
		||||
        std::string line;
 | 
			
		||||
        std::ostringstream processedStream;
 | 
			
		||||
 | 
			
		||||
        uint_fast8_t whitespaceRow = 0;
 | 
			
		||||
        bool hasAddedRealTextJet = false;
 | 
			
		||||
 | 
			
		||||
        while (std::getline(stream, line)) {
 | 
			
		||||
            const bool onlySpace = isAllWhitespace(line);
 | 
			
		||||
 | 
			
		||||
            if (!hasAddedRealTextJet) {
 | 
			
		||||
                if (onlySpace)
 | 
			
		||||
                    continue; // Skip leading empty lines
 | 
			
		||||
                hasAddedRealTextJet = true;
 | 
			
		||||
            } else if (onlySpace) {
 | 
			
		||||
                whitespaceRow++;
 | 
			
		||||
            } else {
 | 
			
		||||
                whitespaceRow = 0;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            if (whitespaceRow > maxWhitespace)
 | 
			
		||||
                continue; // Skip lines exceeding maxWhitespace
 | 
			
		||||
 | 
			
		||||
            processedStream << line << "\n";
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        str = processedStream.str();
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										46
									
								
								src/stringutil.hpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								src/stringutil.hpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,46 @@
 | 
			
		||||
#pragma once
 | 
			
		||||
#include <string>
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * @brief String utility functions.
 | 
			
		||||
 */
 | 
			
		||||
namespace string_utils {
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * @brief Checks if a given string consists solely of whitespace characters.
 | 
			
		||||
     *
 | 
			
		||||
     * @param str The string to check.
 | 
			
		||||
     * @return true If all characters in the string are whitespace.
 | 
			
		||||
     * @return false Otherwise.
 | 
			
		||||
     */
 | 
			
		||||
    bool isAllWhitespace(const std::string& str);
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * @brief Removes trailing whitespace lines from the given string.
 | 
			
		||||
     *
 | 
			
		||||
     * This function removes all consecutive empty or whitespace-only lines at the end
 | 
			
		||||
     * of the input string.
 | 
			
		||||
     *
 | 
			
		||||
     * @param str The string from which to remove trailing whitespace lines.
 | 
			
		||||
     */
 | 
			
		||||
    void removeTrailingWhitespace(std::string& str);
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * @brief Removes all tab characters from the given string.
 | 
			
		||||
     *
 | 
			
		||||
     * @param str The string from which to remove tab characters.
 | 
			
		||||
     */
 | 
			
		||||
    void removeTabs(std::string& str);
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * @brief Limits the number of consecutive whitespace lines in the given string.
 | 
			
		||||
     *
 | 
			
		||||
     * This function ensures that no more than a specified number of consecutive empty
 | 
			
		||||
     * or whitespace-only lines exist in the string.
 | 
			
		||||
     *
 | 
			
		||||
     * @param str The string to process.
 | 
			
		||||
     * @param maxWhitespace The maximum allowed consecutive whitespace lines.
 | 
			
		||||
     */
 | 
			
		||||
    void limitConsecutiveWhitespace(std::string& str, uint_fast8_t maxWhitespace);
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user