import requests import csv from os import path BASE_DIR = path.join(path.dirname(__file__), "src") content = ( requests.get("https://raw.githubusercontent.com/peterdalle/svensktext/master/lemma/lemmatization.csv") .content .decode('utf-8') .splitlines() ) table = csv.DictReader(content) def filter_word(word: str) -> bool: def valid_char(c: str) -> bool: return ord("a") <= ord(c) <= ord("z") pass return all(valid_char(c) for c in word) words = filter(filter_word, map(lambda row: row["word"].lower(), table)) # write to c++ header and cpp source files with open(path.join(BASE_DIR, "words.hpp"), "w") as file: file.write("#pragma once\n\n") file.write("const char* words[];") # Write to C++ source file (optional, if needed) with open(path.join(BASE_DIR, "words.cpp"), "w") as file: file.write('#include "words.hpp"\n\n') file.write("const char* words[] = {\n") for word in words: file.write(f' "{word}",\n') file.write("};\n\n")