39 lines
1018 B
Python
39 lines
1018 B
Python
import requests
|
|
import csv
|
|
from os import path
|
|
|
|
BASE_DIR = path.join(path.dirname(__file__), "src")
|
|
|
|
content = (
|
|
requests.get("https://raw.githubusercontent.com/peterdalle/svensktext/master/lemma/lemmatization.csv")
|
|
.content
|
|
.decode('utf-8')
|
|
.splitlines()
|
|
)
|
|
|
|
table = csv.DictReader(content)
|
|
|
|
|
|
def filter_word(word: str) -> bool:
|
|
def valid_char(c: str) -> bool:
|
|
return ord("a") <= ord(c) <= ord("z")
|
|
pass
|
|
|
|
return all(valid_char(c) for c in word)
|
|
|
|
|
|
words = filter(filter_word, map(lambda row: row["word"].lower(), table))
|
|
|
|
# write to c++ header and cpp source files
|
|
with open(path.join(BASE_DIR, "words.hpp"), "w") as file:
|
|
file.write("#pragma once\n\n")
|
|
file.write("const char* words[];")
|
|
|
|
# Write to C++ source file (optional, if needed)
|
|
with open(path.join(BASE_DIR, "words.cpp"), "w") as file:
|
|
file.write('#include "words.hpp"\n\n')
|
|
file.write("const char* words[] = {\n")
|
|
for word in words:
|
|
file.write(f' "{word}",\n')
|
|
file.write("};\n\n")
|