2016-08-26 09:58:03 +00:00
|
|
|
// Hyperbolic Rogue language file generator
|
|
|
|
|
2018-02-08 23:40:26 +00:00
|
|
|
// Copyright (C) 2011-2018 Zeno Rogue, see 'hyper.cpp' for details
|
2016-08-26 09:58:03 +00:00
|
|
|
|
|
|
|
#include <map>
|
|
|
|
#include <string>
|
2018-06-17 16:32:06 +00:00
|
|
|
#include <cstdio>
|
2016-08-26 09:58:03 +00:00
|
|
|
#include <vector>
|
2018-06-17 16:32:06 +00:00
|
|
|
#include <cstdlib>
|
|
|
|
#include <set>
|
|
|
|
|
2018-06-27 05:41:56 +00:00
|
|
|
#define GEN_M 0
|
|
|
|
#define GEN_F 1
|
|
|
|
#define GEN_N 2
|
|
|
|
#define GEN_O 3
|
|
|
|
|
2022-07-05 17:57:02 +00:00
|
|
|
#define GENF_ELISION (1 << 3)
|
|
|
|
#define GENF_PLURALONLY (1 << 4)
|
|
|
|
#define GENF_PROPER (1 << 5)
|
|
|
|
|
2018-06-27 05:41:56 +00:00
|
|
|
#if MAC
|
|
|
|
#define IF_MAC(y,z) y
|
|
|
|
#else
|
|
|
|
#define IF_MAC(y,z) z
|
|
|
|
#endif
|
|
|
|
|
2018-06-22 12:47:24 +00:00
|
|
|
template<class T> int isize(const T& x) { return x.size(); }
|
2016-08-26 09:58:03 +00:00
|
|
|
|
2024-08-02 02:31:28 +00:00
|
|
|
#define NUMLAN 9
|
2016-08-26 09:58:03 +00:00
|
|
|
|
|
|
|
// language generator
|
|
|
|
|
2021-02-04 16:20:49 +00:00
|
|
|
std::string current_language;
|
|
|
|
|
2018-06-24 01:09:34 +00:00
|
|
|
const char *escape(std::string s, const std::string& dft);
|
2017-08-06 12:50:16 +00:00
|
|
|
|
2016-08-26 09:58:03 +00:00
|
|
|
template<class T> struct dictionary {
|
2018-06-24 01:09:34 +00:00
|
|
|
std::map<std::string, T> m;
|
2018-06-24 01:19:22 +00:00
|
|
|
void add(const std::string& s, T val) {
|
2020-03-31 18:56:45 +00:00
|
|
|
auto it = m.find(s);
|
|
|
|
if (it == m.end()) {
|
|
|
|
m.emplace(s, std::move(val));
|
|
|
|
}
|
|
|
|
else if (val != it->second) {
|
2021-02-04 16:20:49 +00:00
|
|
|
printf("// #warning Two translations for %s [%s]\n", escape(s, s), current_language.c_str());
|
2020-03-31 18:56:45 +00:00
|
|
|
}
|
2017-08-06 12:50:16 +00:00
|
|
|
}
|
2018-06-24 01:09:34 +00:00
|
|
|
T& operator [] (const std::string& s) { return m[s]; }
|
2018-06-27 05:54:40 +00:00
|
|
|
int count(const std::string& s) const { return m.count(s); }
|
2016-08-26 09:58:03 +00:00
|
|
|
};
|
|
|
|
|
2018-06-24 01:09:34 +00:00
|
|
|
dictionary<std::string> d[NUMLAN];
|
2016-08-26 09:58:03 +00:00
|
|
|
|
2018-06-24 01:19:22 +00:00
|
|
|
struct noun2 {
|
|
|
|
int genus;
|
|
|
|
const char *nom;
|
|
|
|
const char *nomp;
|
|
|
|
const char *acc;
|
|
|
|
const char *abl;
|
|
|
|
};
|
|
|
|
|
2016-08-26 09:58:03 +00:00
|
|
|
struct noun {
|
|
|
|
int genus;
|
2018-06-24 01:09:34 +00:00
|
|
|
std::string nom, nomp, acc, abl;
|
2018-06-24 01:19:22 +00:00
|
|
|
noun() = default;
|
|
|
|
noun(const noun2& n) : genus(n.genus), nom(n.nom), nomp(n.nomp), acc(n.acc), abl(n.abl) {}
|
2020-03-31 18:56:45 +00:00
|
|
|
friend bool operator==(const noun& a, const noun& b) {
|
|
|
|
return std::tie(a.genus, a.nom, a.nomp, a.acc, a.abl) == std::tie(b.genus, b.nom, b.nomp, b.acc, b.abl);
|
|
|
|
}
|
|
|
|
friend bool operator!=(const noun& a, const noun& b) {
|
|
|
|
return std::tie(a.genus, a.nom, a.nomp, a.acc, a.abl) != std::tie(b.genus, b.nom, b.nomp, b.acc, b.abl);
|
|
|
|
}
|
2016-08-26 09:58:03 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
dictionary<noun> nouns[NUMLAN];
|
|
|
|
|
2017-07-04 13:38:33 +00:00
|
|
|
int utfsize(char c) {
|
|
|
|
unsigned char cu = c;
|
|
|
|
if(cu < 128) return 1;
|
|
|
|
if(cu < 224) return 2;
|
2019-09-13 17:36:27 +00:00
|
|
|
if(cu < 0xF0) return 3;
|
2017-07-04 13:38:33 +00:00
|
|
|
return 4;
|
|
|
|
}
|
|
|
|
|
2020-02-16 03:59:04 +00:00
|
|
|
void addutftoset(std::set<std::string>& s, const std::string& w) {
|
2018-06-24 01:09:34 +00:00
|
|
|
size_t i = 0;
|
|
|
|
while(i < w.size()) {
|
2017-07-04 13:38:33 +00:00
|
|
|
int siz = utfsize(w[i]);
|
|
|
|
s.insert(w.substr(i, siz));
|
|
|
|
i += siz;
|
2016-08-26 09:58:03 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-16 03:59:04 +00:00
|
|
|
void addutftoset(std::set<std::string>& s, const noun& w) {
|
2016-08-26 09:58:03 +00:00
|
|
|
addutftoset(s, w.nom);
|
|
|
|
addutftoset(s, w.nomp);
|
|
|
|
addutftoset(s, w.acc);
|
|
|
|
addutftoset(s, w.abl);
|
|
|
|
}
|
|
|
|
|
|
|
|
template<class T>
|
2020-02-16 03:59:04 +00:00
|
|
|
void addutftoset(std::set<std::string>& s, const dictionary<T>& w) {
|
2018-06-24 01:09:34 +00:00
|
|
|
for(auto&& elt : w.m)
|
|
|
|
addutftoset(s, elt.second);
|
2016-08-26 09:58:03 +00:00
|
|
|
}
|
|
|
|
|
2018-06-24 01:09:34 +00:00
|
|
|
std::set<std::string> allchars;
|
2016-08-26 09:58:03 +00:00
|
|
|
|
|
|
|
typedef unsigned hashcode;
|
|
|
|
|
|
|
|
hashcode hashval;
|
|
|
|
|
2018-06-24 01:09:34 +00:00
|
|
|
hashcode langhash(const std::string& s) {
|
2016-08-26 09:58:03 +00:00
|
|
|
hashcode r = 0;
|
2020-02-16 03:59:04 +00:00
|
|
|
for (char ch : s) r = hashval * r + ch;
|
2016-08-26 09:58:03 +00:00
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2018-06-24 01:09:34 +00:00
|
|
|
std::map<hashcode, std::string> buildHashTable(std::set<std::string>& s) {
|
|
|
|
std::map<hashcode, std::string> res;
|
|
|
|
for(auto&& elt : s)
|
|
|
|
res[langhash(elt)] = elt;
|
2016-08-26 09:58:03 +00:00
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2018-06-24 01:09:34 +00:00
|
|
|
const char *escape(std::string s, const std::string& dft) {
|
2016-08-26 09:58:03 +00:00
|
|
|
if(s == "") {
|
|
|
|
printf("/*MISSING*/ ");
|
|
|
|
s = dft;
|
|
|
|
}
|
2018-06-24 01:09:34 +00:00
|
|
|
static std::string t;
|
2016-08-26 09:58:03 +00:00
|
|
|
t = "\"";
|
2018-06-22 12:47:24 +00:00
|
|
|
for(int i=0; i<isize(s); i++)
|
2016-08-26 09:58:03 +00:00
|
|
|
if(s[i] == '\\') t += "\\\\";
|
|
|
|
else if(s[i] == '\n') t += "\\n";
|
|
|
|
else if(s[i] == '\"') t += "\\\"";
|
|
|
|
else t += s[i];
|
|
|
|
t += "\"";
|
|
|
|
return t.c_str();
|
|
|
|
}
|
|
|
|
|
2018-06-24 01:09:34 +00:00
|
|
|
std::set<std::string> nothe;
|
|
|
|
std::set<std::string> plural;
|
2016-08-26 09:58:03 +00:00
|
|
|
|
2021-02-04 16:20:49 +00:00
|
|
|
|
2018-06-24 01:19:22 +00:00
|
|
|
void langPL() {
|
2021-02-04 16:20:49 +00:00
|
|
|
current_language = "PL";
|
2018-06-24 01:19:22 +00:00
|
|
|
static std::pair<const char *, const char *> ds[] = {
|
|
|
|
#define S(a,b) { a, b },
|
|
|
|
#define N(a,b,c,d,e,f)
|
|
|
|
#include "language-pl.cpp"
|
|
|
|
#undef N
|
|
|
|
#undef S
|
|
|
|
};
|
|
|
|
static std::pair<const char *, noun2> ns[] = {
|
|
|
|
#define S(a,b)
|
|
|
|
#define N(a,b,c,d,e,f) { a, noun2{ b, c, d, e, f } },
|
|
|
|
#include "language-pl.cpp"
|
|
|
|
#undef N
|
|
|
|
#undef S
|
|
|
|
};
|
|
|
|
for(auto&& elt : ds) d[1].add(elt.first, elt.second);
|
|
|
|
for(auto&& elt : ns) nouns[1].add(elt.first, elt.second);
|
2021-02-04 16:20:49 +00:00
|
|
|
current_language = "-";
|
2018-06-24 01:19:22 +00:00
|
|
|
}
|
2016-08-26 09:58:03 +00:00
|
|
|
|
2017-08-06 12:50:16 +00:00
|
|
|
void langTR() {
|
2021-02-04 16:20:49 +00:00
|
|
|
current_language = "TR";
|
2018-06-24 01:19:22 +00:00
|
|
|
static std::pair<const char *, const char *> ds[] = {
|
|
|
|
#define S(a,b) { a, b },
|
|
|
|
#define N(a,b,c,d,e,f)
|
|
|
|
#include "language-tr.cpp"
|
|
|
|
#undef N
|
|
|
|
#undef S
|
|
|
|
};
|
|
|
|
static std::pair<const char *, noun2> ns[] = {
|
|
|
|
#define S(a,b)
|
|
|
|
#define N(a,b,c,d,e,f) { a, noun2{ b, c, d, e, f } },
|
|
|
|
#include "language-tr.cpp"
|
|
|
|
#undef N
|
|
|
|
#undef S
|
|
|
|
};
|
|
|
|
for(auto&& elt : ds) d[2].add(elt.first, elt.second);
|
|
|
|
for(auto&& elt : ns) nouns[2].add(elt.first, elt.second);
|
2021-02-04 16:20:49 +00:00
|
|
|
current_language = "-";
|
2017-08-06 12:50:16 +00:00
|
|
|
}
|
2016-08-26 09:58:03 +00:00
|
|
|
|
2017-08-06 12:50:16 +00:00
|
|
|
void langCZ() {
|
2021-02-04 16:20:49 +00:00
|
|
|
current_language = "CZ";
|
2018-06-24 01:19:22 +00:00
|
|
|
static std::pair<const char *, const char *> ds[] = {
|
|
|
|
#define S(a,b) { a, b },
|
|
|
|
#define N(a,b,c,d,e,f)
|
|
|
|
#include "language-cz.cpp"
|
|
|
|
#undef N
|
|
|
|
#undef S
|
|
|
|
};
|
|
|
|
static std::pair<const char *, noun2> ns[] = {
|
|
|
|
#define S(a,b)
|
|
|
|
#define N(a,b,c,d,e,f) { a, noun2{ b, c, d, e, f } },
|
|
|
|
#include "language-cz.cpp"
|
|
|
|
#undef N
|
|
|
|
#undef S
|
|
|
|
};
|
|
|
|
for(auto&& elt : ds) d[3].add(elt.first, elt.second);
|
|
|
|
for(auto&& elt : ns) nouns[3].add(elt.first, elt.second);
|
2021-02-04 16:20:49 +00:00
|
|
|
current_language = "-";
|
2017-08-06 12:50:16 +00:00
|
|
|
}
|
2016-08-26 09:58:03 +00:00
|
|
|
|
2017-08-06 12:50:16 +00:00
|
|
|
void langRU() {
|
2021-02-04 16:20:49 +00:00
|
|
|
current_language = "RU";
|
2018-06-24 01:19:22 +00:00
|
|
|
static std::pair<const char *, const char *> ds[] = {
|
|
|
|
#define S(a,b) { a, b },
|
|
|
|
#define N(a,b,c,d,e,f)
|
|
|
|
#include "language-ru.cpp"
|
|
|
|
#undef N
|
|
|
|
#undef S
|
|
|
|
};
|
|
|
|
static std::pair<const char *, noun2> ns[] = {
|
|
|
|
#define S(a,b)
|
|
|
|
#define N(a,b,c,d,e,f) { a, noun2{ b, c, d, e, f } },
|
|
|
|
#include "language-ru.cpp"
|
|
|
|
#undef N
|
|
|
|
#undef S
|
|
|
|
};
|
|
|
|
for(auto&& elt : ds) d[4].add(elt.first, elt.second);
|
|
|
|
for(auto&& elt : ns) nouns[4].add(elt.first, elt.second);
|
2021-02-04 16:20:49 +00:00
|
|
|
current_language = "-";
|
2017-08-06 12:50:16 +00:00
|
|
|
}
|
2016-08-26 09:58:03 +00:00
|
|
|
|
2017-08-06 12:50:16 +00:00
|
|
|
void langDE() {
|
2021-02-04 16:20:49 +00:00
|
|
|
current_language = "DE";
|
2018-06-24 01:19:22 +00:00
|
|
|
static std::pair<const char *, const char *> ds[] = {
|
|
|
|
#define S(a,b) { a, b },
|
|
|
|
#define N(a,b,c,d,e)
|
|
|
|
#include "language-de.cpp"
|
|
|
|
#undef N
|
|
|
|
#undef S
|
|
|
|
};
|
|
|
|
static std::pair<const char *, noun2> ns[] = {
|
|
|
|
#define S(a,b)
|
|
|
|
#define N(a,b,c,d,e) { a, noun2{ b, c, d, e, e } },
|
|
|
|
#include "language-de.cpp"
|
|
|
|
#undef N
|
|
|
|
#undef S
|
|
|
|
};
|
|
|
|
for(auto&& elt : ds) d[5].add(elt.first, elt.second);
|
|
|
|
for(auto&& elt : ns) nouns[5].add(elt.first, elt.second);
|
2021-02-04 16:20:49 +00:00
|
|
|
current_language = "-";
|
2017-08-06 12:50:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void langPT() {
|
2021-02-04 16:20:49 +00:00
|
|
|
current_language = "PT";
|
2018-06-24 01:19:22 +00:00
|
|
|
static std::pair<const char *, const char *> ds[] = {
|
|
|
|
#define S(a,b) { a, b },
|
|
|
|
#define N(a,b,c,d,e)
|
|
|
|
#include "language-ptbr.cpp"
|
|
|
|
#undef N
|
|
|
|
#undef S
|
|
|
|
};
|
|
|
|
static std::pair<const char *, noun2> ns[] = {
|
|
|
|
#define S(a,b)
|
|
|
|
#define N(a,b,c,d,e) { a, noun2{ b, c, d, "", e } },
|
|
|
|
#include "language-ptbr.cpp"
|
|
|
|
#undef N
|
|
|
|
#undef S
|
|
|
|
};
|
|
|
|
for(auto&& elt : ds) d[6].add(elt.first, elt.second);
|
|
|
|
for(auto&& elt : ns) nouns[6].add(elt.first, elt.second);
|
2021-02-04 16:20:49 +00:00
|
|
|
current_language = "-";
|
2017-08-06 12:50:16 +00:00
|
|
|
}
|
|
|
|
|
2022-07-01 18:06:57 +00:00
|
|
|
void langFR() {
|
|
|
|
current_language = "FR";
|
|
|
|
static std::pair<const char *, const char *> ds[] = {
|
|
|
|
#define S(a,b) { a, b },
|
|
|
|
#define N(a,b,c,d,e)
|
|
|
|
#include "language-fr.cpp"
|
|
|
|
#undef N
|
|
|
|
#undef S
|
|
|
|
};
|
|
|
|
static std::pair<const char *, noun2> ns[] = {
|
|
|
|
#define S(a,b)
|
2022-07-05 09:52:05 +00:00
|
|
|
#define N(a,b,c,d,e) { a, noun2{ b, c, d, e, e } },
|
2022-07-01 18:06:57 +00:00
|
|
|
#include "language-fr.cpp"
|
|
|
|
#undef N
|
|
|
|
#undef S
|
|
|
|
};
|
|
|
|
for(auto&& elt : ds) d[7].add(elt.first, elt.second);
|
|
|
|
for(auto&& elt : ns) nouns[7].add(elt.first, elt.second);
|
|
|
|
current_language = "-";
|
|
|
|
}
|
|
|
|
|
2024-08-02 02:31:28 +00:00
|
|
|
void langZH() {
|
|
|
|
current_language = "ZH";
|
|
|
|
static std::pair<const char *, const char *> ds[] = {
|
|
|
|
#define S(a,b) { a, b },
|
2024-08-05 03:48:14 +00:00
|
|
|
#define N(a,b,c,d)
|
2024-08-02 02:31:28 +00:00
|
|
|
#include "language-zh.cpp"
|
|
|
|
#undef N
|
|
|
|
#undef S
|
|
|
|
};
|
|
|
|
static std::pair<const char *, noun2> ns[] = {
|
|
|
|
#define S(a,b)
|
2024-08-05 03:48:14 +00:00
|
|
|
#define N(a,b,c,d) { a, noun2{ b, c, c, c, d } },
|
2024-08-02 02:31:28 +00:00
|
|
|
#include "language-zh.cpp"
|
|
|
|
#undef N
|
|
|
|
#undef S
|
|
|
|
};
|
|
|
|
for(auto&& elt : ds) d[8].add(elt.first, elt.second);
|
|
|
|
for(auto&& elt : ns) nouns[8].add(elt.first, elt.second);
|
|
|
|
current_language = "-";
|
|
|
|
}
|
|
|
|
|
2017-08-06 12:50:16 +00:00
|
|
|
int completeness[NUMLAN];
|
|
|
|
|
2018-06-27 05:54:40 +00:00
|
|
|
template<class T>
|
|
|
|
void compute_completeness(const T& dict)
|
|
|
|
{
|
2018-06-24 01:09:34 +00:00
|
|
|
std::set<std::string> s;
|
2016-08-26 09:58:03 +00:00
|
|
|
for(int i=1; i<NUMLAN; i++)
|
2018-06-27 05:54:40 +00:00
|
|
|
for(auto&& elt : dict[i].m)
|
2018-06-24 01:09:34 +00:00
|
|
|
s.insert(elt.first);
|
2016-08-26 09:58:03 +00:00
|
|
|
|
2018-06-24 01:09:34 +00:00
|
|
|
for(auto&& elt : s) {
|
2021-02-04 16:20:49 +00:00
|
|
|
std::string mis = "", mis1 = "", exist_in = "";
|
|
|
|
bool in_important = false;
|
2018-06-27 05:54:40 +00:00
|
|
|
for(int i=1; i<NUMLAN; i++) if(dict[i].count(elt) == 0) {
|
2018-06-24 01:09:34 +00:00
|
|
|
std::string which = d[i]["EN"];
|
2021-02-04 16:20:49 +00:00
|
|
|
if(which != "TR" && which != "DE" && which != "PT-BR" && which != "RU")
|
|
|
|
mis += which + " ";
|
2017-08-06 12:50:16 +00:00
|
|
|
else
|
2021-02-04 16:20:49 +00:00
|
|
|
mis1 += which + " ";
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
std::string which = d[i]["EN"];
|
|
|
|
if(which != "PT-BR" && which != "TR" && which != "DE")
|
|
|
|
in_important = true;
|
|
|
|
exist_in += which + " ";
|
2017-08-06 12:50:16 +00:00
|
|
|
}
|
2021-02-04 16:20:49 +00:00
|
|
|
if(mis != "") mis.pop_back();
|
|
|
|
if(mis1 != "") mis1.pop_back();
|
|
|
|
if(exist_in != "") exist_in.pop_back();
|
|
|
|
if(in_important && mis != "")
|
|
|
|
printf("// #warning Missing [%s : %s] from [%s]: %s\n", mis.c_str(), mis1.c_str(), exist_in.c_str(), escape(elt, "?"));
|
2017-08-06 12:50:16 +00:00
|
|
|
|
2020-03-31 18:56:45 +00:00
|
|
|
completeness[0]++;
|
|
|
|
for(int i=1; i<NUMLAN; i++) if(dict[i].count(elt)) completeness[i]++;
|
2016-08-26 09:58:03 +00:00
|
|
|
}
|
2018-06-27 05:54:40 +00:00
|
|
|
}
|
2016-08-26 09:58:03 +00:00
|
|
|
|
2018-06-27 05:54:40 +00:00
|
|
|
int main() {
|
2016-08-26 09:58:03 +00:00
|
|
|
|
2018-06-27 05:54:40 +00:00
|
|
|
printf("// DO NOT EDIT -- this file is generated automatically with langen\n\n");
|
2018-06-24 01:09:34 +00:00
|
|
|
|
2018-06-27 05:54:40 +00:00
|
|
|
nothe.insert("R'Lyeh");
|
|
|
|
nothe.insert("Camelot");
|
2022-04-26 11:29:04 +00:00
|
|
|
nothe.insert("Hell");
|
2018-06-27 05:54:40 +00:00
|
|
|
plural.insert("Crossroads");
|
|
|
|
plural.insert("Crossroads II");
|
|
|
|
plural.insert("Crossroads III");
|
|
|
|
plural.insert("Elemental Planes");
|
|
|
|
plural.insert("Crossroads IV");
|
|
|
|
plural.insert("Kraken Depths");
|
|
|
|
allchars.insert("ᵈ");
|
|
|
|
allchars.insert("δ");
|
|
|
|
allchars.insert("∞");
|
|
|
|
allchars.insert("½");
|
|
|
|
allchars.insert("²");
|
2018-07-23 21:37:36 +00:00
|
|
|
allchars.insert("π");
|
2018-09-13 18:38:06 +00:00
|
|
|
allchars.insert("Θ");
|
2019-04-13 11:04:17 +00:00
|
|
|
allchars.insert("λ");
|
2019-07-03 05:31:09 +00:00
|
|
|
allchars.insert("⌫");
|
|
|
|
allchars.insert("⏎");
|
|
|
|
allchars.insert("←");
|
|
|
|
allchars.insert("→");
|
2019-07-30 10:58:42 +00:00
|
|
|
allchars.insert("⁻");
|
|
|
|
allchars.insert("ᶻ");
|
2023-03-28 12:10:13 +00:00
|
|
|
allchars.insert("√");
|
2017-08-06 12:50:16 +00:00
|
|
|
|
2018-06-27 05:54:40 +00:00
|
|
|
langPL(); langCZ(); langRU();
|
|
|
|
langTR(); langDE(); langPT();
|
2024-08-02 02:31:28 +00:00
|
|
|
langFR(); langZH();
|
2018-06-27 05:54:40 +00:00
|
|
|
|
|
|
|
// verify
|
|
|
|
compute_completeness(d);
|
|
|
|
compute_completeness(nouns);
|
2016-08-26 09:58:03 +00:00
|
|
|
|
|
|
|
for(int i=1; i<NUMLAN; i++) {
|
2018-06-24 01:09:34 +00:00
|
|
|
addutftoset(allchars, d[i]);
|
|
|
|
addutftoset(allchars, nouns[i]);
|
2016-08-26 09:58:03 +00:00
|
|
|
}
|
|
|
|
|
2018-06-24 01:09:34 +00:00
|
|
|
std::string javastring;
|
|
|
|
std::vector<std::string> vchars;
|
|
|
|
for(auto&& elt : allchars) {
|
|
|
|
if(isize(elt) >= 2) { javastring += elt; vchars.push_back(elt); }
|
2016-08-26 09:58:03 +00:00
|
|
|
}
|
|
|
|
printf("\n");
|
2019-09-05 10:00:55 +00:00
|
|
|
printf("#if HDR\n");
|
2019-09-13 01:10:26 +00:00
|
|
|
printf("#if CAP_TRANS\n");
|
2019-08-22 10:40:39 +00:00
|
|
|
printf("#define NUMEXTRA %d\n", isize(vchars));
|
2017-08-13 18:49:38 +00:00
|
|
|
printf("#define NATCHARS {");
|
2018-06-24 01:09:34 +00:00
|
|
|
for(auto&& elt : vchars) printf("\"%s\",", elt.c_str());
|
2020-02-23 01:51:27 +00:00
|
|
|
printf("}\n");
|
2019-09-06 07:19:07 +00:00
|
|
|
printf("extern const char* natchars[NUMEXTRA];\n");
|
2019-09-05 10:00:55 +00:00
|
|
|
printf("#endif\n");
|
2019-09-13 01:10:26 +00:00
|
|
|
printf("#endif\n");
|
2019-09-06 07:18:24 +00:00
|
|
|
printf("const char* natchars[NUMEXTRA] = NATCHARS;\n");
|
2016-08-26 09:58:03 +00:00
|
|
|
printf("//javastring = \"%s\";\n", javastring.c_str());
|
2017-08-06 12:50:16 +00:00
|
|
|
|
2019-09-05 10:00:55 +00:00
|
|
|
printf("\nEX int transcompleteness[NUMLAN] = {");
|
2017-08-06 12:50:16 +00:00
|
|
|
for(int i=0; i<NUMLAN; i++) printf("%d, ", completeness[i]);
|
|
|
|
printf("};\n");
|
2016-08-26 09:58:03 +00:00
|
|
|
|
|
|
|
printf("\n//statistics\n");
|
2018-06-24 01:09:34 +00:00
|
|
|
for(auto&& elt : d[1].m)
|
|
|
|
d[0][elt.first] = elt.first;
|
|
|
|
for(auto&& elt : nouns[1].m) {
|
|
|
|
noun n = elt.second;
|
|
|
|
n.nom = n.nomp = n.acc = n.abl = elt.first;
|
|
|
|
nouns[0][elt.first] = n;
|
2016-08-26 09:58:03 +00:00
|
|
|
}
|
2018-06-24 01:09:34 +00:00
|
|
|
|
2019-08-22 10:40:39 +00:00
|
|
|
printf("// total: %5d nouns, %5d sentences\n", isize(nouns[1].m), isize(d[1].m));
|
2016-08-26 09:58:03 +00:00
|
|
|
|
|
|
|
for(int i=0; i<NUMLAN; i++) {
|
2018-06-24 01:09:34 +00:00
|
|
|
size_t bnouns = 0;
|
|
|
|
size_t bdict = 0;
|
|
|
|
|
|
|
|
for(auto&& elt : d[i].m)
|
|
|
|
bdict += elt.second.size();
|
|
|
|
for(auto&& elt : nouns[i].m) {
|
|
|
|
const noun& n = elt.second;
|
|
|
|
bnouns += n.nom.size();
|
|
|
|
bnouns += n.nomp.size();
|
|
|
|
bnouns += n.acc.size();
|
|
|
|
bnouns += n.abl.size();
|
2016-08-26 09:58:03 +00:00
|
|
|
}
|
|
|
|
|
2019-08-22 10:40:39 +00:00
|
|
|
printf("// %s: %5dB nouns, %5dB sentences\n",
|
|
|
|
d[i]["EN"].c_str(), int(bnouns), int(bdict));
|
2016-08-26 09:58:03 +00:00
|
|
|
}
|
|
|
|
|
2018-06-24 01:09:34 +00:00
|
|
|
std::set<std::string> allsent;
|
|
|
|
for(auto&& elt : d[1].m)
|
|
|
|
allsent.insert(elt.first);
|
2016-08-26 09:58:03 +00:00
|
|
|
|
2018-06-24 01:09:34 +00:00
|
|
|
std::set<std::string> allnouns;
|
|
|
|
for(auto&& elt : nouns[1].m)
|
|
|
|
allnouns.insert(elt.first);
|
2016-08-26 09:58:03 +00:00
|
|
|
|
2018-06-24 01:09:34 +00:00
|
|
|
std::map<hashcode, std::string> ms, mn;
|
2016-08-26 09:58:03 +00:00
|
|
|
|
|
|
|
do {
|
|
|
|
hashval = rand();
|
|
|
|
printf("// check hash: %x\n", hashval);
|
|
|
|
ms = buildHashTable(allsent);
|
|
|
|
mn = buildHashTable(allnouns);
|
|
|
|
}
|
2018-06-24 01:09:34 +00:00
|
|
|
while(ms.size() != allsent.size() || mn.size() != allnouns.size());
|
2016-08-26 09:58:03 +00:00
|
|
|
|
|
|
|
printf("hashcode hashval = 0x%x;\n\n", hashval);
|
|
|
|
|
2017-10-14 17:54:52 +00:00
|
|
|
printf("sentence all_sentences[] = {\n");
|
2016-08-26 09:58:03 +00:00
|
|
|
|
2018-06-24 01:09:34 +00:00
|
|
|
for(auto&& elt : ms) {
|
|
|
|
const std::string& s = elt.second;
|
|
|
|
printf(" {0x%x, { // %s\n", elt.first, escape(s, s));
|
2016-08-26 09:58:03 +00:00
|
|
|
for(int i=1; i<NUMLAN; i++) printf(" %s,\n", escape(d[i][s], s));
|
|
|
|
printf(" }},\n");
|
|
|
|
}
|
|
|
|
printf(" };\n\n");
|
|
|
|
|
2017-10-14 17:54:52 +00:00
|
|
|
printf("fullnoun all_nouns[] = {\n");
|
2016-08-26 09:58:03 +00:00
|
|
|
|
2018-06-24 01:09:34 +00:00
|
|
|
for(auto&& elt : mn) {
|
|
|
|
const std::string& s = elt.second;
|
|
|
|
printf(" {0x%x, %d, { // \"%s\"\n", elt.first,
|
2016-08-26 09:58:03 +00:00
|
|
|
(nothe.count(s) ? 1:0) + (plural.count(s) ? 2:0),
|
|
|
|
escape(s, s));
|
|
|
|
|
|
|
|
for(int i=1; i<NUMLAN; i++) {
|
|
|
|
printf(" {%d", nouns[i][s].genus);
|
|
|
|
printf(", %s", escape(nouns[i][s].nom, s));
|
|
|
|
printf(", %s", escape(nouns[i][s].nomp, s));
|
|
|
|
printf(", %s", escape(nouns[i][s].acc, s));
|
|
|
|
printf(", %s},\n", escape(nouns[i][s].abl, s));
|
|
|
|
}
|
|
|
|
|
|
|
|
printf(" }},\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
printf(" };\n");
|
|
|
|
|
|
|
|
}
|