mirror of
				https://github.com/zenorogue/hyperrogue.git
				synced 2025-10-30 13:32:59 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			507 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			507 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| // Hyperbolic Rogue language file generator
 | ||
| 
 | ||
| // Copyright (C) 2011-2018 Zeno Rogue, see 'hyper.cpp' for details
 | ||
| 
 | ||
| #include <map>
 | ||
| #include <string>
 | ||
| #include <cstdio>
 | ||
| #include <vector>
 | ||
| #include <cstdlib>
 | ||
| #include <set>
 | ||
| #include <cstring>
 | ||
| 
 | ||
| #define GEN_M 0
 | ||
| #define GEN_F 1
 | ||
| #define GEN_N 2
 | ||
| #define GEN_O 3
 | ||
| 
 | ||
| #define GENF_ELISION    (1 << 3)
 | ||
| #define GENF_PLURALONLY (1 << 4)
 | ||
| #define GENF_PROPER     (1 << 5)
 | ||
| 
 | ||
| #if MAC
 | ||
|  #define IF_MAC(y,z) y
 | ||
| #else
 | ||
|  #define IF_MAC(y,z) z
 | ||
| #endif
 | ||
| 
 | ||
| template<class T> int isize(const T& x) { return x.size(); }
 | ||
| 
 | ||
| #define NUMLAN 9
 | ||
| 
 | ||
| FILE *f;
 | ||
| 
 | ||
| // language generator
 | ||
| 
 | ||
| std::string current_language;
 | ||
| 
 | ||
| const char *escape(std::string s, const std::string& dft);
 | ||
| 
 | ||
| template<class T> struct dictionary {
 | ||
|   std::map<std::string, T> m;
 | ||
|   void add(const std::string& s, T val) {
 | ||
|     auto it = m.find(s);
 | ||
|     if (it == m.end()) {
 | ||
|       m.emplace(s, std::move(val));
 | ||
|       }
 | ||
|     else if (val != it->second) {
 | ||
|       fprintf(f, "// #warning Two translations for %s [%s]\n", escape(s, s), current_language.c_str());
 | ||
|       }
 | ||
|     }
 | ||
|   T& operator [] (const std::string& s) { return m[s]; }
 | ||
|   int count(const std::string& s) const { return m.count(s); }
 | ||
|   };
 | ||
| 
 | ||
| dictionary<std::string> d[NUMLAN];
 | ||
| 
 | ||
| struct noun2 {
 | ||
|   int genus;
 | ||
|   const char *nom;
 | ||
|   const char *nomp;
 | ||
|   const char *acc;
 | ||
|   const char *abl;
 | ||
|   };
 | ||
| 
 | ||
| struct noun {
 | ||
|   int genus;
 | ||
|   std::string nom, nomp, acc, abl;
 | ||
|   noun() = default;
 | ||
|   noun(const noun2& n) : genus(n.genus), nom(n.nom), nomp(n.nomp), acc(n.acc), abl(n.abl) {}
 | ||
|   friend bool operator==(const noun& a, const noun& b) {
 | ||
|     return std::tie(a.genus, a.nom, a.nomp, a.acc, a.abl) == std::tie(b.genus, b.nom, b.nomp, b.acc, b.abl);
 | ||
|     }
 | ||
|   friend bool operator!=(const noun& a, const noun& b) {
 | ||
|     return std::tie(a.genus, a.nom, a.nomp, a.acc, a.abl) != std::tie(b.genus, b.nom, b.nomp, b.acc, b.abl);
 | ||
|     }
 | ||
|   };
 | ||
| 
 | ||
| dictionary<noun> nouns[NUMLAN];
 | ||
| 
 | ||
| int utfsize(char c) {
 | ||
|   unsigned char cu = c;
 | ||
|   if(cu < 128) return 1;
 | ||
|   if(cu < 224) return 2;
 | ||
|   if(cu < 0xF0) return 3;
 | ||
|   return 4;
 | ||
|   }
 | ||
| 
 | ||
| void addutftoset(std::set<std::string>& s, const std::string& w) {
 | ||
|   size_t i = 0;
 | ||
|   while(i < w.size()) {
 | ||
|     int siz = utfsize(w[i]);
 | ||
|     s.insert(w.substr(i, siz));
 | ||
|     i += siz;
 | ||
|     }
 | ||
|   }
 | ||
| 
 | ||
| void addutftoset(std::set<std::string>& s, const noun& w) {
 | ||
|   addutftoset(s, w.nom);
 | ||
|   addutftoset(s, w.nomp);
 | ||
|   addutftoset(s, w.acc);
 | ||
|   addutftoset(s, w.abl);
 | ||
|   }
 | ||
| 
 | ||
| template<class T>
 | ||
| void addutftoset(std::set<std::string>& s, const dictionary<T>& w) {
 | ||
|   for(auto&& elt : w.m)
 | ||
|     addutftoset(s, elt.second);
 | ||
|   }
 | ||
| 
 | ||
| std::set<std::string> allchars;
 | ||
| 
 | ||
| typedef unsigned hashcode;
 | ||
| 
 | ||
| hashcode hashval;
 | ||
| 
 | ||
| hashcode langhash(const std::string& s) {
 | ||
|   hashcode r = 0;
 | ||
|   for (char ch : s) r = hashval * r + ch;
 | ||
|   return r;
 | ||
|   }
 | ||
| 
 | ||
| std::map<hashcode, std::string> buildHashTable(std::set<std::string>& s) {
 | ||
|   std::map<hashcode, std::string> res;
 | ||
|   for(auto&& elt : s)
 | ||
|     res[langhash(elt)] = elt;
 | ||
|   return res;
 | ||
|   }
 | ||
| 
 | ||
| const char *escape(std::string s, const std::string& dft) {
 | ||
|   if(s == "") {
 | ||
|     fprintf(f, "/*MISSING*/ ");
 | ||
|     s = dft;
 | ||
|     }
 | ||
|   static std::string t;
 | ||
|   t = "\"";
 | ||
|   for(int i=0; i<isize(s); i++)
 | ||
|     if(s[i] == '\\') t += "\\\\";
 | ||
|     else if(s[i] == '\n') t += "\\n";
 | ||
|     else if(s[i] == '\"') t += "\\\"";
 | ||
|     else t += s[i];
 | ||
|   t += "\"";
 | ||
|   return t.c_str();
 | ||
|   }
 | ||
| 
 | ||
| std::set<std::string> nothe;
 | ||
| std::set<std::string> plural;
 | ||
| 
 | ||
| 
 | ||
|  void langPL() {
 | ||
|   current_language = "PL";
 | ||
|   static std::pair<const char *, const char *> ds[] = {
 | ||
|     #define S(a,b) { a, b },
 | ||
|     #define N(a,b,c,d,e,f)
 | ||
|     #include "language-pl.cpp"
 | ||
|     #undef N
 | ||
|     #undef S
 | ||
|     };
 | ||
|   static std::pair<const char *, noun2> ns[] = {
 | ||
|     #define S(a,b)
 | ||
|     #define N(a,b,c,d,e,f) { a, noun2{ b, c, d, e, f } },
 | ||
|     #include "language-pl.cpp"
 | ||
|     #undef N
 | ||
|     #undef S
 | ||
|     };
 | ||
|   for(auto&& elt : ds) d[1].add(elt.first, elt.second);
 | ||
|   for(auto&& elt : ns) nouns[1].add(elt.first, elt.second);
 | ||
|   current_language = "-";
 | ||
| }
 | ||
| 
 | ||
| void langTR() {
 | ||
|   current_language = "TR";
 | ||
|   static std::pair<const char *, const char *> ds[] = {
 | ||
|     #define S(a,b) { a, b },
 | ||
|     #define N(a,b,c,d,e,f)
 | ||
|     #include "language-tr.cpp"
 | ||
|     #undef N
 | ||
|     #undef S
 | ||
|     };
 | ||
|   static std::pair<const char *, noun2> ns[] = {
 | ||
|     #define S(a,b)
 | ||
|     #define N(a,b,c,d,e,f) { a, noun2{ b, c, d, e, f } },
 | ||
|     #include "language-tr.cpp"
 | ||
|     #undef N
 | ||
|     #undef S
 | ||
|     };
 | ||
|   for(auto&& elt : ds) d[2].add(elt.first, elt.second);
 | ||
|   for(auto&& elt : ns) nouns[2].add(elt.first, elt.second);
 | ||
|   current_language = "-";
 | ||
|   }
 | ||
| 
 | ||
| void langCZ() {
 | ||
|   current_language = "CZ";
 | ||
|   static std::pair<const char *, const char *> ds[] = {
 | ||
|     #define S(a,b) { a, b },
 | ||
|     #define N(a,b,c,d,e,f)
 | ||
|     #include "language-cz.cpp"
 | ||
|     #undef N
 | ||
|     #undef S
 | ||
|     };
 | ||
|   static std::pair<const char *, noun2> ns[] = {
 | ||
|     #define S(a,b)
 | ||
|     #define N(a,b,c,d,e,f) { a, noun2{ b, c, d, e, f } },
 | ||
|     #include "language-cz.cpp"
 | ||
|     #undef N
 | ||
|     #undef S
 | ||
|     };
 | ||
|   for(auto&& elt : ds) d[3].add(elt.first, elt.second);
 | ||
|   for(auto&& elt : ns) nouns[3].add(elt.first, elt.second);
 | ||
|   current_language = "-";
 | ||
|   }
 | ||
| 
 | ||
| void langRU() {
 | ||
|   current_language = "RU";
 | ||
|   static std::pair<const char *, const char *> ds[] = {
 | ||
|     #define S(a,b) { a, b },
 | ||
|     #define N(a,b,c,d,e,f)
 | ||
|     #include "language-ru.cpp"
 | ||
|     #undef N
 | ||
|     #undef S
 | ||
|     };
 | ||
|   static std::pair<const char *, noun2> ns[] = {
 | ||
|     #define S(a,b)
 | ||
|     #define N(a,b,c,d,e,f) { a, noun2{ b, c, d, e, f } },
 | ||
|     #include "language-ru.cpp"
 | ||
|     #undef N
 | ||
|     #undef S
 | ||
|     };
 | ||
|   for(auto&& elt : ds) d[4].add(elt.first, elt.second);
 | ||
|   for(auto&& elt : ns) nouns[4].add(elt.first, elt.second);
 | ||
|   current_language = "-";
 | ||
|   }
 | ||
| 
 | ||
| void langDE() {
 | ||
|   current_language = "DE";
 | ||
|   static std::pair<const char *, const char *> ds[] = {
 | ||
|     #define S(a,b) { a, b },
 | ||
|     #define N(a,b,c,d,e)
 | ||
|     #include "language-de.cpp"
 | ||
|     #undef N
 | ||
|     #undef S
 | ||
|     };
 | ||
|   static std::pair<const char *, noun2> ns[] = {
 | ||
|     #define S(a,b)
 | ||
|     #define N(a,b,c,d,e) { a, noun2{ b, c, d, e, e } },
 | ||
|     #include "language-de.cpp"
 | ||
|     #undef N
 | ||
|     #undef S
 | ||
|     };
 | ||
|   for(auto&& elt : ds) d[5].add(elt.first, elt.second);
 | ||
|   for(auto&& elt : ns) nouns[5].add(elt.first, elt.second);
 | ||
|   current_language = "-";
 | ||
|   }
 | ||
| 
 | ||
| void langPT() {
 | ||
|   current_language = "PT";
 | ||
|   static std::pair<const char *, const char *> ds[] = {
 | ||
|     #define S(a,b) { a, b },
 | ||
|     #define N(a,b,c,d,e)
 | ||
|     #include "language-ptbr.cpp"
 | ||
|     #undef N
 | ||
|     #undef S
 | ||
|     };
 | ||
|   static std::pair<const char *, noun2> ns[] = {
 | ||
|     #define S(a,b)
 | ||
|     #define N(a,b,c,d,e) { a, noun2{ b, c, d, "", e } },
 | ||
|     #include "language-ptbr.cpp"
 | ||
|     #undef N
 | ||
|     #undef S
 | ||
|     };
 | ||
|   for(auto&& elt : ds) d[6].add(elt.first, elt.second);
 | ||
|   for(auto&& elt : ns) nouns[6].add(elt.first, elt.second);
 | ||
|   current_language = "-";
 | ||
|   }
 | ||
| 
 | ||
| void langFR() {
 | ||
|   current_language = "FR";
 | ||
|   static std::pair<const char *, const char *> ds[] = {
 | ||
|     #define S(a,b) { a, b },
 | ||
|     #define N(a,b,c,d,e)
 | ||
|     #include "language-fr.cpp"
 | ||
|     #undef N
 | ||
|     #undef S
 | ||
|     };
 | ||
|   static std::pair<const char *, noun2> ns[] = {
 | ||
|     #define S(a,b)
 | ||
|     #define N(a,b,c,d,e) { a, noun2{ b, c, d, e, e } },
 | ||
|     #include "language-fr.cpp"
 | ||
|     #undef N
 | ||
|     #undef S
 | ||
|     };
 | ||
|   for(auto&& elt : ds) d[7].add(elt.first, elt.second);
 | ||
|   for(auto&& elt : ns) nouns[7].add(elt.first, elt.second);
 | ||
|   current_language = "-";
 | ||
|   }
 | ||
| 
 | ||
| void langZH() {
 | ||
|   current_language = "ZH";
 | ||
|   static std::pair<const char *, const char *> ds[] = {
 | ||
|     #define S(a,b) { a, b },
 | ||
|     #define N(a,b,c,d,e)
 | ||
|     #include "language-zh.cpp"
 | ||
|     #undef N
 | ||
|     #undef S
 | ||
|     };
 | ||
|   static std::pair<const char *, noun2> ns[] = {
 | ||
|     #define S(a,b)
 | ||
|     #define N(a,b,c,d,e) { a, noun2{ b, c, c, d, e } },
 | ||
|     #include "language-zh.cpp"
 | ||
|     #undef N
 | ||
|     #undef S
 | ||
|     };
 | ||
|   for(auto&& elt : ds) d[8].add(elt.first, elt.second);
 | ||
|   for(auto&& elt : ns) nouns[8].add(elt.first, elt.second);
 | ||
|   current_language = "-";
 | ||
|   }
 | ||
| 
 | ||
| int completeness[NUMLAN];
 | ||
| 
 | ||
| template<class T>
 | ||
| void compute_completeness(const T& dict)
 | ||
| {
 | ||
|   std::set<std::string> s;
 | ||
|   for(int i=1; i<NUMLAN; i++) 
 | ||
|     for(auto&& elt : dict[i].m)
 | ||
|       s.insert(elt.first);
 | ||
|   
 | ||
|   for(auto&& elt : s) {
 | ||
|     std::string mis = "", mis1 = "", exist_in = "";
 | ||
|     bool in_important = false;
 | ||
|     for(int i=1; i<NUMLAN; i++) if(dict[i].count(elt) == 0) {
 | ||
|       std::string which = d[i]["EN"];
 | ||
|       if(which != "TR" && which != "DE" && which != "PT-BR" && which != "RU")
 | ||
|         mis += which + " ";
 | ||
|       else
 | ||
|         mis1 += which + " ";
 | ||
|       }
 | ||
|     else {
 | ||
|       std::string which = d[i]["EN"];
 | ||
|       if(which != "PT-BR" && which != "TR" && which != "DE") 
 | ||
|         in_important = true;
 | ||
|       exist_in += which + " ";
 | ||
|       }
 | ||
|     if(mis != "") mis.pop_back();
 | ||
|     if(mis1 != "") mis1.pop_back();
 | ||
|     if(exist_in != "") exist_in.pop_back();
 | ||
|     if(in_important && mis != "")
 | ||
|       fprintf(f, "// #warning Missing [%s : %s] from [%s]: %s\n", mis.c_str(), mis1.c_str(), exist_in.c_str(), escape(elt, "?"));
 | ||
| 
 | ||
|     completeness[0]++;
 | ||
|     for(int i=1; i<NUMLAN; i++) if(dict[i].count(elt)) completeness[i]++;
 | ||
|     }
 | ||
|   }
 | ||
|   
 | ||
| int main(int argc, char ** argv) {
 | ||
| 
 | ||
|   f = stdout;
 | ||
|   if(argc == 3 && strcmp(argv[1], "-o") == 0) f = fopen(argv[2], "wt");
 | ||
|   else if(argc != 1) {
 | ||
|     printf("Usage: langen -o <filename>, or without arguments to output to standard output\n");
 | ||
|     exit(1);
 | ||
|     }
 | ||
| 
 | ||
|   fprintf(f, "// DO NOT EDIT -- this file is generated automatically with langen\n\n");
 | ||
| 
 | ||
|   nothe.insert("R'Lyeh");
 | ||
|   nothe.insert("Camelot");
 | ||
|   nothe.insert("Hell");
 | ||
|   plural.insert("Crossroads");
 | ||
|   plural.insert("Crossroads II");
 | ||
|   plural.insert("Crossroads III");
 | ||
|   plural.insert("Elemental Planes");
 | ||
|   plural.insert("Crossroads IV");
 | ||
|   plural.insert("Kraken Depths");
 | ||
|   for(auto s: {
 | ||
|     // superscripts
 | ||
|     "¹", "²", "³", "⁻", "ᵈ", "ᶻ",
 | ||
|     // other math symbols
 | ||
|     "∞", "½", "√", "×",
 | ||
|     // Greek letters
 | ||
|     "α", "β", "γ", "δ", "ε", "ζ", "η", "θ", "ι", "κ", "λ", "μ", "ν", "ξ", "ο", "π", "ρ", "σ", "τ", "υ", "φ", "χ", "ψ", "ω",
 | ||
|     // Greek capital letters
 | ||
|     "Δ", "Ω", "Θ", "Λ", "Γ", "Ξ", "Π", "Σ", "Φ", "Ψ",
 | ||
|     // other symbols
 | ||
|     "⌫", "⏎", "←", "→", "␣",
 | ||
|     // international letters
 | ||
|     "ñ", "Ñ", "ê", "Ç", "Ą", "Ć", "Ę", "Ł", "Ń", "Ó", "Ś", "Ź", "Ż"
 | ||
|     })
 | ||
|     allchars.insert(s);
 | ||
| 
 | ||
|   langPL(); langCZ(); langRU();
 | ||
|   langTR(); langDE(); langPT();
 | ||
|   langFR(); langZH();
 | ||
| 
 | ||
|   // verify
 | ||
|   compute_completeness(d);
 | ||
|   compute_completeness(nouns);
 | ||
| 
 | ||
|   for(int i=1; i<NUMLAN; i++) {
 | ||
|     addutftoset(allchars, d[i]);
 | ||
|     addutftoset(allchars, nouns[i]);
 | ||
|     }
 | ||
| 
 | ||
|   std::string javastring;
 | ||
|   std::vector<std::string> vchars;
 | ||
|   for(auto&& elt : allchars) {
 | ||
|     if(isize(elt) >= 2) { javastring += elt; vchars.push_back(elt); }
 | ||
|     }
 | ||
|   fprintf(f, "\n");
 | ||
|   fprintf(f, "#if HDR\n");
 | ||
|   fprintf(f, "#if CAP_TRANS\n");
 | ||
|   fprintf(f, "#define NUMEXTRA %d\n", isize(vchars));
 | ||
|   fprintf(f, "#define NATCHARS {");
 | ||
|   for(auto&& elt : vchars) fprintf(f, "\"%s\",", elt.c_str());
 | ||
|   fprintf(f, "}\n");
 | ||
|   fprintf(f, "extern const char* natchars[NUMEXTRA];\n");
 | ||
|   fprintf(f, "#endif\n");
 | ||
|   fprintf(f, "#endif\n");
 | ||
|   fprintf(f, "const char* natchars[NUMEXTRA] = NATCHARS;\n");
 | ||
|   fprintf(f, "//javastring = \"%s\";\n", javastring.c_str());
 | ||
|   
 | ||
|   fprintf(f, "\nEX int transcompleteness[NUMLAN] = {");
 | ||
|   for(int i=0; i<NUMLAN; i++) fprintf(f, "%d, ", completeness[i]);
 | ||
|   fprintf(f, "};\n");
 | ||
| 
 | ||
|   fprintf(f, "\n//statistics\n");
 | ||
|   for(auto&& elt : d[1].m)
 | ||
|     d[0][elt.first] = elt.first;
 | ||
|   for(auto&& elt : nouns[1].m) {
 | ||
|     noun n = elt.second;
 | ||
|     n.nom = n.nomp = n.acc = n.abl = elt.first;
 | ||
|     nouns[0][elt.first] = n;
 | ||
|     }
 | ||
| 
 | ||
|   fprintf(f, "// total: %5d nouns, %5d sentences\n", isize(nouns[1].m), isize(d[1].m));
 | ||
| 
 | ||
|   for(int i=0; i<NUMLAN; i++) {
 | ||
|     size_t bnouns = 0;
 | ||
|     size_t bdict = 0;
 | ||
| 
 | ||
|     for(auto&& elt : d[i].m)
 | ||
|       bdict += elt.second.size();
 | ||
|     for(auto&& elt : nouns[i].m) {
 | ||
|       const noun& n = elt.second;
 | ||
|       bnouns += n.nom.size();
 | ||
|       bnouns += n.nomp.size();
 | ||
|       bnouns += n.acc.size();
 | ||
|       bnouns += n.abl.size();
 | ||
|       }
 | ||
| 
 | ||
|     fprintf(f, "// %s: %5dB nouns, %5dB sentences\n",
 | ||
|       d[i]["EN"].c_str(), int(bnouns), int(bdict));
 | ||
|     }
 | ||
|   
 | ||
|   std::set<std::string> allsent;
 | ||
|   for(auto&& elt : d[1].m)
 | ||
|     allsent.insert(elt.first);
 | ||
| 
 | ||
|   std::set<std::string> allnouns;
 | ||
|   for(auto&& elt : nouns[1].m)
 | ||
|     allnouns.insert(elt.first);
 | ||
|   
 | ||
|   std::map<hashcode, std::string> ms, mn;
 | ||
|   
 | ||
|   do {
 | ||
|     hashval = rand();
 | ||
|     fprintf(f, "// check hash: %x\n", hashval);
 | ||
|     ms = buildHashTable(allsent);
 | ||
|     mn = buildHashTable(allnouns);
 | ||
|     }
 | ||
|   while(ms.size() != allsent.size() || mn.size() != allnouns.size());
 | ||
|   
 | ||
|   fprintf(f, "hashcode hashval = 0x%x;\n\n", hashval);
 | ||
|   
 | ||
|   fprintf(f, "sentence all_sentences[] = {\n");
 | ||
|   
 | ||
|   for(auto&& elt : ms) {
 | ||
|     const std::string& s = elt.second;
 | ||
|     fprintf(f, "  {0x%x, { // %s\n", elt.first, escape(s, s));
 | ||
|     for(int i=1; i<NUMLAN; i++) fprintf(f, "   %s,\n", escape(d[i][s], s));
 | ||
|     fprintf(f, "    }},\n");
 | ||
|     }
 | ||
|   fprintf(f, "  };\n\n");
 | ||
| 
 | ||
|   fprintf(f, "fullnoun all_nouns[] = {\n");
 | ||
|   
 | ||
|   for(auto&& elt : mn) {
 | ||
|     const std::string& s = elt.second;
 | ||
|     fprintf(f, "  {0x%x, %d, { // \"%s\"\n", elt.first,
 | ||
|       (nothe.count(s) ? 1:0) + (plural.count(s) ? 2:0),
 | ||
|       escape(s, s));
 | ||
|     
 | ||
|     for(int i=1; i<NUMLAN; i++) {
 | ||
|       fprintf(f, "    {%d", nouns[i][s].genus);
 | ||
|       fprintf(f, ", %s", escape(nouns[i][s].nom, s));
 | ||
|       fprintf(f, ", %s", escape(nouns[i][s].nomp, s));
 | ||
|       fprintf(f, ", %s", escape(nouns[i][s].acc, s));
 | ||
|       fprintf(f, ", %s},\n", escape(nouns[i][s].abl, s));
 | ||
|       }
 | ||
|     
 | ||
|     fprintf(f, "    }},\n");
 | ||
|     }
 | ||
| 
 | ||
|   fprintf(f, "  };\n");
 | ||
| 
 | ||
|   if(f != stdout) fclose(f);
 | ||
|   }
 | 
