244 lines
4.8 KiB
TypeScript
Executable file
244 lines
4.8 KiB
TypeScript
Executable file
#!/usr/bin/env -S deno --allow-read --allow-write
|
|
|
|
import { parse, stringify } from "jsr:@std/yaml";
|
|
import miwafu from "npm:@fedran/miwafu@0.2.1";
|
|
|
|
// Go through all the directories and parse each one.
|
|
let done = false;
|
|
|
|
for await (const rootEntry of Deno.readDir("src/dictionary")) {
|
|
const rootPath = "src/dictionary/" + rootEntry.name + "/";
|
|
|
|
console.log("-", rootEntry.name);
|
|
|
|
for await (const dirEntry of Deno.readDir(rootPath)) {
|
|
// Load the file and see if we've already converted it.
|
|
const filePath = rootPath + dirEntry.name;
|
|
|
|
console.log(" - path:", filePath);
|
|
|
|
const text = await Deno.readTextFile(filePath);
|
|
const data = parse(text);
|
|
|
|
data.version ??= 0;
|
|
|
|
console.log(" version:", data.version);
|
|
|
|
if (data.version !== 0) {
|
|
continue;
|
|
}
|
|
|
|
console.log(" converting");
|
|
console.log(JSON.stringify(data, null, " "));
|
|
|
|
// Convert the object.
|
|
const newLanguage: NewLanguage = {
|
|
parts: {},
|
|
};
|
|
|
|
const newEntry: NewEntry = {
|
|
entry: data.base,
|
|
languages: { qmw: newLanguage },
|
|
};
|
|
|
|
var newData: NewFile = {
|
|
version: 1,
|
|
entries: [newEntry],
|
|
};
|
|
|
|
addGender(newLanguage, data.pos.noun, "noun", data.base);
|
|
addGender(newLanguage, data.pos.verb, "verb", data.base);
|
|
addList(newLanguage, data.pos.adv, "adv", data.base);
|
|
addList(newLanguage, data.pos.adj, "adj", data.base);
|
|
addList(newLanguage, data.pos.num, "num", data.base);
|
|
addList(newLanguage, data.pos.pro, "pro", data.base);
|
|
addList(newLanguage, data.pos.part, "particle", data.base);
|
|
|
|
// Write out the results.
|
|
Deno.writeTextFile(filePath, stringify(newData));
|
|
|
|
console.log(" ---");
|
|
console.log(stringify(newData));
|
|
|
|
// We only want to convert one at a time.
|
|
done = true;
|
|
break;
|
|
}
|
|
|
|
if (done) break;
|
|
}
|
|
|
|
function addGender(
|
|
lang: NewLanguage,
|
|
gender: OldGender | undefined,
|
|
part: String,
|
|
word: String,
|
|
) {
|
|
const m = miwafu.inflectMasculine(word);
|
|
|
|
console.log(
|
|
"------",
|
|
word,
|
|
miwafu.splitSyllables(word),
|
|
m,
|
|
miwafu.inflectFeminine(word),
|
|
miwafu.inflectNeuter(word),
|
|
);
|
|
|
|
if (gender) {
|
|
addList(lang, gender.masculine, part, miwafu.inflectMasculine(word));
|
|
addList(lang, gender.feminine, part, miwafu.inflectFeminine(word));
|
|
addList(lang, gender.neuter, part, miwafu.inflectNeuter(word));
|
|
}
|
|
}
|
|
|
|
function addPartList(
|
|
lang: NewLanguage,
|
|
list: OldDefinition[] | undefined,
|
|
part: String,
|
|
word: String,
|
|
) {
|
|
if (list) {
|
|
const p: NewPart = {
|
|
word,
|
|
definitions: [],
|
|
};
|
|
|
|
lang.parts[part] ??= [];
|
|
lang.parts[part].push(p);
|
|
|
|
for (const def of list) {
|
|
add(p, def, part, word);
|
|
}
|
|
}
|
|
}
|
|
|
|
function addList(
|
|
lang: NewLanguage,
|
|
list: OldDefinition[] | undefined,
|
|
part: String,
|
|
word: String,
|
|
) {
|
|
if (list) {
|
|
let p: NewPart = {
|
|
word,
|
|
definitions: [],
|
|
};
|
|
|
|
lang.parts[part] ??= [];
|
|
lang.parts[part].push(p);
|
|
|
|
for (const def of list) {
|
|
add(p, def, part, word);
|
|
}
|
|
}
|
|
}
|
|
|
|
function add(dest: NewPart, def: OldDefinition, part: String, word: String) {
|
|
// Create the top-level reference.
|
|
console.log("**** def", part, word, def);
|
|
|
|
const newPart = { definition: def.def };
|
|
|
|
dest.definitions.push(newPart);
|
|
|
|
// Add in the various tags.
|
|
if (def.oow) {
|
|
newPart.tags ??= [];
|
|
newPart.tags.push("reality");
|
|
}
|
|
|
|
// See if we have references.
|
|
if (def.reference) {
|
|
newPart.references = def.reference
|
|
.map(o => {
|
|
const ref = o.url
|
|
?.replace("https://fedran.com/", "")
|
|
?.replace(/\/chapter-0*(\d+)\//, "/$1");
|
|
|
|
return {
|
|
ref,
|
|
excerpt: o.excerpt?.trimEnd(),
|
|
};
|
|
});
|
|
}
|
|
}
|
|
|
|
// Define the old data format.
|
|
interface OldEntry {
|
|
version?: Number;
|
|
name: String;
|
|
pos: [OldPartOfSpeech];
|
|
}
|
|
|
|
interface OldPartOfSpeech {
|
|
noun?: OldGender;
|
|
verb?: OldGender;
|
|
adv?: OldDefinition[];
|
|
adj?: OldDefinition[];
|
|
}
|
|
|
|
interface OldGender {
|
|
masculine?: OldDefinition[];
|
|
feminine?: OldDefinition[];
|
|
neuter?: OldDefinition[];
|
|
}
|
|
|
|
interface OldDefinition {
|
|
def?: string;
|
|
example?: OldExample;
|
|
oow?: Boolean;
|
|
reference?: OldReference[];
|
|
}
|
|
|
|
interface OldExample {
|
|
miw: String;
|
|
en: String;
|
|
}
|
|
|
|
interface OldReference {
|
|
identifier?: String;
|
|
title?: String;
|
|
url?: String;
|
|
excerpt?: String;
|
|
}
|
|
|
|
// Define the new data format.
|
|
interface NewFile {
|
|
version: 1;
|
|
entries: NewEntry[];
|
|
}
|
|
|
|
interface NewEntry {
|
|
entry: String;
|
|
languages: { [language: String]: NewLanguage };
|
|
}
|
|
|
|
interface NewLanguage {
|
|
parts: { [pos: String]: NewPart[] };
|
|
}
|
|
|
|
interface NewPart {
|
|
word: String;
|
|
definitions: NewDef[];
|
|
}
|
|
|
|
interface NewDef {
|
|
definition: String;
|
|
tags?: string[];
|
|
}
|
|
|
|
/*
|
|
pos:
|
|
noun:
|
|
masculine:
|
|
- def: A long or a full night's sleep.
|
|
feminine:
|
|
- def: A nap.
|
|
verb:
|
|
masculine:
|
|
- def: To sleep for a long time.
|
|
feminine:
|
|
- def: To take a nap.
|
|
base: zushi
|
|
*/
|