miwafu/convert

195 lines
3.8 KiB
TypeScript
Executable file

#!/usr/bin/env -S deno --allow-read
import { parse, stringify } from "jsr:@std/yaml";
import miwafu from "npm:fedran-miwafu@0.1.6";
console.log(miwafu);
// Go through all the directories and parse each one.
let done = false;
for await (const rootEntry of Deno.readDir("src/dictionary")) {
const rootPath = "src/dictionary/" + rootEntry.name + "/";
console.log("-", rootEntry.name);
for await (const dirEntry of Deno.readDir(rootPath)) {
// Load the file and see if we've already converted it.
const filePath = rootPath + dirEntry.name;
console.log(" - path:", filePath);
const text = await Deno.readTextFile(filePath);
const data = parse(text);
data.version ??= 0;
console.log(" version:", data.version);
if (data.version !== 0) {
continue;
}
console.log(" converting");
console.log(JSON.stringify(data, null, " "));
// Convert the object.
const newLanguage: NewLanguage = {
parts: {},
};
const newEntry: NewEntry = {
entry: data.base,
languages: { qmw: newLanguage },
};
var newData: NewFile = {
version: 1,
entries: [newEntry],
};
addGender(newLanguage, data.pos.noun, "noun", data.base);
addGender(newLanguage, data.pos.verb, "verb", data.base);
addList(newLanguage, data.pos.adv, "adv", data.base);
addList(newLanguage, data.pos.adj, "adj", data.base);
// Write out the results.
Deno.writeTextFile(filePath, stringify(newData));
console.log(" ---");
console.log(stringify(newData));
// We only want to convert one at a time.
done = true;
break;
}
if (done) break;
}
function addGender(
lang: NewLanguage,
gender: OldGender | undefined,
part: String,
word: String,
) {
if (gender) {
addList(lang, gender.masculine, part, miwafu.inflectMasculine(word));
addList(lang, gender.feminine, part, miwafu.inflectFeminine(word));
addList(lang, gender.neuter, part, miwafu.inflectNeuter(word));
}
}
function addPartList(
lang: NewLanguage,
list: OldDefinition[] | undefined,
part: String,
word: String,
) {
if (list) {
lang.parts[part] ??= [];
for (const def of list) {
add(lang.parts[part], def, part, word);
}
}
}
function addList(
lang: NewLanguage,
list: OldDefinition[] | undefined,
part: String,
word: String,
) {
if (list) {
lang.parts[part] ??= [];
for (const def of list) {
add(lang.parts[part], def, part, word);
}
}
}
function add(dest: NewPart[], def: OldDefinition, part: String, word: String) {
console.log("**** def", part, word, def);
const newPart: NewPart = {
word: word,
definitions: [{ definition: def.def }],
};
dest.push(newPart);
}
// Define the old data format.
interface OldEntry {
version?: Number;
name: String;
pos: [OldPartOfSpeech];
}
interface OldPartOfSpeech {
noun?: OldGender;
verb?: OldGender;
adv?: OldDefinition[];
adj?: OldDefinition[];
}
interface OldGender {
masculine?: OldDefinition[];
feminine?: OldDefinition[];
neuter?: OldDefinition[];
}
interface OldDefinition {
def?: string;
example?: OldExample;
oow?: Boolean;
reference?: OldReference[];
}
interface OldExample {
miw: String;
en: String;
}
interface OldReference {
identifier?: String;
title?: String;
url?: String;
excerpt?: String;
}
// Define the new data format.
interface NewFile {
version: 1;
entries: NewEntry[];
}
interface NewEntry {
entry: String;
languages: { [language: String]: NewLanguage };
}
interface NewLanguage {
parts: { [pos: String]: NewPart[] };
}
interface NewPart {
word: String;
}
/*
pos:
noun:
masculine:
- def: A long or a full night's sleep.
feminine:
- def: A nap.
verb:
masculine:
- def: To sleep for a long time.
feminine:
- def: To take a nap.
base: zushi
*/