miwafu/convert

244 lines
4.8 KiB
TypeScript
Executable file

#!/usr/bin/env -S deno --allow-read --allow-write
import { parse, stringify } from "jsr:@std/yaml";
import miwafu from "npm:@fedran/miwafu@0.2.1";
// Go through all the directories and parse each one.
let done = false;
for await (const rootEntry of Deno.readDir("src/dictionary")) {
const rootPath = "src/dictionary/" + rootEntry.name + "/";
console.log("-", rootEntry.name);
for await (const dirEntry of Deno.readDir(rootPath)) {
// Load the file and see if we've already converted it.
const filePath = rootPath + dirEntry.name;
console.log(" - path:", filePath);
const text = await Deno.readTextFile(filePath);
const data = parse(text);
data.version ??= 0;
console.log(" version:", data.version);
if (data.version !== 0) {
continue;
}
console.log(" converting");
console.log(JSON.stringify(data, null, " "));
// Convert the object.
const newLanguage: NewLanguage = {
parts: {},
};
const newEntry: NewEntry = {
entry: data.base,
languages: { qmw: newLanguage },
};
var newData: NewFile = {
version: 1,
entries: [newEntry],
};
addGender(newLanguage, data.pos.noun, "noun", data.base);
addGender(newLanguage, data.pos.verb, "verb", data.base);
addList(newLanguage, data.pos.adv, "adv", data.base);
addList(newLanguage, data.pos.adj, "adj", data.base);
addList(newLanguage, data.pos.num, "num", data.base);
addList(newLanguage, data.pos.pro, "pro", data.base);
addList(newLanguage, data.pos.part, "particle", data.base);
// Write out the results.
Deno.writeTextFile(filePath, stringify(newData));
console.log(" ---");
console.log(stringify(newData));
// We only want to convert one at a time.
done = true;
break;
}
if (done) break;
}
function addGender(
lang: NewLanguage,
gender: OldGender | undefined,
part: String,
word: String,
) {
const m = miwafu.inflectMasculine(word);
console.log(
"------",
word,
miwafu.splitSyllables(word),
m,
miwafu.inflectFeminine(word),
miwafu.inflectNeuter(word),
);
if (gender) {
addList(lang, gender.masculine, part, miwafu.inflectMasculine(word));
addList(lang, gender.feminine, part, miwafu.inflectFeminine(word));
addList(lang, gender.neuter, part, miwafu.inflectNeuter(word));
}
}
function addPartList(
lang: NewLanguage,
list: OldDefinition[] | undefined,
part: String,
word: String,
) {
if (list) {
const p: NewPart = {
word,
definitions: [],
};
lang.parts[part] ??= [];
lang.parts[part].push(p);
for (const def of list) {
add(p, def, part, word);
}
}
}
function addList(
lang: NewLanguage,
list: OldDefinition[] | undefined,
part: String,
word: String,
) {
if (list) {
let p: NewPart = {
word,
definitions: [],
};
lang.parts[part] ??= [];
lang.parts[part].push(p);
for (const def of list) {
add(p, def, part, word);
}
}
}
function add(dest: NewPart, def: OldDefinition, part: String, word: String) {
// Create the top-level reference.
console.log("**** def", part, word, def);
const newPart = { definition: def.def };
dest.definitions.push(newPart);
// Add in the various tags.
if (def.oow) {
newPart.tags ??= [];
newPart.tags.push("reality");
}
// See if we have references.
if (def.reference) {
newPart.references = def.reference
.map(o => {
const ref = o.url
?.replace("https://fedran.com/", "")
?.replace(/\/chapter-0*(\d+)\//, "/$1");
return {
ref,
excerpt: o.excerpt?.trimEnd(),
};
});
}
}
// Define the old data format.
interface OldEntry {
version?: Number;
name: String;
pos: [OldPartOfSpeech];
}
interface OldPartOfSpeech {
noun?: OldGender;
verb?: OldGender;
adv?: OldDefinition[];
adj?: OldDefinition[];
}
interface OldGender {
masculine?: OldDefinition[];
feminine?: OldDefinition[];
neuter?: OldDefinition[];
}
interface OldDefinition {
def?: string;
example?: OldExample;
oow?: Boolean;
reference?: OldReference[];
}
interface OldExample {
miw: String;
en: String;
}
interface OldReference {
identifier?: String;
title?: String;
url?: String;
excerpt?: String;
}
// Define the new data format.
interface NewFile {
version: 1;
entries: NewEntry[];
}
interface NewEntry {
entry: String;
languages: { [language: String]: NewLanguage };
}
interface NewLanguage {
parts: { [pos: String]: NewPart[] };
}
interface NewPart {
word: String;
definitions: NewDef[];
}
interface NewDef {
definition: String;
tags?: string[];
}
/*
pos:
noun:
masculine:
- def: A long or a full night's sleep.
feminine:
- def: A nap.
verb:
masculine:
- def: To sleep for a long time.
feminine:
- def: To take a nap.
base: zushi
*/