chore: converting dictionaries

This commit is contained in:
D. Moonfire 2024-12-07 23:30:38 -06:00
parent c662876f3d
commit 7e00cb5b4f
8 changed files with 2920 additions and 84 deletions

195
convert Executable file
View file

@ -0,0 +1,195 @@
#!/usr/bin/env -S deno --allow-read
import { parse, stringify } from "jsr:@std/yaml";
import miwafu from "npm:fedran-miwafu@0.1.6";
console.log(miwafu);
// Go through all the directories and parse each one.
let done = false;
for await (const rootEntry of Deno.readDir("src/dictionary")) {
const rootPath = "src/dictionary/" + rootEntry.name + "/";
console.log("-", rootEntry.name);
for await (const dirEntry of Deno.readDir(rootPath)) {
// Load the file and see if we've already converted it.
const filePath = rootPath + dirEntry.name;
console.log(" - path:", filePath);
const text = await Deno.readTextFile(filePath);
const data = parse(text);
data.version ??= 0;
console.log(" version:", data.version);
if (data.version !== 0) {
continue;
}
console.log(" converting");
console.log(JSON.stringify(data, null, " "));
// Convert the object.
const newLanguage: NewLanguage = {
parts: {},
};
const newEntry: NewEntry = {
entry: data.base,
languages: { qmw: newLanguage },
};
var newData: NewFile = {
version: 1,
entries: [newEntry],
};
addGender(newLanguage, data.pos.noun, "noun", data.base);
addGender(newLanguage, data.pos.verb, "verb", data.base);
addList(newLanguage, data.pos.adv, "adv", data.base);
addList(newLanguage, data.pos.adj, "adj", data.base);
// Write out the results.
Deno.writeTextFile(filePath, stringify(newData));
console.log(" ---");
console.log(stringify(newData));
// We only want to convert one at a time.
done = true;
break;
}
if (done) break;
}
function addGender(
lang: NewLanguage,
gender: OldGender | undefined,
part: String,
word: String,
) {
if (gender) {
addList(lang, gender.masculine, part, miwafu.inflectMasculine(word));
addList(lang, gender.feminine, part, miwafu.inflectFeminine(word));
addList(lang, gender.neuter, part, miwafu.inflectNeuter(word));
}
}
function addPartList(
lang: NewLanguage,
list: OldDefinition[] | undefined,
part: String,
word: String,
) {
if (list) {
lang.parts[part] ??= [];
for (const def of list) {
add(lang.parts[part], def, part, word);
}
}
}
function addList(
lang: NewLanguage,
list: OldDefinition[] | undefined,
part: String,
word: String,
) {
if (list) {
lang.parts[part] ??= [];
for (const def of list) {
add(lang.parts[part], def, part, word);
}
}
}
function add(dest: NewPart[], def: OldDefinition, part: String, word: String) {
console.log("**** def", part, word, def);
const newPart: NewPart = {
word: word,
definitions: [{ definition: def.def }],
};
dest.push(newPart);
}
// Define the old data format.
interface OldEntry {
version?: Number;
name: String;
pos: [OldPartOfSpeech];
}
interface OldPartOfSpeech {
noun?: OldGender;
verb?: OldGender;
adv?: OldDefinition[];
adj?: OldDefinition[];
}
interface OldGender {
masculine?: OldDefinition[];
feminine?: OldDefinition[];
neuter?: OldDefinition[];
}
interface OldDefinition {
def?: string;
example?: OldExample;
oow?: Boolean;
reference?: OldReference[];
}
interface OldExample {
miw: String;
en: String;
}
interface OldReference {
identifier?: String;
title?: String;
url?: String;
excerpt?: String;
}
// Define the new data format.
interface NewFile {
version: 1;
entries: NewEntry[];
}
interface NewEntry {
entry: String;
languages: { [language: String]: NewLanguage };
}
interface NewLanguage {
parts: { [pos: String]: NewPart[] };
}
interface NewPart {
word: String;
}
/*
pos:
noun:
masculine:
- def: A long or a full night's sleep.
feminine:
- def: A nap.
verb:
masculine:
- def: To sleep for a long time.
feminine:
- def: To take a nap.
base: zushi
*/

27
deno.lock Normal file
View file

@ -0,0 +1,27 @@
{
"version": "4",
"specifiers": {
"jsr:@std/yaml@*": "1.0.5"
},
"jsr": {
"@std/yaml@1.0.5": {
"integrity": "71ba3d334305ee2149391931508b2c293a8490f94a337eef3a09cade1a2a2742"
}
},
"workspace": {
"packageJson": {
"dependencies": [
"npm:@commitlint/cli@^7.2.1",
"npm:@commitlint/config-conventional@^7.1.2",
"npm:commitizen@^3.0.5",
"npm:cz-conventional-changelog@^2.1.0",
"npm:fedran-miwafu@~0.1.6",
"npm:fs-extra@^7.0.1",
"npm:husky@^1.2.1",
"npm:js-yaml@^3.13.1",
"npm:json-stable-stringify@^1.0.1",
"npm:pajv@^1.2.0"
]
}
}
}

View file

@ -25,6 +25,7 @@
packages = [ packages = [
pkgs.nodejs_20 pkgs.nodejs_20
pkgs.python3 pkgs.python3
pkgs.deno
] ]
++ config.packages; ++ config.packages;

2745
package-lock.json generated

File diff suppressed because it is too large Load diff

View file

@ -34,5 +34,7 @@
"commit-msg": "commitlint -E HUSKY_GIT_PARAMS" "commit-msg": "commitlint -E HUSKY_GIT_PARAMS"
} }
}, },
"dependencies": {} "dependencies": {
"fedran-miwafu": "^0.1.6"
}
} }

View file

@ -1,5 +1,10 @@
pos: version: 1
entries:
- entry: kadu
languages:
qmw:
parts:
noun: noun:
feminine: - word: kàdu
- def: Hand. definitions:
base: kadu - definition: Hand.

View file

@ -1,5 +1,5 @@
--- ---
version: 0 version: 1
languages: languages:
qmw: qmw:

View file

@ -1,5 +1,5 @@
--- ---
version: 0 version: 1
entries: entries:
- entry: asanogi - entry: asanogi
@ -9,11 +9,10 @@ entries:
ipa: "/a.sa.noː.ɡi/" ipa: "/a.sa.noː.ɡi/"
languages: languages:
- lang: qmw qmw:
parts: parts:
- part: noun noun:
word: asanōgi - word: asanōgi
definitions: definitions:
- definition: A drink made from infusing roasted tea leaves in hot water for a long period of time. - definition: A drink made from infusing roasted tea leaves in hot water for a long period of time.