chore: converting dictionaries
This commit is contained in:
parent
c662876f3d
commit
7e00cb5b4f
8 changed files with 2920 additions and 84 deletions
195
convert
Executable file
195
convert
Executable file
|
@ -0,0 +1,195 @@
|
|||
#!/usr/bin/env -S deno --allow-read
|
||||
|
||||
import { parse, stringify } from "jsr:@std/yaml";
|
||||
import miwafu from "npm:fedran-miwafu@0.1.6";
|
||||
|
||||
console.log(miwafu);
|
||||
|
||||
// Go through all the directories and parse each one.
|
||||
let done = false;
|
||||
|
||||
for await (const rootEntry of Deno.readDir("src/dictionary")) {
|
||||
const rootPath = "src/dictionary/" + rootEntry.name + "/";
|
||||
|
||||
console.log("-", rootEntry.name);
|
||||
|
||||
for await (const dirEntry of Deno.readDir(rootPath)) {
|
||||
// Load the file and see if we've already converted it.
|
||||
const filePath = rootPath + dirEntry.name;
|
||||
|
||||
console.log(" - path:", filePath);
|
||||
|
||||
const text = await Deno.readTextFile(filePath);
|
||||
const data = parse(text);
|
||||
|
||||
data.version ??= 0;
|
||||
|
||||
console.log(" version:", data.version);
|
||||
|
||||
if (data.version !== 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
console.log(" converting");
|
||||
console.log(JSON.stringify(data, null, " "));
|
||||
|
||||
// Convert the object.
|
||||
const newLanguage: NewLanguage = {
|
||||
parts: {},
|
||||
};
|
||||
|
||||
const newEntry: NewEntry = {
|
||||
entry: data.base,
|
||||
languages: { qmw: newLanguage },
|
||||
};
|
||||
|
||||
var newData: NewFile = {
|
||||
version: 1,
|
||||
entries: [newEntry],
|
||||
};
|
||||
|
||||
addGender(newLanguage, data.pos.noun, "noun", data.base);
|
||||
addGender(newLanguage, data.pos.verb, "verb", data.base);
|
||||
addList(newLanguage, data.pos.adv, "adv", data.base);
|
||||
addList(newLanguage, data.pos.adj, "adj", data.base);
|
||||
|
||||
// Write out the results.
|
||||
Deno.writeTextFile(filePath, stringify(newData));
|
||||
|
||||
console.log(" ---");
|
||||
console.log(stringify(newData));
|
||||
|
||||
// We only want to convert one at a time.
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (done) break;
|
||||
}
|
||||
|
||||
function addGender(
|
||||
lang: NewLanguage,
|
||||
gender: OldGender | undefined,
|
||||
part: String,
|
||||
word: String,
|
||||
) {
|
||||
if (gender) {
|
||||
addList(lang, gender.masculine, part, miwafu.inflectMasculine(word));
|
||||
addList(lang, gender.feminine, part, miwafu.inflectFeminine(word));
|
||||
addList(lang, gender.neuter, part, miwafu.inflectNeuter(word));
|
||||
}
|
||||
}
|
||||
|
||||
function addPartList(
|
||||
lang: NewLanguage,
|
||||
list: OldDefinition[] | undefined,
|
||||
part: String,
|
||||
word: String,
|
||||
) {
|
||||
if (list) {
|
||||
lang.parts[part] ??= [];
|
||||
|
||||
for (const def of list) {
|
||||
add(lang.parts[part], def, part, word);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function addList(
|
||||
lang: NewLanguage,
|
||||
list: OldDefinition[] | undefined,
|
||||
part: String,
|
||||
word: String,
|
||||
) {
|
||||
if (list) {
|
||||
lang.parts[part] ??= [];
|
||||
|
||||
for (const def of list) {
|
||||
add(lang.parts[part], def, part, word);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function add(dest: NewPart[], def: OldDefinition, part: String, word: String) {
|
||||
console.log("**** def", part, word, def);
|
||||
|
||||
const newPart: NewPart = {
|
||||
word: word,
|
||||
definitions: [{ definition: def.def }],
|
||||
};
|
||||
|
||||
dest.push(newPart);
|
||||
}
|
||||
|
||||
// Define the old data format.
|
||||
interface OldEntry {
|
||||
version?: Number;
|
||||
name: String;
|
||||
pos: [OldPartOfSpeech];
|
||||
}
|
||||
|
||||
interface OldPartOfSpeech {
|
||||
noun?: OldGender;
|
||||
verb?: OldGender;
|
||||
adv?: OldDefinition[];
|
||||
adj?: OldDefinition[];
|
||||
}
|
||||
|
||||
interface OldGender {
|
||||
masculine?: OldDefinition[];
|
||||
feminine?: OldDefinition[];
|
||||
neuter?: OldDefinition[];
|
||||
}
|
||||
|
||||
interface OldDefinition {
|
||||
def?: string;
|
||||
example?: OldExample;
|
||||
oow?: Boolean;
|
||||
reference?: OldReference[];
|
||||
}
|
||||
|
||||
interface OldExample {
|
||||
miw: String;
|
||||
en: String;
|
||||
}
|
||||
|
||||
interface OldReference {
|
||||
identifier?: String;
|
||||
title?: String;
|
||||
url?: String;
|
||||
excerpt?: String;
|
||||
}
|
||||
|
||||
// Define the new data format.
|
||||
interface NewFile {
|
||||
version: 1;
|
||||
entries: NewEntry[];
|
||||
}
|
||||
|
||||
interface NewEntry {
|
||||
entry: String;
|
||||
languages: { [language: String]: NewLanguage };
|
||||
}
|
||||
|
||||
interface NewLanguage {
|
||||
parts: { [pos: String]: NewPart[] };
|
||||
}
|
||||
|
||||
interface NewPart {
|
||||
word: String;
|
||||
}
|
||||
|
||||
/*
|
||||
pos:
|
||||
noun:
|
||||
masculine:
|
||||
- def: A long or a full night's sleep.
|
||||
feminine:
|
||||
- def: A nap.
|
||||
verb:
|
||||
masculine:
|
||||
- def: To sleep for a long time.
|
||||
feminine:
|
||||
- def: To take a nap.
|
||||
base: zushi
|
||||
*/
|
27
deno.lock
Normal file
27
deno.lock
Normal file
|
@ -0,0 +1,27 @@
|
|||
{
|
||||
"version": "4",
|
||||
"specifiers": {
|
||||
"jsr:@std/yaml@*": "1.0.5"
|
||||
},
|
||||
"jsr": {
|
||||
"@std/yaml@1.0.5": {
|
||||
"integrity": "71ba3d334305ee2149391931508b2c293a8490f94a337eef3a09cade1a2a2742"
|
||||
}
|
||||
},
|
||||
"workspace": {
|
||||
"packageJson": {
|
||||
"dependencies": [
|
||||
"npm:@commitlint/cli@^7.2.1",
|
||||
"npm:@commitlint/config-conventional@^7.1.2",
|
||||
"npm:commitizen@^3.0.5",
|
||||
"npm:cz-conventional-changelog@^2.1.0",
|
||||
"npm:fedran-miwafu@~0.1.6",
|
||||
"npm:fs-extra@^7.0.1",
|
||||
"npm:husky@^1.2.1",
|
||||
"npm:js-yaml@^3.13.1",
|
||||
"npm:json-stable-stringify@^1.0.1",
|
||||
"npm:pajv@^1.2.0"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
|
@ -25,6 +25,7 @@
|
|||
packages = [
|
||||
pkgs.nodejs_20
|
||||
pkgs.python3
|
||||
pkgs.deno
|
||||
]
|
||||
++ config.packages;
|
||||
|
||||
|
|
2745
package-lock.json
generated
2745
package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
@ -34,5 +34,7 @@
|
|||
"commit-msg": "commitlint -E HUSKY_GIT_PARAMS"
|
||||
}
|
||||
},
|
||||
"dependencies": {}
|
||||
"dependencies": {
|
||||
"fedran-miwafu": "^0.1.6"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,10 @@
|
|||
pos:
|
||||
version: 1
|
||||
entries:
|
||||
- entry: kadu
|
||||
languages:
|
||||
qmw:
|
||||
parts:
|
||||
noun:
|
||||
feminine:
|
||||
- def: Hand.
|
||||
base: kadu
|
||||
- word: kàdu
|
||||
definitions:
|
||||
- definition: Hand.
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
---
|
||||
version: 0
|
||||
version: 1
|
||||
|
||||
languages:
|
||||
qmw:
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
---
|
||||
version: 0
|
||||
version: 1
|
||||
|
||||
entries:
|
||||
- entry: asanogi
|
||||
|
@ -9,11 +9,10 @@ entries:
|
|||
ipa: "/a.sa.noː.ɡi/"
|
||||
|
||||
languages:
|
||||
- lang: qmw
|
||||
|
||||
qmw:
|
||||
parts:
|
||||
- part: noun
|
||||
word: asanōgi
|
||||
noun:
|
||||
- word: asanōgi
|
||||
|
||||
definitions:
|
||||
- definition: A drink made from infusing roasted tea leaves in hot water for a long period of time.
|
||||
|
|
Loading…
Reference in a new issue