work on downloading some more.

This commit is contained in:
Jordan
2025-03-11 07:26:49 -07:00
parent 8f67d0421b
commit dca3987e18
14 changed files with 480 additions and 414 deletions

View File

@ -1,123 +1,155 @@
import { Cache } from "react-native-cache";
import { LIBRETRANSLATE_BASE_URL } from "@/constants/api";
import AsyncStorage from '@react-native-async-storage/async-storage';
import AsyncStorage from "@react-native-async-storage/async-storage";
import { Settings } from "../lib/settings";
type language_t = string;
const cache = new Cache({
namespace: "translation_terrace",
policy: {
maxEntries: 50000, // if unspecified, it can have unlimited entries
stdTTL: 0 // the standard ttl as number in seconds, default: 0 (unlimited)
},
backend: AsyncStorage
namespace: "translation_terrace",
policy: {
maxEntries: 50000, // if unspecified, it can have unlimited entries
stdTTL: 0, // the standard ttl as number in seconds, default: 0 (unlimited)
},
backend: AsyncStorage,
});
export type language_matrix_entry = {
code: string,
name: string,
targets: string []
}
code: string;
name: string;
targets: string[];
};
export type language_matrix = {
[key:string] : language_matrix_entry
}
[key: string]: language_matrix_entry;
};
export async function fetchWithTimeout(url : string, options : RequestInit, timeout = 5000) : Promise<Response> {
return Promise.race([
fetch(url, options),
new Promise((_, reject) => setTimeout(() => reject(new Error('timeout')), timeout))
]);
export async function fetchWithTimeout(
url: string,
options: RequestInit,
timeout = 5000
): Promise<Response> {
return Promise.race([
fetch(url, options),
new Promise((_, reject) =>
setTimeout(() => reject(new Error("timeout")), timeout)
),
]);
}
export class LanguageServer {
constructor(public baseUrl : string) {}
constructor(public baseUrl: string) {}
async fetchLanguages(timeout = 500) : Promise<language_matrix> {
let data = {};
const res = await fetchWithTimeout(this.baseUrl + "/languages", {
headers: {
"Content-Type": "application/json"
}
}, timeout);
try {
data = await res.json();
} catch (e) {
throw new Error(`Parsing data from ${await res.text()}: ${e}`)
}
try {
return Object.fromEntries(
Object.values(data as language_matrix_entry []).map((obj : language_matrix_entry) => {
return [
obj["code"],
obj,
]
})
)
} catch(e) {
throw new Error(`Can't extract values from data: ${JSON.stringify(data)}`)
}
async fetchLanguages(timeout = 500): Promise<language_matrix> {
let data = {};
const res = await fetchWithTimeout(
this.baseUrl + "/languages",
{
headers: {
"Content-Type": "application/json",
},
},
timeout
);
try {
data = await res.json();
} catch (e) {
throw new Error(`Parsing data from ${await res.text()}: ${e}`);
}
try {
return Object.fromEntries(
Object.values(data as language_matrix_entry[]).map(
(obj: language_matrix_entry) => {
return [obj["code"], obj];
}
)
);
} catch (e) {
throw new Error(
`Can't extract values from data: ${JSON.stringify(data)}`
);
}
}
static async getDefault() {
const settings = await Settings.getDefault();
return new LanguageServer(await settings.getLibretranslateBaseUrl() || LIBRETRANSLATE_BASE_URL);
}
static async getDefault() {
const settings = await Settings.getDefault();
return new LanguageServer(
(await settings.getLibretranslateBaseUrl()) || LIBRETRANSLATE_BASE_URL
);
}
}
export class Translator {
constructor(public source : language_t, public defaultTarget : string = "en", private _languageServer : LanguageServer) {
}
constructor(
public source: language_t,
public defaultTarget: string = "en",
private _languageServer: LanguageServer
) {}
get languageServer() {
return this._languageServer;
}
get languageServer() {
return this._languageServer;
}
async translate(text : string, target : string|undefined = undefined) {
const url = this._languageServer.baseUrl + `/translate`;
const res = await fetch(url, {
method: "POST",
body: JSON.stringify({
q: text,
source: this.source,
target: target || this.defaultTarget,
format: "text",
alternatives: 3,
api_key: ""
}),
headers: { "Content-Type": "application/json" }
});
async translate(text: string, target: string | undefined = undefined) {
const url = this._languageServer.baseUrl + `/translate`;
console.log(url);
const postData = {
method: "POST",
body: JSON.stringify({
q: text,
source: this.source,
target: target || this.defaultTarget,
format: "text",
alternatives: 3,
api_key: "",
}),
headers: { "Content-Type": "application/json" },
};
const data = await res.json();
console.log(data)
return data.translatedText
}
console.debug("Requesting %s with %o", url, postData);
static async getDefault(defaultTarget: string | undefined = undefined) {
const settings = await Settings.getDefault();
const source = await settings.getHostLanguage();
return new Translator(source, defaultTarget, await LanguageServer.getDefault())
const res = await fetch(url, postData);
const data = await res.json();
if (res.status === 200) {
console.log(data);
return data.translatedText;
} else {
console.error(data);
}
}
static async getDefault(defaultTarget: string | undefined = undefined) {
const settings = await Settings.getDefault();
const source = await settings.getHostLanguage();
return new Translator(
source,
defaultTarget,
await LanguageServer.getDefault()
);
}
}
export class CachedTranslator extends Translator {
async translate (text : string, target : string|undefined = undefined) {
const targetKey = target || this.defaultTarget;
// console.debug(`Translating from ${this.source} -> ${targetKey}`)
const key1 = `${this.source}::${targetKey}::${text}`
const tr1 = await cache.get(key1);
if (tr1) return tr1;
const tr2 = await super.translate(text, target);
const key2 = `${this.source}::${targetKey}::${text}`
await cache.set(key2, tr2);
return tr2;
}
async translate(text: string, target: string | undefined = undefined) {
const targetKey = target || this.defaultTarget;
// console.debug(`Translating from ${this.source} -> ${targetKey}`)
const key1 = `${this.source}::${targetKey}::${text}`;
const tr1 = await cache.get(key1);
if (tr1) return tr1;
const tr2 = await super.translate(text, target);
const key2 = `${this.source}::${targetKey}::${text}`;
await cache.set(key2, tr2);
return tr2;
}
static async getDefault(defaultTarget: string | undefined = undefined) {
const settings = await Settings.getDefault();
const source = await settings.getHostLanguage();
return new CachedTranslator(source, defaultTarget, await LanguageServer.getDefault())
}
}
static async getDefault(defaultTarget: string | undefined = undefined) {
const settings = await Settings.getDefault();
const source = await settings.getHostLanguage() || "en";
return new CachedTranslator(
source,
defaultTarget,
await LanguageServer.getDefault()
);
}
}

View File

@ -8,11 +8,12 @@ describe('Settings', () => {
beforeEach(async () => {
db = await getDb("development");
await migrateDb("development");
settings = new Settings(db);
});
afterEach(async () => {
await migrateDb("development");
await migrateDb("development", "down");
});
it('should set the host language in the database', async () => {

View File

@ -1,101 +1,170 @@
// components/ui/__tests__/WhisperFile.spec.tsx
// app/lib/__tests__/whisper.spec.tsx
import React from "react";
import { render, act } from "@testing-library/react-native";
import { WhisperFile } from "@/app/lib/whisper"; // Adjust the import path as necessary
import { getDb } from "@/app/lib/db";
import { WhisperFile, WhisperModelTag } from "@/app/lib/whisper"; // Corrected to use WhisperFile and WhisperModelTag instead of WhisperDownloader
import { Settings } from "@/app/lib/settings";
import { File } from "expo-file-system/next";
jest.mock('expo-file-system');
import * as FileSystem from 'expo-file-system';
jest.mock("@/app/lib/db", () => ({
getDb: jest.fn().mockResolvedValue({
runAsync: jest.fn(),
upsert: jest.fn(), // Mock the upsert method used in addToDatabase
}),
}));
jest.mock("@/app/lib/settings", () => ({
Settings: {
getDefault: jest.fn(() => ({
getValue: jest.fn((key) => {
switch (key) {
case "whisper_model":
return "base";
default:
throw new Error(`Invalid setting: '${key}'`);
}
}),
})),
},
}));
jest.mock("expo-file-system/next", () => {
const _next = jest.requireActual("expo-file-system/next");
return {
..._next,
File: jest.fn().mockImplementation(() => ({
..._next.File,
text: jest.fn(() => {
return new String("text");
}),
})),
};
});
describe("WhisperFile", () => {
// Corrected to use WhisperFile instead of WhisperDownloader
let whisperFile: WhisperFile;
beforeEach(() => {
beforeEach(async () => {
whisperFile = new WhisperFile("small");
});
it("should initialize correctly", () => {
expect(whisperFile).toBeInstanceOf(WhisperFile);
it("should create a download resumable with existing data if available", async () => {
const mockExistingData = "mockExistingData";
jest.spyOn(whisperFile, "doesTargetExist").mockResolvedValue(true);
await whisperFile.createDownloadResumable();
// expect(whisperFile.targetFileName).toEqual("small.bin");
expect(whisperFile.targetPath).toContain("small.bin");
expect(FileSystem.createDownloadResumable).toHaveBeenCalledWith(
"https://huggingface.co/openai/whisper-small/resolve/main/pytorch_model.bin",
"file:///whisper/small.bin",
{},
expect.any(Function),
expect.anything(),
);
});
describe("getModelFileSize", () => {
it("should return the correct model file size", async () => {
expect(whisperFile.size).toBeUndefined();
await whisperFile.updateMetadata();
expect(whisperFile.size).toBeGreaterThan(1000);
});
});
// it("should create a download resumable without existing data if not available", async () => {
// jest.spyOn(whisperFile, "doesTargetExist").mockResolvedValue(false);
describe("getWhisperDownloadStatus", () => {
it("should return the correct download status", async () => {
const mockStatus = {
doesTargetExist: true,
isDownloadComplete: false,
hasDownloadStarted: true,
progress: {
current: 100,
total: 200,
remaining: 100,
percentRemaining: 50.0,
},
};
jest
.spyOn(whisperFile, "getDownloadStatus")
.mockResolvedValue(mockStatus);
// await whisperFile.createDownloadResumable(); // Updated to use createDownloadResumable instead of download
const result = await whisperFile.getDownloadStatus();
// expect(FileSystem.createDownloadResumable).toHaveBeenCalledWith(
// "http://mock.model.com/model",
// "mockTargetPath",
// {},
// expect.any(Function),
// undefined
// );
// });
expect(result).toEqual(mockStatus);
});
});
// it("should update the download status in the database", async () => {
// const mockRunAsync = jest.fn();
// (getDb as jest.Mock).mockResolvedValue({ runAsync: mockRunAsync });
describe("initiateWhisperDownload", () => {
it("should initiate the download with default options", async () => {
const mockModelLabel = "small";
jest
.spyOn(whisperFile, "createDownloadResumable")
.mockResolvedValue(true);
// const downloadable = await whisperFile.createDownloadResumable(); // Updated to use createDownloadResumable instead of download
// await downloadable.resumeAsync();
await whisperFile.initiateWhisperDownload(mockModelLabel);
// jest.advanceTimersByTime(1000);
expect(whisperFile.createDownloadResumable).toHaveBeenCalledWith(
mockModelLabel
);
});
// expect(mockRunAsync).toHaveBeenCalled();
// });
it("should initiate the download with custom options", async () => {
const mockModelLabel = "small";
const mockOptions = { force_redownload: true };
jest
.spyOn(whisperFile, "createDownloadResumable")
.mockResolvedValue(true);
// it("should record the latest target hash after downloading", async () => {
// const mockRecordLatestTargetHash = jest.spyOn(
// whisperFile,
// "recordLatestTargetHash"
// );
await whisperFile.initiateWhisperDownload(mockModelLabel, mockOptions);
// await whisperFile.createDownloadResumable(); // Updated to use createDownloadResumable instead of download
expect(whisperFile.createDownloadResumable).toHaveBeenCalledWith(
mockModelLabel,
mockOptions
);
});
// expect(mockRecordLatestTargetHash).toHaveBeenCalled();
// });
it("should return the correct download status when target exists and is complete", async () => {
jest.spyOn(whisperFile, "doesTargetExist").mockResolvedValue(true);
jest.spyOn(whisperFile, "isDownloadComplete").mockResolvedValue(true);
// it("should call the onData callback if provided", async () => {
// const mockOnData = jest.fn();
// const options = { onData: mockOnData };
expect(await whisperFile.doesTargetExist()).toEqual(true);
expect(await whisperFile.isDownloadComplete()).toEqual(true);
});
// await whisperFile.createDownloadResumable(options); // Updated to use createDownloadResumable instead of download
it("should return the correct download status when target does not exist", async () => {
jest.spyOn(whisperFile, "doesTargetExist").mockResolvedValue(false);
// expect(mockOnData).toHaveBeenCalledWith(expect.any(Object));
// });
const result = await whisperFile.getDownloadStatus();
// describe("getDownloadStatus", () => {
// it("should return the correct download status when model size is known and download has started", async () => {
// whisperFile.size = 1024;
// jest.spyOn(whisperFile, "doesTargetExist").mockResolvedValue(true);
// jest.spyOn(whisperFile, "isDownloadComplete").mockResolvedValue(false);
// jest.spyOn(whisperFile, "targetFile").mockReturnValue({
// size: 512,
// });
expect(result).toEqual({
doesTargetExist: false,
isDownloadComplete: false,
hasDownloadStarted: false,
progress: undefined,
});
});
});
// const status = await whisperFile.getDownloadStatus();
// Add more tests as needed for other methods in WhisperFile
// expect(status).toEqual({
// doesTargetExist: true,
// isDownloadComplete: false,
// hasDownloadStarted: true,
// progress: {
// current: 512,
// total: 1024,
// remaining: 512,
// percentRemaining: 50.0,
// },
// });
// });
// it("should return the correct download status when model size is known and download is complete", async () => {
// whisperFile.size = 1024;
// jest.spyOn(whisperFile, "doesTargetExist").mockResolvedValue(true);
// jest.spyOn(whisperFile, "isDownloadComplete").mockResolvedValue(true);
// const status = await whisperFile.getDownloadStatus();
// expect(status).toEqual({
// doesTargetExist: true,
// isDownloadComplete: true,
// hasDownloadStarted: false,
// progress: undefined,
// });
// });
// it("should return the correct download status when model size is unknown", async () => {
// jest.spyOn(whisperFile, "doesTargetExist").mockResolvedValue(false);
// const status = await whisperFile.getDownloadStatus();
// expect(status).toEqual({
// doesTargetExist: false,
// isDownloadComplete: false,
// hasDownloadStarted: false,
// progress: undefined,
// });
// });
// });
});

View File

@ -2,15 +2,16 @@
export const MIGRATE_UP = {
1: [
`CREATE TABLE IF NOT EXISTS settings (
host_language TEXT,
libretranslate_base_url TEXT,
ui_direction INTEGER,
whisper_model TEXT
)`,
key TEXT PRIMARY KEY,
value TEXT
)`,
],
2: [
`CREATE TABLE IF NOT EXISTS whisper_models (
model TEXT PRIMARY KEY,
download_status STRING(255),
expected_size INTEGER,
last_hash STRING(1024),
bytes_done INTEGER,
bytes_total INTEGER
)`,

View File

@ -1,5 +1,6 @@
import { SQLiteDatabase } from "expo-sqlite";
import { getDb } from "./db";
import { WhisperFile, whisper_model_tag_t } from "./whisper";
export class Settings {
@ -20,10 +21,9 @@ export class Settings {
throw new Error(`Invalid setting: '${key}'`)
}
const row: { [key: string]: string } | null = this.db.getFirstSync(`SELECT ${key} from settings LIMIT 1`)
const row: { value: string } | null = this.db.getFirstSync(`SELECT value FROM settings WHERE key = ?`, key)
if (!(row && row[key])) return undefined;
return row[key];
return row?.value;
}
@ -33,13 +33,11 @@ export class Settings {
}
// Check if the key already exists
this.db.runSync(`INSERT INTO OR UPDATE
this.db.runSync(`INSERT OR REPLACE INTO
settings
(${key})
(key, value)
VALUES
(?)
WHERE
${key} IS NOT NULL`, value);
(?, ?)`, key, value);
}
async setHostLanguage(value: string) {
@ -63,11 +61,10 @@ export class Settings {
}
async getWhisperModel() {
return await this.getValue("whisper_model");
return await this.getValue("whisper_model") as whisper_model_tag_t;
}
static async getDefault() {
return new Settings(await getDb())
}
}

5
app/lib/util.ts Normal file
View File

@ -0,0 +1,5 @@
import { TextDecoder } from "util";
export async function arrbufToStr(arrayBuffer : ArrayBuffer) {
return new TextDecoder().decode(new Uint8Array(arrayBuffer));
}

View File

@ -3,6 +3,7 @@ import * as FileSystem from "expo-file-system";
import { File, Paths } from "expo-file-system/next";
import { getDb } from "./db";
import * as Crypto from "expo-crypto";
import { arrbufToStr } from "./util";
export const WHISPER_MODEL_PATH = Paths.join(
FileSystem.documentDirectory || "file:///",
@ -114,6 +115,12 @@ export type download_status_t = {
};
export class WhisperFile {
hf_metadata: hf_metadata_t | undefined;
target_hash: string | undefined;
does_target_exist: boolean = false;
download_data: FileSystem.DownloadProgressData | undefined;
constructor(
public tag: whisper_model_tag_t,
private targetFileName?: string,
@ -122,11 +129,11 @@ export class WhisperFile {
) {
this.targetFileName = this.targetFileName || `${tag}.bin`;
this.label =
this.label || `${tag[0].toUpperCase}${tag.substring(1).toLowerCase()}`;
this.label || `${tag[0].toUpperCase()}${tag.substring(1).toLowerCase()}`;
}
get targetPath() {
return Paths.join(WHISPER_MODEL_DIR, this.targetFileName as string);
return Paths.join(WHISPER_MODEL_PATH, this.targetFileName as string);
}
get targetFile() {
@ -137,79 +144,30 @@ export class WhisperFile {
return await FileSystem.getInfoAsync(this.targetPath);
}
async doesTargetExist() {
return (await this.getTargetInfo()).exists;
async updateTargetExistence() {
this.does_target_exist = (await this.getTargetInfo()).exists;
}
public async recordLatestTargetHash() {
if (!(await this.doesTargetExist())) {
console.debug("%s does not exist", this.targetPath);
}
if (!this.label) {
throw new Error("No label");
}
const digest1Str = await this.getActualTargetHash();
if (!digest1Str) {
return;
}
const db = await getDb();
db.runSync(`INSERT OR UPDATE
INTO whisper_models
(model, last_hash)
VALUES (?, ?)
WHERE
model = ?`, this.label, digest1Str, this.label);
}
public async getRecordedTargetHash(): Promise<string> {
const db = await getDb();
const row = db.getFirstSync("SELECT last_hash FROM whisper_models WHERE model = ?", this.tag);
return (row as {last_hash: string}).last_hash
}
public async getActualTargetHash(): Promise<string | undefined> {
if (!(await this.doesTargetExist())) {
public async getTargetSha() {
await this.updateTargetExistence();
if (!this.does_target_exist) {
console.debug("%s does not exist", this.targetPath);
return undefined;
}
const digest1 = await Crypto.digest(
return await Crypto.digest(
Crypto.CryptoDigestAlgorithm.SHA256,
this.targetFile.bytes()
);
const digest1Str = new TextDecoder().decode(new Uint8Array(digest1));
return digest1Str;
}
async isTargetCorrupted() {
const recordedTargetHash = await this.getRecordedTargetHash();
const actualTargetHash = await this.getActualTargetHash();
if (!(actualTargetHash || recordedTargetHash)) return false;
return actualTargetHash !== recordedTargetHash;
public async updateTargetHash() {
const targetSha = await this.getTargetSha();
if (!targetSha) return;
this.target_hash = await arrbufToStr(targetSha);
}
async isDownloadComplete() {
if (!(await this.doesTargetExist())) {
console.debug("%s does not exist", this.targetPath);
return false;
}
const data = this.targetFile.bytes();
const meta = await this.fetchMetadata();
const expectedHash = meta.oid;
const digest1: ArrayBuffer = await Crypto.digest(
Crypto.CryptoDigestAlgorithm.SHA256,
data
);
const digest1Str = new TextDecoder().decode(new Uint8Array(digest1));
const doesMatch = digest1Str === expectedHash;
if (!doesMatch) {
console.debug(
"sha256 of '%s' does not match expected '%s'",
digest1Str,
expectedHash
);
return false;
}
return true;
get isHashValid() {
return this.target_hash === this.hf_metadata?.oid;
}
delete(ignoreErrors = true) {
@ -232,7 +190,21 @@ export class WhisperFile {
return create_hf_url(this.tag, "raw");
}
private async fetchMetadata(): Promise<hf_metadata_t> {
get percentDone() {
if (!this.download_data) return 0;
return (
(this.download_data.totalBytesWritten /
this.download_data.totalBytesExpectedToWrite) *
100
);
}
get percentLeft() {
if (!this.download_data) return 0;
return 100 - this.percentDone;
}
public async syncHfMetadata() {
try {
const resp = await fetch(this.metadataUrl, {
credentials: "include",
@ -254,7 +226,7 @@ export class WhisperFile {
mode: "cors",
});
const text = await resp.text();
return Object.fromEntries(
this.hf_metadata = Object.fromEntries(
text.split("\n").map((line) => line.split(" "))
) as hf_metadata_t;
} catch (err) {
@ -263,95 +235,50 @@ export class WhisperFile {
}
}
async updateMetadata() {
const metadata = await this.fetchMetadata();
this.size = Number.parseInt(metadata.size);
}
async addToDatabase() {
const db = await getDb();
if (!(this.size && this.tag)) {
throw new Error();
}
db.runSync(`INSERT OR UPDATE
INTO whisper_models
(model, expected_size)
VALUES
(?, ?)
WHERE
model = ?`, this.tag, this.size.valueOf(), this.tag);
}
async createDownloadResumable(
options: {
onData?: DownloadCallback | undefined;
} = {
onData: undefined,
}
onData: undefined,
}
) {
const existingData = (await this.doesTargetExist())
await this.syncHfMetadata();
// If the whisper model dir doesn't exist, create it.
if (!WHISPER_MODEL_DIR.exists) {
FileSystem.makeDirectoryAsync(WHISPER_MODEL_PATH, {
intermediates: true,
});
}
// Check for the existence of the target file
// If it exists, load the existing data.
await this.updateTargetExistence();
const existingData = this.does_target_exist
? this.targetFile.text()
: undefined;
if (await this.doesTargetExist()) {
}
// Create the resumable.
return FileSystem.createDownloadResumable(
this.modelUrl,
this.targetPath,
{},
async (data: FileSystem.DownloadProgressData) => {
const db = await getDb();
db.runAsync(`INSERT INTO OR UPDATE
whisper_models
(model, download_status)
VALUES
(?, ?)
WHERE
model = ?
`, this.tag, "active", this.tag);
await this.recordLatestTargetHash();
if (options.onData) await options.onData(data);
this.download_data = data;
await this.syncHfMetadata();
await this.updateTargetHash();
await this.updateTargetExistence();
if (options.onData) await options.onData(this);
},
existingData ? existingData : undefined
);
}
async getDownloadStatus(): Promise<download_status_t> {
const doesTargetExist = await this.doesTargetExist();
const isDownloadComplete = await this.isDownloadComplete();
const hasDownloadStarted = doesTargetExist && !isDownloadComplete;
if (!this.size) {
return {
doesTargetExist: false,
isDownloadComplete: false,
hasDownloadStarted: false,
progress: undefined,
}
}
const remaining = hasDownloadStarted
? this.size - (this.targetFile.size as number)
: 0;
const progress = hasDownloadStarted
? {
current: this.targetFile.size || 0,
total: this.size,
remaining: this.size - (this.targetFile.size as number),
percentRemaining: (remaining / this.size) * 100.0,
}
: undefined;
return {
doesTargetExist,
isDownloadComplete,
hasDownloadStarted,
progress,
};
}
}
export type DownloadCallback = (arg0: WhisperFile) => any;
export type DownloadCallback = (arg0: FileSystem.DownloadProgressData) => any;
export const WHISPER_FILES = {
small: new WhisperFile("small"),
medium: new WhisperFile("medium"),
large: new WhisperFile("large"),
};