import { Platform } from "react-native"; import * as FileSystem from "expo-file-system"; import { File, Paths } from "expo-file-system/next"; import { getDb } from "./db"; import * as Crypto from "expo-crypto"; import { arrbufToStr, strToArrBuf } from "./util"; import { createReadStream } from "./readstream"; export const WHISPER_MODEL_PATH = Paths.join( FileSystem.documentDirectory || "file:///", "whisper" ); export const WHISPER_MODEL_DIR = new File(WHISPER_MODEL_PATH); // Thanks to https://medium.com/@fabi.mofar/downloading-and-saving-files-in-react-native-expo-5b3499adda84 export async function saveFile( uri: string, filename: string, mimetype: string ) { if (Platform.OS === "android") { const permissions = await FileSystem.StorageAccessFramework.requestDirectoryPermissionsAsync(); if (permissions.granted) { const base64 = await FileSystem.readAsStringAsync(uri, { encoding: FileSystem.EncodingType.Base64, }); await FileSystem.StorageAccessFramework.createFileAsync( permissions.directoryUri, filename, mimetype ) .then(async (uri) => { await FileSystem.writeAsStringAsync(uri, base64, { encoding: FileSystem.EncodingType.Base64, }); }) .catch((e) => console.log(e)); } else { shareAsync(uri); } } else { shareAsync(uri); } } function shareAsync(uri: string) { throw new Error("Function not implemented."); } export const WHISPER_MODEL_TAGS = ["small", "medium", "large"]; export type whisper_model_tag_t = "small" | "medium" | "large"; export const WHISPER_MODELS = { small: { source: "https://huggingface.co/openai/whisper-small/blob/resolve/pytorch_model.bin", target: "small.bin", label: "Small", size: 967092419, }, medium: { source: "https://huggingface.co/openai/whisper-medium/resolve/main/pytorch_model.bin", target: "medium.bin", label: "Medium", size: 3055735323, }, large: { source: "https://huggingface.co/openai/whisper-large/resolve/main/pytorch_model.bin", target: "large.bin", label: "Large", size: 6173629930, }, } as { [key: whisper_model_tag_t]: { source: string; target: string; label: string; size: number; }; }; export type whisper_tag_t = "small" | "medium" | "large"; export type hf_channel_t = "raw" | "resolve"; export const HF_URL_BASE = "https://huggingface.co/openai/whisper-"; export const HF_URL_RAW = "raw"; export const HF_URL_RESOLVE = "resolve"; export const HF_URL_END = "/main/pytorch_model.bin"; export function create_hf_url(tag: whisper_tag_t, channel: hf_channel_t) { return `${HF_URL_BASE}${tag}/${channel}${HF_URL_END}`; } export type hf_metadata_t = { version: string; oid: string; size: string; }; export type download_status_t = { doesTargetExist: boolean; isDownloadComplete: boolean; hasDownloadStarted: boolean; progress?: { current: number; total: number; remaining: number; percentRemaining: number; }; }; export class WhisperFile { hf_metadata: hf_metadata_t | undefined; target_hash: string | undefined; does_target_exist: boolean = false; does_part_target_exist: boolean = false; download_data: FileSystem.DownloadProgressData | undefined; constructor( public tag: whisper_model_tag_t, private targetFileName?: string, public label?: string, public size?: number ) { this.targetFileName = this.targetFileName || `${tag}.bin`; this.label = this.label || `${tag[0].toUpperCase()}${tag.substring(1).toLowerCase()}`; } get targetPath() { return Paths.join(WHISPER_MODEL_PATH, this.targetFileName as string); } get targetPartPath() { return this.targetPath + ".part"; } get targetFile() { return new File(this.targetPath); } get targetPartFile() { return new File(this.targetPartPath); } async getTargetInfo() { return await FileSystem.getInfoAsync(this.targetPath); } async getTargetPartInfo() { return await FileSystem.getInfoAsync(this.targetPartPath); } async updateTargetExistence() { this.does_target_exist = (await this.getTargetInfo()).exists; console.log("Determining if %s exists: %s", this.targetPath, this.does_target_exist) this.does_part_target_exist = (await this.getTargetPartInfo()).exists; console.log("Determining if %s exists: %s", this.targetPartPath, this.does_part_target_exist) } public async getTargetSha() { await this.updateTargetExistence(); if (!this.does_target_exist) { console.debug("%s does not exist", this.targetPath); return undefined; } const strData = await FileSystem.readAsStringAsync(this.targetPath, { encoding: FileSystem.EncodingType.Base64, }); const data = strToArrBuf(strData); const digest = await Crypto.digest( Crypto.CryptoDigestAlgorithm.SHA256, data ); return digest; } public async updateTargetHash() { const targetSha = await this.getTargetSha(); if (!targetSha) return; this.target_hash = arrbufToStr(targetSha); } get isHashValid() { return this.target_hash === this.hf_metadata?.oid; } delete(ignoreErrors = true) { try { this.does_target_exist && this.targetFile.delete(); this.does_part_target_exist && this.targetPartFile.delete(); } catch (err) { console.error(err); if (!ignoreErrors) { throw err; } } console.debug("Successfully deleted %s and %s", this.targetPartPath, this.targetPath); } get modelUrl() { return create_hf_url(this.tag, "resolve"); } get metadataUrl() { return create_hf_url(this.tag, "raw"); } get percentDone() { if (!this.download_data) return 0; return ( (this.download_data.totalBytesWritten / this.download_data.totalBytesExpectedToWrite) * 100 ); } get percentLeft() { if (!this.download_data) return 0; return 100 - this.percentDone; } public async syncHfMetadata() { try { const resp = await fetch(this.metadataUrl, { credentials: "include", headers: { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:135.0) Gecko/20100101 Firefox/135.0", Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.5", "Sec-GPC": "1", "Upgrade-Insecure-Requests": "1", "Sec-Fetch-Dest": "document", "Sec-Fetch-Mode": "navigate", "Sec-Fetch-Site": "cross-site", "If-None-Match": '"8fa71cbce85078986b46fb97caec22039e73351a"', Priority: "u=0, i", }, method: "GET", mode: "cors", }); const text = await resp.text(); this.hf_metadata = Object.fromEntries( text.split("\n").map((line) => line.split(" ")) ) as hf_metadata_t; } catch (err) { console.error("Failed to fetch %s: %s", this.metadataUrl, err); throw err; } } async createDownloadResumable( options: { onData?: DownloadCallback | undefined; onComplete?: CompletionCallback | undefined; } = { onData: undefined, onComplete: undefined, } ) { await this.syncHfMetadata(); // If the whisper model dir doesn't exist, create it. if (!WHISPER_MODEL_DIR.exists) { FileSystem.makeDirectoryAsync(WHISPER_MODEL_PATH, { intermediates: true, }); } // Check for the existence of the target file // If it exists, load the existing data. await this.updateTargetExistence(); try { // const existingData = this.does_target_exist // ? await FileSystem.readAsStringAsync(this.targetPath, { // encoding: FileSystem.EncodingType.Base64, // }) // : undefined; // Create the resumable. return FileSystem.createDownloadResumable( this.modelUrl, this.targetPartPath, {}, async (data: FileSystem.DownloadProgressData) => { console.log( "Downloading %s: %d of %d", this.targetPartPath, data.totalBytesExpectedToWrite, data.totalBytesWritten ); // console.debug("yes, I'm still downloading"); try { this.download_data = data; } catch (err) { console.error("Failed to set downloadData: %s", err); } try { await this.syncHfMetadata(); } catch (err) { console.error("Failed to update HuggingFace metadata: %s", err); } // try { // await this.updateTargetHash(); // } catch (er) { // console.error("Failed to update target hash: %s", er); // } try { await this.updateTargetExistence(); } catch (err) { console.error("Failed to update target existence: %s", err); } if (options.onData) await options.onData(this); if (data.totalBytesExpectedToWrite === data.totalBytesWritten) { console.debug( "Finalizing; copying from %s -> %s", this.targetPartPath, this.targetPath ); await FileSystem.moveAsync({ from: this.targetPartPath, to: this.targetPath, }); await this.updateTargetExistence(); options.onComplete && options.onComplete(this); } }, // existingData ? existingData : undefined ); } catch (err) { console.error("Could not read %s: %s", this.targetPath, err); } } } export type DownloadCallback = (arg0: WhisperFile) => any; export type CompletionCallback = (arg0: WhisperFile) => any; export const WHISPER_FILES = { small: new WhisperFile("small"), medium: new WhisperFile("medium"), large: new WhisperFile("large"), };