/* Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #define C_LUCY_COMPOUNDFILEWRITER #include "Lucy/Util/ToolSet.h" #include "Lucy/Store/CompoundFileWriter.h" #include "Lucy/Store/Folder.h" #include "Lucy/Store/InStream.h" #include "Lucy/Store/OutStream.h" #include "Lucy/Util/IndexFileNames.h" #include "Lucy/Util/Json.h" int32_t CFWriter_current_file_format = 2; // Helper which does the heavy lifting for CFWriter_consolidate. static void S_do_consolidate(CompoundFileWriter *self); // Clean up files which may be left over from previous merge attempts. static void S_clean_up_old_temp_files(CompoundFileWriter *self); CompoundFileWriter* CFWriter_new(Folder *folder) { CompoundFileWriter *self = (CompoundFileWriter*)VTable_Make_Obj(COMPOUNDFILEWRITER); return CFWriter_init(self, folder); } CompoundFileWriter* CFWriter_init(CompoundFileWriter *self, Folder *folder) { self->folder = (Folder*)INCREF(folder); return self; } void CFWriter_destroy(CompoundFileWriter *self) { DECREF(self->folder); SUPER_DESTROY(self, COMPOUNDFILEWRITER); } void CFWriter_consolidate(CompoundFileWriter *self) { CharBuf *cfmeta_file = (CharBuf*)ZCB_WRAP_STR("cfmeta.json", 11); if (Folder_Exists(self->folder, cfmeta_file)) { THROW(ERR, "Merge already performed for %o", Folder_Get_Path(self->folder)); } else { S_clean_up_old_temp_files(self); S_do_consolidate(self); } } static void S_clean_up_old_temp_files(CompoundFileWriter *self) { Folder *folder = self->folder; CharBuf *cfmeta_temp = (CharBuf*)ZCB_WRAP_STR("cfmeta.json.temp", 16); CharBuf *cf_file = (CharBuf*)ZCB_WRAP_STR("cf.dat", 6); if (Folder_Exists(folder, cf_file)) { if (!Folder_Delete(folder, cf_file)) { THROW(ERR, "Can't delete '%o'", cf_file); } } if (Folder_Exists(folder, cfmeta_temp)) { if (!Folder_Delete(folder, cfmeta_temp)) { THROW(ERR, "Can't delete '%o'", cfmeta_temp); } } } static void S_do_consolidate(CompoundFileWriter *self) { Folder *folder = self->folder; Hash *metadata = Hash_new(0); Hash *sub_files = Hash_new(0); VArray *files = Folder_List(folder, NULL); VArray *merged = VA_new(VA_Get_Size(files)); CharBuf *cf_file = (CharBuf*)ZCB_WRAP_STR("cf.dat", 6); OutStream *outstream = Folder_Open_Out(folder, (CharBuf*)cf_file); uint32_t i, max; bool_t rename_success; if (!outstream) { RETHROW(INCREF(Err_get_error())); } // Start metadata. Hash_Store_Str(metadata, "files", 5, INCREF(sub_files)); Hash_Store_Str(metadata, "format", 6, (Obj*)CB_newf("%i32", CFWriter_current_file_format)); CharBuf *infilepath = CB_new(30); size_t base_len = 0; VA_Sort(files, NULL, NULL); for (i = 0, max = VA_Get_Size(files); i < max; i++) { CharBuf *infilename = (CharBuf*)VA_Fetch(files, i); if (!CB_Ends_With_Str(infilename, ".json", 5)) { InStream *instream = Folder_Open_In(folder, infilename); Hash *file_data = Hash_new(2); int64_t offset, len; if (!instream) { RETHROW(INCREF(Err_get_error())); } // Absorb the file. offset = OutStream_Tell(outstream); OutStream_Absorb(outstream, instream); len = OutStream_Tell(outstream) - offset; // Record offset and length. Hash_Store_Str(file_data, "offset", 6, (Obj*)CB_newf("%i64", offset)); Hash_Store_Str(file_data, "length", 6, (Obj*)CB_newf("%i64", len)); CB_Set_Size(infilepath, base_len); CB_Cat(infilepath, infilename); Hash_Store(sub_files, (Obj*)infilepath, (Obj*)file_data); VA_Push(merged, INCREF(infilename)); // Add filler NULL bytes so that every sub-file begins on a file // position multiple of 8. OutStream_Align(outstream, 8); InStream_Close(instream); DECREF(instream); } } DECREF(infilepath); // Write metadata to cfmeta file. CharBuf *cfmeta_temp = (CharBuf*)ZCB_WRAP_STR("cfmeta.json.temp", 16); CharBuf *cfmeta_file = (CharBuf*)ZCB_WRAP_STR("cfmeta.json", 11); Json_spew_json((Obj*)metadata, (Folder*)self->folder, cfmeta_temp); rename_success = Folder_Rename(self->folder, cfmeta_temp, cfmeta_file); if (!rename_success) { RETHROW(INCREF(Err_get_error())); } // Clean up. OutStream_Close(outstream); DECREF(outstream); DECREF(files); DECREF(metadata); /* CharBuf *merged_file; Obj *ignore; Hash_Iterate(sub_files); while (Hash_Next(sub_files, (Obj**)&merged_file, &ignore)) { if (!Folder_Delete(folder, merged_file)) { CharBuf *mess = MAKE_MESS("Can't delete '%o'", merged_file); DECREF(sub_files); Err_throw_mess(ERR, mess); } } */ DECREF(sub_files); for (uint32_t i = 0, max = VA_Get_Size(merged); i < max; i++) { CharBuf *merged_file = (CharBuf*)VA_Fetch(merged, i); if (!Folder_Delete(folder, merged_file)) { CharBuf *mess = MAKE_MESS("Can't delete '%o'", merged_file); DECREF(merged); Err_throw_mess(ERR, mess); } } DECREF(merged); }