diff --git a/include/swift/Localization/LocalizationFormat.h b/include/swift/Localization/LocalizationFormat.h index 63b6e7d85f2ef..50ec3927a8e21 100644 --- a/include/swift/Localization/LocalizationFormat.h +++ b/include/swift/Localization/LocalizationFormat.h @@ -64,6 +64,20 @@ class DefToYAMLConverter { void convert(llvm::raw_ostream &out); }; +class DefToStringsConverter { + llvm::ArrayRef IDs; + llvm::ArrayRef Messages; + +public: + DefToStringsConverter(llvm::ArrayRef ids, + llvm::ArrayRef messages) + : IDs(ids), Messages(messages) { + assert(IDs.size() == Messages.size()); + } + + void convert(llvm::raw_ostream &out); +}; + class LocalizationWriterInfo { public: using key_type = uint32_t; @@ -225,6 +239,31 @@ class YAMLLocalizationProducer final : public LocalizationProducer { llvm::StringRef getMessage(swift::DiagID id) const override; }; +class StringsLocalizationProducer final : public LocalizationProducer { + std::string filePath; + + std::vector diagnostics; + +public: + explicit StringsLocalizationProducer(llvm::StringRef filePath, + bool printDiagnosticNames = false) + : LocalizationProducer(printDiagnosticNames), filePath(filePath) {} + + /// Iterate over all of the available (non-empty) translations + /// maintained by this producer, callback gets each translation + /// with its unique identifier. + void forEachAvailable( + llvm::function_ref callback); + +protected: + bool initializeImpl() override; + llvm::StringRef getMessage(swift::DiagID id) const override; + +private: + static void readStringsFile(llvm::MemoryBuffer *in, + std::vector &diagnostics); +}; + class SerializedLocalizationProducer final : public LocalizationProducer { using SerializedLocalizationTable = llvm::OnDiskIterableChainedHashTable; diff --git a/lib/Localization/LocalizationFormat.cpp b/lib/Localization/LocalizationFormat.cpp index 53c47c880d4d4..40368b6b5fac6 100644 --- a/lib/Localization/LocalizationFormat.cpp +++ b/lib/Localization/LocalizationFormat.cpp @@ -184,7 +184,6 @@ void YAMLLocalizationProducer::forEachAvailable( std::unique_ptr LocalizationProducer::producerFor(llvm::StringRef locale, llvm::StringRef path, bool printDiagnosticNames) { - std::unique_ptr producer; llvm::SmallString<128> filePath(path); llvm::sys::path::append(filePath, locale); llvm::sys::path::replace_extension(filePath, ".db"); @@ -193,7 +192,7 @@ LocalizationProducer::producerFor(llvm::StringRef locale, llvm::StringRef path, // fallback to the `YAML` file. if (llvm::sys::fs::exists(filePath)) { if (auto file = llvm::MemoryBuffer::getFile(filePath)) { - producer = std::make_unique( + return std::make_unique( std::move(file.get()), printDiagnosticNames); } } else { @@ -201,12 +200,18 @@ LocalizationProducer::producerFor(llvm::StringRef locale, llvm::StringRef path, // In case of missing localization files, we should fallback to messages // from `.def` files. if (llvm::sys::fs::exists(filePath)) { - producer = std::make_unique( + return std::make_unique( + filePath.str(), printDiagnosticNames); + } + + llvm::sys::path::replace_extension(filePath, ".strings"); + if (llvm::sys::fs::exists(filePath)) { + return std::make_unique( filePath.str(), printDiagnosticNames); } } - return producer; + return std::unique_ptr(); } llvm::Optional LocalizationInput::readID(llvm::yaml::IO &io) { @@ -289,5 +294,161 @@ void DefToYAMLConverter::convert(llvm::raw_ostream &out) { } } +void DefToStringsConverter::convert(llvm::raw_ostream &out) { + // "" = ""; + for (auto i : swift::indices(IDs)) { + out << "\"" << IDs[i] << "\""; + out << " = "; + + const std::string &msg = Messages[i]; + + out << "\""; + for (unsigned j = 0; j < msg.length(); ++j) { + // Escape '"' found in the message. + if (msg[j] == '"') + out << '\\'; + + out << msg[j]; + } + + out << "\";\r\n"; + } +} + +bool StringsLocalizationProducer::initializeImpl() { + auto FileBufOrErr = llvm::MemoryBuffer::getFileOrSTDIN(filePath); + llvm::MemoryBuffer *document = FileBufOrErr->get(); + readStringsFile(document, diagnostics); + return true; +} + +llvm::StringRef +StringsLocalizationProducer::getMessage(swift::DiagID id) const { + return diagnostics[(unsigned)id]; +} + +void StringsLocalizationProducer::forEachAvailable( + llvm::function_ref callback) { + initializeIfNeeded(); + if (getState() == FailedInitialization) { + return; + } + + for (uint32_t i = 0, n = diagnostics.size(); i != n; ++i) { + auto translation = diagnostics[i]; + if (!translation.empty()) + callback(static_cast(i), translation); + } +} + +void StringsLocalizationProducer::readStringsFile( + llvm::MemoryBuffer *in, std::vector &diagnostics) { + std::map diagLocs; +#define DIAG(KIND, ID, Options, Text, Signature) \ + diagLocs[#ID] = static_cast(LocalDiagID::ID); +#include "swift/AST/DiagnosticsAll.def" +#undef DIAG + + // Allocate enough slots to fit all the possible diagnostics + // this helps to identify which diagnostics are missing. + diagnostics.resize(LocalDiagID::NumDiags); + + // The format is as follows: + // + // - comment: /* ... */ + // - translation: "" = ""; + auto buffer = in->getBuffer(); + while (!buffer.empty()) { + // consume comment. + if (buffer.startswith("/*")) { + auto endOfComment = buffer.find("*/"); + assert(endOfComment != std::string::npos); + // Consume the comment and trailing `*/` + buffer = buffer.drop_front(endOfComment + 2).ltrim(); + continue; + } + + assert(buffer.startswith("\"") && "malformed diagnostics file"); + + // Consume leading `"` + buffer = buffer.drop_front(); + + // Valid diagnostic id cannot have any `"` in it. + auto idSize = buffer.find_first_of('\"'); + assert(idSize != std::string::npos); + + std::string id(buffer.data(), idSize); + + // consume id and `" = "`. There could be a variable number of + // spaces on each side of `=`. + { + // Consume id, trailing `"`, and all spaces before `=` + buffer = buffer.drop_front(idSize + 1).ltrim(' '); + + // Consume `=` and all trailing spaces until `"` + { + assert(!buffer.empty() && buffer.front() == '='); + buffer = buffer.drop_front().ltrim(' '); + } + + // Consume `"` at the beginning of the diagnostic message. + { + assert(!buffer.empty() && buffer.front() == '\"'); + buffer = buffer.drop_front(); + } + } + + llvm::SmallString<64> msg; + { + bool isValid = false; + // Look for `";` which denotes the end of message + for (unsigned i = 0, n = buffer.size(); i != n; ++i) { + if (buffer[i] != '\"') { + msg.push_back(buffer[i]); + continue; + } + + // Leading `"` has been comsumed. + assert(i > 0); + + // Let's check whether this `"` is escaped, and if so - continue + // because `"` is part of the message. + if (buffer[i - 1] == '\\') { + // Drop `\` added for escaping. + msg.pop_back(); + msg.push_back(buffer[i]); + continue; + } + + // If current `"` was not escaped and it's followed by `;` - + // we have reached the end of the message, otherwise + // the input is malformed. + if (i + 1 < n && buffer[i + 1] == ';') { + // Consume the message and its trailing info. + buffer = buffer.drop_front(i + 2).ltrim(); + // Mark message as valid. + isValid = true; + break; + } else { + llvm_unreachable("malformed diagnostics file"); + } + } + + assert(isValid && "malformed diagnostic message"); + } + + // Check whether extracted diagnostic still exists in the + // system and if not - record as unknown. + { + auto existing = diagLocs.find(id); + if (existing != diagLocs.end()) { + diagnostics[existing->second] = std::string(msg); + } else { + llvm::errs() << "[!] Unknown diagnostic: " << id << '\n'; + } + } + } +} + } // namespace diag } // namespace swift