001/* 002 * Copyright 2017-2022 Product Mog LLC, 2022-2026 Revetware LLC. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package com.lokalized; 018 019import org.jspecify.annotations.NonNull; 020 021import java.util.Arrays; 022import java.util.Collections; 023import java.util.Map; 024import java.util.stream.Collectors; 025 026/** 027 * Represents the phonetic onset category of a word, used to select 028 * context-appropriate word forms in localized strings. 029 * <p> 030 * Many languages require different word forms based on the sound that 031 * follows. For example, English uses "a" before consonant sounds and 032 * "an" before vowel sounds. Italian has more complex rules requiring 033 * different articles before vowels, s+consonant clusters, and certain 034 * other onsets. 035 * <p> 036 * The phonetic category of a word is determined at runtime by a 037 * user-supplied resolver function, since correct classification often 038 * requires language-specific knowledge and exception handling. 039 * 040 * @author <a href="https://revetkn.com">Mark Allen</a> 041 * @since 1.2.0 042 */ 043public enum Phonetic implements LanguageForm { 044 /** 045 * Word begins with a vowel sound. 046 * <p> 047 * Applies to most languages. In English, triggers "an" instead of "a". 048 * In French, triggers elision (le/la → l'). In Italian, triggers 049 * l' for singular articles. 050 * <p> 051 * Note: Classification is by <em>sound</em>, not spelling. English 052 * "hour" is phonetically vowel-initial; "university" is not. 053 */ 054 VOWEL, 055 056 /** 057 * Word begins with a typical consonant sound. 058 * <p> 059 * This is the default category for words not matching any other 060 * phonetic pattern. In English, triggers "a" instead of "an". 061 * In Italian, triggers "il" for masculine singular nouns. 062 */ 063 CONSONANT, 064 065 /** 066 * Word begins with a silent H, making it phonetically vowel-initial. 067 * <p> 068 * Primarily applies to <strong>English</strong> and <strong>French</strong>. 069 * <p> 070 * English examples: "hour", "honor", "heir" → "an hour" 071 * <p> 072 * French examples (h muet): "homme", "heure" → "l'homme" 073 */ 074 H_SILENT, 075 076 /** 077 * Word begins with an aspirated (pronounced) H. 078 * <p> 079 * Primarily applies to <strong>English</strong> and <strong>French</strong>. 080 * <p> 081 * English examples: "house", "happy" → "a house" 082 * <p> 083 * French examples (h aspiré): "héros", "haricot" → "le héros" (no elision) 084 */ 085 H_ASPIRATED, 086 087 /** 088 * Word begins with s + consonant cluster (s impura). 089 * <p> 090 * Primarily applies to <strong>Italian</strong>. Triggers "lo/gli" 091 * instead of "il/i" for masculine nouns, and "uno" instead of "un". 092 * <p> 093 * Examples: "studente", "spaghetti", "sbaglio", "scuola" 094 * → "lo studente", "gli spaghetti" 095 */ 096 S_IMPURE, 097 098 /** 099 * Word begins with Z sound or affricates /ts/, /dz/. 100 * <p> 101 * Primarily applies to <strong>Italian</strong>. Triggers "lo/gli" 102 * instead of "il/i" for masculine nouns. 103 * <p> 104 * Examples: "zio", "zero", "zucchero" → "lo zio", "gli zii" 105 */ 106 Z, 107 108 /** 109 * Word begins with the palatal nasal cluster GN. 110 * <p> 111 * Primarily applies to <strong>Italian</strong>. Triggers "lo/gli" 112 * instead of "il/i" for masculine nouns. 113 * <p> 114 * Examples: "gnomo", "gnocco" → "lo gnomo", "gli gnocchi" 115 */ 116 GN, 117 118 /** 119 * Word begins with the PS cluster. 120 * <p> 121 * Primarily applies to <strong>Italian</strong>, typically in words 122 * of Greek origin. Triggers "lo/gli" instead of "il/i". 123 * <p> 124 * Examples: "psicologo", "pseudonimo" → "lo psicologo" 125 */ 126 PS, 127 128 /** 129 * Word begins with the PN cluster. 130 * <p> 131 * Primarily applies to <strong>Italian</strong>, typically in words 132 * of Greek origin. Triggers "lo/gli" instead of "il/i". 133 * <p> 134 * Examples: "pneumatico", "pneumologo" → "lo pneumatico" 135 */ 136 PN, 137 138 /** 139 * Word begins with X (/ks/ sound). 140 * <p> 141 * Primarily applies to <strong>Italian</strong>. Triggers "lo/gli" 142 * instead of "il/i". Rare in Italian vocabulary. 143 * <p> 144 * Examples: "xilofono", "xenofobo" → "lo xilofono" 145 */ 146 X, 147 148 /** 149 * Word begins with Y functioning as a consonantal glide /j/. 150 * <p> 151 * Primarily applies to <strong>Italian</strong>, where loanwords 152 * starting with Y take "lo/gli" instead of "il/i". 153 * <p> 154 * Examples: "yogurt", "yacht" → "lo yogurt" 155 * <p> 156 * Note: In English, Y is typically treated as {@link #CONSONANT}. 157 */ 158 GLIDE_Y, 159 160 /** 161 * Word begins with W functioning as a consonantal glide /w/. 162 * <p> 163 * May apply to <strong>Italian</strong> and other languages where 164 * W-initial words (typically loanwords) require special handling. 165 * <p> 166 * Examples: "whisky", "weekend" 167 */ 168 GLIDE_W, 169 170 /** 171 * Word begins with a stressed A or HA sound. 172 * <p> 173 * Primarily applies to <strong>Spanish</strong> and <strong>Catalan</strong>. 174 * Feminine nouns with stressed initial A take masculine singular articles 175 * for euphonic reasons, while remaining grammatically feminine. 176 * <p> 177 * Examples: "agua", "águila", "hacha", "alma" 178 * → "el agua" (not "la agua"), but "las aguas" in plural 179 * <p> 180 * Note: The noun remains feminine—adjectives still agree femininely: 181 * "el agua fría" (the cold water). 182 */ 183 STRESSED_A, 184 185 /** 186 * Arabic sun letter (الحروف الشمسية). 187 * <p> 188 * Applies to <strong>Arabic</strong>. When the definite article "al-" 189 * precedes a sun letter, the L assimilates to the following consonant. 190 * <p> 191 * Sun letters: ت ث د ذ ر ز س ش ص ض ط ظ ل ن 192 * <p> 193 * Example: "al-shams" is pronounced "ash-shams" (the sun) 194 */ 195 SOLAR, 196 197 /** 198 * Arabic moon letter (الحروف القمرية). 199 * <p> 200 * Applies to <strong>Arabic</strong>. When the definite article "al-" 201 * precedes a moon letter, the L is pronounced normally without assimilation. 202 * <p> 203 * Moon letters: ب ج ح خ ع غ ف ق ك م هـ و ي ء 204 * <p> 205 * Example: "al-qamar" is pronounced as written (the moon) 206 */ 207 LUNAR, 208 209 /** 210 * Fallback category for edge cases not covered by other values. 211 * <p> 212 * Use this when a word doesn't fit cleanly into any other phonetic 213 * category, allowing graceful degradation in localized string selection. 214 */ 215 OTHER; 216 217 @NonNull 218 private static final Map<@NonNull String, @NonNull Phonetic> PHONETICS_BY_NAME; 219 220 static { 221 PHONETICS_BY_NAME = Collections.unmodifiableMap(Arrays.stream( 222 Phonetic.values()).collect(Collectors.toMap(phonetic -> phonetic.name(), phonetic -> phonetic))); 223 } 224 225 /** 226 * Gets the mapping of phonetic names to phonetic values. 227 * 228 * @return the mapping of phonetic names to phonetic values, not null 229 */ 230 @NonNull 231 static Map<@NonNull String, @NonNull Phonetic> getPhoneticsByName() { 232 return PHONETICS_BY_NAME; 233 } 234}