P粉8850351142023-08-26 00:50:59
我需要這個用於 C# 項目,所以這裡是 Python 程式碼 上面提到的。確保在原始檔中包含 using System.Text.RegularExpressions;
。
private string GetIndefiniteArticle(string noun_phrase) { string word = null; var m = Regex.Match(noun_phrase, @"\w+"); if (m.Success) word = m.Groups[0].Value; else return "an"; var wordi = word.ToLower(); foreach (string anword in new string[] { "euler", "heir", "honest", "hono" }) if (wordi.StartsWith(anword)) return "an"; if (wordi.StartsWith("hour") && !wordi.StartsWith("houri")) return "an"; var char_list = new char[] { 'a', 'e', 'd', 'h', 'i', 'l', 'm', 'n', 'o', 'r', 's', 'x' }; if (wordi.Length == 1) { if (wordi.IndexOfAny(char_list) == 0) return "an"; else return "a"; } if (Regex.Match(word, "(?!FJO|[HLMNS]Y.|RY[EO]|SQU|(F[LR]?|[HL]|MN?|N|RH?|S[CHKLMNPTVW]?|X(YL)?)[AEIOU])[FHLMNRSX][A-Z]").Success) return "an"; foreach (string regex in new string[] { "^e[uw]", "^onc?e\b", "^uni([^nmd]|mo)", "^u[bcfhjkqrst][aeiou]" }) { if (Regex.IsMatch(wordi, regex)) return "a"; } if (Regex.IsMatch(word, "^U[NK][AIEO]")) return "a"; else if (word == word.ToUpper()) { if (wordi.IndexOfAny(char_list) == 0) return "an"; else return "a"; } if (wordi.IndexOfAny(new char[] { 'a', 'e', 'i', 'o', 'u' }) == 0) return "an"; if (Regex.IsMatch(wordi, "^y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt)")) return "an"; return "a"; }
P粉9330033502023-08-26 00:45:17
您想要的是確定適當的不定冠詞。 Lingua::EN:: Inflect
是一個表現出色的 Perl 模組。我已經提取了相關程式碼並將其貼在下面。這只是一堆案例和一些正規表示式,所以移植到 PHP 應該不難。一位朋友將其移植到Python 如果有人有興趣,請點擊這裡。
# 2. INDEFINITE ARTICLES # THIS PATTERN MATCHES STRINGS OF CAPITALS STARTING WITH A "VOWEL-SOUND" # CONSONANT FOLLOWED BY ANOTHER CONSONANT, AND WHICH ARE NOT LIKELY # TO BE REAL WORDS (OH, ALL RIGHT THEN, IT'S JUST MAGIC!) my $A_abbrev = q{ (?! FJO | [HLMNS]Y. | RY[EO] | SQU | ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU]) [FHLMNRSX][A-Z] }; # THIS PATTERN CODES THE BEGINNINGS OF ALL ENGLISH WORDS BEGINING WITH A # 'y' FOLLOWED BY A CONSONANT. ANY OTHER Y-CONSONANT PREFIX THEREFORE # IMPLIES AN ABBREVIATION. my $A_y_cons = 'y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt)'; # EXCEPTIONS TO EXCEPTIONS my $A_explicit_an = enclose join '|', ( "euler", "hour(?!i)", "heir", "honest", "hono", ); my $A_ordinal_an = enclose join '|', ( "[aefhilmnorsx]-?th", ); my $A_ordinal_a = enclose join '|', ( "[bcdgjkpqtuvwyz]-?th", ); sub A { my ($str, $count) = @_; my ($pre, $word, $post) = ( $str =~ m/\A(\s*)(?:an?\s+)?(.+?)(\s*)\Z/i ); return $str unless $word; my $result = _indef_article($word,$count); return $pre.$result.$post; } sub AN { goto &A } sub _indef_article { my ( $word, $count ) = @_; $count = $persistent_count if !defined($count) && defined($persistent_count); return "$count $word" if defined $count && $count!~/^($PL_count_one)$/io; # HANDLE USER-DEFINED VARIANTS my $value; return "$value $word" if defined($value = ud_match($word, @A_a_user_defined)); # HANDLE ORDINAL FORMS $word =~ /^($A_ordinal_a)/i and return "a $word"; $word =~ /^($A_ordinal_an)/i and return "an $word"; # HANDLE SPECIAL CASES $word =~ /^($A_explicit_an)/i and return "an $word"; $word =~ /^[aefhilmnorsx]$/i and return "an $word"; $word =~ /^[bcdgjkpqtuvwyz]$/i and return "a $word"; # HANDLE ABBREVIATIONS $word =~ /^($A_abbrev)/ox and return "an $word"; $word =~ /^[aefhilmnorsx][.-]/i and return "an $word"; $word =~ /^[a-z][.-]/i and return "a $word"; # HANDLE CONSONANTS $word =~ /^[^aeiouy]/i and return "a $word"; # HANDLE SPECIAL VOWEL-FORMS $word =~ /^e[uw]/i and return "a $word"; $word =~ /^onc?e\b/i and return "a $word"; $word =~ /^uni([^nmd]|mo)/i and return "a $word"; $word =~ /^ut[th]/i and return "an $word"; $word =~ /^u[bcfhjkqrst][aeiou]/i and return "a $word"; # HANDLE SPECIAL CAPITALS $word =~ /^U[NK][AIEO]?/ and return "a $word"; # HANDLE VOWELS $word =~ /^[aeiou]/i and return "an $word"; # HANDLE y... (BEFORE CERTAIN CONSONANTS IMPLIES (UNNATURALIZED) "i.." SOUND) $word =~ /^($A_y_cons)/io and return "an $word"; # OTHERWISE, GUESS "a" return "a $word"; }