P粉8850351142023-08-26 00:50:59
I need this for a C# project, so here is the Python code mentioned above. Make sure to include using System.Text.RegularExpressions;
in your source file.
private string GetIndefiniteArticle(string noun_phrase) { string word = null; var m = Regex.Match(noun_phrase, @"\w+"); if (m.Success) word = m.Groups[0].Value; else return "an"; var wordi = word.ToLower(); foreach (string anword in new string[] { "euler", "heir", "honest", "hono" }) if (wordi.StartsWith(anword)) return "an"; if (wordi.StartsWith("hour") && !wordi.StartsWith("houri")) return "an"; var char_list = new char[] { 'a', 'e', 'd', 'h', 'i', 'l', 'm', 'n', 'o', 'r', 's', 'x' }; if (wordi.Length == 1) { if (wordi.IndexOfAny(char_list) == 0) return "an"; else return "a"; } if (Regex.Match(word, "(?!FJO|[HLMNS]Y.|RY[EO]|SQU|(F[LR]?|[HL]|MN?|N|RH?|S[CHKLMNPTVW]?|X(YL)?)[AEIOU])[FHLMNRSX][A-Z]").Success) return "an"; foreach (string regex in new string[] { "^e[uw]", "^onc?e\b", "^uni([^nmd]|mo)", "^u[bcfhjkqrst][aeiou]" }) { if (Regex.IsMatch(wordi, regex)) return "a"; } if (Regex.IsMatch(word, "^U[NK][AIEO]")) return "a"; else if (word == word.ToUpper()) { if (wordi.IndexOfAny(char_list) == 0) return "an"; else return "a"; } if (wordi.IndexOfAny(new char[] { 'a', 'e', 'i', 'o', 'u' }) == 0) return "an"; if (Regex.IsMatch(wordi, "^y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt)")) return "an"; return "a"; }
P粉9330033502023-08-26 00:45:17
What you want is to identify the appropriate indefinite article. Lingua::EN::Inflect
is a Perl module that performs well. I've extracted the relevant code and pasted it below. It's just a bunch of cases and some regular expressions, so porting to PHP shouldn't be difficult. A friend ported it to Python If anyone is interested, click here .
# 2. INDEFINITE ARTICLES # THIS PATTERN MATCHES STRINGS OF CAPITALS STARTING WITH A "VOWEL-SOUND" # CONSONANT FOLLOWED BY ANOTHER CONSONANT, AND WHICH ARE NOT LIKELY # TO BE REAL WORDS (OH, ALL RIGHT THEN, IT'S JUST MAGIC!) my $A_abbrev = q{ (?! FJO | [HLMNS]Y. | RY[EO] | SQU | ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU]) [FHLMNRSX][A-Z] }; # THIS PATTERN CODES THE BEGINNINGS OF ALL ENGLISH WORDS BEGINING WITH A # 'y' FOLLOWED BY A CONSONANT. ANY OTHER Y-CONSONANT PREFIX THEREFORE # IMPLIES AN ABBREVIATION. my $A_y_cons = 'y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt)'; # EXCEPTIONS TO EXCEPTIONS my $A_explicit_an = enclose join '|', ( "euler", "hour(?!i)", "heir", "honest", "hono", ); my $A_ordinal_an = enclose join '|', ( "[aefhilmnorsx]-?th", ); my $A_ordinal_a = enclose join '|', ( "[bcdgjkpqtuvwyz]-?th", ); sub A { my ($str, $count) = @_; my ($pre, $word, $post) = ( $str =~ m/\A(\s*)(?:an?\s+)?(.+?)(\s*)\Z/i ); return $str unless $word; my $result = _indef_article($word,$count); return $pre.$result.$post; } sub AN { goto &A } sub _indef_article { my ( $word, $count ) = @_; $count = $persistent_count if !defined($count) && defined($persistent_count); return "$count $word" if defined $count && $count!~/^($PL_count_one)$/io; # HANDLE USER-DEFINED VARIANTS my $value; return "$value $word" if defined($value = ud_match($word, @A_a_user_defined)); # HANDLE ORDINAL FORMS $word =~ /^($A_ordinal_a)/i and return "a $word"; $word =~ /^($A_ordinal_an)/i and return "an $word"; # HANDLE SPECIAL CASES $word =~ /^($A_explicit_an)/i and return "an $word"; $word =~ /^[aefhilmnorsx]$/i and return "an $word"; $word =~ /^[bcdgjkpqtuvwyz]$/i and return "a $word"; # HANDLE ABBREVIATIONS $word =~ /^($A_abbrev)/ox and return "an $word"; $word =~ /^[aefhilmnorsx][.-]/i and return "an $word"; $word =~ /^[a-z][.-]/i and return "a $word"; # HANDLE CONSONANTS $word =~ /^[^aeiouy]/i and return "a $word"; # HANDLE SPECIAL VOWEL-FORMS $word =~ /^e[uw]/i and return "a $word"; $word =~ /^onc?e\b/i and return "a $word"; $word =~ /^uni([^nmd]|mo)/i and return "a $word"; $word =~ /^ut[th]/i and return "an $word"; $word =~ /^u[bcfhjkqrst][aeiou]/i and return "a $word"; # HANDLE SPECIAL CAPITALS $word =~ /^U[NK][AIEO]?/ and return "a $word"; # HANDLE VOWELS $word =~ /^[aeiou]/i and return "an $word"; # HANDLE y... (BEFORE CERTAIN CONSONANTS IMPLIES (UNNATURALIZED) "i.." SOUND) $word =~ /^($A_y_cons)/io and return "an $word"; # OTHERWISE, GUESS "a" return "a $word"; }