Revert "Language names via intl library."

This commit is contained in:
RedMatrix
2014-12-31 10:43:19 +11:00
parent ae9d08267c
commit 4f35efa0ba
994 changed files with 7 additions and 498717 deletions

View File

@@ -1,162 +0,0 @@
<?php
/**
* Generates the json files stored in resources/country.
*/
set_time_limit(0);
// Downloaded from http://unicode.org/Public/cldr/26/json-full.zip
$enCountries = '../json-full/main/en/territories.json';
$codeMappings = '../json-full/supplemental/codeMappings.json';
$telephoneCodeData = '../json-full/supplemental/telephoneCodeData.json';
if (!file_exists($enCountries)) {
die("The $enCountries file was not found");
}
if (!file_exists($codeMappings)) {
die("The $codeMappings file was not found");
}
if (!file_exists($telephoneCodeData)) {
die("The $telephoneCodeData file was not found");
}
if (!function_exists('collator_create')) {
// Reimplementing intl's collator would be a huge undertaking, so we
// use it instead to presort the generated locale specific data.
die('The intl extension was not found.');
}
$ignoredCountries = array(
'AN', // Netherlands Antilles, no longer exists.
'BV', 'HM', 'CP', // Uninhabited islands.
'EU', 'QO', // European Union, Outlying Oceania. Not countries.
'ZZ', // Unknown region
);
// Locales listed without a "-" match all variants.
// Locales listed with a "-" match only those exact ones.
$ignoredLocales = array(
// Interlingua is a made up language.
'ia',
// Valencian differs from its parent only by a single character (è/é).
'ca-ES-VALENCIA',
// Those locales are 90% untranslated.
'aa', 'as', 'az-Cyrl', 'az-Cyrl-AZ', 'bem', 'dua', 'gv', 'haw', 'ig', 'ii',
'kkj', 'kok', 'kw', 'lkt', 'mgo', 'nnh', 'nr', 'nso', 'om', 'os', 'pa-Arab',
'pa-Arab-PK', 'qu', 'rw', 'sah', 'smn', 'ss', 'ssy', 'st', 'tg', 'tn', 'ts',
'uz-Arab', 'uz-Arab-AF', 've', 'vo', 'xh', 'yi',
// Special "grouping" locales.
'root', 'en-US-POSIX', 'en-001', 'en-150', 'es-419',
);
// Assemble the base data. Use the "en" data to get a list of countries.
$telephoneCodeData = json_decode(file_get_contents($telephoneCodeData), true);
$telephoneCodeData = $telephoneCodeData['supplemental']['telephoneCodeData'];
$codeMappings = json_decode(file_get_contents($codeMappings), true);
$codeMappings = $codeMappings['supplemental']['codeMappings'];
$countryData = json_decode(file_get_contents($enCountries), true);
$countryData = $countryData['main']['en']['localeDisplayNames']['territories'];
$baseData = array();
foreach ($countryData as $countryCode => $countryName) {
if (is_numeric($countryCode) || in_array($countryCode, $ignoredCountries)) {
// Ignore continents, regions, uninhabited islands.
continue;
}
if (strpos($countryCode, '-alt-') !== FALSE) {
// Ignore alternative names.
continue;
}
$baseData[$countryCode]['code'] = $countryCode;
// Countries are not guaranteed to have an alpha3 and/or numeric code.
if (isset($codeMappings[$countryCode]['_alpha3'])) {
$baseData[$countryCode]['three_letter_code'] = $codeMappings[$countryCode]['_alpha3'];
}
if (isset($codeMappings[$countryCode]['_numeric'])) {
$baseData[$countryCode]['numeric_code'] = $codeMappings[$countryCode]['_numeric'];
}
// Determine the telephone code for this country.
if (in_array($countryCode, array('IC', 'EA'))) {
// "Canary Islands" and "Ceuta and Melilla" use Spain's.
$baseData[$countryCode]['telephone_code'] = $telephoneCodeData['ES'][0]['telephoneCountryCode'];
} elseif ($countryCode == 'XK') {
// Kosovo uses three telephone codes. Use Serbia's until that gets resolved.
$baseData[$countryCode]['telephone_code'] = $telephoneCodeData['RS'][0]['telephoneCountryCode'];
} elseif (isset($telephoneCodeData[$countryCode])) {
$baseData[$countryCode]['telephone_code'] = $telephoneCodeData[$countryCode][0]['telephoneCountryCode'];
}
}
// Write out base.json.
ksort($baseData);
$json = json_encode($baseData, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
file_put_contents('base.json', $json);
// Gather available locales.
$locales = array();
if ($handle = opendir('../json-full/main')) {
while (false !== ($entry = readdir($handle))) {
if (substr($entry, 0, 1) != '.') {
$entryParts = explode('-', $entry);
if (!in_array($entry, $ignoredLocales) && !in_array($entryParts[0], $ignoredLocales)) {
$locales[] = $entry;
}
}
}
closedir($handle);
}
// Create the localizations.
$countries = array();
foreach ($locales as $locale) {
$data = json_decode(file_get_contents('../json-full/main/' . $locale . '/territories.json'), true);
$data = $data['main'][$locale]['localeDisplayNames']['territories'];
foreach ($data as $countryCode => $countryName) {
if (isset($baseData[$countryCode])) {
// This country name is untranslated, use the english version.
if ($countryCode == $countryName) {
$countryName = $countryData[$countryCode];
}
$countries[$locale][$countryCode] = array(
'name' => $countryName,
);
}
}
}
// Identify localizations that are the same as the ones for the parent locale.
// For example, "fr-FR" if "fr" has the same data.
$duplicates = array();
foreach ($countries as $locale => $localizedCountries) {
if (strpos($locale, '-') !== FALSE) {
$localeParts = explode('-', $locale);
array_pop($localeParts);
$parentLocale = implode('-', $localeParts);
$diff = array_udiff($localizedCountries, $countries[$parentLocale], function ($first, $second) {
return ($first['name'] == $second['name']) ? 0 : 1;
});
if (empty($diff)) {
// The duplicates are not removed right away because they might
// still be needed for other duplicate checks (for example,
// when there are locales like bs-Latn-BA, bs-Latn, bs).
$duplicates[] = $locale;
}
}
}
// Remove the duplicates.
foreach ($duplicates as $locale) {
unset($countries[$locale]);
}
// Write out the localizations.
foreach ($countries as $locale => $localizedCountries) {
$collator = collator_create($locale);
uasort($localizedCountries, function($a, $b) use ($collator) {
return collator_compare($collator, $a['name'], $b['name']);
});
$json = json_encode($localizedCountries, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
file_put_contents($locale . '.json', $json);
}

View File

@@ -1,153 +0,0 @@
<?php
/**
* Generates the json files stored in resources/currency.
*
* The ISO currency list is used as a base, since it doesn't contain
* deprecated currencies, unlike CLDR (v25 has 139 deprecated entries).
*/
set_time_limit(0);
// Downloaded from http://www.currency-iso.org/en/home/tables/table-a1.html
$isoCurrencies = '../c2.xml';
// Downloaded from http://unicode.org/Public/cldr/26/json-full.zip
$cldrCurrencies = '../json-full/main/en-US/currencies.json';
$currencyData = '../json-full/supplemental/currencyData.json';
if (!file_exists($isoCurrencies)) {
die("The $isoCurrencies file was not found");
}
if (!file_exists($cldrCurrencies)) {
die("The $cldrCurrencies file was not found");
}
if (!file_exists($currencyData)) {
die("The $currencyData file was not found");
}
if (!function_exists('collator_create')) {
// Reimplementing intl's collator would be a huge undertaking, so we
// use it instead to presort the generated locale specific data.
die('The intl extension was not found.');
}
// Locales listed without a "-" match all variants.
// Locales listed with a "-" match only those exact ones.
$ignoredLocales = array(
// Interlingua is a made up language.
'ia',
// Valencian differs from its parent only by a single character (è/é).
'ca-ES-VALENCIA',
// Those locales are 90% untranslated.
'aa', 'as', 'az-Cyrl', 'az-Cyrl-AZ', 'bem', 'dua', 'gv', 'haw', 'ig', 'ii',
'kkj', 'kok', 'kw', 'lkt', 'mgo', 'nnh', 'nr', 'nso', 'om', 'os', 'pa-Arab',
'pa-Arab-PK', 'qu', 'rw', 'sah', 'smn', 'ss', 'ssy', 'st', 'tg', 'tn', 'ts',
'uz-Arab', 'uz-Arab-AF', 've', 'vo', 'xh', 'yi',
// Special "grouping" locales.
'root', 'en-US-POSIX', 'en-001', 'en-150', 'es-419',
);
// Assemble the base data.
$baseData = array();
$currencyData = json_decode(file_get_contents($currencyData), true);
$currencyData = $currencyData['supplemental']['currencyData']['fractions'];
$isoData = simplexml_load_file($isoCurrencies);
foreach ($isoData->CcyTbl->CcyNtry as $currency) {
$attributes = (array) $currency->CcyNm->attributes();
if (!empty($attributes) && !empty($attributes['@attributes']['IsFund'])) {
// Ignore funds.
continue;
}
$currency = (array) $currency;
if (empty($currency['Ccy'])) {
// Ignore placeholders like "Antarctica".
continue;
}
if (substr($currency['CtryNm'], 0, 2) == 'ZZ' || in_array($currency['Ccy'], array('XUA', 'XSU', 'XDR'))) {
// Ignore special currencies.
continue;
}
$currencyCode = $currency['Ccy'];
$baseData[$currencyCode] = array(
'code' => $currencyCode,
'numeric_code' => $currency['CcyNbr'],
);
// Take the fraction digits from CLDR, not ISO, because it reflects real
// life usage more closely. If the digits aren't set, that means that the
// default value (2) should be used.
if (isset($currencyData[$currencyCode]['_digits'])) {
$fractionDigits = $currencyData[$currencyCode]['_digits'];
if ($fractionDigits != 2) {
$baseData[$currencyCode]['fraction_digits'] = $fractionDigits;
}
}
}
// Write out base.json.
ksort($baseData);
$json = json_encode($baseData, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
file_put_contents('base.json', $json);
// Gather available locales.
$locales = array();
if ($handle = opendir('../json-full/main')) {
while (false !== ($entry = readdir($handle))) {
if (substr($entry, 0, 1) != '.') {
$entryParts = explode('-', $entry);
if (!in_array($entry, $ignoredLocales) && !in_array($entryParts[0], $ignoredLocales)) {
$locales[] = $entry;
}
}
}
closedir($handle);
}
// Create the localizations.
$currencies = array();
foreach ($locales as $locale) {
$data = json_decode(file_get_contents('../json-full/main/' . $locale . '/currencies.json'), true);
$data = $data['main'][$locale]['numbers']['currencies'];
foreach ($data as $currencyCode => $currency) {
if (isset($baseData[$currencyCode])) {
$currencies[$locale][$currencyCode] = array(
'name' => $currency['displayName'],
'symbol' => $currency['symbol'],
);
}
}
}
// Identify localizations that are the same as the ones for the parent locale.
// For example, "fr-FR" if "fr" has the same data.
$duplicates = array();
foreach ($currencies as $locale => $localizedCurrencies) {
if (strpos($locale, '-') !== FALSE) {
$localeParts = explode('-', $locale);
array_pop($localeParts);
$parentLocale = implode('-', $localeParts);
$diff = array_udiff($localizedCurrencies, $currencies[$parentLocale], function ($first, $second) {
return ($first['name'] == $second['name']) ? 0 : 1;
});
if (empty($diff)) {
// The duplicates are not removed right away because they might
// still be needed for other duplicate checks (for example,
// when there are locales like bs-Latn-BA, bs-Latn, bs).
$duplicates[] = $locale;
}
}
}
// Remove the duplicates.
foreach ($duplicates as $locale) {
unset($currencies[$locale]);
}
// Write out the localizations.
foreach ($currencies as $locale => $localizedCurrencies) {
$collator = collator_create($locale);
uasort($localizedCurrencies, function($a, $b) use ($collator) {
return collator_compare($collator, $a['name'], $b['name']);
});
$json = json_encode($localizedCurrencies, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
file_put_contents($locale . '.json', $json);
}

View File

@@ -1,129 +0,0 @@
<?php
/**
* Generates the json files stored in resources/language.
*
* CLDR lists about 515 languages, many of them dead (like Latin or Old English).
* In order to decrease the list to a reasonable size, only the languages
* for which CLDR itself has translations are listed.
*/
set_time_limit(0);
// Downloaded from http://unicode.org/Public/cldr/26/json-full.zip
$enLanguages = '../json-full/main/en/languages.json';
if (!file_exists($enLanguages)) {
die("The $enLanguages file was not found");
}
if (!function_exists('collator_create')) {
// Reimplementing intl's collator would be a huge undertaking, so we
// use it instead to presort the generated locale specific data.
die('The intl extension was not found.');
}
// Locales listed without a "-" match all variants.
// Locales listed with a "-" match only those exact ones.
$ignoredLocales = array(
// Interlingua is a made up language.
'ia',
// Valencian differs from its parent only by a single character (è/é).
'ca-ES-VALENCIA',
// Those locales are 90% untranslated.
'aa', 'as', 'az-Cyrl', 'az-Cyrl-AZ', 'bem', 'dua', 'gv', 'haw', 'ig', 'ii',
'kkj', 'kok', 'kw', 'lkt', 'mgo', 'nnh', 'nr', 'nso', 'om', 'os', 'pa-Arab',
'pa-Arab-PK', 'qu', 'rw', 'sah', 'smn', 'ss', 'ssy', 'st', 'tg', 'tn', 'ts',
'uz-Arab', 'uz-Arab-AF', 've', 'vo', 'xh', 'yi',
// Special "grouping" locales.
'root', 'en-US-POSIX', 'en-001', 'en-150', 'es-419',
);
$languages = array();
// Load the "en" data first so that it can be used as a fallback for
// untranslated language names in other locales.
$languageData = json_decode(file_get_contents($enLanguages), true);
$languageData = $languageData['main']['en']['localeDisplayNames']['languages'];
foreach ($languageData as $languageCode => $languageName) {
if (strpos($languageCode, '-alt-') === FALSE) {
$languages['en'][$languageCode] = array(
'code' => $languageCode,
'name' => $languageName,
);
}
}
// Gather available locales.
$locales = array();
if ($handle = opendir('../json-full/main')) {
while (false !== ($entry = readdir($handle))) {
if (substr($entry, 0, 1) != '.') {
$entryParts = explode('-', $entry);
if (!in_array($entry, $ignoredLocales) && !in_array($entryParts[0], $ignoredLocales)) {
$locales[] = $entry;
}
}
}
closedir($handle);
}
// Remove all languages that aren't an available locale at the same time.
// This reduces the language list from about 515 to about 185 languages.
foreach ($languages['en'] as $languageCode => $languageData) {
if (!in_array($languageCode, $locales)) {
unset($languages['en'][$languageCode]);
}
}
// Load the localizations.
foreach ($locales as $locale) {
$data = json_decode(file_get_contents('../json-full/main/' . $locale . '/languages.json'), true);
$data = $data['main'][$locale]['localeDisplayNames']['languages'];
foreach ($data as $languageCode => $languageName) {
if (isset($languages['en'][$languageCode])) {
// This language name is untranslated, use to the english version.
if ($languageCode == $languageName) {
$languageName = $languages['en'][$languageCode]['name'];
}
$languages[$locale][$languageCode] = array(
'code' => $languageCode,
'name' => $languageName,
);
}
}
}
// Identify localizations that are the same as the ones for the parent locale.
// For example, "fr-FR" if "fr" has the same data.
$duplicates = array();
foreach ($languages as $locale => $localizedLanguages) {
if (strpos($locale, '-') !== FALSE) {
$localeParts = explode('-', $locale);
array_pop($localeParts);
$parentLocale = implode('-', $localeParts);
$diff = array_udiff($localizedLanguages, $languages[$parentLocale], function ($first, $second) {
return ($first['name'] == $second['name']) ? 0 : 1;
});
if (empty($diff)) {
// The duplicates are not removed right away because they might
// still be needed for other duplicate checks (for example,
// when there are locales like bs-Latn-BA, bs-Latn, bs).
$duplicates[] = $locale;
}
}
}
// Remove the duplicates.
foreach ($duplicates as $locale) {
unset($languages[$locale]);
}
// Write out the localizations.
foreach ($languages as $locale => $localizedLanguages) {
$collator = collator_create($locale);
uasort($localizedLanguages, function($a, $b) use ($collator) {
return collator_compare($collator, $a['name'], $b['name']);
});
$json = json_encode($localizedLanguages, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
file_put_contents($locale . '.json', $json);
}

View File

@@ -1,107 +0,0 @@
<?php
/**
* Generates the json files stored in resources/number_format.
*/
set_time_limit(0);
// Downloaded from http://unicode.org/Public/cldr/26/json-full.zip
if (!is_dir('../json-full/main')) {
die("The '../json-full/main' directory was not found");
}
// Locales listed without a "-" match all variants.
// Locales listed with a "-" match only those exact ones.
$ignoredLocales = array(
// Interlingua is a made up language.
'ia',
// Special "grouping" locales.
'root', 'en-US-POSIX', 'en-001', 'en-150', 'es-419',
);
// Gather available locales.
$locales = array();
if ($handle = opendir('../json-full/main')) {
while (false !== ($entry = readdir($handle))) {
if (substr($entry, 0, 1) != '.') {
$entryParts = explode('-', $entry);
if (!in_array($entry, $ignoredLocales) && !in_array($entryParts[0], $ignoredLocales)) {
$locales[] = $entry;
}
}
}
closedir($handle);
}
// Load the data.
$numberFormats = array();
foreach ($locales as $locale) {
$data = json_decode(file_get_contents('../json-full/main/' . $locale . '/numbers.json'), true);
$data = $data['main'][$locale]['numbers'];
// Use the default numbering system, if it's supported.
if (in_array($data['defaultNumberingSystem'], array('arab', 'arabext', 'beng', 'deva', 'latn'))) {
$numberingSystem = $data['defaultNumberingSystem'];
} else {
$numberingSystem = 'latn';
}
$numberFormats[$locale] = array(
'numbering_system' => $numberingSystem,
'decimal_pattern' => $data['decimalFormats-numberSystem-' . $numberingSystem]['standard'],
'percent_pattern' => $data['percentFormats-numberSystem-' . $numberingSystem]['standard'],
'currency_pattern' => $data['currencyFormats-numberSystem-' . $numberingSystem]['standard'],
'accounting_currency_pattern' => $data['currencyFormats-numberSystem-' . $numberingSystem]['accounting'],
);
// Add the symbols only if they're different from the default data.
$decimalSeparator = $data['symbols-numberSystem-' . $numberingSystem]['decimal'];
$groupingSeparator = $data['symbols-numberSystem-' . $numberingSystem]['group'];
$plusSign = $data['symbols-numberSystem-' . $numberingSystem]['plusSign'];
$minusSign = $data['symbols-numberSystem-' . $numberingSystem]['minusSign'];
$percentSign = $data['symbols-numberSystem-' . $numberingSystem]['percentSign'];
if ($decimalSeparator != '.') {
$numberFormats[$locale]['decimal_separator'] = $decimalSeparator;
}
if ($groupingSeparator != ',') {
$numberFormats[$locale]['grouping_separator'] = $groupingSeparator;
}
if ($plusSign != '+') {
$numberFormats[$locale]['plus_sign'] = $plusSign;
}
if ($minusSign != '-') {
$numberFormats[$locale]['minus_sign'] = $minusSign;
}
if ($percentSign != '%') {
$numberFormats[$locale]['percent_sign'] = $percentSign;
}
}
// Identify localizations that are the same as the ones for the parent locale.
// For example, "fr-FR" if "fr" has the same data.
$duplicates = array();
foreach ($numberFormats as $locale => $formatData) {
if (strpos($locale, '-') !== FALSE) {
$localeParts = explode('-', $locale);
array_pop($localeParts);
$parentLocale = implode('-', $localeParts);
$diff = array_diff_assoc($formatData, $numberFormats[$parentLocale]);
if (empty($diff)) {
// The duplicates are not removed right away because they might
// still be needed for other duplicate checks (for example,
// when there are locales like bs-Latn-BA, bs-Latn, bs).
$duplicates[] = $locale;
}
}
}
// Remove the duplicates.
foreach ($duplicates as $locale) {
unset($numberFormats[$locale]);
}
// Write out the data.
foreach ($numberFormats as $locale => $numberFormat) {
$json = json_encode($numberFormat, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
file_put_contents($locale . '.json', $json);
}