⬆️ Update intl library.

Update intl library from v0.4? (2014) to v0.7.4 (2016).
Use global composer autoloader now.
This commit is contained in:
Klaus Weidenbach
2017-10-25 23:21:07 +02:00
parent 8e4c5db766
commit 66832c41e9
2027 changed files with 484998 additions and 497713 deletions

View File

@@ -0,0 +1,216 @@
<?php
/**
* Generates the json files stored in resources/country.
*/
set_time_limit(0);
// Downloaded from https://github.com/unicode-cldr/cldr-localenames-full.git
$localeDirectory = '../assets/cldr-localenames-full/main/';
$enCountries = $localeDirectory . 'en/territories.json';
// Downloaded from https://github.com/unicode-cldr/cldr-core.git
$codeMappings = '../assets/cldr-core/supplemental/codeMappings.json';
$currencyData = '../assets/cldr-core/supplemental/currencyData.json';
if (!file_exists($enCountries)) {
die("The $enCountries file was not found");
}
if (!file_exists($codeMappings)) {
die("The $codeMappings file was not found");
}
if (!file_exists($currencyData)) {
die("The $currencyData file was not found");
}
if (!function_exists('collator_create')) {
// Reimplementing intl's collator would be a huge undertaking, so we
// use it instead to presort the generated locale specific data.
die('The intl extension was not found.');
}
if (!is_dir($localeDirectory)) {
die("The $localeDirectory directory was not found");
}
$ignoredCountries = [
'AN', // Netherlands Antilles, no longer exists.
'BV', 'HM', 'CP', // Uninhabited islands.
'EU', 'QO', // European Union, Outlying Oceania. Not countries.
'ZZ', // Unknown region
];
// Locales listed without a "-" match all variants.
// Locales listed with a "-" match only those exact ones.
$ignoredLocales = [
// Interlingua is a made up language.
'ia',
// Valencian differs from its parent only by a single character (è/é).
'ca-ES-VALENCIA',
// Special "grouping" locales.
'root', 'en-US-POSIX', 'en-001', 'en-150', 'es-419',
];
// Assemble the base data. Use the "en" data to get a list of countries.
$codeMappings = json_decode(file_get_contents($codeMappings), true);
$codeMappings = $codeMappings['supplemental']['codeMappings'];
$currencyData = json_decode(file_get_contents($currencyData), true);
$currencyData = $currencyData['supplemental']['currencyData'];
$countryData = json_decode(file_get_contents($enCountries), true);
$countryData = $countryData['main']['en']['localeDisplayNames']['territories'];
$baseData = [];
foreach ($countryData as $countryCode => $countryName) {
if (is_numeric($countryCode) || in_array($countryCode, $ignoredCountries)) {
// Ignore continents, regions, uninhabited islands.
continue;
}
if (strpos($countryCode, '-alt-') !== false) {
// Ignore alternative names.
continue;
}
// Countries are not guaranteed to have an alpha3 and/or numeric code.
if (isset($codeMappings[$countryCode]['_alpha3'])) {
$baseData[$countryCode]['three_letter_code'] = $codeMappings[$countryCode]['_alpha3'];
}
if (isset($codeMappings[$countryCode]['_numeric'])) {
$baseData[$countryCode]['numeric_code'] = $codeMappings[$countryCode]['_numeric'];
}
// Determine the current currency for this country.
if (isset($currencyData['region'][$countryCode])) {
$currencies = prepare_currencies($currencyData['region'][$countryCode]);
if ($currencies) {
$currentCurrency = end(array_keys($currencies));
$baseData[$countryCode]['currency_code'] = $currentCurrency;
}
}
}
// Write out base.json.
ksort($baseData);
file_put_json('base.json', $baseData);
// Gather available locales.
$locales = [];
if ($handle = opendir($localeDirectory)) {
while (false !== ($entry = readdir($handle))) {
if (substr($entry, 0, 1) != '.') {
$entryParts = explode('-', $entry);
if (!in_array($entry, $ignoredLocales) && !in_array($entryParts[0], $ignoredLocales)) {
$locales[] = $entry;
}
}
}
closedir($handle);
}
// Create the localizations.
$countries = [];
$untranslatedCounts = [];
foreach ($locales as $locale) {
$data = json_decode(file_get_contents($localeDirectory . $locale . '/territories.json'), true);
$data = $data['main'][$locale]['localeDisplayNames']['territories'];
foreach ($data as $countryCode => $countryName) {
if (isset($baseData[$countryCode])) {
// This country name is untranslated, use the english version.
if ($countryCode == str_replace('_', '-', $countryName)) {
$countryName = $countryData[$countryCode];
// Maintain a count of untranslated countries per locale.
$untranslatedCounts += [$locale => 0];
$untranslatedCounts[$locale]++;
}
$countries[$locale][$countryCode] = [
'name' => $countryName,
];
}
}
}
// Ignore locales that are more than 80% untranslated.
foreach ($untranslatedCounts as $locale => $count) {
$totalCount = count($countries[$locale]);
$untranslatedPercentage = $count * (100 / $totalCount);
if ($untranslatedPercentage >= 80) {
unset($countries[$locale]);
}
}
// Identify localizations that are the same as the ones for the parent locale.
// For example, "fr-FR" if "fr" has the same data.
$duplicates = [];
foreach ($countries as $locale => $localizedCountries) {
if (strpos($locale, '-') !== false) {
$localeParts = explode('-', $locale);
array_pop($localeParts);
$parentLocale = implode('-', $localeParts);
$diff = array_udiff($localizedCountries, $countries[$parentLocale], function ($first, $second) {
return ($first['name'] == $second['name']) ? 0 : 1;
});
if (empty($diff)) {
// The duplicates are not removed right away because they might
// still be needed for other duplicate checks (for example,
// when there are locales like bs-Latn-BA, bs-Latn, bs).
$duplicates[] = $locale;
}
}
}
// Remove the duplicates.
foreach ($duplicates as $locale) {
unset($countries[$locale]);
}
// Write out the localizations.
foreach ($countries as $locale => $localizedCountries) {
$collator = collator_create($locale);
uasort($localizedCountries, function ($a, $b) use ($collator) {
return collator_compare($collator, $a['name'], $b['name']);
});
file_put_json($locale . '.json', $localizedCountries);
}
/**
* Converts the provided data into json and writes it to the disk.
*/
function file_put_json($filename, $data)
{
$data = json_encode($data, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
// Indenting with tabs instead of 4 spaces gives us 20% smaller files.
$data = str_replace(' ', "\t", $data);
file_put_contents($filename, $data);
}
/**
* Prepares the currencies for a specific country.
*/
function prepare_currencies($currencies) {
if (empty($currencies)) {
return [];
}
// Rekey the array by currency code.
foreach ($currencies as $index => $realCurrencies) {
foreach ($realCurrencies as $currencyCode => $currency) {
$currencies[$currencyCode] = $currency;
}
unset($currencies[$index]);
}
// Remove non-tender currencies.
$currencies = array_filter($currencies, function ($currency) {
return !isset($currency['_tender']) || $currency['_tender'] != 'false';
});
// Sort by _from date.
uasort($currencies, 'compare_from_dates');
return $currencies;
}
/**
* uasort callback for comparing arrays using their "_from" dates.
*/
function compare_from_dates($a, $b) {
$a = new DateTime($a['_from']);
$b = new DateTime($b['_from']);
// DateTime overloads the comparison providers.
if ($a == $b) {
return 0;
}
return ($a < $b) ? -1 : 1;
}

View File

@@ -0,0 +1,192 @@
<?php
/**
* Generates the json files stored in resources/currency.
*
* The ISO currency list is used as a base, since it doesn't contain
* deprecated currencies, unlike CLDR (v25 has 139 deprecated entries).
*/
set_time_limit(0);
// Downloaded from http://www.currency-iso.org/en/home/tables/table-a1.html
$isoCurrencies = '../assets/c2.xml';
// Downloaded from https://github.com/unicode-cldr/cldr-numbers-full.git
$numbersDirectory = '../assets/cldr-numbers-full/main/';
$cldrCurrencies = $numbersDirectory . 'en/currencies.json';
// Downloaded from https://github.com/unicode-cldr/cldr-core.git
$currencyData = '../assets/cldr-core/supplemental/currencyData.json';
// Downloaded from https://github.com/unicode-cldr/cldr-localenames-full.git
$localeDirectory = '../assets/cldr-localenames-full/main/';
if (!file_exists($isoCurrencies)) {
die("The $isoCurrencies file was not found");
}
if (!file_exists($cldrCurrencies)) {
die("The $cldrCurrencies file was not found");
}
if (!file_exists($currencyData)) {
die("The $currencyData file was not found");
}
if (!function_exists('collator_create')) {
// Reimplementing intl's collator would be a huge undertaking, so we
// use it instead to presort the generated locale specific data.
die('The intl extension was not found.');
}
if (!is_dir($localeDirectory)) {
die("The $localeDirectory directory was not found");
}
if (!is_dir($numbersDirectory)) {
die("The $numbersDirectory directory was not found");
}
// Locales listed without a "-" match all variants.
// Locales listed with a "-" match only those exact ones.
$ignoredLocales = [
// Interlingua is a made up language.
'ia',
// Valencian differs from its parent only by a single character (è/é).
'ca-ES-VALENCIA',
// Special "grouping" locales.
'root', 'en-US-POSIX', 'en-001', 'en-150', 'es-419',
];
// Assemble the base data.
$baseData = [];
$currencyData = json_decode(file_get_contents($currencyData), true);
$currencyData = $currencyData['supplemental']['currencyData']['fractions'];
$isoData = simplexml_load_file($isoCurrencies);
foreach ($isoData->CcyTbl->CcyNtry as $currency) {
$attributes = (array) $currency->CcyNm->attributes();
if (!empty($attributes) && !empty($attributes['@attributes']['IsFund'])) {
// Ignore funds.
continue;
}
$currency = (array) $currency;
if (empty($currency['Ccy'])) {
// Ignore placeholders like "Antarctica".
continue;
}
if (substr($currency['CtryNm'], 0, 2) == 'ZZ' || in_array($currency['Ccy'], ['XUA', 'XSU', 'XDR'])) {
// Ignore special currencies.
continue;
}
$currencyCode = $currency['Ccy'];
$baseData[$currencyCode] = [
'numeric_code' => $currency['CcyNbr'],
];
// Take the fraction digits from CLDR, not ISO, because it reflects real
// life usage more closely. If the digits aren't set, that means that the
// default value (2) should be used.
if (isset($currencyData[$currencyCode]['_digits'])) {
$fractionDigits = $currencyData[$currencyCode]['_digits'];
if ($fractionDigits != 2) {
$baseData[$currencyCode]['fraction_digits'] = $fractionDigits;
}
}
}
// Write out base.json.
ksort($baseData);
file_put_json('base.json', $baseData);
// Gather available locales.
$locales = [];
if ($handle = opendir($localeDirectory)) {
while (false !== ($entry = readdir($handle))) {
if (substr($entry, 0, 1) != '.') {
$entryParts = explode('-', $entry);
if (!in_array($entry, $ignoredLocales) && !in_array($entryParts[0], $ignoredLocales)) {
$locales[] = $entry;
}
}
}
closedir($handle);
}
// Make sure 'en' is processed first so that it can be used as a fallback.
$index = array_search('en', $locales);
unset($locales[$index]);
array_unshift($locales, 'en');
// Create the localizations.
$currencies = [];
$untranslatedCounts = [];
foreach ($locales as $locale) {
$data = json_decode(file_get_contents($numbersDirectory . $locale . '/currencies.json'), true);
$data = $data['main'][$locale]['numbers']['currencies'];
foreach ($data as $currencyCode => $currency) {
if (isset($baseData[$currencyCode])) {
$currencyName = $currency['displayName'];
// This currency name is untranslated, use the english version.
if ($currencyCode == $currencyName) {
$currencyName = $currencies['en'][$currencyCode]['name'];
// Maintain a count of untranslated currencies per locale.
$untranslatedCounts += [$locale => 0];
$untranslatedCounts[$locale]++;
}
$currencies[$locale][$currencyCode] = [
'name' => $currencyName,
];
// Decrease the dataset size by exporting the symbol only if it's
// different from the currency code.
if ($currency['symbol'] != $currencyCode) {
$currencies[$locale][$currencyCode]['symbol'] = $currency['symbol'];
}
}
}
}
// Ignore locales that are more than 80% untranslated.
foreach ($untranslatedCounts as $locale => $count) {
$totalCount = count($currencies[$locale]);
$untranslatedPercentage = $count * (100 / $totalCount);
if ($untranslatedPercentage >= 80) {
unset($currencies[$locale]);
}
}
// Identify localizations that are the same as the ones for the parent locale.
// For example, "fr-FR" if "fr" has the same data.
$duplicates = [];
foreach ($currencies as $locale => $localizedCurrencies) {
if (strpos($locale, '-') !== false) {
$localeParts = explode('-', $locale);
array_pop($localeParts);
$parentLocale = implode('-', $localeParts);
$diff = array_udiff($localizedCurrencies, $currencies[$parentLocale], function ($first, $second) {
return ($first['name'] == $second['name']) ? 0 : 1;
});
if (empty($diff)) {
// The duplicates are not removed right away because they might
// still be needed for other duplicate checks (for example,
// when there are locales like bs-Latn-BA, bs-Latn, bs).
$duplicates[] = $locale;
}
}
}
// Remove the duplicates.
foreach ($duplicates as $locale) {
unset($currencies[$locale]);
}
// Write out the localizations.
foreach ($currencies as $locale => $localizedCurrencies) {
$collator = collator_create($locale);
uasort($localizedCurrencies, function ($a, $b) use ($collator) {
return collator_compare($collator, $a['name'], $b['name']);
});
file_put_json($locale . '.json', $localizedCurrencies);
}
/**
* Converts the provided data into json and writes it to the disk.
*/
function file_put_json($filename, $data)
{
$data = json_encode($data, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
// Indenting with tabs instead of 4 spaces gives us 20% smaller files.
$data = str_replace(' ', "\t", $data);
file_put_contents($filename, $data);
}

View File

@@ -0,0 +1,24 @@
#!/bin/sh
URL="http://www.currency-iso.org/dam/downloads/lists/list_one.xml"
rm -fR assets
mkdir assets
cd assets
git clone https://github.com/unicode-cldr/cldr-core.git
git clone https://github.com/unicode-cldr/cldr-numbers-full.git
git clone https://github.com/unicode-cldr/cldr-localenames-full.git
if command -v wget >/dev/null 2>&1;
then
wget $URL -O c2.xml
else
if command -v curl >/dev/null 2>&1;
then
curl $URL > c2.xml
else
echo "I require wget or curl but it's not installed. Aborting."
exit 1
fi
fi

View File

@@ -0,0 +1,148 @@
<?php
/**
* Generates the json files stored in resources/language.
*
* CLDR lists about 515 languages, many of them dead (like Latin or Old English).
* In order to decrease the list to a reasonable size, only the languages
* for which CLDR itself has translations are listed.
*/
set_time_limit(0);
// Downloaded from https://github.com/unicode-cldr/cldr-localenames-full.git
$localeDirectory = '../assets/cldr-localenames-full/main/';
$enLanguages = $localeDirectory . 'en/languages.json';
if (!is_dir($localeDirectory)) {
die("The $localeDirectory directory was not found");
}
if (!file_exists($enLanguages)) {
die("The $enLanguages file was not found");
}
if (!function_exists('collator_create')) {
// Reimplementing intl's collator would be a huge undertaking, so we
// use it instead to presort the generated locale specific data.
die('The intl extension was not found.');
}
// Locales listed without a "-" match all variants.
// Locales listed with a "-" match only those exact ones.
$ignoredLocales = [
// Interlingua is a made up language.
'ia',
// Valencian differs from its parent only by a single character (è/é).
'ca-ES-VALENCIA',
// Special "grouping" locales.
'root', 'en-US-POSIX', 'en-001', 'en-150', 'es-419',
];
$languages = [];
// Load the "en" data first so that it can be used as a fallback for
// untranslated language names in other locales.
$languageData = json_decode(file_get_contents($enLanguages), true);
$languageData = $languageData['main']['en']['localeDisplayNames']['languages'];
foreach ($languageData as $languageCode => $languageName) {
if (strpos($languageCode, '-alt-') === false) {
$languages['en'][$languageCode] = [
'name' => $languageName,
];
}
}
// Gather available locales.
$locales = [];
if ($handle = opendir($localeDirectory)) {
while (false !== ($entry = readdir($handle))) {
if (substr($entry, 0, 1) != '.') {
$entryParts = explode('-', $entry);
if (!in_array($entry, $ignoredLocales) && !in_array($entryParts[0], $ignoredLocales)) {
$locales[] = $entry;
}
}
}
closedir($handle);
}
// Remove all languages that aren't an available locale at the same time.
// This reduces the language list from about 515 to about 185 languages.
foreach ($languages['en'] as $languageCode => $languageData) {
if (!in_array($languageCode, $locales)) {
unset($languages['en'][$languageCode]);
}
}
// Load the localizations.
$untranslatedCounts = [];
foreach ($locales as $locale) {
$data = json_decode(file_get_contents($localeDirectory . $locale . '/languages.json'), true);
$data = $data['main'][$locale]['localeDisplayNames']['languages'];
foreach ($data as $languageCode => $languageName) {
if (isset($languages['en'][$languageCode])) {
// This language name is untranslated, use to the english version.
if ($languageCode == str_replace('_', '-', $languageName)) {
$languageName = $languages['en'][$languageCode]['name'];
// Maintain a count of untranslated languages per locale.
$untranslatedCounts += [$locale => 0];
$untranslatedCounts[$locale]++;
}
$languages[$locale][$languageCode] = [
'name' => $languageName,
];
}
}
}
// Ignore locales that are more than 80% untranslated.
foreach ($untranslatedCounts as $locale => $count) {
$totalCount = count($languages[$locale]);
$untranslatedPercentage = $count * (100 / $totalCount);
if ($untranslatedPercentage >= 80) {
unset($languages[$locale]);
}
}
// Identify localizations that are the same as the ones for the parent locale.
// For example, "fr-FR" if "fr" has the same data.
$duplicates = [];
foreach ($languages as $locale => $localizedLanguages) {
if (strpos($locale, '-') !== false) {
$localeParts = explode('-', $locale);
array_pop($localeParts);
$parentLocale = implode('-', $localeParts);
$diff = array_udiff($localizedLanguages, $languages[$parentLocale], function ($first, $second) {
return ($first['name'] == $second['name']) ? 0 : 1;
});
if (empty($diff)) {
// The duplicates are not removed right away because they might
// still be needed for other duplicate checks (for example,
// when there are locales like bs-Latn-BA, bs-Latn, bs).
$duplicates[] = $locale;
}
}
}
// Remove the duplicates.
foreach ($duplicates as $locale) {
unset($languages[$locale]);
}
// Write out the localizations.
foreach ($languages as $locale => $localizedLanguages) {
$collator = collator_create($locale);
uasort($localizedLanguages, function ($a, $b) use ($collator) {
return collator_compare($collator, $a['name'], $b['name']);
});
file_put_json($locale . '.json', $localizedLanguages);
}
/**
* Converts the provided data into json and writes it to the disk.
*/
function file_put_json($filename, $data)
{
$data = json_encode($data, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
// Indenting with tabs instead of 4 spaces gives us 20% smaller files.
$data = str_replace(' ', "\t", $data);
file_put_contents($filename, $data);
}

View File

@@ -0,0 +1,132 @@
<?php
/**
* Generates the json files stored in resources/number_format.
*/
set_time_limit(0);
// Downloaded from https://github.com/unicode-cldr/cldr-localenames-full.git
$localeDirectory = '../assets/cldr-localenames-full/main/';
$enLanguages = $localeDirectory . 'en/languages.json';
// Downloaded from https://github.com/unicode-cldr/cldr-numbers-full.git
$numbersDirectory = '../assets/cldr-numbers-full/main/';
if (!is_dir($localeDirectory)) {
die("The $localeDirectory directory was not found");
}
if (!is_dir($numbersDirectory)) {
die("The $numbersDirectory directory was not found");
}
if (!file_exists($enLanguages)) {
die("The $enLanguages file was not found");
}
// Locales listed without a "-" match all variants.
// Locales listed with a "-" match only those exact ones.
$ignoredLocales = [
// Interlingua is a made up language.
'ia',
// Ignored by other generation scripts, very minor locales.
'as', 'asa', 'bem', 'chr', 'dav', 'dua', 'ebu', 'ewo', 'guz', 'gv', 'ii',
'jgo', 'jmc', 'kam', 'kde', 'ki', 'kkj', 'kl', 'kln', 'ksb', 'kw', 'lag',
'ln', 'mer', 'mgo', 'nd', 'nmg', 'nnh', 'nus', 'os', 'ps', 'rwk', 'sah',
'saq', 'sbp', 'shi', 'sn', 'teo', 'vai', 'vun', 'xog', 'zgh',
// Special "grouping" locales.
'root', 'en-US-POSIX', 'en-001', 'en-150', 'es-419',
];
// Gather available locales.
$locales = [];
if ($handle = opendir($localeDirectory)) {
while (false !== ($entry = readdir($handle))) {
if (substr($entry, 0, 1) != '.') {
$entryParts = explode('-', $entry);
if (!in_array($entry, $ignoredLocales) && !in_array($entryParts[0], $ignoredLocales)) {
$locales[] = $entry;
}
}
}
closedir($handle);
}
// Load the data.
$numberFormats = [];
foreach ($locales as $locale) {
$data = json_decode(file_get_contents($numbersDirectory . $locale . '/numbers.json'), true);
$data = $data['main'][$locale]['numbers'];
// Use the default numbering system, if it's supported.
if (in_array($data['defaultNumberingSystem'], ['arab', 'arabext', 'beng', 'deva', 'latn'])) {
$numberingSystem = $data['defaultNumberingSystem'];
} else {
$numberingSystem = 'latn';
}
$numberFormats[$locale] = [
'numbering_system' => $numberingSystem,
'decimal_pattern' => $data['decimalFormats-numberSystem-' . $numberingSystem]['standard'],
'percent_pattern' => $data['percentFormats-numberSystem-' . $numberingSystem]['standard'],
'currency_pattern' => $data['currencyFormats-numberSystem-' . $numberingSystem]['standard'],
'accounting_currency_pattern' => $data['currencyFormats-numberSystem-' . $numberingSystem]['accounting'],
];
// Add the symbols only if they're different from the default data.
$decimalSeparator = $data['symbols-numberSystem-' . $numberingSystem]['decimal'];
$groupingSeparator = $data['symbols-numberSystem-' . $numberingSystem]['group'];
$plusSign = $data['symbols-numberSystem-' . $numberingSystem]['plusSign'];
$minusSign = $data['symbols-numberSystem-' . $numberingSystem]['minusSign'];
$percentSign = $data['symbols-numberSystem-' . $numberingSystem]['percentSign'];
if ($decimalSeparator != '.') {
$numberFormats[$locale]['decimal_separator'] = $decimalSeparator;
}
if ($groupingSeparator != ',') {
$numberFormats[$locale]['grouping_separator'] = $groupingSeparator;
}
if ($plusSign != '+') {
$numberFormats[$locale]['plus_sign'] = $plusSign;
}
if ($minusSign != '-') {
$numberFormats[$locale]['minus_sign'] = $minusSign;
}
if ($percentSign != '%') {
$numberFormats[$locale]['percent_sign'] = $percentSign;
}
}
// Identify localizations that are the same as the ones for the parent locale.
// For example, "fr-FR" if "fr" has the same data.
$duplicates = [];
foreach ($numberFormats as $locale => $formatData) {
if (strpos($locale, '-') !== false) {
$localeParts = explode('-', $locale);
array_pop($localeParts);
$parentLocale = implode('-', $localeParts);
$diff = array_diff_assoc($formatData, $numberFormats[$parentLocale]);
if (empty($diff)) {
// The duplicates are not removed right away because they might
// still be needed for other duplicate checks (for example,
// when there are locales like bs-Latn-BA, bs-Latn, bs).
$duplicates[] = $locale;
}
}
}
// Remove the duplicates.
foreach ($duplicates as $locale) {
unset($numberFormats[$locale]);
}
// Write out the data.
foreach ($numberFormats as $locale => $numberFormat) {
file_put_json($locale . '.json', $numberFormat);
}
/**
* Converts the provided data into json and writes it to the disk.
*/
function file_put_json($filename, $data)
{
$data = json_encode($data, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
// Indenting with tabs instead of 4 spaces gives us 20% smaller files.
$data = str_replace(' ', "\t", $data);
file_put_contents($filename, $data);
}