upgrade.php: Artifact [a959efaa53]

Artifact a959efaa5396653b367ea26a2de17a907b3ab964:

File ext/gettext.php — part of check-in [51e3884900] at 2010-06-22 17:03:27 on branch trunk — upgradephp-15 (user: mario size: 12517)
<?php
/*
  api: PHP
  type: functions
  title: gettext()
  description: emulates PHP gettext extension functionality
  priority: auto
  category: library

   This include script simulates gettext() functionality.
    - It could read translation data from .mo and .po files.
    - Lookup of plural forms mostly work (but not 100% compliant,
      no real interpreter for Plural-Forms: expression).
    - Categories/codesets are ignored.

   Besides using setlocale() you should change the $_ENV["LANG"] var
   to the desired language manually. Additionally all your scripts
   could contain following (may also work with standard gettext):
     $_ENV["LANGUAGE"] = $_SERVER["HTTP_ACCEPT_LANGUAGE"];
   That's often more user-friendly than hardwired server-side values.
*/


#-- emulate only if not present in current PHP interpreter
if (!function_exists("gettext")) {

   #-- all-in-one combined implementation
   #   (in original API only the first parameter is present)
   function gettext($msg, $msg2=NULL, $domain=NULL, $category=NULL, $plural=NULL) {
      global $_GETTEXT;

      #-- get default params if corresponding args are empty
      if (!isset($domain)) {
         $domain = $_GETTEXT['%domain'];
      }
      if (empty($_GETTEXT[$domain])) {
         bindtextdomain($domain);  // auto load from system dirs
      }

      #-- plural array position
      if (!isset($plural)) {
         $pli = 0;
      }
      elseif ($ph = $_GETTEXT[$domain]['%plural-c']) {
         $pli = gettext___plural_guess($ph, $plural);
      }
      else {
         $pli = ($plural != 1) ? 1 : 0;   // English
      }

      #-- look up string
      if (isset($_GETTEXT[$domain][$msg]) && ($trans = $_GETTEXT[$domain][$msg])
      or ($pli) and ($trans = $_GETTEXT[$domain][$msg2]))
      {
         // handle plural entries
         if (is_array($trans)) {
            if (!isset($trans[$pli])) {
               $pli = 0;   // missing translation
            }
            $trans = $trans[$pli];
         }
         // only return, if something found
         if (strlen($trans)) {
            $msg = $trans;
         }
      }

      #-- handle $category (???)
      // recode() ...

      #-- give out whatever we have
      return($msg);
   }
   

   #-- return plural form array index for algorithm type
   #   (compacted from C expression string beforehand)
   function gettext___plural_guess(&$type, $n) {
   
      #-- guess from string with C expression and set integer shorthand
      if (is_string($type)) {
         if (($type == "nplurals=1;plural=0;") || !strlen($type)) {
            $type = -1; // no plurals
         }
         elseif ($type == "nplurals=2;plural=n!=1;") {
            $type = 1;  // English
         }
         elseif ($type == "nplurals=2;plural=n>1;") {
            $type = 2;  // French
         }
         // special cases
         elseif (strpos($type, "n%100!=11")) {
            if (strpos($type, "n!=0")) {
               $type == 21;  // Latvian
            }
            if (strpos($type, "n%10<=4")) {
               $type = 22;   // a few Slavic langs (code similar to Polish below)
            }
            if (strpos($type, "n%10>=2")) {  // Lithuanian
               $type = 23;
            }
            $type = 0;
         }
         // specials, group 2
         elseif (strpos($type, "n<=4")) {   // Slovak
            $type = 25;
         }
         elseif (strpos($type, "n==2")) {   // Irish
            $type = 31;
         }
         elseif (strpos($type, "n%10>=2")) {   // Polish
            $type = 26;
         }
         elseif (strpos($type, "n%100==3")) {   // Slovenian
            $type = 28;
         }
         // fallbacks
         elseif (strpos($type, ";plural=n;")) {
            $type = 7;  // unused
         }
         // first at this point a tokenizer/parser/interpreter would have made sense
         else {
            $type = 0;  // no plurals
         }
      }

      #-- return plural index value from pre-set formulas
      switch ($type) {
         case -1:  // no plural forms
            return(0);
         case 1:   // English, and lots of others...
            return($n != 1 ? 1 : 0);
         case 2:   // French, Brazilian Protuguese
            return($n > 1 ? 1 : 0);
         case 7:   // unused
            return($n);

         case 21:  // Latvian
            return  (($n%10==1) && ($n%100!=11)) ? (0) :  ($n!=0 ? 1 : 2)  ;
         case 22:  // Slavic langs
            return  ($n%10==1) && ($n%100!=11) ? 0 :
               ( ($n%10>=2) && ($n%10<=4) && ($n%100<10 || $n%100>=20) ? 1 : 2  )  ;
         case 23:  // Lithuanian
            return  ($n%10==1) && ($n%100!=11) ? 0 :
               ( ($n%10>=2) && ($n%100<10 || $n%100>=20) ? 1 : 2  )  ;
         case 25:  // Slovak
            return  $n==1 ? 0 : ($n>=2 && $n<=4 ? 1 : 2)  ;
         case 26:  // Polish
             return  $n==1 ? 0 : ( $n%10>=2 && $n%10<=4 && ($n%100<10 || $n%100>=20) ? 1 : 2 )  ;
         case 28:  // Slovenian
            return  $n%100==1 ? 0 : ($n%100==2 || $n%100==3 || $n%100==4 ? 2 : 3)  ;
         case 31:  // Irish
            return  ($n == 1) ? (0) : (($n == 2) ? 1 : 2)  ;

         default:
            $type = -1;
      }   // unsupported, always return non-plural index [0]
      return(0);
   }


   #-- wrappers around monster function above
   function ngettext($msg1, $msg2, $plural) {
      return gettext($msg1, $msg2, NULL, NULL, $plural);
   }
   function dngettext($domain, $msg1, $msg2, $plural) {
      return gettext($msg1, $msg2, $domain, NULL, $plural);
   }
   function dcngettext($domain, $msg1, $msg2, $plural, $category) {
      return gettext($msg1, $msg2, $domain, $category, $plural);
   }
   function dcgettext($domain, $msg, $category) {
      return gettext($msg, NULL, $domain, $category);
   }
   function dgettext($domain, $msg) {
      return gettext($msg, NULL, $domain);
   }


   #-- sets current translation data source
   #   (must have been loaded beforehand)
   function textdomain($default="NULL") {
      global $_GETTEXT;
      $prev = isset($_GETTEXT['%domain']) ? $_GETTEXT['%domain'] : NULL;
      if (isset($default)) {
         $_GETTEXT['%domain'] = $default;
      }
      return $prev;
   }


   #-- loads data files
   function bindtextdomain($domain, $directory="/usr/share/locale:/usr/local/share/locale:./locale") {
      global $_GETTEXT;
      if (isset($_GETTEXT[$domain]) && (count($_GETTEXT[$domain]) > 3)) {
         return;  // don't load twice
      }
      $_GETTEXT[$domain]['%dir'] = $directory;
      $_GETTEXT['%locale'] = setlocale(LC_CTYPE, 0);

      #-- allowed languages
      $langs = @$_ENV['LANGUAGE'] . ',' . @$_ENV['LC_ALL'] . ','
             . @$_ENV['LC_MESSAGE'] .',' . @$_ENV['LANG'] . ','
             . @$_GETTEXT['%locale'] . ',' . @$_SERVER['HTTP_ACCEPT_LANGUAGE']
             . ',C,en';
          
      #-- add shortened language codes (en_UK.UTF-8 -> + en_UK, en)
      foreach (explode(',', $langs) as $d) {
         $d = trim($d);
         // $dir2[] = $d;
         $d = strtok($d, "@.-+=%:; ");
         if (strlen($d)) {
            $dir2[] = $d;
         }
         if (strpos($d, '_')) {
            $dir2[] = strtok($d, '_');
         }
      }

      #-- search for matching directory and load data file
      foreach (explode(':', $directory) as $directory) {
         foreach ($dir2 as $lang) {
            $base_fn = "$directory/$lang/LC_MESSAGES/$domain";

#echo "GETTEXT:$lang:$base_fn\n";

            #-- binary format
            if (file_exists($fn = "$base_fn.mo") && ($f = fopen($fn, "rb")))
            {
               gettext___load_mo($f, $domain);
               break 2;
            }

            #-- text file
            elseif (function_exists("gettext___load_po")
            and file_exists($fn = "$base_fn.po") && ($f = fopen($fn, "r")))
            {
               gettext___load_po($f, $domain);
               break 2;
            }
         }
      }//foreach
      
      #-- extract headers
      if ($head = $_GETTEXT[$domain][""]) {
         foreach (explode("\n", $head) as $line) {
            $header = strtok(':', $line);
            $line = trim(strtok("\n"));
            $_GETTEXT[$domain]['%po-header'][strtolower($header)] = $line;
         }
      
         #-- plural-forms header
         if (function_exists("gettext___plural_guess")
         and ($h = @$_GETTEXT[$domain]['%po-header']["plural-forms"]))
         {
            $h = preg_replace("/[(){}\[\]^\s*\\]+/", "", $h);  // rm whitespace
            gettext___plural_guess($h, 0);  // pre-decode into algorithm type integer
            $_GETTEXT[$domain]['%plural-c'] = $h;
         }
      }

      #-- set as default textdomain
      if (empty($_GETTEXT['%domain'])) {
         textdomain($domain);
      }
      return($domain);
   }


   #-- load string data from binary .mo files (ign checksums)
   function gettext___load_mo($f, $domain) {
      global $_GETTEXT;

      #-- read in data file completely
      $data = fread($f, 1<<20);
      fclose($f);

      #-- extract header fields and check file magic
      if ($data) {
         $header = substr($data, 0, 20);
         $header = unpack("L1magic/L1version/L1count/L1o_msg/L1o_trn", $header);
         extract($header);
         if ((dechex($magic) == "950412de") && ($version == 0)) {

            #-- fetch all entries
            for ($n=0; $n<$count; $n++) {

               #-- msgid
               $r = unpack("L1len/L1offs", substr($data, $o_msg + $n * 8, 8));
               $msgid = substr($data, $r["offs"], $r["len"]);
               unset($msgid_plural);
               if (strpos($msgid, "\000")) {
                  list($msgid, $msgid_plural) = explode("\000", $msgid);
               }

               #-- translation(s)
               $r = unpack("L1len/L1offs", substr($data, $o_trn + $n * 8, 8));
               $msgstr = substr($data, $r["offs"], $r["len"]);
               if (strpos($msgstr, "\000")) {
                  $msgstr = explode("\000", $msgstr);
               }

               #-- add
               $_GETTEXT[$domain][$msgid] = $msgstr;
               if (isset($msgid_plural)) {
                  $_GETTEXT[$domain][$msgid_plural] = &$_GETTEXT[$domain][$msgid];
               }
            }

         }
      }
   }


   #-- read from textual .po source file (not fully correct, and redundant
   #   because the original gettext/libintl doesn't support this at all)
   function gettext___load_po($f, $domain) {
      global $_GETTEXT;
      $c_esc = array("\\n"=>"\n", "\\r"=>"\r", "\\\\"=>"\\", "\\f"=>"\f", "\\t"=>"\t", "\\"=>"");

      #-- read line-wise from text file   
      do {
         $line = trim(fgets($f));

         #-- check what's in the current line
         $space = strpos($line, " ");
         // comment
         if ($line[0] == "#") {
            //continue;
         }
         // msgid
         elseif (strncmp($line, "msgid", 5)==0) {
            $msgid[] = trim(substr($line, $space+1), '"');
         }
         // translation
         elseif (strncmp($line, "msgstr", 6)==0) {
            $msgstr[] = trim(substr($line, $space+1), '"');
         }
         // continued (could be _id or _str)
         elseif ($line[0] == '"') {
            $line = trim($line, '"');
            if (isset($msggstr)) {
               $msgstr[count($msgstr)] .= $line;
            }
            else {
               $msgid[count($msgid)] .= $line;
            }
         }

         #-- append to global $_GETTEXT hash as soon as we have a complete dataset
         if (isset($msgid) && isset($msgstr) && (empty($line) || ($line[0]=="#") || feof($f)) )
         {
            $msgid[0] = strtr($msgid[0], $c_esc);
            foreach ($msgstr as $v) {
               $_GETTEXT[$domain][$msgid[0]] = strtr($v, $c_esc);
            }
            if ($msgid[1]) {
               $msgid[1] = strtr($msgid[1], $c_esc);
               $_GETTEXT[$domain][$msgid[1]] = &$_GETTEXT[$domain][$msgid[0]];
            }

            $msgid = array();
            $msgstr = array();
         }

      } while (!feof($f));
      fclose($f);
   }


   #-- ignored setting (no idea what it really should do)
   function bind_textdomain_codeset($domain, $codeset) {
      global $_GETTEXT;
      $_GETTEXT[$domain]["%codeset"] = $codeset;
      return($domain);
   }


}


#-- define gettexts preferred function name _ separately
if (!function_exists("_")) {
   function _($str) {
      return gettext($str);
   }
}



?>