extract.php 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. <?php
  2. $pomo = dirname( dirname( dirname( __FILE__ ) ) ) . '/src/wp-includes/pomo';
  3. require_once "$pomo/entry.php";
  4. require_once "$pomo/translations.php";
  5. /**
  6. * Responsible for extracting translatable strings from PHP source files
  7. * in the form of Translations instances
  8. */
  9. class StringExtractor {
  10. var $rules = array(
  11. '__' => array( 'string' ),
  12. '_e' => array( 'string' ),
  13. '_n' => array( 'singular', 'plural' ),
  14. );
  15. var $comment_prefix = 'translators:';
  16. function __construct( $rules = array() ) {
  17. $this->rules = $rules;
  18. }
  19. function extract_from_directory( $dir, $excludes = array(), $includes = array(), $prefix = '' ) {
  20. $old_cwd = getcwd();
  21. chdir( $dir );
  22. $translations = new Translations;
  23. $file_names = (array) scandir( '.' );
  24. foreach ( $file_names as $file_name ) {
  25. if ( '.' == $file_name || '..' == $file_name ) continue;
  26. if ( preg_match( '/\.php$/', $file_name ) && $this->does_file_name_match( $prefix . $file_name, $excludes, $includes ) ) {
  27. $translations->merge_originals_with( $this->extract_from_file( $file_name, $prefix ) );
  28. }
  29. if ( is_dir( $file_name ) ) {
  30. $translations->merge_originals_with( $this->extract_from_directory( $file_name, $excludes, $includes, $prefix . $file_name . '/' ) );
  31. }
  32. }
  33. chdir( $old_cwd );
  34. return $translations;
  35. }
  36. function extract_from_file( $file_name, $prefix ) {
  37. $code = file_get_contents( $file_name );
  38. return $this->extract_from_code( $code, $prefix . $file_name );
  39. }
  40. function does_file_name_match( $path, $excludes, $includes ) {
  41. if ( $includes ) {
  42. $matched_any_include = false;
  43. foreach( $includes as $include ) {
  44. if ( preg_match( '|^'.$include.'$|', $path ) ) {
  45. $matched_any_include = true;
  46. break;
  47. }
  48. }
  49. if ( !$matched_any_include ) return false;
  50. }
  51. if ( $excludes ) {
  52. foreach( $excludes as $exclude ) {
  53. if ( preg_match( '|^'.$exclude.'$|', $path ) ) {
  54. return false;
  55. }
  56. }
  57. }
  58. return true;
  59. }
  60. function entry_from_call( $call, $file_name ) {
  61. $rule = isset( $this->rules[$call['name']] )? $this->rules[$call['name']] : null;
  62. if ( !$rule ) return null;
  63. $entry = new Translation_Entry;
  64. $multiple = array();
  65. $complete = false;
  66. for( $i = 0; $i < count( $rule ); ++$i ) {
  67. if ( $rule[$i] && ( !isset( $call['args'][$i] ) || !is_string( $call['args'][$i] ) || '' == $call['args'][$i] ) ) return false;
  68. switch( $rule[$i] ) {
  69. case 'string':
  70. if ( $complete ) {
  71. $multiple[] = $entry;
  72. $entry = new Translation_Entry;
  73. $complete = false;
  74. }
  75. $entry->singular = $call['args'][$i];
  76. $complete = true;
  77. break;
  78. case 'singular':
  79. if ( $complete ) {
  80. $multiple[] = $entry;
  81. $entry = new Translation_Entry;
  82. $complete = false;
  83. }
  84. $entry->singular = $call['args'][$i];
  85. $entry->is_plural = true;
  86. break;
  87. case 'plural':
  88. $entry->plural = $call['args'][$i];
  89. $entry->is_plural = true;
  90. $complete = true;
  91. break;
  92. case 'context':
  93. $entry->context = $call['args'][$i];
  94. foreach( $multiple as &$single_entry ) {
  95. $single_entry->context = $entry->context;
  96. }
  97. break;
  98. }
  99. }
  100. if ( isset( $call['line'] ) && $call['line'] ) {
  101. $references = array( $file_name . ':' . $call['line'] );
  102. $entry->references = $references;
  103. foreach( $multiple as &$single_entry ) {
  104. $single_entry->references = $references;
  105. }
  106. }
  107. if ( isset( $call['comment'] ) && $call['comment'] ) {
  108. $comments = rtrim( $call['comment'] ) . "\n";
  109. $entry->extracted_comments = $comments;
  110. foreach( $multiple as &$single_entry ) {
  111. $single_entry->extracted_comments = $comments;
  112. }
  113. }
  114. if ( $multiple && $entry ) {
  115. $multiple[] = $entry;
  116. return $multiple;
  117. }
  118. return $entry;
  119. }
  120. function extract_from_code( $code, $file_name ) {
  121. $translations = new Translations;
  122. $function_calls = $this->find_function_calls( array_keys( $this->rules ), $code );
  123. foreach( $function_calls as $call ) {
  124. $entry = $this->entry_from_call( $call, $file_name );
  125. if ( is_array( $entry ) )
  126. foreach( $entry as $single_entry )
  127. $translations->add_entry_or_merge( $single_entry );
  128. elseif ( $entry)
  129. $translations->add_entry_or_merge( $entry );
  130. }
  131. return $translations;
  132. }
  133. /**
  134. * Finds all function calls in $code and returns an array with an associative array for each function:
  135. * - name - name of the function
  136. * - args - array for the function arguments. Each string literal is represented by itself, other arguments are represented by null.
  137. * - line - line number
  138. */
  139. function find_function_calls( $function_names, $code ) {
  140. $tokens = token_get_all( $code );
  141. $function_calls = array();
  142. $latest_comment = false;
  143. $in_func = false;
  144. foreach( $tokens as $token ) {
  145. $id = $text = null;
  146. if ( is_array( $token ) ) list( $id, $text, $line ) = $token;
  147. if ( T_WHITESPACE == $id ) continue;
  148. if ( T_STRING == $id && in_array( $text, $function_names ) && !$in_func ) {
  149. $in_func = true;
  150. $paren_level = -1;
  151. $args = array();
  152. $func_name = $text;
  153. $func_line = $line;
  154. $func_comment = $latest_comment? $latest_comment : '';
  155. $just_got_into_func = true;
  156. $latest_comment = false;
  157. continue;
  158. }
  159. if ( T_COMMENT == $id ) {
  160. $text = trim( preg_replace( '%^/\*|//%', '', preg_replace( '%\*/$%', '', $text ) ) );
  161. if ( 0 === stripos( $text, $this->comment_prefix ) ) {
  162. $latest_comment = $text;
  163. }
  164. }
  165. if ( !$in_func ) continue;
  166. if ( '(' == $token ) {
  167. $paren_level++;
  168. if ( 0 == $paren_level ) { // start of first argument
  169. $just_got_into_func = false;
  170. $current_argument = null;
  171. $current_argument_is_just_literal = true;
  172. }
  173. continue;
  174. }
  175. if ( $just_got_into_func ) {
  176. // there wasn't a opening paren just after the function name -- this means it is not a function
  177. $in_func = false;
  178. $just_got_into_func = false;
  179. }
  180. if ( ')' == $token ) {
  181. if ( 0 == $paren_level ) {
  182. $in_func = false;
  183. $args[] = $current_argument;
  184. $call = array( 'name' => $func_name, 'args' => $args, 'line' => $func_line );
  185. if ( $func_comment ) $call['comment'] = $func_comment;
  186. $function_calls[] = $call;
  187. }
  188. $paren_level--;
  189. continue;
  190. }
  191. if ( ',' == $token && 0 == $paren_level ) {
  192. $args[] = $current_argument;
  193. $current_argument = null;
  194. $current_argument_is_just_literal = true;
  195. continue;
  196. }
  197. if ( T_CONSTANT_ENCAPSED_STRING == $id && $current_argument_is_just_literal ) {
  198. // we can use eval safely, because we are sure $text is just a string literal
  199. eval('$current_argument = '.$text.';' );
  200. continue;
  201. }
  202. $current_argument_is_just_literal = false;
  203. $current_argument = null;
  204. }
  205. return $function_calls;
  206. }
  207. }