charset.php 31 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132
  1. <?php
  2. /**
  3. * Test WPDB methods
  4. *
  5. * @group wpdb
  6. * @group security-153
  7. */
  8. class Tests_DB_Charset extends WP_UnitTestCase {
  9. /**
  10. * Our special WPDB
  11. *
  12. * @var resource
  13. */
  14. protected static $_wpdb;
  15. /**
  16. * The version of the MySQL server.
  17. *
  18. * @var string
  19. */
  20. private static $server_info;
  21. public static function setUpBeforeClass() {
  22. parent::setUpBeforeClass();
  23. require_once dirname( __DIR__ ) . '/db.php';
  24. self::$_wpdb = new WpdbExposedMethodsForTesting();
  25. self::$server_info = self::$_wpdb->db_server_info();
  26. }
  27. /**
  28. * @ticket 21212
  29. */
  30. function data_strip_invalid_text() {
  31. $fields = array(
  32. 'latin1' => array(
  33. // latin1. latin1 never changes.
  34. 'charset' => 'latin1',
  35. 'value' => "\xf0\x9f\x8e\xb7",
  36. 'expected' => "\xf0\x9f\x8e\xb7",
  37. 'length' => array(
  38. 'type' => 'char',
  39. 'length' => 100,
  40. ),
  41. ),
  42. 'latin1_char_length' => array(
  43. // latin1. latin1 never changes.
  44. 'charset' => 'latin1',
  45. 'value' => str_repeat( 'A', 11 ),
  46. 'expected' => str_repeat( 'A', 10 ),
  47. 'length' => array(
  48. 'type' => 'char',
  49. 'length' => 10,
  50. ),
  51. ),
  52. 'latin1_byte_length' => array(
  53. // latin1. latin1 never changes.
  54. 'charset' => 'latin1',
  55. 'value' => str_repeat( 'A', 11 ),
  56. 'expected' => str_repeat( 'A', 10 ),
  57. 'length' => array(
  58. 'type' => 'byte',
  59. 'length' => 10,
  60. ),
  61. ),
  62. 'ascii' => array(
  63. // ascii gets special treatment, make sure it's covered.
  64. 'charset' => 'ascii',
  65. 'value' => 'Hello World',
  66. 'expected' => 'Hello World',
  67. 'length' => array(
  68. 'type' => 'char',
  69. 'length' => 100,
  70. ),
  71. ),
  72. 'ascii_char_length' => array(
  73. // ascii gets special treatment, make sure it's covered.
  74. 'charset' => 'ascii',
  75. 'value' => str_repeat( 'A', 11 ),
  76. 'expected' => str_repeat( 'A', 10 ),
  77. 'length' => array(
  78. 'type' => 'char',
  79. 'length' => 10,
  80. ),
  81. ),
  82. 'ascii_byte_length' => array(
  83. // ascii gets special treatment, make sure it's covered.
  84. 'charset' => 'ascii',
  85. 'value' => str_repeat( 'A', 11 ),
  86. 'expected' => str_repeat( 'A', 10 ),
  87. 'length' => array(
  88. 'type' => 'byte',
  89. 'length' => 10,
  90. ),
  91. ),
  92. 'utf8' => array(
  93. // utf8 only allows <= 3-byte chars.
  94. 'charset' => 'utf8',
  95. 'value' => "H€llo\xf0\x9f\x98\x88World¢",
  96. 'expected' => 'H€lloWorld¢',
  97. 'length' => array(
  98. 'type' => 'char',
  99. 'length' => 100,
  100. ),
  101. ),
  102. 'utf8_23char_length' => array(
  103. // utf8 only allows <= 3-byte chars.
  104. 'charset' => 'utf8',
  105. 'value' => str_repeat( '²3', 10 ),
  106. 'expected' => str_repeat( '²3', 5 ),
  107. 'length' => array(
  108. 'type' => 'char',
  109. 'length' => 10,
  110. ),
  111. ),
  112. 'utf8_23byte_length' => array(
  113. // utf8 only allows <= 3-byte chars.
  114. 'charset' => 'utf8',
  115. 'value' => str_repeat( '²3', 10 ),
  116. 'expected' => '²3²3',
  117. 'length' => array(
  118. 'type' => 'byte',
  119. 'length' => 10,
  120. ),
  121. ),
  122. 'utf8_3char_length' => array(
  123. // utf8 only allows <= 3-byte chars.
  124. 'charset' => 'utf8',
  125. 'value' => str_repeat( '3', 11 ),
  126. 'expected' => str_repeat( '3', 10 ),
  127. 'length' => array(
  128. 'type' => 'char',
  129. 'length' => 10,
  130. ),
  131. ),
  132. 'utf8_3byte_length' => array(
  133. // utf8 only allows <= 3-byte chars.
  134. 'charset' => 'utf8',
  135. 'value' => str_repeat( '3', 11 ),
  136. 'expected' => '333',
  137. 'length' => array(
  138. 'type' => 'byte',
  139. 'length' => 10,
  140. ),
  141. ),
  142. 'utf8mb3' => array(
  143. // utf8mb3 should behave the same an utf8.
  144. 'charset' => 'utf8mb3',
  145. 'value' => "H€llo\xf0\x9f\x98\x88World¢",
  146. 'expected' => 'H€lloWorld¢',
  147. 'length' => array(
  148. 'type' => 'char',
  149. 'length' => 100,
  150. ),
  151. ),
  152. 'utf8mb3_23char_length' => array(
  153. // utf8mb3 should behave the same an utf8.
  154. 'charset' => 'utf8mb3',
  155. 'value' => str_repeat( '²3', 10 ),
  156. 'expected' => str_repeat( '²3', 5 ),
  157. 'length' => array(
  158. 'type' => 'char',
  159. 'length' => 10,
  160. ),
  161. ),
  162. 'utf8mb3_23byte_length' => array(
  163. // utf8mb3 should behave the same an utf8.
  164. 'charset' => 'utf8mb3',
  165. 'value' => str_repeat( '²3', 10 ),
  166. 'expected' => '²3²3',
  167. 'length' => array(
  168. 'type' => 'byte',
  169. 'length' => 10,
  170. ),
  171. ),
  172. 'utf8mb3_3char_length' => array(
  173. // utf8mb3 should behave the same an utf8.
  174. 'charset' => 'utf8mb3',
  175. 'value' => str_repeat( '3', 11 ),
  176. 'expected' => str_repeat( '3', 10 ),
  177. 'length' => array(
  178. 'type' => 'char',
  179. 'length' => 10,
  180. ),
  181. ),
  182. 'utf8mb3_3byte_length' => array(
  183. // utf8mb3 should behave the same an utf8.
  184. 'charset' => 'utf8mb3',
  185. 'value' => str_repeat( '3', 10 ),
  186. 'expected' => '333',
  187. 'length' => array(
  188. 'type' => 'byte',
  189. 'length' => 10,
  190. ),
  191. ),
  192. 'utf8mb4' => array(
  193. // utf8mb4 allows 4-byte characters, too.
  194. 'charset' => 'utf8mb4',
  195. 'value' => "H€llo\xf0\x9f\x98\x88World¢",
  196. 'expected' => "H€llo\xf0\x9f\x98\x88World¢",
  197. 'length' => array(
  198. 'type' => 'char',
  199. 'length' => 100,
  200. ),
  201. ),
  202. 'utf8mb4_234char_length' => array(
  203. // utf8mb4 allows 4-byte characters, too.
  204. 'charset' => 'utf8mb4',
  205. 'value' => str_repeat( '²3𝟜', 10 ),
  206. 'expected' => '²3𝟜²3𝟜²3𝟜²',
  207. 'length' => array(
  208. 'type' => 'char',
  209. 'length' => 10,
  210. ),
  211. ),
  212. 'utf8mb4_234byte_length' => array(
  213. // utf8mb4 allows 4-byte characters, too.
  214. 'charset' => 'utf8mb4',
  215. 'value' => str_repeat( '²3𝟜', 10 ),
  216. 'expected' => '²3𝟜',
  217. 'length' => array(
  218. 'type' => 'byte',
  219. 'length' => 10,
  220. ),
  221. ),
  222. 'utf8mb4_4char_length' => array(
  223. // utf8mb4 allows 4-byte characters, too.
  224. 'charset' => 'utf8mb4',
  225. 'value' => str_repeat( '𝟜', 11 ),
  226. 'expected' => str_repeat( '𝟜', 10 ),
  227. 'length' => array(
  228. 'type' => 'char',
  229. 'length' => 10,
  230. ),
  231. ),
  232. 'utf8mb4_4byte_length' => array(
  233. // utf8mb4 allows 4-byte characters, too.
  234. 'charset' => 'utf8mb4',
  235. 'value' => str_repeat( '𝟜', 10 ),
  236. 'expected' => '𝟜𝟜',
  237. 'length' => array(
  238. 'type' => 'byte',
  239. 'length' => 10,
  240. ),
  241. ),
  242. 'koi8r' => array(
  243. 'charset' => 'koi8r',
  244. 'value' => "\xfdord\xf2ress",
  245. 'expected' => "\xfdord\xf2ress",
  246. 'length' => array(
  247. 'type' => 'char',
  248. 'length' => 100,
  249. ),
  250. ),
  251. 'koi8r_char_length' => array(
  252. 'charset' => 'koi8r',
  253. 'value' => str_repeat( "\xfd\xf2", 10 ),
  254. 'expected' => str_repeat( "\xfd\xf2", 5 ),
  255. 'length' => array(
  256. 'type' => 'char',
  257. 'length' => 10,
  258. ),
  259. ),
  260. 'koi8r_byte_length' => array(
  261. 'charset' => 'koi8r',
  262. 'value' => str_repeat( "\xfd\xf2", 10 ),
  263. 'expected' => str_repeat( "\xfd\xf2", 5 ),
  264. 'length' => array(
  265. 'type' => 'byte',
  266. 'length' => 10,
  267. ),
  268. ),
  269. 'hebrew' => array(
  270. 'charset' => 'hebrew',
  271. 'value' => "\xf9ord\xf7ress",
  272. 'expected' => "\xf9ord\xf7ress",
  273. 'length' => array(
  274. 'type' => 'char',
  275. 'length' => 100,
  276. ),
  277. ),
  278. 'hebrew_char_length' => array(
  279. 'charset' => 'hebrew',
  280. 'value' => str_repeat( "\xf9\xf7", 10 ),
  281. 'expected' => str_repeat( "\xf9\xf7", 5 ),
  282. 'length' => array(
  283. 'type' => 'char',
  284. 'length' => 10,
  285. ),
  286. ),
  287. 'hebrew_byte_length' => array(
  288. 'charset' => 'hebrew',
  289. 'value' => str_repeat( "\xf9\xf7", 10 ),
  290. 'expected' => str_repeat( "\xf9\xf7", 5 ),
  291. 'length' => array(
  292. 'type' => 'byte',
  293. 'length' => 10,
  294. ),
  295. ),
  296. 'cp1251' => array(
  297. 'charset' => 'cp1251',
  298. 'value' => "\xd8ord\xd0ress",
  299. 'expected' => "\xd8ord\xd0ress",
  300. 'length' => array(
  301. 'type' => 'char',
  302. 'length' => 100,
  303. ),
  304. ),
  305. 'cp1251_no_length' => array(
  306. 'charset' => 'cp1251',
  307. 'value' => "\xd8ord\xd0ress",
  308. 'expected' => "\xd8ord\xd0ress",
  309. 'length' => false,
  310. ),
  311. 'cp1251_no_length_ascii' => array(
  312. 'charset' => 'cp1251',
  313. 'value' => 'WordPress',
  314. 'expected' => 'WordPress',
  315. 'length' => false,
  316. // Don't set 'ascii' => true/false.
  317. // That's a different codepath than it being unset
  318. // even if there's only ASCII in the value.
  319. ),
  320. 'cp1251_char_length' => array(
  321. 'charset' => 'cp1251',
  322. 'value' => str_repeat( "\xd8\xd0", 10 ),
  323. 'expected' => str_repeat( "\xd8\xd0", 5 ),
  324. 'length' => array(
  325. 'type' => 'char',
  326. 'length' => 10,
  327. ),
  328. ),
  329. 'cp1251_byte_length' => array(
  330. 'charset' => 'cp1251',
  331. 'value' => str_repeat( "\xd8\xd0", 10 ),
  332. 'expected' => str_repeat( "\xd8\xd0", 5 ),
  333. 'length' => array(
  334. 'type' => 'byte',
  335. 'length' => 10,
  336. ),
  337. ),
  338. 'tis620' => array(
  339. 'charset' => 'tis620',
  340. 'value' => "\xccord\xe3ress",
  341. 'expected' => "\xccord\xe3ress",
  342. 'length' => array(
  343. 'type' => 'char',
  344. 'length' => 100,
  345. ),
  346. ),
  347. 'tis620_char_length' => array(
  348. 'charset' => 'tis620',
  349. 'value' => str_repeat( "\xcc\xe3", 10 ),
  350. 'expected' => str_repeat( "\xcc\xe3", 5 ),
  351. 'length' => array(
  352. 'type' => 'char',
  353. 'length' => 10,
  354. ),
  355. ),
  356. 'tis620_byte_length' => array(
  357. 'charset' => 'tis620',
  358. 'value' => str_repeat( "\xcc\xe3", 10 ),
  359. 'expected' => str_repeat( "\xcc\xe3", 5 ),
  360. 'length' => array(
  361. 'type' => 'byte',
  362. 'length' => 10,
  363. ),
  364. ),
  365. 'ujis_with_utf8_connection' => array(
  366. 'charset' => 'ujis',
  367. 'connection_charset' => 'utf8',
  368. 'value' => '自動下書き',
  369. 'expected' => '自動下書き',
  370. 'length' => array(
  371. 'type' => 'byte',
  372. 'length' => 100,
  373. ),
  374. ),
  375. 'ujis_with_utf8_connection_char_length' => array(
  376. 'charset' => 'ujis',
  377. 'connection_charset' => 'utf8',
  378. 'value' => '自動下書き',
  379. 'expected' => '自動下書',
  380. 'length' => array(
  381. 'type' => 'char',
  382. 'length' => 4,
  383. ),
  384. ),
  385. 'ujis_with_utf8_connection_byte_length' => array(
  386. 'charset' => 'ujis',
  387. 'connection_charset' => 'utf8',
  388. 'value' => '自動下書き',
  389. 'expected' => '自動',
  390. 'length' => array(
  391. 'type' => 'byte',
  392. 'length' => 6,
  393. ),
  394. ),
  395. 'false' => array(
  396. // False is a column with no character set (i.e. a number column).
  397. 'charset' => false,
  398. 'value' => 100,
  399. 'expected' => 100,
  400. 'length' => false,
  401. ),
  402. );
  403. if ( function_exists( 'mb_convert_encoding' ) ) {
  404. // big5 is a non-Unicode multibyte charset.
  405. $utf8 = "a\xe5\x85\xb1b"; // UTF-8 Character 20849.
  406. $big5 = mb_convert_encoding( $utf8, 'BIG-5', 'UTF-8' );
  407. $conv_utf8 = mb_convert_encoding( $big5, 'UTF-8', 'BIG-5' );
  408. // Make sure PHP's multibyte conversions are working correctly.
  409. $this->assertNotEquals( $utf8, $big5 );
  410. $this->assertSame( $utf8, $conv_utf8 );
  411. $fields['big5'] = array(
  412. 'charset' => 'big5',
  413. 'value' => $big5,
  414. 'expected' => $big5,
  415. 'length' => array(
  416. 'type' => 'char',
  417. 'length' => 100,
  418. ),
  419. );
  420. $fields['big5_char_length'] = array(
  421. 'charset' => 'big5',
  422. 'value' => str_repeat( $big5, 10 ),
  423. 'expected' => str_repeat( $big5, 3 ) . 'a',
  424. 'length' => array(
  425. 'type' => 'char',
  426. 'length' => 10,
  427. ),
  428. );
  429. $fields['big5_byte_length'] = array(
  430. 'charset' => 'big5',
  431. 'value' => str_repeat( $big5, 10 ),
  432. 'expected' => str_repeat( $big5, 2 ) . 'a',
  433. 'length' => array(
  434. 'type' => 'byte',
  435. 'length' => 10,
  436. ),
  437. );
  438. }
  439. // The data above is easy to edit. Now, prepare it for the data provider.
  440. $data_provider = array();
  441. $multiple = array();
  442. $multiple_expected = array();
  443. foreach ( $fields as $test_case => $field ) {
  444. $expected = $field;
  445. $expected['value'] = $expected['expected'];
  446. unset( $expected['expected'], $field['expected'], $expected['connection_charset'] );
  447. // We're keeping track of these for our multiple-field test.
  448. $multiple[] = $field;
  449. $multiple_expected[] = $expected;
  450. // strip_invalid_text() expects an array of fields. We're testing one field at a time.
  451. $data = array( $field );
  452. $expected = array( $expected );
  453. // First argument is field data. Second is expected. Third is the message.
  454. $data_provider[] = array( $data, $expected, $test_case );
  455. }
  456. return $data_provider;
  457. }
  458. /**
  459. * @dataProvider data_strip_invalid_text
  460. * @ticket 21212
  461. *
  462. * @covers wpdb::strip_invalid_text
  463. */
  464. function test_strip_invalid_text( $data, $expected, $message ) {
  465. $charset = self::$_wpdb->charset;
  466. if ( isset( $data[0]['connection_charset'] ) ) {
  467. $new_charset = $data[0]['connection_charset'];
  468. unset( $data[0]['connection_charset'] );
  469. } else {
  470. $new_charset = $data[0]['charset'];
  471. }
  472. if ( 'utf8mb4' === $new_charset && ! self::$_wpdb->has_cap( 'utf8mb4' ) ) {
  473. $this->markTestSkipped( "The current MySQL server doesn't support the utf8mb4 character set." );
  474. }
  475. if ( 'big5' === $new_charset && 'byte' === $data[0]['length']['type'] && false !== strpos( self::$server_info, 'MariaDB' ) ) {
  476. $this->markTestSkipped( "MariaDB doesn't support this data set. See https://core.trac.wordpress.org/ticket/33171." );
  477. }
  478. self::$_wpdb->charset = $new_charset;
  479. self::$_wpdb->set_charset( self::$_wpdb->dbh, $new_charset );
  480. $actual = self::$_wpdb->strip_invalid_text( $data );
  481. self::$_wpdb->charset = $charset;
  482. self::$_wpdb->set_charset( self::$_wpdb->dbh, $charset );
  483. $this->assertSame( $expected, $actual, $message );
  484. }
  485. /**
  486. * @ticket 21212
  487. *
  488. * @covers wpdb::process_fields
  489. */
  490. function test_process_fields_failure() {
  491. global $wpdb;
  492. $charset = $wpdb->get_col_charset( $wpdb->posts, 'post_content' );
  493. if ( 'utf8' !== $charset && 'utf8mb4' !== $charset ) {
  494. $this->markTestSkipped( 'This test requires a utf8 character set.' );
  495. }
  496. // \xf0\xff\xff\xff is invalid in utf8 and utf8mb4.
  497. $data = array( 'post_content' => "H€llo\xf0\xff\xff\xffWorld¢" );
  498. $this->assertFalse( self::$_wpdb->process_fields( $wpdb->posts, $data, null ) );
  499. }
  500. /**
  501. * @ticket 21212
  502. */
  503. function data_process_field_charsets() {
  504. if ( $GLOBALS['wpdb']->charset ) {
  505. $charset = $GLOBALS['wpdb']->charset;
  506. } else {
  507. $charset = $GLOBALS['wpdb']->get_col_charset( $GLOBALS['wpdb']->posts, 'post_content' );
  508. }
  509. // 'value' and 'format' are $data, 'charset' ends up as part of $expected.
  510. $no_string_fields = array(
  511. 'post_parent' => array(
  512. 'value' => 10,
  513. 'format' => '%d',
  514. 'charset' => false,
  515. ),
  516. 'comment_count' => array(
  517. 'value' => 0,
  518. 'format' => '%d',
  519. 'charset' => false,
  520. ),
  521. );
  522. $all_ascii_fields = array(
  523. 'post_content' => array(
  524. 'value' => 'foo foo foo!',
  525. 'format' => '%s',
  526. 'charset' => $charset,
  527. ),
  528. 'post_excerpt' => array(
  529. 'value' => 'bar bar bar!',
  530. 'format' => '%s',
  531. 'charset' => $charset,
  532. ),
  533. );
  534. // This is the same data used in process_field_charsets_for_nonexistent_table().
  535. $non_ascii_string_fields = array(
  536. 'post_content' => array(
  537. 'value' => '¡foo foo foo!',
  538. 'format' => '%s',
  539. 'charset' => $charset,
  540. ),
  541. 'post_excerpt' => array(
  542. 'value' => '¡bar bar bar!',
  543. 'format' => '%s',
  544. 'charset' => $charset,
  545. ),
  546. );
  547. $vars = get_defined_vars();
  548. unset( $vars['charset'] );
  549. foreach ( $vars as $var_name => $var ) {
  550. $data = $var;
  551. $expected = $var;
  552. foreach ( $data as &$datum ) {
  553. // 'charset' and 'ascii' are part of the expected return only.
  554. unset( $datum['charset'], $datum['ascii'] );
  555. }
  556. $vars[ $var_name ] = array( $data, $expected, $var_name );
  557. }
  558. return array_values( $vars );
  559. }
  560. /**
  561. * @dataProvider data_process_field_charsets
  562. * @ticket 21212
  563. *
  564. * @covers wpdb::process_field_charsets
  565. */
  566. function test_process_field_charsets( $data, $expected, $message ) {
  567. $actual = self::$_wpdb->process_field_charsets( $data, $GLOBALS['wpdb']->posts );
  568. $this->assertSame( $expected, $actual, $message );
  569. }
  570. /**
  571. * The test this test depends on first verifies that this
  572. * would normally work against the posts table.
  573. *
  574. * @ticket 21212
  575. * @depends test_process_field_charsets
  576. */
  577. function test_process_field_charsets_on_nonexistent_table() {
  578. $data = array(
  579. 'post_content' => array(
  580. 'value' => '¡foo foo foo!',
  581. 'format' => '%s',
  582. ),
  583. );
  584. self::$_wpdb->suppress_errors( true );
  585. $this->assertFalse( self::$_wpdb->process_field_charsets( $data, 'nonexistent_table' ) );
  586. self::$_wpdb->suppress_errors( false );
  587. }
  588. /**
  589. * @ticket 21212
  590. *
  591. * @covers wpdb::check_ascii
  592. */
  593. function test_check_ascii() {
  594. $ascii = "\0\t\n\r '" . '!"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~';
  595. $this->assertTrue( self::$_wpdb->check_ascii( $ascii ) );
  596. }
  597. /**
  598. * @ticket 21212
  599. *
  600. * @covers wpdb::check_ascii
  601. */
  602. function test_check_ascii_false() {
  603. $this->assertFalse( self::$_wpdb->check_ascii( 'ABCDEFGHIJKLMNOPQRSTUVWXYZ¡©«' ) );
  604. }
  605. /**
  606. * @ticket 21212
  607. *
  608. * @covers wpdb::strip_invalid_text_for_column
  609. */
  610. function test_strip_invalid_text_for_column() {
  611. global $wpdb;
  612. $charset = $wpdb->get_col_charset( $wpdb->posts, 'post_content' );
  613. if ( 'utf8' !== $charset && 'utf8mb4' !== $charset ) {
  614. $this->markTestSkipped( 'This test requires a utf8 character set.' );
  615. }
  616. // Invalid 3-byte and 4-byte sequences.
  617. $value = "H€llo\xe0\x80\x80World\xf0\xff\xff\xff¢";
  618. $expected = 'H€lloWorld¢';
  619. $actual = $wpdb->strip_invalid_text_for_column( $wpdb->posts, 'post_content', $value );
  620. $this->assertSame( $expected, $actual );
  621. }
  622. /**
  623. * Set of table definitions for testing wpdb::get_table_charset and wpdb::get_column_charset
  624. *
  625. * @var array
  626. */
  627. protected $table_and_column_defs = array(
  628. array(
  629. 'definition' => '( a INT, b FLOAT )',
  630. 'table_expected' => false,
  631. 'column_expected' => array(
  632. 'a' => false,
  633. 'b' => false,
  634. ),
  635. ),
  636. array(
  637. 'definition' => '( a VARCHAR(50) CHARACTER SET big5, b TEXT CHARACTER SET big5 )',
  638. 'table_expected' => 'big5',
  639. 'column_expected' => array(
  640. 'a' => 'big5',
  641. 'b' => 'big5',
  642. ),
  643. ),
  644. array(
  645. 'definition' => '( a VARCHAR(50) CHARACTER SET big5, b BINARY )',
  646. 'table_expected' => 'binary',
  647. 'column_expected' => array(
  648. 'a' => 'big5',
  649. 'b' => false,
  650. ),
  651. ),
  652. array(
  653. 'definition' => '( a VARCHAR(50) CHARACTER SET latin1, b BLOB )',
  654. 'table_expected' => 'binary',
  655. 'column_expected' => array(
  656. 'a' => 'latin1',
  657. 'b' => false,
  658. ),
  659. ),
  660. array(
  661. 'definition' => '( a VARCHAR(50) CHARACTER SET latin1, b TEXT CHARACTER SET koi8r )',
  662. 'table_expected' => 'koi8r',
  663. 'column_expected' => array(
  664. 'a' => 'latin1',
  665. 'b' => 'koi8r',
  666. ),
  667. ),
  668. array(
  669. 'definition' => '( a VARCHAR(50) CHARACTER SET utf8mb3, b TEXT CHARACTER SET utf8mb3 )',
  670. 'table_expected' => 'utf8',
  671. 'column_expected' => array(
  672. 'a' => 'utf8',
  673. 'b' => 'utf8',
  674. ),
  675. ),
  676. array(
  677. 'definition' => '( a VARCHAR(50) CHARACTER SET utf8, b TEXT CHARACTER SET utf8mb4 )',
  678. 'table_expected' => 'utf8',
  679. 'column_expected' => array(
  680. 'a' => 'utf8',
  681. 'b' => 'utf8mb4',
  682. ),
  683. ),
  684. array(
  685. 'definition' => '( a VARCHAR(50) CHARACTER SET big5, b TEXT CHARACTER SET koi8r )',
  686. 'table_expected' => 'ascii',
  687. 'column_expected' => array(
  688. 'a' => 'big5',
  689. 'b' => 'koi8r',
  690. ),
  691. ),
  692. );
  693. /**
  694. * @ticket 21212
  695. */
  696. function data_test_get_table_charset() {
  697. $table_name = 'test_get_table_charset';
  698. $vars = array();
  699. foreach ( $this->table_and_column_defs as $i => $value ) {
  700. $this_table_name = $table_name . '_' . $i;
  701. $drop = "DROP TABLE IF EXISTS $this_table_name";
  702. $create = "CREATE TABLE $this_table_name {$value['definition']}";
  703. $vars[] = array( $drop, $create, $this_table_name, $value['table_expected'] );
  704. }
  705. return $vars;
  706. }
  707. /**
  708. * @dataProvider data_test_get_table_charset
  709. * @ticket 21212
  710. *
  711. * @covers wpdb::get_table_charset
  712. */
  713. function test_get_table_charset( $drop, $create, $table, $expected_charset ) {
  714. self::$_wpdb->query( $drop );
  715. if ( ! self::$_wpdb->has_cap( 'utf8mb4' ) && preg_match( '/utf8mb[34]/i', $create ) ) {
  716. $this->markTestSkipped( "This version of MySQL doesn't support utf8mb4." );
  717. return;
  718. }
  719. self::$_wpdb->query( $create );
  720. $charset = self::$_wpdb->get_table_charset( $table );
  721. $this->assertSame( $charset, $expected_charset );
  722. $charset = self::$_wpdb->get_table_charset( strtoupper( $table ) );
  723. $this->assertSame( $charset, $expected_charset );
  724. self::$_wpdb->query( $drop );
  725. }
  726. /**
  727. * @ticket 21212
  728. */
  729. function data_test_get_column_charset() {
  730. $table_name = 'test_get_column_charset';
  731. $vars = array();
  732. foreach ( $this->table_and_column_defs as $i => $value ) {
  733. $this_table_name = $table_name . '_' . $i;
  734. $drop = "DROP TABLE IF EXISTS $this_table_name";
  735. $create = "CREATE TABLE $this_table_name {$value['definition']}";
  736. $vars[] = array( $drop, $create, $this_table_name, $value['column_expected'] );
  737. }
  738. return $vars;
  739. }
  740. /**
  741. * @dataProvider data_test_get_column_charset
  742. * @ticket 21212
  743. *
  744. * @covers wpdb::get_col_charset
  745. */
  746. function test_get_column_charset( $drop, $create, $table, $expected_charset ) {
  747. self::$_wpdb->query( $drop );
  748. if ( ! self::$_wpdb->has_cap( 'utf8mb4' ) && preg_match( '/utf8mb[34]/i', $create ) ) {
  749. $this->markTestSkipped( "This version of MySQL doesn't support utf8mb4." );
  750. return;
  751. }
  752. self::$_wpdb->query( $create );
  753. foreach ( $expected_charset as $column => $charset ) {
  754. $this->assertSame( $charset, self::$_wpdb->get_col_charset( $table, $column ) );
  755. $this->assertSame( $charset, self::$_wpdb->get_col_charset( strtoupper( $table ), strtoupper( $column ) ) );
  756. }
  757. self::$_wpdb->query( $drop );
  758. }
  759. /**
  760. * @dataProvider data_test_get_column_charset
  761. * @ticket 21212
  762. *
  763. * @covers wpdb::get_col_charset
  764. */
  765. function test_get_column_charset_non_mysql( $drop, $create, $table, $columns ) {
  766. self::$_wpdb->query( $drop );
  767. if ( ! self::$_wpdb->has_cap( 'utf8mb4' ) && preg_match( '/utf8mb[34]/i', $create ) ) {
  768. $this->markTestSkipped( "This version of MySQL doesn't support utf8mb4." );
  769. return;
  770. }
  771. self::$_wpdb->is_mysql = false;
  772. self::$_wpdb->query( $create );
  773. $columns = array_keys( $columns );
  774. foreach ( $columns as $column => $charset ) {
  775. $this->assertFalse( self::$_wpdb->get_col_charset( $table, $column ) );
  776. }
  777. self::$_wpdb->query( $drop );
  778. self::$_wpdb->is_mysql = true;
  779. }
  780. /**
  781. * @dataProvider data_test_get_column_charset
  782. * @ticket 33501
  783. *
  784. * @covers wpdb::get_col_charset
  785. */
  786. function test_get_column_charset_is_mysql_undefined( $drop, $create, $table, $columns ) {
  787. self::$_wpdb->query( $drop );
  788. if ( ! self::$_wpdb->has_cap( 'utf8mb4' ) && preg_match( '/utf8mb[34]/i', $create ) ) {
  789. $this->markTestSkipped( "This version of MySQL doesn't support utf8mb4." );
  790. return;
  791. }
  792. unset( self::$_wpdb->is_mysql );
  793. self::$_wpdb->query( $create );
  794. $columns = array_keys( $columns );
  795. foreach ( $columns as $column => $charset ) {
  796. $this->assertFalse( self::$_wpdb->get_col_charset( $table, $column ) );
  797. }
  798. self::$_wpdb->query( $drop );
  799. self::$_wpdb->is_mysql = true;
  800. }
  801. /**
  802. * @ticket 21212
  803. */
  804. function data_strip_invalid_text_from_query() {
  805. $table_name = 'strip_invalid_text_from_query_table';
  806. $data = array(
  807. array(
  808. // Binary tables don't get stripped.
  809. '( a VARCHAR(50) CHARACTER SET utf8, b BINARY )', // Create.
  810. "('foo\xf0\x9f\x98\x88bar', 'foo')", // Query.
  811. "('foo\xf0\x9f\x98\x88bar', 'foo')", // Expected result.
  812. ),
  813. array(
  814. // utf8/utf8mb4 tables default to utf8.
  815. '( a VARCHAR(50) CHARACTER SET utf8, b VARCHAR(50) CHARACTER SET utf8mb4 )',
  816. "('foo\xf0\x9f\x98\x88bar', 'foo')",
  817. "('foobar', 'foo')",
  818. ),
  819. );
  820. foreach ( $data as $i => &$value ) {
  821. $this_table_name = $table_name . '_' . $i;
  822. $value[0] = "CREATE TABLE $this_table_name {$value[0]}";
  823. $value[1] = "INSERT INTO $this_table_name VALUES {$value[1]}";
  824. $value[2] = "INSERT INTO $this_table_name VALUES {$value[2]}";
  825. $value[3] = "DROP TABLE IF EXISTS $this_table_name";
  826. }
  827. unset( $value );
  828. return $data;
  829. }
  830. /**
  831. * @dataProvider data_strip_invalid_text_from_query
  832. * @ticket 21212
  833. *
  834. * @covers wpdb::strip_invalid_text_from_query
  835. */
  836. function test_strip_invalid_text_from_query( $create, $query, $expected, $drop ) {
  837. self::$_wpdb->query( $drop );
  838. if ( ! self::$_wpdb->has_cap( 'utf8mb4' ) && preg_match( '/utf8mb[34]/i', $create ) ) {
  839. $this->markTestSkipped( "This version of MySQL doesn't support utf8mb4." );
  840. return;
  841. }
  842. self::$_wpdb->query( $create );
  843. $return = self::$_wpdb->strip_invalid_text_from_query( $query );
  844. $this->assertSame( $expected, $return );
  845. self::$_wpdb->query( $drop );
  846. }
  847. /**
  848. * @ticket 32104
  849. */
  850. function data_dont_strip_text_from_schema_queries() {
  851. // An obviously invalid and fake table name.
  852. $table_name = "\xff\xff\xff\xff";
  853. $queries = array(
  854. "SHOW CREATE TABLE $table_name",
  855. "DESCRIBE $table_name",
  856. "DESC $table_name",
  857. "EXPLAIN SELECT * FROM $table_name",
  858. "CREATE $table_name( a VARCHAR(100))",
  859. );
  860. foreach ( $queries as &$query ) {
  861. $query = array( $query );
  862. }
  863. unset( $query );
  864. return $queries;
  865. }
  866. /**
  867. * @dataProvider data_dont_strip_text_from_schema_queries
  868. * @ticket 32104
  869. *
  870. * @covers wpdb::strip_invalid_text_from_query
  871. */
  872. function test_dont_strip_text_from_schema_queries( $query ) {
  873. $return = self::$_wpdb->strip_invalid_text_from_query( $query );
  874. $this->assertSame( $query, $return );
  875. }
  876. /**
  877. * @ticket 21212
  878. *
  879. * @covers wpdb::query
  880. */
  881. function test_invalid_characters_in_query() {
  882. global $wpdb;
  883. $charset = $wpdb->get_col_charset( $wpdb->posts, 'post_content' );
  884. if ( 'utf8' !== $charset && 'utf8mb4' !== $charset ) {
  885. $this->markTestSkipped( 'This test requires a utf8 character set.' );
  886. }
  887. $this->assertFalse( $wpdb->query( "INSERT INTO {$wpdb->posts} (post_content) VALUES ('foo\xf0\xff\xff\xffbar')" ) );
  888. }
  889. /**
  890. * @ticket 21212
  891. */
  892. function data_table_collation_check() {
  893. $table_name = 'table_collation_check';
  894. $data = array(
  895. array(
  896. // utf8_bin tables don't need extra sanity checking.
  897. '( a VARCHAR(50) COLLATE utf8_bin )', // Create.
  898. true, // Expected result.
  899. ),
  900. array(
  901. // Neither do utf8_general_ci tables.
  902. '( a VARCHAR(50) COLLATE utf8_general_ci )',
  903. true,
  904. ),
  905. array(
  906. // utf8_unicode_ci tables do.
  907. '( a VARCHAR(50) COLLATE utf8_unicode_ci )',
  908. false,
  909. ),
  910. array(
  911. // utf8_bin tables don't need extra sanity checking,
  912. // except for when they're not just utf8_bin.
  913. '( a VARCHAR(50) COLLATE utf8_bin, b VARCHAR(50) COLLATE big5_chinese_ci )',
  914. false,
  915. ),
  916. array(
  917. // utf8_bin tables don't need extra sanity checking
  918. // when the other columns aren't strings.
  919. '( a VARCHAR(50) COLLATE utf8_bin, b INT )',
  920. true,
  921. ),
  922. );
  923. foreach ( $data as $i => &$value ) {
  924. $this_table_name = $table_name . '_' . $i;
  925. $value[0] = "CREATE TABLE $this_table_name {$value[0]}";
  926. $value[2] = "SELECT * FROM $this_table_name WHERE a='\xf0\x9f\x98\x88'";
  927. $value[3] = "DROP TABLE IF EXISTS $this_table_name";
  928. $value[4] = array(
  929. "SELECT * FROM $this_table_name WHERE a='foo'",
  930. "SHOW FULL TABLES LIKE $this_table_name",
  931. "DESCRIBE $this_table_name",
  932. "DESC $this_table_name",
  933. "EXPLAIN SELECT * FROM $this_table_name",
  934. );
  935. }
  936. unset( $value );
  937. return $data;
  938. }
  939. /**
  940. * @dataProvider data_table_collation_check
  941. * @ticket 21212
  942. *
  943. * @covers wpdb::check_safe_collation
  944. */
  945. function test_table_collation_check( $create, $expected, $query, $drop, $always_true ) {
  946. self::$_wpdb->query( $drop );
  947. self::$_wpdb->query( $create );
  948. $return = self::$_wpdb->check_safe_collation( $query );
  949. $this->assertSame( $expected, $return );
  950. foreach ( $always_true as $true_query ) {
  951. $return = self::$_wpdb->check_safe_collation( $true_query );
  952. $this->assertTrue( $return );
  953. }
  954. self::$_wpdb->query( $drop );
  955. }
  956. /**
  957. * @covers wpdb::strip_invalid_text_for_column
  958. */
  959. function test_strip_invalid_text_for_column_bails_if_ascii_input_too_long() {
  960. global $wpdb;
  961. // TEXT column.
  962. $stripped = $wpdb->strip_invalid_text_for_column( $wpdb->comments, 'comment_content', str_repeat( 'A', 65536 ) );
  963. $this->assertSame( 65535, strlen( $stripped ) );
  964. // VARCHAR column.
  965. $stripped = $wpdb->strip_invalid_text_for_column( $wpdb->comments, 'comment_agent', str_repeat( 'A', 256 ) );
  966. $this->assertSame( 255, strlen( $stripped ) );
  967. }
  968. /**
  969. * @ticket 32279
  970. *
  971. * @covers wpdb::strip_invalid_text_from_query
  972. */
  973. function test_strip_invalid_text_from_query_cp1251_is_safe() {
  974. $tablename = 'test_cp1251_query_' . rand_str( 5 );
  975. if ( ! self::$_wpdb->query( "CREATE TABLE $tablename ( a VARCHAR(50) ) DEFAULT CHARSET 'cp1251'" ) ) {
  976. $this->markTestSkipped( "Test requires the 'cp1251' charset." );
  977. }
  978. $safe_query = "INSERT INTO $tablename( `a` ) VALUES( 'safe data' )";
  979. $stripped_query = self::$_wpdb->strip_invalid_text_from_query( $safe_query );
  980. self::$_wpdb->query( "DROP TABLE $tablename" );
  981. $this->assertSame( $safe_query, $stripped_query );
  982. }
  983. /**
  984. * @ticket 34708
  985. *
  986. * @covers wpdb::strip_invalid_text_from_query
  987. */
  988. function test_no_db_charset_defined() {
  989. $tablename = 'test_cp1251_query_' . rand_str( 5 );
  990. if ( ! self::$_wpdb->query( "CREATE TABLE $tablename ( a VARCHAR(50) ) DEFAULT CHARSET 'cp1251'" ) ) {
  991. $this->markTestSkipped( "Test requires the 'cp1251' charset." );
  992. }
  993. $charset = self::$_wpdb->charset;
  994. self::$_wpdb->charset = '';
  995. $safe_query = "INSERT INTO $tablename( `a` ) VALUES( 'safe data' )";
  996. $stripped_query = self::$_wpdb->strip_invalid_text_from_query( $safe_query );
  997. self::$_wpdb->query( "DROP TABLE $tablename" );
  998. self::$_wpdb->charset = $charset;
  999. $this->assertSame( $safe_query, $stripped_query );
  1000. }
  1001. /**
  1002. * @ticket 36649
  1003. *
  1004. * @covers wpdb::set_charset
  1005. */
  1006. function test_set_charset_changes_the_connection_collation() {
  1007. self::$_wpdb->set_charset( self::$_wpdb->dbh, 'utf8', 'utf8_general_ci' );
  1008. $results = self::$_wpdb->get_results( "SHOW VARIABLES WHERE Variable_name='collation_connection'" );
  1009. $this->assertSame( 'utf8_general_ci', $results[0]->Value );
  1010. self::$_wpdb->set_charset( self::$_wpdb->dbh, 'utf8mb4', 'utf8mb4_unicode_ci' );
  1011. $results = self::$_wpdb->get_results( "SHOW VARIABLES WHERE Variable_name='collation_connection'" );
  1012. $this->assertSame( 'utf8mb4_unicode_ci', $results[0]->Value );
  1013. self::$_wpdb->set_charset( self::$_wpdb->dbh );
  1014. }
  1015. }