IMPALA-4167: Support insert plan hints for CREATE TABLE AS SELECT
[impala.git] / fe / src / test / java / org / apache / impala / analysis / AnalyzeDDLTest.java
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 package org.apache.impala.analysis;
19
20 import static org.junit.Assert.assertEquals;
21 import static org.junit.Assert.assertTrue;
22
23 import java.io.IOException;
24 import java.util.ArrayList;
25 import java.util.Arrays;
26 import java.util.List;
27 import java.util.Set;
28 import java.util.UUID;
29
30 import org.apache.commons.lang3.StringUtils;
31 import org.apache.hadoop.fs.FSDataOutputStream;
32 import org.apache.hadoop.fs.FileSystem;
33 import org.apache.hadoop.fs.Path;
34 import org.apache.hadoop.fs.permission.FsAction;
35 import org.apache.hadoop.fs.permission.FsPermission;
36 import org.apache.impala.catalog.ArrayType;
37 import org.apache.impala.catalog.CatalogException;
38 import org.apache.impala.catalog.Column;
39 import org.apache.impala.catalog.ColumnStats;
40 import org.apache.impala.catalog.DataSource;
41 import org.apache.impala.catalog.DataSourceTable;
42 import org.apache.impala.catalog.KuduTable;
43 import org.apache.impala.catalog.PrimitiveType;
44 import org.apache.impala.catalog.ScalarType;
45 import org.apache.impala.catalog.Type;
46 import org.apache.impala.common.AnalysisException;
47 import org.apache.impala.common.FileSystemUtil;
48 import org.apache.impala.common.FrontendTestBase;
49 import org.apache.impala.common.RuntimeEnv;
50 import org.apache.impala.service.BackendConfig;
51 import org.apache.impala.testutil.TestUtils;
52 import org.apache.impala.thrift.TBackendGflags;
53 import org.apache.impala.thrift.TDescribeTableParams;
54 import org.apache.impala.thrift.TQueryOptions;
55 import org.apache.impala.util.MetaStoreUtil;
56 import org.apache.kudu.ColumnSchema.CompressionAlgorithm;
57 import org.apache.kudu.ColumnSchema.Encoding;
58 import org.junit.Assert;
59 import org.junit.Test;
60
61 import com.google.common.base.Joiner;
62 import com.google.common.base.Preconditions;
63 import com.google.common.collect.ImmutableList;
64 import com.google.common.collect.Lists;
65 import com.google.common.collect.Sets;
66
67 public class AnalyzeDDLTest extends FrontendTestBase {
68
69 @Test
70 public void TestAlterTableAddDropPartition() throws CatalogException {
71 String[] addDrop = {"add if not exists", "drop if exists"};
72 for (String kw: addDrop) {
73 // Add different partitions for different column types
74 AnalyzesOk("alter table functional.alltypes " + kw +
75 " partition(year=2050, month=10)");
76 AnalyzesOk("alter table functional.alltypes " + kw +
77 " partition(month=10, year=2050)");
78 AnalyzesOk("alter table functional.insert_string_partitioned " + kw +
79 " partition(s2='1234')");
80
81 // Can't add/drop partitions to/from unpartitioned tables
82 AnalysisError("alter table functional.alltypesnopart " + kw + " partition (i=1)",
83 "Table is not partitioned: functional.alltypesnopart");
84 AnalysisError("alter table functional_hbase.alltypesagg " + kw +
85 " partition (i=1)", "Table is not partitioned: functional_hbase.alltypesagg");
86
87 // Empty string partition keys
88 AnalyzesOk("alter table functional.insert_string_partitioned " + kw +
89 " partition(s2='')");
90 // Arbitrary exprs as partition key values. Constant exprs are ok.
91 AnalyzesOk("alter table functional.alltypes " + kw +
92 " partition(year=-1, month=cast((10+5*4) as INT))");
93
94 // Table/Db does not exist
95 AnalysisError("alter table db_does_not_exist.alltypes " + kw +
96 " partition (i=1)", "Could not resolve table reference: " +
97 "'db_does_not_exist.alltypes'");
98 AnalysisError("alter table functional.table_does_not_exist " + kw +
99 " partition (i=1)", "Could not resolve table reference: " +
100 "'functional.table_does_not_exist'");
101
102 // Cannot ALTER TABLE a view.
103 AnalysisError("alter table functional.alltypes_view " + kw +
104 " partition(year=2050, month=10)",
105 "ALTER TABLE not allowed on a view: functional.alltypes_view");
106 AnalysisError("alter table functional.alltypes_datasource " + kw +
107 " partition(year=2050, month=10)",
108 "ALTER TABLE not allowed on a table produced by a data source: " +
109 "functional.alltypes_datasource");
110
111 // NULL partition keys
112 AnalyzesOk("alter table functional.alltypes " + kw +
113 " partition(year=NULL, month=1)");
114 AnalyzesOk("alter table functional.alltypes " + kw +
115 " partition(year=NULL, month=NULL)");
116 AnalyzesOk("alter table functional.alltypes " + kw +
117 " partition(year=ascii(null), month=ascii(NULL))");
118 }
119
120 // Data types don't match
121 AnalysisError("alter table functional.insert_string_partitioned add" +
122 " partition(s2=1234)",
123 "Value of partition spec (column=s2) has incompatible type: " +
124 "'SMALLINT'. Expected type: 'STRING'.");
125 AnalysisError("alter table functional.insert_string_partitioned drop" +
126 " partition(s2=1234)",
127 "operands of type STRING and SMALLINT are not comparable: s2 = 1234");
128
129 // Loss of precision
130 AnalysisError(
131 "alter table functional.alltypes add " +
132 "partition(year=100000000000, month=10) ",
133 "Partition key value may result in loss of precision.\nWould need to cast " +
134 "'100000000000' to 'INT' for partition column: year");
135
136 // Duplicate partition key name
137 AnalysisError("alter table functional.alltypes add " +
138 "partition(year=2050, year=2051)", "Duplicate partition key name: year");
139
140 // Arbitrary exprs as partition key values. Non-constant exprs should fail.
141 AnalysisError("alter table functional.alltypes add " +
142 "partition(year=2050, month=int_col) ",
143 "Non-constant expressions are not supported as static partition-key " +
144 "values in 'month=int_col'.");
145 AnalysisError("alter table functional.alltypes add " +
146 "partition(year=cast(int_col as int), month=12) ",
147 "Non-constant expressions are not supported as static partition-key " +
148 "values in 'year=CAST(int_col AS INT)'.");
149
150 // Not a partition column
151 AnalysisError("alter table functional.alltypes drop " +
152 "partition(year=2050, int_col=1)",
153 "Partition exprs cannot contain non-partition column(s): int_col = 1.");
154
155 // Arbitrary exprs as partition key values. Non-partition columns should fail.
156 AnalysisError("alter table functional.alltypes drop " +
157 "partition(year=2050, month=int_col) ",
158 "Partition exprs cannot contain non-partition column(s): month = int_col.");
159 AnalysisError("alter table functional.alltypes drop " +
160 "partition(year=cast(int_col as int), month=12) ",
161 "Partition exprs cannot contain non-partition column(s): " +
162 "year = CAST(int_col AS INT).");
163
164 // IF NOT EXISTS properly checks for partition existence
165 AnalyzesOk("alter table functional.alltypes add " +
166 "partition(year=2050, month=10)");
167 AnalysisError("alter table functional.alltypes add " +
168 "partition(year=2010, month=10)",
169 "Partition spec already exists: (year=2010, month=10).");
170 AnalyzesOk("alter table functional.alltypes add if not exists " +
171 "partition(year=2010, month=10)");
172 AnalyzesOk("alter table functional.alltypes add if not exists " +
173 "partition(year=2010, month=10) location " +
174 "'/test-warehouse/alltypes/year=2010/month=10'");
175
176 // IF EXISTS properly checks for partition existence
177 // with a fully specified partition.
178 AnalyzesOk("alter table functional.alltypes drop " +
179 "partition(year=2010, month=10)");
180 AnalysisError("alter table functional.alltypes drop " +
181 "partition(year=2050, month=10)",
182 "No matching partition(s) found.");
183 AnalyzesOk("alter table functional.alltypes drop if exists " +
184 "partition(year=2050, month=10)");
185
186 // NULL partition keys
187 AnalysisError("alter table functional.alltypes drop " +
188 "partition(year=NULL, month=1)",
189 "No matching partition(s) found.");
190 AnalysisError("alter table functional.alltypes drop " +
191 "partition(year=NULL, month is NULL)",
192 "No matching partition(s) found.");
193
194 // Drop partition using predicates
195 // IF EXISTS is added here
196 AnalyzesOk("alter table functional.alltypes drop " +
197 "partition(year<2011, month!=10)");
198 AnalysisError("alter table functional.alltypes drop " +
199 "partition(1=1, month=10)",
200 "Invalid partition expr 1 = 1. " +
201 "A partition spec may not contain constant predicates.");
202 AnalyzesOk("alter table functional.alltypes drop " +
203 "partition(year>1050, month=10)");
204 AnalyzesOk("alter table functional.alltypes drop " +
205 "partition(year>1050 and month=10)");
206 AnalyzesOk("alter table functional.alltypes drop " +
207 "partition(month=10)");
208 AnalyzesOk("alter table functional.alltypes drop " +
209 "partition(month+2000=year)");
210 AnalyzesOk("alter table functional.alltypes drop " +
211 "partition(year>9050, month=10)");
212 AnalyzesOk("alter table functional.alltypes drop if exists " +
213 "partition(year>9050, month=10)");
214
215 // Not a valid column
216 AnalysisError("alter table functional.alltypes add " +
217 "partition(year=2050, blah=1)",
218 "Partition column 'blah' not found in table: functional.alltypes");
219 AnalysisError("alter table functional.alltypes drop " +
220 "partition(year=2050, blah=1)",
221 "Could not resolve column/field reference: 'blah'");
222
223 // Not a partition column
224 AnalysisError("alter table functional.alltypes add " +
225 "partition(year=2050, int_col=1) ",
226 "Column 'int_col' is not a partition column in table: functional.alltypes");
227
228 // Caching ops
229 AnalyzesOk("alter table functional.alltypes add " +
230 "partition(year=2050, month=10) cached in 'testPool'");
231 AnalyzesOk("alter table functional.alltypes add " +
232 "partition(year=2050, month=10) cached in 'testPool' with replication = 10");
233 AnalyzesOk("alter table functional.alltypes add " +
234 "partition(year=2050, month=10) uncached");
235 AnalysisError("alter table functional.alltypes add " +
236 "partition(year=2050, month=10) cached in 'badPool'",
237 "The specified cache pool does not exist: badPool");
238 AnalysisError("alter table functional.alltypes add " +
239 "partition(year=2050, month=10) location " +
240 "'file:///test-warehouse/alltypes/year=2010/month=10' cached in 'testPool'",
241 "Location 'file:/test-warehouse/alltypes/year=2010/month=10' cannot be cached. " +
242 "Please retry without caching: ALTER TABLE functional.alltypes ADD PARTITION " +
243 "... UNCACHED");
244
245 // Valid URIs.
246 AnalyzesOk("alter table functional.alltypes add " +
247 "partition(year=2050, month=10) location " +
248 "'/test-warehouse/alltypes/year=2010/month=10'");
249 AnalyzesOk("alter table functional.alltypes add " +
250 "partition(year=2050, month=10) location " +
251 "'hdfs://localhost:20500/test-warehouse/alltypes/year=2010/month=10'");
252 AnalyzesOk("alter table functional.alltypes add " +
253 "partition(year=2050, month=10) location " +
254 "'s3a://bucket/test-warehouse/alltypes/year=2010/month=10'");
255 AnalyzesOk("alter table functional.alltypes add " +
256 "partition(year=2050, month=10) location " +
257 "'file:///test-warehouse/alltypes/year=2010/month=10'");
258
259 // Invalid URIs.
260 AnalysisError("alter table functional.alltypes add " +
261 "partition(year=2050, month=10) location " +
262 "'foofs://bar/test-warehouse/alltypes/year=2010/month=10'",
263 "No FileSystem for scheme: foofs");
264 AnalysisError("alter table functional.alltypes add " +
265 "partition(year=2050, month=10) location ' '",
266 "URI path cannot be empty.");
267 }
268
269 @Test
270 public void TestAlterTableAddMultiplePartitions() {
271 for (String cl: new String[]{"if not exists", ""}) {
272 // Add multiple partitions.
273 AnalyzesOk("alter table functional.alltypes add " + cl +
274 " partition(year=2050, month=10)" +
275 " partition(year=2050, month=11)" +
276 " partition(year=2050, month=12)");
277 // Duplicate partition specifications.
278 AnalysisError("alter table functional.alltypes add " + cl +
279 " partition(year=2050, month=10)" +
280 " partition(year=2050, month=11)" +
281 " partition(Month=10, YEAR=2050)",
282 "Duplicate partition spec: (month=10, year=2050)");
283
284 // Multiple partitions with locations and caching.
285 AnalyzesOk("alter table functional.alltypes add " + cl +
286 " partition(year=2050, month=10) location" +
287 " '/test-warehouse/alltypes/y2050m10' cached in 'testPool'" +
288 " partition(year=2050, month=11) location" +
289 " 'hdfs://localhost:20500/test-warehouse/alltypes/y2050m11'" +
290 " cached in 'testPool' with replication = 7" +
291 " partition(year=2050, month=12) location" +
292 " 'file:///test-warehouse/alltypes/y2050m12' uncached");
293 // One of the partitions points to an invalid URI.
294 AnalysisError("alter table functional.alltypes add " + cl +
295 " partition(year=2050, month=10) location" +
296 " '/test-warehouse/alltypes/y2050m10' cached in 'testPool'" +
297 " partition(year=2050, month=11) location" +
298 " 'hdfs://localhost:20500/test-warehouse/alltypes/y2050m11'" +
299 " cached in 'testPool' with replication = 7" +
300 " partition(year=2050, month=12) location" +
301 " 'fil:///test-warehouse/alltypes/y2050m12' uncached",
302 "No FileSystem for scheme: fil");
303 // One of the partitions is cached in a non-existent pool.
304 AnalysisError("alter table functional.alltypes add " + cl +
305 " partition(year=2050, month=10) location" +
306 " '/test-warehouse/alltypes/y2050m10' cached in 'testPool'" +
307 " partition(year=2050, month=11) location" +
308 " 'hdfs://localhost:20500/test-warehouse/alltypes/y2050m11'" +
309 " cached in 'nonExistentTestPool' with replication = 7" +
310 " partition(year=2050, month=12) location" +
311 " 'file:///test-warehouse/alltypes/y2050m12' uncached",
312 "The specified cache pool does not exist: nonExistentTestPool");
313 }
314
315 // If 'IF NOT EXISTS' is not used, ALTER TABLE ADD PARTITION cannot add a preexisting
316 // partition to a table.
317 AnalysisError("alter table functional.alltypes add partition(year=2050, month=1)" +
318 "partition(year=2010, month=1) partition(year=2050, month=2)",
319 "Partition spec already exists: (year=2010, month=1)");
320 }
321
322 @Test
323 public void TestAlterTableAddReplaceColumns() throws AnalysisException {
324 AnalyzesOk("alter table functional.alltypes add columns (new_col int)");
325 AnalyzesOk("alter table functional.alltypes add columns (c1 string comment 'hi')");
326 AnalyzesOk("alter table functional.alltypes add columns (c struct<f1:int>)");
327 AnalyzesOk(
328 "alter table functional.alltypes replace columns (c1 int comment 'c', c2 int)");
329 AnalyzesOk("alter table functional.alltypes replace columns (c array<string>)");
330
331 // Column name must be unique for add
332 AnalysisError("alter table functional.alltypes add columns (int_col int)",
333 "Column already exists: int_col");
334 // Add a column with same name as a partition column
335 AnalysisError("alter table functional.alltypes add columns (year int)",
336 "Column name conflicts with existing partition column: year");
337 // Invalid column name.
338 AnalysisError("alter table functional.alltypes add columns (`???` int)",
339 "Invalid column/field name: ???");
340 AnalysisError("alter table functional.alltypes replace columns (`???` int)",
341 "Invalid column/field name: ???");
342
343 // Replace should not throw an error if the column already exists
344 AnalyzesOk("alter table functional.alltypes replace columns (int_col int)");
345 // It is not possible to replace a partition column
346 AnalysisError("alter table functional.alltypes replace columns (Year int)",
347 "Column name conflicts with existing partition column: year");
348
349 // Duplicate column names
350 AnalysisError("alter table functional.alltypes add columns (c1 int, c1 int)",
351 "Duplicate column name: c1");
352 AnalysisError("alter table functional.alltypes replace columns (c1 int, C1 int)",
353 "Duplicate column name: c1");
354
355 // Table/Db does not exist
356 AnalysisError("alter table db_does_not_exist.alltypes add columns (i int)",
357 "Could not resolve table reference: 'db_does_not_exist.alltypes'");
358 AnalysisError("alter table functional.table_does_not_exist add columns (i int)",
359 "Could not resolve table reference: 'functional.table_does_not_exist'");
360
361 // Cannot ALTER TABLE a view.
362 AnalysisError("alter table functional.alltypes_view " +
363 "add columns (c1 string comment 'hi')",
364 "ALTER TABLE not allowed on a view: functional.alltypes_view");
365 // Cannot ALTER TABLE a nested collection.
366 AnalysisError("alter table allcomplextypes.int_array_col " +
367 "add columns (c1 string comment 'hi')",
368 createAnalysisCtx("functional"),
369 "ALTER TABLE not allowed on a nested collection: allcomplextypes.int_array_col");
370 // Cannot ALTER TABLE produced by a data source.
371 AnalysisError("alter table functional.alltypes_datasource " +
372 "add columns (c1 string comment 'hi')",
373 "ALTER TABLE not allowed on a table produced by a data source: " +
374 "functional.alltypes_datasource");
375
376 // Cannot ALTER TABLE ADD/REPLACE COLUMNS on an HBase table.
377 AnalysisError("alter table functional_hbase.alltypes add columns (i int)",
378 "ALTER TABLE ADD|REPLACE COLUMNS not currently supported on HBase tables.");
379 }
380
381 @Test
382 public void TestAlterTableDropColumn() throws AnalysisException {
383 AnalyzesOk("alter table functional.alltypes drop column int_col");
384
385 AnalysisError("alter table functional.alltypes drop column no_col",
386 "Column 'no_col' does not exist in table: functional.alltypes");
387
388 AnalysisError("alter table functional.alltypes drop column year",
389 "Cannot drop partition column: year");
390
391 // Tables should always have at least 1 column
392 AnalysisError("alter table functional_seq_snap.bad_seq_snap drop column field",
393 "Cannot drop column 'field' from functional_seq_snap.bad_seq_snap. " +
394 "Tables must contain at least 1 column.");
395
396 // Table/Db does not exist
397 AnalysisError("alter table db_does_not_exist.alltypes drop column col1",
398 "Could not resolve table reference: 'db_does_not_exist.alltypes'");
399 AnalysisError("alter table functional.table_does_not_exist drop column col1",
400 "Could not resolve table reference: 'functional.table_does_not_exist'");
401
402 // Cannot ALTER TABLE a view.
403 AnalysisError("alter table functional.alltypes_view drop column int_col",
404 "ALTER TABLE not allowed on a view: functional.alltypes_view");
405 // Cannot ALTER TABLE a nested collection.
406 AnalysisError("alter table allcomplextypes.int_array_col drop column int_col",
407 createAnalysisCtx("functional"),
408 "ALTER TABLE not allowed on a nested collection: allcomplextypes.int_array_col");
409 // Cannot ALTER TABLE produced by a data source.
410 AnalysisError("alter table functional.alltypes_datasource drop column int_col",
411 "ALTER TABLE not allowed on a table produced by a data source: " +
412 "functional.alltypes_datasource");
413
414 // Cannot ALTER TABLE DROP COLUMN on an HBase table.
415 AnalysisError("alter table functional_hbase.alltypes drop column int_col",
416 "ALTER TABLE DROP COLUMN not currently supported on HBase tables.");
417 }
418
419 @Test
420 public void TestAlterTableChangeColumn() throws AnalysisException {
421 // Rename a column
422 AnalyzesOk("alter table functional.alltypes change column int_col int_col2 int");
423 // Rename and change the datatype
424 AnalyzesOk("alter table functional.alltypes change column int_col c2 string");
425 AnalyzesOk(
426 "alter table functional.alltypes change column int_col c2 map<int, string>");
427 // Change only the datatype
428 AnalyzesOk("alter table functional.alltypes change column int_col int_col tinyint");
429 // Add a comment to a column.
430 AnalyzesOk("alter table functional.alltypes change int_col int_col int comment 'c'");
431
432 AnalysisError("alter table functional.alltypes change column no_col c1 int",
433 "Column 'no_col' does not exist in table: functional.alltypes");
434
435 AnalysisError("alter table functional.alltypes change column year year int",
436 "Cannot modify partition column: year");
437
438 AnalysisError(
439 "alter table functional.alltypes change column int_col Tinyint_col int",
440 "Column already exists: tinyint_col");
441
442 // Invalid column name.
443 AnalysisError("alter table functional.alltypes change column int_col `???` int",
444 "Invalid column/field name: ???");
445
446 // Table/Db does not exist
447 AnalysisError("alter table db_does_not_exist.alltypes change c1 c2 int",
448 "Could not resolve table reference: 'db_does_not_exist.alltypes'");
449 AnalysisError("alter table functional.table_does_not_exist change c1 c2 double",
450 "Could not resolve table reference: 'functional.table_does_not_exist'");
451
452 // Cannot ALTER TABLE a view.
453 AnalysisError("alter table functional.alltypes_view " +
454 "change column int_col int_col2 int",
455 "ALTER TABLE not allowed on a view: functional.alltypes_view");
456 // Cannot ALTER TABLE a nested collection.
457 AnalysisError("alter table allcomplextypes.int_array_col " +
458 "change column int_col int_col2 int",
459 createAnalysisCtx("functional"),
460 "ALTER TABLE not allowed on a nested collection: allcomplextypes.int_array_col");
461 // Cannot ALTER TABLE produced by a data source.
462 AnalysisError("alter table functional.alltypes_datasource " +
463 "change column int_col int_col2 int",
464 "ALTER TABLE not allowed on a table produced by a data source: " +
465 "functional.alltypes_datasource");
466
467 // Cannot ALTER TABLE CHANGE COLUMN on an HBase table.
468 AnalysisError("alter table functional_hbase.alltypes CHANGE COLUMN int_col i int",
469 "ALTER TABLE CHANGE/ALTER COLUMN not currently supported on HBase tables.");
470 }
471
472 @Test
473 public void TestAlterTableSetRowFormat() throws AnalysisException {
474 AnalyzesOk("alter table functional.alltypes set row format delimited " +
475 "fields terminated by ' '");
476 AnalyzesOk("alter table functional.alltypes partition (year=2010) set row format " +
477 "delimited fields terminated by ' '");
478 AnalyzesOk("alter table functional_seq.alltypes set row format delimited " +
479 "fields terminated by ' '");
480 AnalysisError("alter table functional.alltypesnopart PARTITION (month=1) " +
481 "set row format delimited fields terminated by ' '",
482 "Table is not partitioned: functional.alltypesnopart");
483 String [] unsupportedFileFormatDbs =
484 {"functional_parquet", "functional_rc", "functional_avro"};
485 for (String format: unsupportedFileFormatDbs) {
486 AnalysisError("alter table " + format + ".alltypes set row format delimited " +
487 "fields terminated by ' '", "ALTER TABLE SET ROW FORMAT is only supported " +
488 "on TEXT or SEQUENCE file formats");
489 }
490 AnalysisError("alter table functional_kudu.alltypes set row format delimited " +
491 "fields terminated by ' '", "ALTER TABLE SET ROW FORMAT is only supported " +
492 "on HDFS tables");
493 AnalysisError("alter table functional.alltypesmixedformat partition(year=2009) " +
494 "set row format delimited fields terminated by ' '",
495 "ALTER TABLE SET ROW FORMAT is only supported on TEXT or SEQUENCE file formats");
496 AnalyzesOk("alter table functional.alltypesmixedformat partition(year=2009,month=1) " +
497 "set row format delimited fields terminated by ' '");
498 }
499
500 @Test
501 public void TestAlterTableSet() throws AnalysisException {
502 AnalyzesOk("alter table functional.alltypes set fileformat sequencefile");
503 AnalyzesOk("alter table functional.alltypes set location '/a/b'");
504 AnalyzesOk("alter table functional.alltypes set tblproperties('a'='1')");
505 AnalyzesOk("alter table functional.alltypes set serdeproperties('a'='2')");
506 AnalyzesOk("alter table functional.alltypes PARTITION (Year=2010, month=11) " +
507 "set location '/a/b'");
508 AnalyzesOk("alter table functional.alltypes PARTITION (month=11, year=2010) " +
509 "set fileformat parquetfile");
510 AnalyzesOk("alter table functional.alltypes PARTITION (month<=11, year=2010) " +
511 "set fileformat parquetfile");
512 AnalyzesOk("alter table functional.stringpartitionkey PARTITION " +
513 "(string_col='partition1') set fileformat parquet");
514 AnalyzesOk("alter table functional.stringpartitionkey PARTITION " +
515 "(string_col='partition1') set location '/a/b/c'");
516 AnalyzesOk("alter table functional.alltypes PARTITION (year=2010, month=11) " +
517 "set tblproperties('a'='1')");
518 AnalyzesOk("alter table functional.alltypes PARTITION (year<=2010, month=11) " +
519 "set tblproperties('a'='1')");
520 AnalyzesOk("alter table functional.alltypes PARTITION (year=2010, month=11) " +
521 "set serdeproperties ('a'='2')");
522 AnalyzesOk("alter table functional.alltypes PARTITION (year<=2010, month=11) " +
523 "set serdeproperties ('a'='2')");
524
525 AnalyzesOk("alter table functional.alltypes set tblproperties('sort.columns'='id')");
526 AnalyzesOk("alter table functional.alltypes set tblproperties(" +
527 "'sort.columns'='INT_COL,id')");
528 AnalyzesOk("alter table functional.alltypes set tblproperties(" +
529 "'sort.columns'='bool_col,int_col,id')");
530 AnalyzesOk("alter table functional.alltypes set tblproperties('sort.columns'='')");
531 AnalysisError("alter table functional.alltypes set tblproperties(" +
532 "'sort.columns'='id,int_col,id')",
533 "Duplicate column in SORT BY list: id");
534 AnalysisError("alter table functional.alltypes set tblproperties(" +
535 "'sort.columns'='ID, foo')",
536 "Could not find SORT BY column 'foo' in table.");
537
538 {
539 // Check that long_properties fail at the analysis layer
540 String long_property_key = "";
541 for (int i = 0; i < MetaStoreUtil.MAX_PROPERTY_KEY_LENGTH; ++i) {
542 long_property_key += 'k';
543 }
544 String long_property_value = "";
545 for (int i = 0; i < MetaStoreUtil.MAX_PROPERTY_VALUE_LENGTH; ++i) {
546 long_property_value += 'v';
547 }
548
549 // At this point long_property_{key_value} are actually not quite long enough to
550 // cause analysis to fail.
551
552 AnalyzesOk("alter table functional.alltypes "
553 + "set serdeproperties ('" + long_property_key + "'='" + long_property_value
554 + "') ");
555
556 AnalyzesOk("alter table functional.alltypes "
557 + "set tblproperties ('" + long_property_key + "'='" + long_property_value
558 + "') ");
559
560 long_property_key += 'X';
561 long_property_value += 'X';
562 // Now that long_property_{key,value} are one character longer, they are too long
563 // for the analyzer.
564
565 AnalysisError("alter table functional.alltypes set "
566 + "tblproperties ('" + long_property_key + "'='value')",
567 "Property key length must be <= " + MetaStoreUtil.MAX_PROPERTY_KEY_LENGTH + ": "
568 + (MetaStoreUtil.MAX_PROPERTY_KEY_LENGTH + 1));
569
570 AnalysisError("alter table functional.alltypes set "
571 + "tblproperties ('key'='" + long_property_value + "')",
572 "Property value length must be <= " + MetaStoreUtil.MAX_PROPERTY_VALUE_LENGTH
573 + ": " + (MetaStoreUtil.MAX_PROPERTY_VALUE_LENGTH + 1));
574
575 AnalysisError("alter table functional.alltypes set "
576 + "serdeproperties ('" + long_property_key + "'='value')",
577 "Property key length must be <= " + MetaStoreUtil.MAX_PROPERTY_KEY_LENGTH + ": "
578 + (MetaStoreUtil.MAX_PROPERTY_KEY_LENGTH + 1));
579
580 AnalysisError("alter table functional.alltypes set "
581 + "serdeproperties ('key'='" + long_property_value + "')",
582 "Property value length must be <= " + MetaStoreUtil.MAX_PROPERTY_VALUE_LENGTH
583 + ": " + (MetaStoreUtil.MAX_PROPERTY_VALUE_LENGTH + 1));
584
585 AnalysisError(
586 "alter table functional.alltypes set tblproperties('storage_handler'='1')",
587 "Changing the 'storage_handler' table property is not supported to protect " +
588 "against metadata corruption.");
589 }
590
591 // Arbitrary exprs as partition key values. Constant exprs are ok.
592 AnalyzesOk("alter table functional.alltypes PARTITION " +
593 "(year=cast(100*20+10 as INT), month=cast(2+9 as INT)) " +
594 "set fileformat sequencefile");
595 AnalyzesOk("alter table functional.alltypes PARTITION " +
596 "(year=cast(100*20+10 as INT), month=cast(2+9 as INT)) " +
597 "set location '/a/b'");
598
599 // Arbitrary exprs as partition key values. One-partition-column-bound exprs are ok.
600 AnalyzesOk("alter table functional.alltypes PARTITION " +
601 "(Year*2=Year+2010, month=11) set fileformat sequencefile");
602
603 // Arbitrary exprs as partition key values. Non-partition-column exprs.
604 AnalysisError("alter table functional.alltypes PARTITION " +
605 "(int_col=3) set fileformat sequencefile",
606 "Partition exprs cannot contain non-partition column(s): int_col = 3.");
607
608 // Partition expr matches more than one partition in set location statement.
609 AnalysisError("alter table functional.alltypes PARTITION (year!=20) " +
610 "set location '/a/b'",
611 "Partition expr in set location statements can only match " +
612 "one partition. Too many matched partitions year=2009/month=1," +
613 "year=2009/month=2,year=2009/month=3");
614
615 // Partition spec does not exist
616 AnalysisError("alter table functional.alltypes PARTITION (year=2014, month=11) " +
617 "set location '/a/b'",
618 "No matching partition(s) found.");
619 AnalysisError("alter table functional.alltypes PARTITION (year=2014, month=11) " +
620 "set tblproperties('a'='1')",
621 "No matching partition(s) found.");
622 AnalysisError("alter table functional.alltypes PARTITION (month=11, year=2014) " +
623 "set fileformat sequencefile",
624 "No matching partition(s) found.");
625 AnalysisError("alter table functional.alltypesnopart PARTITION (month=1) " +
626 "set fileformat sequencefile",
627 "Table is not partitioned: functional.alltypesnopart");
628 AnalysisError("alter table functional.alltypesnopart PARTITION (month=1) " +
629 "set location '/a/b/c'",
630 "Table is not partitioned: functional.alltypesnopart");
631 AnalysisError("alter table functional.stringpartitionkey PARTITION " +
632 "(string_col='partition2') set location '/a/b'",
633 "No matching partition(s) found.");
634 AnalysisError("alter table functional.stringpartitionkey PARTITION " +
635 "(string_col='partition2') set fileformat sequencefile",
636 "No matching partition(s) found.");
637 AnalysisError("alter table functional.alltypes PARTITION " +
638 "(year=cast(10*20+10 as INT), month=cast(5*3 as INT)) " +
639 "set location '/a/b'",
640 "No matching partition(s) found.");
641 AnalysisError("alter table functional.alltypes PARTITION " +
642 "(year=cast(10*20+10 as INT), month=cast(5*3 as INT)) " +
643 "set fileformat sequencefile",
644 "No matching partition(s) found.");
645
646 // Table/Db does not exist
647 AnalysisError("alter table db_does_not_exist.alltypes set fileformat sequencefile",
648 "Could not resolve table reference: 'db_does_not_exist.alltypes'");
649 AnalysisError("alter table functional.table_does_not_exist set fileformat rcfile",
650 "Could not resolve table reference: 'functional.table_does_not_exist'");
651 AnalysisError("alter table db_does_not_exist.alltypes set location '/a/b'",
652 "Could not resolve table reference: 'db_does_not_exist.alltypes'");
653 AnalysisError("alter table functional.table_does_not_exist set location '/a/b'",
654 "Could not resolve table reference: 'functional.table_does_not_exist'");
655 AnalysisError("alter table functional.no_tbl partition(i=1) set location '/a/b'",
656 "Could not resolve table reference: 'functional.no_tbl'");
657 AnalysisError("alter table no_db.alltypes partition(i=1) set fileformat textfile",
658 "Could not resolve table reference: 'no_db.alltypes'");
659
660 // Valid location
661 AnalyzesOk("alter table functional.alltypes set location " +
662 "'hdfs://localhost:20500/test-warehouse/a/b'");
663 AnalyzesOk("alter table functional.alltypes set location " +
664 "'s3a://bucket/test-warehouse/a/b'");
665 AnalyzesOk("alter table functional.alltypes set location " +
666 "'file:///test-warehouse/a/b'");
667
668 // Invalid location
669 AnalysisError("alter table functional.alltypes set location 'test/warehouse'",
670 "URI path must be absolute: test/warehouse");
671 AnalysisError("alter table functional.alltypes set location 'blah:///warehouse/'",
672 "No FileSystem for scheme: blah");
673 AnalysisError("alter table functional.alltypes set location ''",
674 "URI path cannot be empty.");
675 AnalysisError("alter table functional.alltypes set location ' '",
676 "URI path cannot be empty.");
677
678 // Cannot ALTER TABLE a view.
679 AnalysisError("alter table functional.alltypes_view set fileformat sequencefile",
680 "ALTER TABLE not allowed on a view: functional.alltypes_view");
681 // Cannot ALTER TABLE a nested collection.
682 AnalysisError("alter table allcomplextypes.int_array_col set fileformat sequencefile",
683 createAnalysisCtx("functional"),
684 "ALTER TABLE not allowed on a nested collection: allcomplextypes.int_array_col");
685 // Cannot ALTER TABLE produced by a data source.
686 AnalysisError("alter table functional.alltypes_datasource set fileformat parquet",
687 "ALTER TABLE not allowed on a table produced by a data source: " +
688 "functional.alltypes_datasource");
689
690 // Cannot ALTER TABLE SET on an HBase table.
691 AnalysisError("alter table functional_hbase.alltypes set tblproperties('a'='b')",
692 "ALTER TABLE SET not currently supported on HBase tables.");
693 }
694
695 @Test
696 public void TestAlterTableSetCached() {
697 // Positive cases
698 AnalyzesOk("alter table functional.alltypesnopart set cached in 'testPool'");
699 AnalyzesOk("alter table functional.alltypes set cached in 'testPool'");
700 AnalyzesOk("alter table functional.alltypes partition(year=2010, month=12) " +
701 "set cached in 'testPool'");
702 AnalyzesOk("alter table functional.alltypes partition(year<=2010, month<=12) " +
703 "set cached in 'testPool'");
704
705 // Replication factor
706 AnalyzesOk("alter table functional.alltypes set cached in 'testPool' " +
707 "with replication = 10");
708 AnalyzesOk("alter table functional.alltypes partition(year=2010, month=12) " +
709 "set cached in 'testPool' with replication = 4");
710 AnalysisError("alter table functional.alltypes set cached in 'testPool' " +
711 "with replication = 0",
712 "Cache replication factor must be between 0 and Short.MAX_VALUE");
713 AnalysisError("alter table functional.alltypes set cached in 'testPool' " +
714 "with replication = 90000",
715 "Cache replication factor must be between 0 and Short.MAX_VALUE");
716
717 // Attempt to alter a table that is not backed by HDFS.
718 AnalysisError("alter table functional_hbase.alltypesnopart set cached in 'testPool'",
719 "ALTER TABLE SET not currently supported on HBase tables.");
720 AnalysisError("alter table functional.view_view set cached in 'testPool'",
721 "ALTER TABLE not allowed on a view: functional.view_view");
722 AnalysisError("alter table allcomplextypes.int_array_col set cached in 'testPool'",
723 createAnalysisCtx("functional"),
724 "ALTER TABLE not allowed on a nested collection: allcomplextypes.int_array_col");
725
726 AnalysisError("alter table functional.alltypes set cached in 'badPool'",
727 "The specified cache pool does not exist: badPool");
728 AnalysisError("alter table functional.alltypes partition(year=2010, month=12) " +
729 "set cached in 'badPool'", "The specified cache pool does not exist: badPool");
730
731 // Attempt to uncache a table that is not cached. Should be a no-op.
732 AnalyzesOk("alter table functional.alltypes set uncached");
733 AnalyzesOk("alter table functional.alltypes partition(year=2010, month=12) " +
734 "set uncached");
735
736 // Attempt to cache a table that is already cached. Should be a no-op.
737 AnalyzesOk("alter table functional.alltypestiny set cached in 'testPool'");
738 AnalyzesOk("alter table functional.alltypestiny partition(year=2009, month=1) " +
739 "set cached in 'testPool'");
740
741 // Change location of a cached table/partition
742 AnalysisError("alter table functional.alltypestiny set location '/tmp/tiny'",
743 "Target table is cached, please uncache before changing the location using: " +
744 "ALTER TABLE functional.alltypestiny SET UNCACHED");
745 AnalysisError("alter table functional.alltypestiny partition (year=2009,month=1) " +
746 "set location '/test-warehouse/new_location'",
747 "Target partition is cached, please uncache before changing the location " +
748 "using: ALTER TABLE functional.alltypestiny PARTITION (year = 2009, month = 1) " +
749 "SET UNCACHED");
750
751 // Table/db/partition do not exist
752 AnalysisError("alter table baddb.alltypestiny set cached in 'testPool'",
753 "Could not resolve table reference: 'baddb.alltypestiny'");
754 AnalysisError("alter table functional.badtbl set cached in 'testPool'",
755 "Could not resolve table reference: 'functional.badtbl'");
756 AnalysisError("alter table functional.alltypestiny partition(year=9999, month=1) " +
757 "set cached in 'testPool'",
758 "No matching partition(s) found.");
759 }
760
761 @Test
762 public void TestAlterTableSetColumnStats() {
763 // Contains entries of the form 'statsKey'='statsValue' for every
764 // stats key. A dummy value is used for 'statsValue'.
765 List<String> testKeyValues = Lists.newArrayList();
766 for (ColumnStats.StatsKey statsKey: ColumnStats.StatsKey.values()) {
767 testKeyValues.add(String.format("'%s'='10'", statsKey));
768 }
769 // Test updating all stats keys individually.
770 for (String kv: testKeyValues) {
771 AnalyzesOk(String.format(
772 "alter table functional.alltypes set column stats string_col (%s)", kv));
773 // Stats key is case-insensitive.
774 AnalyzesOk(String.format(
775 "alter table functional.alltypes set column stats string_col (%s)",
776 kv.toLowerCase()));
777 AnalyzesOk(String.format(
778 "alter table functional.alltypes set column stats string_col (%s)",
779 kv.toUpperCase()));
780 }
781 // Test updating all stats keys at once in a single statement.
782 AnalyzesOk(String.format(
783 "alter table functional.alltypes set column stats string_col (%s)",
784 Joiner.on(",").join(testKeyValues)));
785 // Test setting all stats keys to -1 (unknown).
786 for (ColumnStats.StatsKey statsKey: ColumnStats.StatsKey.values()) {
787 AnalyzesOk(String.format(
788 "alter table functional.alltypes set column stats string_col ('%s'='-1')",
789 statsKey));
790 }
791 // Duplicate stats keys are valid. The last entry is used.
792 AnalyzesOk("alter table functional.alltypes set column stats " +
793 "int_col ('numDVs'='2','numDVs'='3')");
794
795 // Test updating stats on all scalar types.
796 for (Type t: Type.getSupportedTypes()) {
797 if (t.isNull()) continue;
798 Preconditions.checkState(t.isScalarType());
799 String typeStr = t.getPrimitiveType().toString();
800 if (t.getPrimitiveType() == PrimitiveType.CHAR ||
801 t.getPrimitiveType() == PrimitiveType.VARCHAR) {
802 typeStr += "(60)";
803 }
804 String tblName = "t_" + t.getPrimitiveType();
805 addTestTable(String.format("create table %s (c %s)", tblName, typeStr));
806 AnalyzesOk(String.format(
807 "alter table %s set column stats c ('%s'='100','%s'='10')",
808 tblName, ColumnStats.StatsKey.NUM_DISTINCT_VALUES,
809 ColumnStats.StatsKey.NUM_NULLS));
810 // Test setting stats values to -1 (unknown).
811 AnalyzesOk(String.format(
812 "alter table %s set column stats c ('%s'='-1','%s'='-1')",
813 tblName, ColumnStats.StatsKey.NUM_DISTINCT_VALUES,
814 ColumnStats.StatsKey.NUM_NULLS));
815 }
816
817 // Setting stats works on all table types.
818 AnalyzesOk("alter table functional_hbase.alltypes set column stats " +
819 "int_col ('numNulls'='2')");
820 AnalyzesOk("alter table functional.alltypes_datasource set column stats " +
821 "int_col ('numDVs'='2')");
822 if (RuntimeEnv.INSTANCE.isKuduSupported()) {
823 AnalyzesOk("alter table functional_kudu.testtbl set column stats " +
824 "name ('numNulls'='2')");
825 }
826
827 // Table does not exist.
828 AnalysisError("alter table bad_tbl set column stats int_col ('numNulls'='2')",
829 "Could not resolve table reference: 'bad_tbl'");
830 // Column does not exist.
831 AnalysisError(
832 "alter table functional.alltypes set column stats bad_col ('numNulls'='2')",
833 "Column 'bad_col' does not exist in table: functional.alltypes");
834
835 // Cannot set column stats of a view.
836 AnalysisError(
837 "alter table functional.alltypes_view set column stats int_col ('numNulls'='2')",
838 "ALTER TABLE not allowed on a view: functional.alltypes_view");
839 // Cannot set column stats of a nested collection.
840 AnalysisError(
841 "alter table allcomplextypes.int_array_col " +
842 "set column stats int_col ('numNulls'='2')",
843 createAnalysisCtx("functional"),
844 "ALTER TABLE not allowed on a nested collection: allcomplextypes.int_array_col");
845 // Cannot set column stats of partition columns.
846 AnalysisError(
847 "alter table functional.alltypes set column stats month ('numDVs'='10')",
848 "Updating the stats of a partition column is not allowed: month");
849 // Cannot set the size of a fixed-length column.
850 AnalysisError(
851 "alter table functional.alltypes set column stats int_col ('maxSize'='10')",
852 "Cannot update the 'maxSize' stats of column 'int_col' with type 'INT'.\n" +
853 "Changing 'maxSize' is only allowed for variable-length columns.");
854 AnalysisError(
855 "alter table functional.alltypes set column stats int_col ('avgSize'='10')",
856 "Cannot update the 'avgSize' stats of column 'int_col' with type 'INT'.\n" +
857 "Changing 'avgSize' is only allowed for variable-length columns.");
858 // Cannot set column stats of complex-typed columns.
859 AnalysisError(
860 "alter table functional.allcomplextypes set column stats int_array_col " +
861 "('numNulls'='10')",
862 "Statistics for column 'int_array_col' are not supported because " +
863 "it has type 'ARRAY<INT>'.");
864 AnalysisError(
865 "alter table functional.allcomplextypes set column stats int_map_col " +
866 "('numDVs'='10')",
867 "Statistics for column 'int_map_col' are not supported because " +
868 "it has type 'MAP<STRING,INT>'.");
869 AnalysisError(
870 "alter table functional.allcomplextypes set column stats int_struct_col " +
871 "('numDVs'='10')",
872 "Statistics for column 'int_struct_col' are not supported because " +
873 "it has type 'STRUCT<f1:INT,f2:INT>'.");
874
875 // Invalid stats key.
876 AnalysisError(
877 "alter table functional.alltypes set column stats int_col ('badKey'='10')",
878 "Invalid column stats key: badKey");
879 AnalysisError(
880 "alter table functional.alltypes set column stats " +
881 "int_col ('numDVs'='10',''='10')",
882 "Invalid column stats key: ");
883 // Invalid long stats values.
884 AnalysisError(
885 "alter table functional.alltypes set column stats int_col ('numDVs'='bad')",
886 "Invalid stats value 'bad' for column stats key: numDVs");
887 AnalysisError(
888 "alter table functional.alltypes set column stats int_col ('numDVs'='-10')",
889 "Invalid stats value '-10' for column stats key: numDVs");
890 // Invalid float stats values.
891 AnalysisError(
892 "alter table functional.alltypes set column stats string_col ('avgSize'='bad')",
893 "Invalid stats value 'bad' for column stats key: avgSize");
894 AnalysisError(
895 "alter table functional.alltypes set column stats string_col ('avgSize'='-1.5')",
896 "Invalid stats value '-1.5' for column stats key: avgSize");
897 AnalysisError(
898 "alter table functional.alltypes set column stats string_col ('avgSize'='-0.5')",
899 "Invalid stats value '-0.5' for column stats key: avgSize");
900 AnalysisError(
901 "alter table functional.alltypes set column stats string_col ('avgSize'='NaN')",
902 "Invalid stats value 'NaN' for column stats key: avgSize");
903 AnalysisError(
904 "alter table functional.alltypes set column stats string_col ('avgSize'='inf')",
905 "Invalid stats value 'inf' for column stats key: avgSize");
906 }
907
908 @Test
909 public void TestAlterTableSetAvroProperties() {
910 // Test set tblproperties with avro.schema.url and avro.schema.literal
911 // TODO: Include customized schema files
912
913 for (String propertyType : Lists.newArrayList("tblproperties", "serdeproperties")) {
914 // Valid url with valid schema
915 AnalyzesOk(String.format("alter table functional.alltypes set %s" +
916 "('avro.schema.url'=" +
917 "'hdfs:///test-warehouse/avro_schemas/functional/alltypes.json')",
918 propertyType));
919
920 // Invalid schema URL
921 AnalysisError(String.format("alter table functional.alltypes set %s " +
922 "('avro.schema.url'='')", propertyType),
923 "Invalid avro.schema.url: . Can not create a Path from an empty string");
924 AnalysisError(String.format("alter table functional.alltypes set %s " +
925 "('avro.schema.url'='hdfs://invalid*host/schema.avsc')", propertyType),
926 "Failed to read Avro schema at: hdfs://invalid*host/schema.avsc. " +
927 "Incomplete HDFS URI, no host: hdfs://invalid*host/schema.avsc");
928 AnalysisError(String.format("alter table functional.alltypes set %s" +
929 "('avro.schema.url'='schema.avsc')", propertyType),
930 "Invalid avro.schema.url: schema.avsc. Path does not exist.");
931 AnalysisError(String.format("alter table functional.alltypes set %s " +
932 "('avro.schema.url'='foo://bar/schema.avsc')", propertyType),
933 "Failed to read Avro schema at: foo://bar/schema.avsc. " +
934 "No FileSystem for scheme: foo");
935
936 // Valid schema literal
937 AnalyzesOk(String.format("alter table functional.alltypes set %s" +
938 "('avro.schema.literal'='{\"name\": \"my_record\", \"type\": \"record\", " +
939 "\"fields\": [{\"name\": \"string1\", \"type\": \"string\"}] }')",
940 propertyType));
941
942 // Invalid schema
943 AnalysisError(String.format("alter table functional.alltypes set %s " +
944 "('avro.schema.literal'='{\"name\": \"my_record\", \"type\": \"record\", " +
945 "\"fields\": {\"name\": \"string1\", \"type\": \"string\"}]}')", propertyType),
946 "Error parsing Avro schema for table 'functional.alltypes': " +
947 "org.codehaus.jackson.JsonParseException: Unexpected close marker ']': " +
948 "expected '}'");
949 AnalysisError(String.format("alter table functional.alltypes set %s " +
950 "('avro.schema.literal'='')", propertyType),
951 "Avro schema is null or empty: functional.alltypes");
952 AnalysisError(String.format("alter table functional.alltypes set %s " +
953 "('avro.schema.literal'='{\"name\": \"my_record\"}')", propertyType),
954 "Error parsing Avro schema for table 'functional.alltypes': " +
955 "No type: {\"name\":\"my_record\"}");
956 AnalysisError(String.format("alter table functional.alltypes set %s " +
957 "('avro.schema.literal'='{\"name\":\"my_record\", \"type\": \"record\"}')",
958 propertyType), "Error parsing Avro schema for table 'functional.alltypes': " +
959 "Record has no fields: {\"name\":\"my_record\",\"type\":\"record\"}");
960 AnalysisError(String.format("alter table functional.alltypes set %s " +
961 "('avro.schema.literal'='" +
962 "{\"type\":\"record\", \"fields\":[ {\"name\":\"fff\",\"type\":\"int\"} ] }')",
963 propertyType), "Error parsing Avro schema for table 'functional.alltypes': " +
964 "No name in schema: {\"type\":\"record\",\"fields\":[{\"name\":\"fff\"," +
965 "\"type\":\"int\"}]}");
966
967 // Unsupported types
968 // Union
969 AnalysisError(String.format("alter table functional.alltypes set %s " +
970 "('avro.schema.literal'='{\"name\": \"my_record\", \"type\": \"record\", " +
971 "\"fields\": [{\"name\": \"string1\", \"type\": \"string\"}," +
972 "{\"name\": \"union1\", \"type\": [\"float\", \"boolean\"]}]}')",
973 propertyType), "Unsupported type 'union' of column 'union1'");
974
975 // Check avro.schema.url and avro.schema.literal evaluation order,
976 // skip checking url when literal is provided.
977 AnalysisError(String.format("alter table functional.alltypes set %s " +
978 "('avro.schema.literal'='{\"name\": \"my_record\", \"type\": \"record\", " +
979 "\"fields\": {\"name\": \"string1\", \"type\": \"string\"}]}', " +
980 "'avro.schema.url'='')", propertyType),
981 "Error parsing Avro schema for table 'functional.alltypes': " +
982 "org.codehaus.jackson.JsonParseException: Unexpected close marker ']': " +
983 "expected '}'");
984 // Url is invalid but ignored because literal is provided.
985 AnalyzesOk(String.format("alter table functional.alltypes set %s " +
986 "('avro.schema.literal'='{\"name\": \"my_record\", \"type\": \"record\", " +
987 "\"fields\": [{\"name\": \"string1\", \"type\": \"string\"}] }', " +
988 "'avro.schema.url'='')", propertyType));
989 // Even though url is valid, literal has higher priority.
990 AnalysisError(String.format("alter table functional.alltypes set %s " +
991 "('avro.schema.literal'='', 'avro.schema.url'=" +
992 "'hdfs:///test-warehouse/avro_schemas/functional/alltypes.json')",
993 propertyType), "Avro schema is null or empty: functional.alltypes");
994 }
995 }
996
997 @Test
998 public void TestAlterTableRename() throws AnalysisException {
999 AnalyzesOk("alter table functional.alltypes rename to new_alltypes");
1000 AnalyzesOk("alter table functional.alltypes rename to functional.new_alltypes");
1001 AnalysisError("alter table functional.alltypes rename to functional.alltypes",
1002 "Table already exists: functional.alltypes");
1003 AnalysisError("alter table functional.alltypes rename to functional.alltypesagg",
1004 "Table already exists: functional.alltypesagg");
1005
1006 AnalysisError("alter table functional.table_does_not_exist rename to new_table",
1007 "Table does not exist: functional.table_does_not_exist");
1008 AnalysisError("alter table db_does_not_exist.alltypes rename to new_table",
1009 "Database does not exist: db_does_not_exist");
1010
1011 // Invalid database/table name.
1012 AnalysisError("alter table functional.alltypes rename to `???`.new_table",
1013 "Invalid database name: ???");
1014 AnalysisError("alter table functional.alltypes rename to functional.`%^&`",
1015 "Invalid table/view name: %^&");
1016
1017 AnalysisError(
1018 "alter table functional.alltypes rename to db_does_not_exist.new_table",
1019 "Database does not exist: db_does_not_exist");
1020
1021 // Cannot ALTER TABLE a view.
1022 AnalysisError("alter table functional.alltypes_view rename to new_alltypes",
1023 "ALTER TABLE not allowed on a view: functional.alltypes_view");
1024 // Cannot ALTER TABLE a nested collection.
1025 AnalysisError("alter table allcomplextypes.int_array_col rename to new_alltypes",
1026 createAnalysisCtx("functional"),
1027 "Database does not exist: allcomplextypes");
1028
1029 // It should be okay to rename an HBase table.
1030 AnalyzesOk("alter table functional_hbase.alltypes rename to new_alltypes");
1031
1032 // It should be okay to rename a table produced by a data source.
1033 AnalyzesOk("alter table functional.alltypes_datasource rename to new_datasrc_tbl");
1034 }
1035
1036 @Test
1037 public void TestAlterTableRecoverPartitions() throws CatalogException {
1038 AnalyzesOk("alter table functional.alltypes recover partitions");
1039 AnalysisError("alter table baddb.alltypes recover partitions",
1040 "Could not resolve table reference: 'baddb.alltypes'");
1041 AnalysisError("alter table functional.badtbl recover partitions",
1042 "Could not resolve table reference: 'functional.badtbl'");
1043 AnalysisError("alter table functional.alltypesnopart recover partitions",
1044 "Table is not partitioned: functional.alltypesnopart");
1045 AnalysisError("alter table functional.view_view recover partitions",
1046 "ALTER TABLE not allowed on a view: functional.view_view");
1047 AnalysisError("alter table allcomplextypes.int_array_col recover partitions",
1048 createAnalysisCtx("functional"),
1049 "ALTER TABLE not allowed on a nested collection: allcomplextypes.int_array_col");
1050 AnalysisError("alter table functional_hbase.alltypes recover partitions",
1051 "ALTER TABLE RECOVER PARTITIONS must target an HDFS table: " +
1052 "functional_hbase.alltypes");
1053 }
1054
1055 @Test
1056 public void TestAlterTableSortBy() {
1057 AnalyzesOk("alter table functional.alltypes sort by (id)");
1058 AnalyzesOk("alter table functional.alltypes sort by (int_col,id)");
1059 AnalyzesOk("alter table functional.alltypes sort by (bool_col,int_col,id)");
1060 AnalyzesOk("alter table functional.alltypes sort by ()");
1061 AnalysisError("alter table functional.alltypes sort by (id,int_col,id)",
1062 "Duplicate column in SORT BY list: id");
1063 AnalysisError("alter table functional.alltypes sort by (id, foo)", "Could not find " +
1064 "SORT BY column 'foo' in table.");
1065 AnalysisError("alter table functional_hbase.alltypes sort by (id, foo)",
1066 "ALTER TABLE SORT BY not supported on HBase tables.");
1067 }
1068
1069 @Test
1070 public void TestAlterView() {
1071 // View-definition references a table.
1072 AnalyzesOk("alter view functional.alltypes_view as " +
1073 "select * from functional.alltypesagg");
1074 // View-definition references a view.
1075 AnalyzesOk("alter view functional.alltypes_view as " +
1076 "select * from functional.alltypes_view");
1077
1078 // View-definition resulting in Hive-style auto-generated column names.
1079 AnalyzesOk("alter view functional.alltypes_view as " +
1080 "select trim('abc'), 17 * 7");
1081
1082 // Cannot ALTER VIEW a table.
1083 AnalysisError("alter view functional.alltypes as " +
1084 "select * from functional.alltypesagg",
1085 "ALTER VIEW not allowed on a table: functional.alltypes");
1086 AnalysisError("alter view functional_hbase.alltypesagg as " +
1087 "select * from functional.alltypesagg",
1088 "ALTER VIEW not allowed on a table: functional_hbase.alltypesagg");
1089 // Target database does not exist.
1090 AnalysisError("alter view baddb.alltypes_view as " +
1091 "select * from functional.alltypesagg",
1092 "Database does not exist: baddb");
1093 // Target view does not exist.
1094 AnalysisError("alter view functional.badview as " +
1095 "select * from functional.alltypesagg",
1096 "Table does not exist: functional.badview");
1097 // View-definition statement fails to analyze. Database does not exist.
1098 AnalysisError("alter view functional.alltypes_view as " +
1099 "select * from baddb.alltypesagg",
1100 "Could not resolve table reference: 'baddb.alltypesagg'");
1101 // View-definition statement fails to analyze. Table does not exist.
1102 AnalysisError("alter view functional.alltypes_view as " +
1103 "select * from functional.badtable",
1104 "Could not resolve table reference: 'functional.badtable'");
1105 // Duplicate column name.
1106 AnalysisError("alter view functional.alltypes_view as " +
1107 "select * from functional.alltypessmall a inner join " +
1108 "functional.alltypessmall b on a.id = b.id",
1109 "Duplicate column name: id");
1110 // Invalid column name.
1111 AnalysisError("alter view functional.alltypes_view as select 'abc' as `???`",
1112 "Invalid column/field name: ???");
1113 // Change the view definition to contain a subquery (IMPALA-1797)
1114 AnalyzesOk("alter view functional.alltypes_view as " +
1115 "select * from functional.alltypestiny where id in " +
1116 "(select id from functional.alltypessmall where int_col = 1)");
1117 }
1118
1119 @Test
1120 public void TestAlterViewRename() throws AnalysisException {
1121 AnalyzesOk("alter view functional.alltypes_view rename to new_view");
1122 AnalyzesOk("alter view functional.alltypes_view rename to functional.new_view");
1123 AnalysisError("alter view functional.alltypes_view rename to functional.alltypes",
1124 "Table already exists: functional.alltypes");
1125 AnalysisError("alter view functional.alltypes_view rename to functional.alltypesagg",
1126 "Table already exists: functional.alltypesagg");
1127
1128 AnalysisError("alter view functional.view_does_not_exist rename to new_view",
1129 "Table does not exist: functional.view_does_not_exist");
1130 AnalysisError("alter view db_does_not_exist.alltypes_view rename to new_view",
1131 "Database does not exist: db_does_not_exist");
1132
1133 AnalysisError("alter view functional.alltypes_view " +
1134 "rename to db_does_not_exist.new_view",
1135 "Database does not exist: db_does_not_exist");
1136
1137 // Invalid database/table name.
1138 AnalysisError("alter view functional.alltypes_view rename to `???`.new_view",
1139 "Invalid database name: ???");
1140 AnalysisError("alter view functional.alltypes_view rename to functional.`%^&`",
1141 "Invalid table/view name: %^&");
1142
1143 // Cannot ALTER VIEW a able.
1144 AnalysisError("alter view functional.alltypes rename to new_alltypes",
1145 "ALTER VIEW not allowed on a table: functional.alltypes");
1146 }
1147
1148 @Test
1149 public void TestAlterTableAlterColumn() throws AnalysisException {
1150 AnalyzesOk("alter table functional_kudu.alltypes alter int_col set default 0");
1151 AnalyzesOk("alter table functional_kudu.alltypes alter int_col set " +
1152 "compression LZ4 encoding RLE");
1153 AnalyzesOk("alter table functional.alltypes alter int_col set comment 'a'");
1154 AnalyzesOk("alter table functional_kudu.alltypes alter int_col drop default");
1155
1156 AnalysisError("alter table functional_kudu.alltypes alter id set default 0",
1157 "Cannot set default value for primary key column 'id'");
1158 AnalysisError("alter table functional_kudu.alltypes alter id drop default",
1159 "Cannot drop default value for primary key column 'id'");
1160 AnalysisError("alter table functional_kudu.alltypes alter int_col set default 'a'",
1161 "Default value 'a' (type: STRING) is not compatible with column 'int_col' " +
1162 "(type: INT)");
1163 AnalysisError("alter table functional_kudu.alltypes alter int_col set " +
1164 "encoding rle compression error", "Unsupported compression algorithm 'ERROR'");
1165 AnalysisError("alter table functional_kudu.alltypes alter int_col set primary key",
1166 "Altering a column to be a primary key is not supported.");
1167 AnalysisError("alter table functional_kudu.alltypes alter int_col set not null",
1168 "Altering the nullability of a column is not supported.");
1169 AnalysisError("alter table functional_kudu.alltypes alter int_col set comment 'a'",
1170 "Kudu does not support column comments.");
1171 AnalysisError("alter table functional.alltypes alter int_col set compression lz4",
1172 "Unsupported column options for non-Kudu table: 'int_col INT COMPRESSION LZ4'");
1173 AnalysisError("alter table functional.alltypes alter int_col drop default",
1174 "Unsupported column option for non-Kudu table: DROP DEFAULT");
1175 }
1176
1177 ComputeStatsStmt checkComputeStatsStmt(String stmt) throws AnalysisException {
1178 return checkComputeStatsStmt(stmt, createAnalysisCtx());
1179 }
1180
1181 ComputeStatsStmt checkComputeStatsStmt(String stmt, AnalysisContext ctx)
1182 throws AnalysisException {
1183 return checkComputeStatsStmt(stmt, ctx, null);
1184 }
1185
1186 /**
1187 * Analyzes 'stmt' and checks that the table-level and column-level SQL that is used
1188 * to compute the stats is valid. Returns the analyzed statement.
1189 */
1190 ComputeStatsStmt checkComputeStatsStmt(String stmt, AnalysisContext ctx,
1191 String expectedWarning) throws AnalysisException {
1192 ParseNode parseNode = AnalyzesOk(stmt, ctx, expectedWarning);
1193 assertTrue(parseNode instanceof ComputeStatsStmt);
1194 ComputeStatsStmt parsedStmt = (ComputeStatsStmt)parseNode;
1195 AnalyzesOk(parsedStmt.getTblStatsQuery());
1196 String colsQuery = parsedStmt.getColStatsQuery();
1197 if (colsQuery != null) AnalyzesOk(colsQuery);
1198 return parsedStmt;
1199 }
1200
1201 /**
1202 * In addition to the validation for checkComputeStatsStmt(String), checks that the
1203 * whitelisted columns match 'expColNames'.
1204 */
1205 void checkComputeStatsStmt(String stmt, List<String> expColNames)
1206 throws AnalysisException {
1207 ComputeStatsStmt parsedStmt = checkComputeStatsStmt(stmt);
1208 Set<Column> actCols = parsedStmt.getValidatedColumnWhitelist();
1209 if (expColNames == null) assertTrue("Expected no whitelist.", actCols == null);
1210 assertTrue("Expected whitelist.", actCols != null);
1211 Set<String> actColSet = Sets.newHashSet();
1212 for (Column col: actCols) actColSet.add(col.getName());
1213 Set<String> expColSet = Sets.newHashSet(expColNames);
1214 assertEquals(actColSet, expColSet);
1215 }
1216
1217 @Test
1218 public void TestComputeStats() throws AnalysisException {
1219 // Analyze the stmt itself as well as the generated child queries.
1220 checkComputeStatsStmt("compute stats functional.alltypes");
1221 checkComputeStatsStmt("compute stats functional_hbase.alltypes");
1222 // Test that complex-typed columns are ignored.
1223 checkComputeStatsStmt("compute stats functional.allcomplextypes");
1224 // Test legal column restriction.
1225 checkComputeStatsStmt("compute stats functional.alltypes (int_col, double_col)",
1226 Lists.newArrayList("int_col", "double_col"));
1227 // Test legal column restriction with duplicate columns specified.
1228 checkComputeStatsStmt(
1229 "compute stats functional.alltypes (int_col, double_col, int_col)",
1230 Lists.newArrayList("int_col", "double_col"));
1231 // Test empty column restriction.
1232 checkComputeStatsStmt("compute stats functional.alltypes ()",
1233 new ArrayList<String>());
1234 // Test column restriction of a column that does not exist.
1235 AnalysisError("compute stats functional.alltypes(int_col, bogus_col, double_col)",
1236 "bogus_col not found in table:");
1237 // Test column restriction of a column with an unsupported type.
1238 AnalysisError("compute stats functional.allcomplextypes(id, map_map_col)",
1239 "COMPUTE STATS not supported for column");
1240 // Test column restriction of an Hdfs table partitioning column.
1241 AnalysisError("compute stats functional.stringpartitionkey(string_col)",
1242 "COMPUTE STATS not supported for partitioning");
1243 // Test column restriction of an HBase key column.
1244 checkComputeStatsStmt("compute stats functional_hbase.testtbl(id)",
1245 Lists.newArrayList("id"));
1246
1247 // Cannot compute stats on a database.
1248 AnalysisError("compute stats tbl_does_not_exist",
1249 "Could not resolve table reference: 'tbl_does_not_exist'");
1250 // Cannot compute stats on a view.
1251 AnalysisError("compute stats functional.alltypes_view",
1252 "COMPUTE STATS not supported for view: functional.alltypes_view");
1253
1254 AnalyzesOk("compute stats functional_avro_snap.alltypes");
1255 // Test mismatched column definitions and Avro schema (HIVE-6308, IMPALA-867).
1256 // See testdata/avro_schema_resolution/create_table.sql for the CREATE TABLE stmts.
1257 // Mismatched column type is ok because the conflict is resolved in favor of
1258 // the type in the column definition list in the CREATE TABLE.
1259 AnalyzesOk("compute stats functional_avro_snap.alltypes_type_mismatch");
1260 // Missing column definition is ok because the schema mismatch is resolved
1261 // in the CREATE TABLE.
1262 AnalyzesOk("compute stats functional_avro_snap.alltypes_missing_coldef");
1263 // Extra column definition is ok because the schema mismatch is resolved
1264 // in the CREATE TABLE.
1265 AnalyzesOk("compute stats functional_avro_snap.alltypes_extra_coldef");
1266 // No column definitions are ok.
1267 AnalyzesOk("compute stats functional_avro_snap.alltypes_no_coldef");
1268 // Mismatched column name (table was created by Hive).
1269 AnalysisError("compute stats functional_avro_snap.schema_resolution_test",
1270 "Cannot COMPUTE STATS on Avro table 'schema_resolution_test' because its " +
1271 "column definitions do not match those in the Avro schema.\nDefinition of " +
1272 "column 'col1' of type 'string' does not match the Avro-schema column " +
1273 "'boolean1' of type 'BOOLEAN' at position '0'.\nPlease re-create the table " +
1274 "with column definitions, e.g., using the result of 'SHOW CREATE TABLE'");
1275
1276 // Test tablesample clause with extrapolation enabled/disabled. Replace/restore the
1277 // static backend config for this test to control stats extrapolation.
1278 TBackendGflags gflags = BackendConfig.INSTANCE.getBackendCfg();
1279 boolean origEnableStatsExtrapolation = gflags.isEnable_stats_extrapolation();
1280 try {
1281 // Setup for testing combinations of extrapolation config options.
1282 addTestDb("extrap_config", null);
1283 addTestTable("create table extrap_config.tbl_prop_unset (i int)");
1284 addTestTable("create table extrap_config.tbl_prop_false (i int) " +
1285 "tblproperties('impala.enable.stats.extrapolation'='false')");
1286 addTestTable("create table extrap_config.tbl_prop_true (i int) " +
1287 "tblproperties('impala.enable.stats.extrapolation'='true')");
1288 String stmt = "compute stats %s tablesample system (10)";
1289 String err = "COMPUTE STATS TABLESAMPLE requires stats extrapolation";
1290
1291 // Test --enable_stats_extrapolation=false
1292 gflags.setEnable_stats_extrapolation(false);
1293 // Table property unset --> Extrapolation disabled
1294 AnalysisError(String.format(stmt, "extrap_config.tbl_prop_unset"), err);
1295 // Table property false --> Extrapolation disabled
1296 AnalysisError(String.format(stmt, "extrap_config.tbl_prop_false"), err);
1297 // Table property true --> Extrapolation enabled
1298 AnalyzesOk(String.format(stmt, "extrap_config.tbl_prop_true"));
1299
1300 // Test --enable_stats_extrapolation=true
1301 gflags.setEnable_stats_extrapolation(true);
1302 // Table property unset --> Extrapolation enabled
1303 AnalyzesOk(String.format(stmt, "extrap_config.tbl_prop_unset"));
1304 // Table property false --> Extrapolation disabled
1305 AnalysisError(String.format(stmt, "extrap_config.tbl_prop_false"), err);
1306 // Table property true --> Extrapolation enabled
1307 AnalyzesOk(String.format(stmt, "extrap_config.tbl_prop_true"));
1308
1309 // Test file formats.
1310 gflags.setEnable_stats_extrapolation(true);
1311 checkComputeStatsStmt("compute stats functional.alltypes tablesample system (10)");
1312 checkComputeStatsStmt(
1313 "compute stats functional.alltypes tablesample system (55) repeatable(1)");
1314 AnalysisError("compute stats functional.alltypes tablesample system (101)",
1315 "Invalid percent of bytes value '101'. " +
1316 "The percent of bytes to sample must be between 0 and 100.");
1317 AnalysisError("compute stats functional_kudu.alltypes tablesample system (1)",
1318 "TABLESAMPLE is only supported on HDFS tables.");
1319 AnalysisError("compute stats functional_hbase.alltypes tablesample system (2)",
1320 "TABLESAMPLE is only supported on HDFS tables.");
1321 AnalysisError(
1322 "compute stats functional.alltypes_datasource tablesample system (3)",
1323 "TABLESAMPLE is only supported on HDFS tables.");
1324
1325 // Test file formats with columns whitelist.
1326 gflags.setEnable_stats_extrapolation(true);
1327 checkComputeStatsStmt(
1328 "compute stats functional.alltypes (int_col, double_col) tablesample " +
1329 "system (55) repeatable(1)",
1330 Lists.newArrayList("int_col", "double_col"));
1331 AnalysisError("compute stats functional.alltypes tablesample system (101)",
1332 "Invalid percent of bytes value '101'. " +
1333 "The percent of bytes to sample must be between 0 and 100.");
1334 AnalysisError("compute stats functional_kudu.alltypes tablesample system (1)",
1335 "TABLESAMPLE is only supported on HDFS tables.");
1336 AnalysisError("compute stats functional_hbase.alltypes tablesample system (2)",
1337 "TABLESAMPLE is only supported on HDFS tables.");
1338 AnalysisError(
1339 "compute stats functional.alltypes_datasource tablesample system (3)",
1340 "TABLESAMPLE is only supported on HDFS tables.");
1341
1342 // Test different COMPUTE_STATS_MIN_SAMPLE_BYTES.
1343 TQueryOptions queryOpts = new TQueryOptions();
1344
1345 // The default minimum sample size is greater than 'functional.alltypes'.
1346 // We expect TABLESAMPLE to be ignored.
1347 Preconditions.checkState(
1348 queryOpts.compute_stats_min_sample_size == 1024 * 1024 * 1024);
1349 ComputeStatsStmt noSamplingStmt = checkComputeStatsStmt(
1350 "compute stats functional.alltypes tablesample system (10) repeatable(1)",
1351 createAnalysisCtx(queryOpts),
1352 "Ignoring TABLESAMPLE because the effective sampling rate is 100%");
1353 Assert.assertTrue(noSamplingStmt.getEffectiveSamplingPerc() == 1.0);
1354 String tblStatsQuery = noSamplingStmt.getTblStatsQuery().toUpperCase();
1355 Assert.assertTrue(!tblStatsQuery.contains("TABLESAMPLE"));
1356 Assert.assertTrue(!tblStatsQuery.contains("SAMPLED_NDV"));
1357 String colStatsQuery = noSamplingStmt.getColStatsQuery().toUpperCase();
1358 Assert.assertTrue(!colStatsQuery.contains("TABLESAMPLE"));
1359 Assert.assertTrue(!colStatsQuery.contains("SAMPLED_NDV"));
1360
1361 // No minimum sample bytes.
1362 queryOpts.setCompute_stats_min_sample_size(0);
1363 checkComputeStatsStmt("compute stats functional.alltypes tablesample system (10)",
1364 createAnalysisCtx(queryOpts));
1365 checkComputeStatsStmt(
1366 "compute stats functional.alltypes tablesample system (55) repeatable(1)",
1367 createAnalysisCtx(queryOpts));
1368
1369 // Sample is adjusted based on the minimum sample bytes.
1370 // Assumes that functional.alltypes has 24 files of roughly 20KB each.
1371 // The baseline statement with no sampling minimum should select exactly one file
1372 // and have an effective sampling rate of ~0.04 (1/24).
1373 queryOpts.setCompute_stats_min_sample_size(0);
1374 ComputeStatsStmt baselineStmt = checkComputeStatsStmt(
1375 "compute stats functional.alltypes tablesample system (1) repeatable(1)",
1376 createAnalysisCtx(queryOpts));
1377 // Approximate validation of effective sampling rate.
1378 Assert.assertTrue(baselineStmt.getEffectiveSamplingPerc() > 0.03);
1379 Assert.assertTrue(baselineStmt.getEffectiveSamplingPerc() < 0.05);
1380 // The adjusted statement with a 100KB minimum should select ~5 files and have
1381 // an effective sampling rate of ~0.21 (5/24).
1382 queryOpts.setCompute_stats_min_sample_size(100 * 1024);
1383 ComputeStatsStmt adjustedStmt = checkComputeStatsStmt(
1384 "compute stats functional.alltypes tablesample system (1) repeatable(1)",
1385 createAnalysisCtx(queryOpts));
1386 // Approximate validation to avoid flakiness due to sampling and file size
1387 // changes. Expect a sample between 4 and 6 of the 24 total files.
1388 Assert.assertTrue(adjustedStmt.getEffectiveSamplingPerc() >= 4.0 / 24);
1389 Assert.assertTrue(adjustedStmt.getEffectiveSamplingPerc() <= 6.0 / 24);
1390 } finally {
1391 gflags.setEnable_stats_extrapolation(origEnableStatsExtrapolation);
1392 }
1393 }
1394
1395 @Test
1396 public void TestComputeIncrementalStats() throws AnalysisException {
1397 checkComputeStatsStmt("compute incremental stats functional.alltypes");
1398 checkComputeStatsStmt(
1399 "compute incremental stats functional.alltypes partition(year=2010, month=10)");
1400 checkComputeStatsStmt(
1401 "compute incremental stats functional.alltypes partition(year<=2010)");
1402
1403 AnalysisError(
1404 "compute incremental stats functional.alltypes partition(year=9999, month=10)",
1405 "No matching partition(s) found.");
1406 AnalysisError(
1407 "compute incremental stats functional.alltypes partition(year=2010, month)",
1408 "Partition expr requires return type 'BOOLEAN'. Actual type is 'INT'.");
1409
1410 // Test that NULL partitions generates a valid query
1411 checkComputeStatsStmt("compute incremental stats functional.alltypesagg " +
1412 "partition(year=2010, month=1, day is NULL)");
1413
1414 AnalysisError("compute incremental stats functional_hbase.alltypes " +
1415 "partition(year=2010, month=1)", "COMPUTE INCREMENTAL ... PARTITION not " +
1416 "supported for non-HDFS table functional_hbase.alltypes");
1417
1418 AnalysisError("compute incremental stats functional.view_view",
1419 "COMPUTE STATS not supported for view: functional.view_view");
1420 }
1421
1422
1423 @Test
1424 public void TestDropIncrementalStats() throws AnalysisException {
1425 AnalyzesOk(
1426 "drop incremental stats functional.alltypes partition(year=2010, month=10)");
1427 AnalyzesOk(
1428 "drop incremental stats functional.alltypes partition(year<=2010, month=10)");
1429 AnalysisError(
1430 "drop incremental stats functional.alltypes partition(year=9999, month=10)",
1431 "No matching partition(s) found.");
1432 }
1433
1434
1435 @Test
1436 public void TestDropStats() throws AnalysisException {
1437 AnalyzesOk("drop stats functional.alltypes");
1438
1439 // Table does not exist
1440 AnalysisError("drop stats tbl_does_not_exist",
1441 "Could not resolve table reference: 'tbl_does_not_exist'");
1442 // Database does not exist
1443 AnalysisError("drop stats no_db.no_tbl",
1444 "Could not resolve table reference: 'no_db.no_tbl'");
1445
1446 AnalysisError("drop stats functional.alltypes partition(year=2010, month=10)",
1447 "Syntax error");
1448 AnalysisError("drop stats functional.alltypes partition(year, month)",
1449 "Syntax error");
1450 }
1451
1452 @Test
1453 public void TestDrop() throws AnalysisException {
1454 AnalyzesOk("drop database functional");
1455 AnalyzesOk("drop database functional cascade");
1456 AnalyzesOk("drop database functional restrict");
1457 AnalyzesOk("drop table functional.alltypes");
1458 AnalyzesOk("drop view functional.alltypes_view");
1459
1460 // If the database does not exist, and the user hasn't specified "IF EXISTS",
1461 // an analysis error should be thrown
1462 AnalysisError("drop database db_does_not_exist",
1463 "Database does not exist: db_does_not_exist");
1464 AnalysisError("drop database db_does_not_exist cascade",
1465 "Database does not exist: db_does_not_exist");
1466 AnalysisError("drop database db_does_not_exist restrict",
1467 "Database does not exist: db_does_not_exist");
1468 AnalysisError("drop table db_does_not_exist.alltypes",
1469 "Database does not exist: db_does_not_exist");
1470 AnalysisError("drop view db_does_not_exist.alltypes_view",
1471 "Database does not exist: db_does_not_exist");
1472 // Invalid name reports non-existence instead of invalidity.
1473 AnalysisError("drop database `???`",
1474 "Database does not exist: ???");
1475 AnalysisError("drop database `???` cascade",
1476 "Database does not exist: ???");
1477 AnalysisError("drop database `???` restrict",
1478 "Database does not exist: ???");
1479 AnalysisError("drop table functional.`%^&`",
1480 "Table does not exist: functional.%^&");
1481 AnalysisError("drop view functional.`@#$%`",
1482 "Table does not exist: functional.@#$%");
1483
1484 // If the database exist but the table doesn't, and the user hasn't specified
1485 // "IF EXISTS", an analysis error should be thrown
1486 AnalysisError("drop table functional.badtable",
1487 "Table does not exist: functional.badtable");
1488 AnalysisError("drop view functional.badview",
1489 "Table does not exist: functional.badview");
1490
1491 // No error is thrown if the user specifies IF EXISTS
1492 AnalyzesOk("drop database if exists db_does_not_exist");
1493 AnalyzesOk("drop database if exists db_does_not_exist cascade");
1494 AnalyzesOk("drop database if exists db_does_not_exist restrict");
1495
1496 // No error is thrown if the database does not exist
1497 AnalyzesOk("drop table if exists db_does_not_exist.alltypes");
1498 AnalyzesOk("drop view if exists db_does_not_exist.alltypes");
1499 // No error is thrown if the database table does not exist and IF EXISTS
1500 // is true
1501 AnalyzesOk("drop table if exists functional.notbl");
1502 AnalyzesOk("drop view if exists functional.notbl");
1503
1504 // Cannot drop a view with DROP TABLE.
1505 AnalysisError("drop table functional.alltypes_view",
1506 "DROP TABLE not allowed on a view: functional.alltypes_view");
1507 // Cannot drop a table with DROP VIEW.
1508 AnalysisError("drop view functional.alltypes",
1509 "DROP VIEW not allowed on a table: functional.alltypes");
1510
1511 // No analysis error for tables that can't be loaded.
1512 AnalyzesOk("drop table functional.unsupported_partition_types");
1513 }
1514
1515 @Test
1516 public void TestTruncate() throws AnalysisException {
1517 AnalyzesOk("truncate table functional.alltypes");
1518 AnalyzesOk("truncate table if exists functional.alltypes");
1519 AnalyzesOk("truncate functional.alltypes");
1520 AnalyzesOk("truncate if exists functional.alltypes");
1521
1522 // If the database does not exist, an analysis error should be thrown
1523 AnalysisError("truncate table db_does_not_exist.alltypes",
1524 "Database does not exist: db_does_not_exist");
1525
1526 // If the database does not exist, IF EXISTS would run ok
1527 AnalyzesOk("truncate table if exists db_does_not_exist.alltypes");
1528
1529 // Invalid name reports non-existence instead of invalidity.
1530 AnalysisError("truncate table functional.`%^&`",
1531 "Table does not exist: functional.%^&");
1532
1533 // If the database exists but the table doesn't, an analysis error should be thrown.
1534 AnalysisError("truncate table functional.badtable",
1535 "Table does not exist: functional.badtable");
1536
1537 // If the database exists but the table doesn't, IF EXISTS would run ok
1538 AnalyzesOk("truncate if exists functional.badtable");
1539
1540 // Cannot truncate a non hdfs table.
1541 AnalysisError("truncate table functional.alltypes_view",
1542 "TRUNCATE TABLE not supported on non-HDFS table: functional.alltypes_view");
1543 }
1544
1545 @Test
1546 public void TestCreateDataSource() {
1547 final String DATA_SOURCE_NAME = "TestDataSource1";
1548 final DataSource DATA_SOURCE = new DataSource(DATA_SOURCE_NAME, "/foo.jar",
1549 "foo.Bar", "V1");
1550 catalog_.addDataSource(DATA_SOURCE);
1551 AnalyzesOk("CREATE DATA SOURCE IF NOT EXISTS " + DATA_SOURCE_NAME +
1552 " LOCATION '/foo.jar' CLASS 'foo.Bar' API_VERSION 'V1'");
1553 AnalyzesOk("CREATE DATA SOURCE IF NOT EXISTS " + DATA_SOURCE_NAME.toLowerCase() +
1554 " LOCATION '/foo.jar' CLASS 'foo.Bar' API_VERSION 'V1'");
1555 AnalyzesOk("CREATE DATA SOURCE IF NOT EXISTS " + DATA_SOURCE_NAME +
1556 " LOCATION 'hdfs://localhost:20500/foo.jar' CLASS 'foo.Bar' API_VERSION 'V1'");
1557 AnalyzesOk("CREATE DATA SOURCE foo LOCATION '/' CLASS '' API_VERSION 'v1'");
1558 AnalyzesOk("CREATE DATA SOURCE foo LOCATION '/foo.jar' CLASS 'com.bar.Foo' " +
1559 "API_VERSION 'V1'");
1560 AnalyzesOk("CREATE DATA SOURCE foo LOCATION '/FOO.jar' CLASS 'COM.BAR.FOO' " +
1561 "API_VERSION 'v1'");
1562 AnalyzesOk("CREATE DATA SOURCE foo LOCATION \"/foo.jar\" CLASS \"com.bar.Foo\" " +
1563 "API_VERSION \"V1\"");
1564 AnalyzesOk("CREATE DATA SOURCE foo LOCATION '/x/foo@hi_^!#.jar' " +
1565 "CLASS 'com.bar.Foo' API_VERSION 'V1'");
1566 AnalyzesOk("CREATE DATA SOURCE foo LOCATION 'hdfs://localhost:20500/a/b/foo.jar' " +
1567 "CLASS 'com.bar.Foo' API_VERSION 'V1'");
1568 AnalyzesOk("CREATE DATA SOURCE foo LOCATION 's3a://bucket/a/b/foo.jar' " +
1569 "CLASS 'com.bar.Foo' API_VERSION 'V1'");
1570
1571 AnalysisError("CREATE DATA SOURCE foo LOCATION 'blah://localhost:20500/foo.jar' " +
1572 "CLASS 'com.bar.Foo' API_VERSION 'V1'",
1573 "No FileSystem for scheme: blah");
1574 AnalysisError("CREATE DATA SOURCE " + DATA_SOURCE_NAME + " LOCATION '/foo.jar' " +
1575 "CLASS 'foo.Bar' API_VERSION 'V1'",
1576 "Data source already exists: " + DATA_SOURCE_NAME.toLowerCase());
1577 AnalysisError("CREATE DATA SOURCE foo LOCATION '/foo.jar' " +
1578 "CLASS 'foo.Bar' API_VERSION 'V2'", "Invalid API version: 'V2'");
1579 }
1580
1581 @Test
1582 public void TestCreateDb() throws AnalysisException {
1583 AnalyzesOk("create database some_new_database");
1584 AnalysisError("create database functional", "Database already exists: functional");
1585 AnalyzesOk("create database if not exists functional");
1586 // Invalid database name,
1587 AnalysisError("create database `%^&`", "Invalid database name: %^&");
1588
1589 // Valid URIs.
1590 AnalyzesOk("create database new_db location " +
1591 "'/test-warehouse/new_db'");
1592 AnalyzesOk("create database new_db location " +
1593 "'hdfs://localhost:50200/test-warehouse/new_db'");
1594 AnalyzesOk("create database new_db location " +
1595 "'s3a://bucket/test-warehouse/new_db'");
1596 // Invalid URI.
1597 AnalysisError("create database new_db location " +
1598 "'blah://bucket/test-warehouse/new_db'",
1599 "No FileSystem for scheme: blah");
1600 }
1601
1602 @Test
1603 public void TestCreateTableLikeFile() throws AnalysisException {
1604 // check that we analyze all of the CREATE TABLE options
1605 AnalyzesOk("create table if not exists newtbl_DNE like parquet "
1606 + "'/test-warehouse/schemas/alltypestiny.parquet'");
1607 AnalyzesOk("create table newtbl_DNE like parquet "
1608 + "'/test-warehouse/schemas/zipcode_incomes.parquet'");
1609 AnalyzesOk("create table default.newtbl_DNE like parquet "
1610 + "'/test-warehouse/schemas/zipcode_incomes.parquet'");
1611 AnalyzesOk("create table newtbl_DNE like parquet "
1612 + "'/test-warehouse/schemas/zipcode_incomes.parquet' stored as parquet");
1613 AnalyzesOk("create external table newtbl_DNE like parquet "
1614 + "'/test-warehouse/schemas/zipcode_incomes.parquet' sort by (id,zip) "
1615 + "stored as parquet");
1616 AnalyzesOk("create table newtbl_DNE like parquet "
1617 + "'/test-warehouse/schemas/zipcode_incomes.parquet' sort by (id,zip)");
1618 AnalyzesOk("create table if not exists functional.zipcode_incomes like parquet "
1619 + "'/test-warehouse/schemas/zipcode_incomes.parquet'");
1620 AnalyzesOk("create table if not exists newtbl_DNE like parquet "
1621 + "'/test-warehouse/schemas/zipcode_incomes.parquet'");
1622 AnalyzesOk("create table if not exists newtbl_DNE like parquet "
1623 + "'/test-warehouse/schemas/decimal.parquet'");
1624 AnalyzesOk("create table if not exists newtbl_DNE like parquet'"
1625 + " /test-warehouse/schemas/enum/enum.parquet'");
1626
1627 // check we error in the same situations as standard create table
1628 AnalysisError("create table functional.zipcode_incomes like parquet "
1629 + "'/test-warehouse/schemas/zipcode_incomes.parquet'",
1630 "Table already exists: functional.zipcode_incomes");
1631 AnalysisError("create table database_DNE.newtbl_DNE like parquet "
1632 + "'/test-warehouse/schemas/zipcode_incomes.parquet'",
1633 "Database does not exist: database_DNE");
1634
1635 // check invalid paths
1636 AnalysisError("create table if not exists functional.zipcode_incomes like parquet "
1637 + "'/test-warehouse'",
1638 "Cannot infer schema, path is not a file: hdfs://localhost:20500/test-warehouse");
1639 AnalysisError("create table newtbl_DNE like parquet 'foobar'",
1640 "URI path must be absolute: foobar");
1641 AnalysisError("create table newtbl_DNE like parquet '/not/a/file/path'",
1642 "Cannot infer schema, path is not a file: "
1643 + "hdfs://localhost:20500/not/a/file/path");
1644 AnalysisError("create table if not exists functional.zipcode_incomes like parquet "
1645 + "'file:///tmp/foobar'",
1646 "Cannot infer schema, path is not a file: file:/tmp/foobar");
1647
1648 // check valid paths with bad file contents
1649 AnalysisError("create table database_DNE.newtbl_DNE like parquet "
1650 + "'/test-warehouse/zipcode_incomes_rc/000000_0'",
1651 "File is not a parquet file: "
1652 + "hdfs://localhost:20500/test-warehouse/zipcode_incomes_rc/000000_0");
1653
1654 // this is a decimal file without annotations
1655 AnalysisError("create table if not exists functional.zipcode_incomes like parquet "
1656 + "'/test-warehouse/schemas/malformed_decimal_tiny.parquet'",
1657 "Unsupported parquet type FIXED_LEN_BYTE_ARRAY for field c1");
1658
1659 // Invalid file format
1660 AnalysisError("create table newtbl_kudu like parquet " +
1661 "'/test-warehouse/schemas/alltypestiny.parquet' stored as kudu",
1662 "CREATE TABLE LIKE FILE statement is not supported for Kudu tables.");
1663 }
1664
1665 @Test
1666 public void TestCreateTableAsSelect() throws AnalysisException {
1667 // Constant select.
1668 AnalyzesOk("create table newtbl as select 1+2, 'abc'");
1669
1670 // Select from partitioned and unpartitioned tables using different
1671 // queries.
1672 AnalyzesOk("create table newtbl stored as textfile " +
1673 "as select * from functional.jointbl");
1674 AnalyzesOk("create table newtbl stored as parquetfile " +
1675 "as select * from functional.alltypes");
1676 AnalyzesOk("create table newtbl stored as parquet " +
1677 "as select * from functional.alltypes");
1678 AnalyzesOk("create table newtbl as select int_col from functional.alltypes");
1679
1680 AnalyzesOk("create table functional.newtbl " +
1681 "as select count(*) as CNT from functional.alltypes");
1682 AnalyzesOk("create table functional.tbl as select a.* from functional.alltypes a " +
1683 "join functional.alltypes b on (a.int_col=b.int_col) limit 1000");
1684 // CTAS with a select query that requires expression rewrite (IMPALA-6307)
1685 AnalyzesOk("create table functional.ctas_tbl partitioned by (year) as " +
1686 "with tmp as (select a.timestamp_col, a.year from functional.alltypes a " +
1687 "left join functional.alltypes b " +
1688 "on b.timestamp_col between a.timestamp_col and a.timestamp_col) " +
1689 "select a.timestamp_col, a.year from tmp a");
1690
1691 // Caching operations
1692 AnalyzesOk("create table functional.newtbl cached in 'testPool'" +
1693 " as select count(*) as CNT from functional.alltypes");
1694 AnalyzesOk("create table functional.newtbl uncached" +
1695 " as select count(*) as CNT from functional.alltypes");
1696
1697 // Table already exists with and without IF NOT EXISTS
1698 AnalysisError("create table functional.alltypes as select 1",
1699 "Table already exists: functional.alltypes");
1700 AnalyzesOk("create table if not exists functional.alltypes as select 1");
1701
1702 // Database does not exist
1703 AnalysisError("create table db_does_not_exist.new_table as select 1",
1704 "Database does not exist: db_does_not_exist");
1705
1706 // Analysis errors in the SELECT statement
1707 AnalysisError("create table newtbl as select * from tbl_does_not_exist",
1708 "Could not resolve table reference: 'tbl_does_not_exist'");
1709 AnalysisError("create table newtbl as select 1 as c1, 2 as c1",
1710 "Duplicate column name: c1");
1711
1712 // Unsupported file formats
1713 AnalysisError("create table foo stored as sequencefile as select 1",
1714 "CREATE TABLE AS SELECT does not support the (SEQUENCEFILE) file format. " +
1715 "Supported formats are: (PARQUET, TEXTFILE, KUDU)");
1716 AnalysisError("create table foo stored as RCFILE as select 1",
1717 "CREATE TABLE AS SELECT does not support the (RCFILE) file format. " +
1718 "Supported formats are: (PARQUET, TEXTFILE, KUDU)");
1719
1720 // CTAS with a WITH clause and inline view (IMPALA-1100)
1721 AnalyzesOk("create table test_with as with with_1 as (select 1 as int_col from " +
1722 "functional.alltypes as t1 right join (select 1 as int_col from " +
1723 "functional.alltypestiny as t1) as t2 on t2.int_col = t1.int_col) " +
1724 "select * from with_1 limit 10");
1725
1726 // CTAS with a correlated inline view.
1727 AnalyzesOk("create table test as select id, item " +
1728 "from functional.allcomplextypes b, (select item from b.int_array_col) v1");
1729 // Correlated inline view in WITH clause.
1730 AnalyzesOk("create table test as " +
1731 "with w as (select id, item from functional.allcomplextypes b, " +
1732 "(select item from b.int_array_col) v1) select * from w");
1733 // CTAS with illegal correlated inline views.
1734 AnalysisError("create table test as select id, item " +
1735 "from functional.allcomplextypes b, " +
1736 "(select item from b.int_array_col, functional.alltypes) v1",
1737 "Nested query is illegal because it contains a table reference " +
1738 "'b.int_array_col' correlated with an outer block as well as an " +
1739 "uncorrelated one 'functional.alltypes':\n" +
1740 "SELECT item FROM b.int_array_col, functional.alltypes");
1741 AnalysisError("create table test as " +
1742 "with w as (select id, item from functional.allcomplextypes b, " +
1743 "(select item from b.int_array_col, functional.alltypes) v1) select * from w",
1744 "Nested query is illegal because it contains a table reference " +
1745 "'b.int_array_col' correlated with an outer block as well as an " +
1746 "uncorrelated one 'functional.alltypes':\n" +
1747 "SELECT item FROM b.int_array_col, functional.alltypes");
1748
1749 // CTAS into partitioned table.
1750 AnalyzesOk("create table p partitioned by (int_col) as " +
1751 "select double_col, int_col from functional.alltypes");
1752 AnalyzesOk("create table p partitioned by (int_col) as " +
1753 "select sum(double_col), int_col from functional.alltypes group by int_col");
1754 // At least one non-partition column must be specified.
1755 AnalysisError("create table p partitioned by (int_col, tinyint_col) as " +
1756 "select int_col, tinyint_col from functional.alltypes",
1757 "Number of partition columns (2) must be smaller than the number of columns in " +
1758 "the select statement (2).");
1759 // Order of the columns is important and not automatically corrected.
1760 AnalysisError("create table p partitioned by (int_col) as " +
1761 "select double_col, int_col, tinyint_col from functional.alltypes",
1762 "Partition column name mismatch: int_col != tinyint_col");
1763 AnalysisError("create table p partitioned by (tinyint_col, int_col) as " +
1764 "select double_col, int_col, tinyint_col from functional.alltypes",
1765 "Partition column name mismatch: tinyint_col != int_col");
1766
1767 // CTAS into managed Kudu tables
1768 AnalyzesOk("create table t primary key (id) partition by hash (id) partitions 3" +
1769 " stored as kudu as select id, bool_col, tinyint_col, smallint_col, int_col, " +
1770 "bigint_col, float_col, double_col, date_string_col, string_col " +
1771 "from functional.alltypestiny");
1772 AnalyzesOk("create table t primary key (id) partition by range (id) " +
1773 "(partition values < 10, partition 20 <= values < 30, partition value = 50) " +
1774 "stored as kudu as select id, bool_col, tinyint_col, smallint_col, int_col, " +
1775 "bigint_col, float_col, double_col, date_string_col, string_col " +
1776 "from functional.alltypestiny");
1777 AnalyzesOk("create table t primary key (id) partition by hash (id) partitions 3, "+
1778 "range (id) (partition values < 10, partition 10 <= values < 20, " +
1779 "partition value = 30) stored as kudu as select id, bool_col, tinyint_col, " +
1780 "smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, " +
1781 "string_col from functional.alltypestiny");
1782 // Creating unpartitioned table results in a warning.
1783 AnalyzesOk("create table t primary key(id) stored as kudu as select id, bool_col " +
1784 "from functional.alltypestiny",
1785 "Unpartitioned Kudu tables are inefficient for large data sizes.");
1786 // IMPALA-5796: CTAS into a Kudu table with expr rewriting.
1787 AnalyzesOk("create table t primary key(id) stored as kudu as select id, bool_col " +
1788 "from functional.alltypestiny where id between 0 and 10");
1789 // CTAS with a select query that requires expression rewrite (IMPALA-6307)
1790 AnalyzesOk("create table t primary key(year) stored as kudu as " +
1791 "with tmp as (select a.timestamp_col, a.year from functional.alltypes a " +
1792 "left join functional.alltypes b " +
1793 "on b.timestamp_col between a.timestamp_col and a.timestamp_col) " +
1794 "select a.timestamp_col, a.year from tmp a");
1795 // CTAS in an external Kudu table
1796 AnalysisError("create external table t stored as kudu " +
1797 "tblproperties('kudu.table_name'='t') as select id, int_col from " +
1798 "functional.alltypestiny", "CREATE TABLE AS SELECT is not supported for " +
1799 "external Kudu tables.");
1800
1801 // CTAS into Kudu tables with unsupported types
1802 AnalysisError("create table t primary key (cs) partition by hash partitions 3" +
1803 " stored as kudu as select cs from functional.chars_tiny",
1804 "Cannot create table 't': Type CHAR(5) is not supported in Kudu");
1805 AnalysisError("create table t primary key (vc) partition by hash partitions 3" +
1806 " stored as kudu as select vc from functional.chars_tiny",
1807 "Cannot create table 't': Type VARCHAR(32) is not supported in Kudu");
1808 AnalysisError("create table t primary key (id) partition by hash partitions 3" +
1809 " stored as kudu as select c1 as id from functional.decimal_tiny",
1810 "Cannot create table 't': Type DECIMAL(10,4) is not supported in Kudu");
1811 AnalysisError("create table t primary key (id) partition by hash partitions 3" +
1812 " stored as kudu as select id, s from functional.complextypes_fileformat",
1813 "Expr 's' in select list returns a complex type 'STRUCT<f1:STRING,f2:INT>'.\n" +
1814 "Only scalar types are allowed in the select list.");
1815 AnalysisError("create table t primary key (id) partition by hash partitions 3" +
1816 " stored as kudu as select id, m from functional.complextypes_fileformat",
1817 "Expr 'm' in select list returns a complex type 'MAP<STRING,BIGINT>'.\n" +
1818 "Only scalar types are allowed in the select list.");
1819 AnalysisError("create table t primary key (id) partition by hash partitions 3" +
1820 " stored as kudu as select id, a from functional.complextypes_fileformat",
1821 "Expr 'a' in select list returns a complex type 'ARRAY<INT>'.\n" +
1822 "Only scalar types are allowed in the select list.");
1823
1824 // IMPALA-6454: CTAS into Kudu tables with primary key specified in upper case.
1825 AnalyzesOk("create table part_kudu_tbl primary key(INT_COL, SMALLINT_COL, ID)" +
1826 " partition by hash(INT_COL, SMALLINT_COL, ID) PARTITIONS 2" +
1827 " stored as kudu as SELECT INT_COL, SMALLINT_COL, ID, BIGINT_COL," +
1828 " DATE_STRING_COL, STRING_COL, TIMESTAMP_COL, YEAR, MONTH FROM " +
1829 " functional.alltypes");
1830 }
1831
1832 @Test
1833 public void TestCreateTableAsSelectWithHints() throws AnalysisException {
1834 // Test if CTAS hints are analyzed correctly and that conflicting hints
1835 // result in error.
1836 // The tests here are minimal, because other tests already cover this logic:
1837 // - ParserTests#TestPlanHints tests if hints are set correctly during parsing.
1838 // - AnalyzeStmtsTest#TestInsertHints tests the analyzes of insert hints, which
1839 // is the same as the analyzes of CTAS hints.
1840 for (String[] hintStyle: hintStyles_) {
1841 String prefix = hintStyle[0];
1842 String suffix = hintStyle[1];
1843 // Test plan hints for partitioned Hdfs tables.
1844 AnalyzesOk(String.format("create %sshuffle%s table t " +
1845 "partitioned by (year, month) as select * from functional.alltypes",
1846 prefix, suffix));
1847 // Warn on unrecognized hints.
1848 AnalyzesOk(String.format("create %sbadhint%s table t " +
1849 "partitioned by (year, month) as select * from functional.alltypes",
1850 prefix, suffix),
1851 "INSERT hint not recognized: badhint");
1852 // Conflicting plan hints.
1853 AnalysisError(String.format("create %sshuffle,noshuffle%s table t " +
1854 "partitioned by (year, month) as " +
1855 "select * from functional.alltypes", prefix, suffix),
1856 "Conflicting INSERT hints: shuffle and noshuffle");
1857 }
1858 }
1859
1860 @Test
1861 public void TestCreateTableLike() throws AnalysisException {
1862 AnalyzesOk("create table if not exists functional.new_tbl like functional.alltypes");
1863 AnalyzesOk("create table functional.like_view like functional.view_view");
1864 AnalyzesOk(
1865 "create table if not exists functional.alltypes like functional.alltypes");
1866 AnalysisError("create table functional.alltypes like functional.alltypes",
1867 "Table already exists: functional.alltypes");
1868 AnalysisError("create table functional.new_table like functional.tbl_does_not_exist",
1869 "Table does not exist: functional.tbl_does_not_exist");
1870 AnalysisError("create table functional.new_table like db_does_not_exist.alltypes",
1871 "Database does not exist: db_does_not_exist");
1872 // Invalid database name.
1873 AnalysisError("create table `???`.new_table like functional.alltypes",
1874 "Invalid database name: ???");
1875 // Invalid table/view name.
1876 AnalysisError("create table functional.`^&*` like functional.alltypes",
1877 "Invalid table/view name: ^&*");
1878 // Invalid source database/table name reports non-existence instead of invalidity.
1879 AnalysisError("create table functional.foo like `???`.alltypes",
1880 "Database does not exist: ???");
1881 AnalysisError("create table functional.foo like functional.`%^&`",
1882 "Table does not exist: functional.%^&");
1883 // Valid URI values.
1884 AnalyzesOk("create table tbl like functional.alltypes location " +
1885 "'/test-warehouse/new_table'");
1886 AnalyzesOk("create table tbl like functional.alltypes location " +
1887 "'hdfs://localhost:20500/test-warehouse/new_table'");
1888 // 'file' scheme does not take an authority, so file:/// is equivalent to file://
1889 // and file:/.
1890 AnalyzesOk("create table tbl like functional.alltypes location " +
1891 "'file:///test-warehouse/new_table'");
1892 AnalyzesOk("create table tbl like functional.alltypes location " +
1893 "'file://test-warehouse/new_table'");
1894 AnalyzesOk("create table tbl like functional.alltypes location " +
1895 "'file:/test-warehouse/new_table'");
1896 AnalyzesOk("create table tbl like functional.alltypes location " +
1897 "'s3a://bucket/test-warehouse/new_table'");
1898 // Invalid URI values.
1899 AnalysisError("create table tbl like functional.alltypes location " +
1900 "'foofs://test-warehouse/new_table'",
1901 "No FileSystem for scheme: foofs");
1902 AnalysisError("create table functional.baz like functional.alltypes location ' '",
1903 "URI path cannot be empty.");
1904
1905 // CREATE TABLE LIKE is not currently supported for Kudu tables (see IMPALA-4052)
1906 AnalysisError("create table kudu_tbl like functional.alltypestiny stored as kudu",
1907 "CREATE TABLE LIKE is not supported for Kudu tables");
1908 AnalysisError("create table tbl like functional_kudu.dimtbl", "Cloning a Kudu " +
1909 "table using CREATE TABLE LIKE is not supported.");
1910
1911 // Test sort columns.
1912 AnalyzesOk("create table tbl sort by (int_col,id) like functional.alltypes");
1913 AnalysisError("create table tbl sort by (int_col,foo) like functional.alltypes",
1914 "Could not find SORT BY column 'foo' in table.");
1915 }
1916
1917 @Test
1918 public void TestCreateTable() throws AnalysisException {
1919 AnalyzesOk("create table functional.new_table (i int)");
1920 AnalyzesOk("create table if not exists functional.alltypes (i int)");
1921 AnalysisError("create table functional.alltypes",
1922 "Table already exists: functional.alltypes");
1923 AnalysisError("create table functional.alltypes (i int)",
1924 "Table already exists: functional.alltypes");
1925 AnalyzesOk("create table functional.new_table (i int) row format delimited fields " +
1926 "terminated by '|'");
1927
1928 AnalyzesOk("create table new_table (i int) PARTITIONED BY (d decimal)");
1929 AnalyzesOk("create table new_table (i int) PARTITIONED BY (d decimal(3,1))");
1930 AnalyzesOk("create table new_table(d1 decimal, d2 decimal(10), d3 decimal(5, 2))");
1931 AnalysisError("create table new_table (i int) PARTITIONED BY (d decimal(40,1))",
1932 "Decimal precision must be <= 38: 40");
1933
1934 AnalyzesOk("create table new_table(s1 varchar(1), s2 varchar(32672), " +
1935 "s3 varchar(65535))");
1936 AnalysisError("create table new_table(s1 varchar(0))",
1937 "Varchar size must be > 0: 0");
1938 AnalysisError("create table new_table(s1 varchar(65536))",
1939 "Varchar size must be <= 65535: 65536");
1940 AnalysisError("create table new_table(s1 char(0))",
1941 "Char size must be > 0: 0");
1942 AnalysisError("create table new_table(s1 Char(256))",
1943 "Char size must be <= 255: 256");
1944 AnalyzesOk("create table new_table (i int) PARTITIONED BY (s varchar(3))");
1945 AnalyzesOk("create table functional.new_table (c char(250))");
1946 AnalyzesOk("create table new_table (i int) PARTITIONED BY (c char(3))");
1947
1948 {
1949 // Check that long_properties fail at the analysis layer
1950 String long_property_key = "";
1951 for (int i = 0; i < MetaStoreUtil.MAX_PROPERTY_KEY_LENGTH; ++i) {
1952 long_property_key += 'k';
1953 }
1954 String long_property_value = "";
1955 for (int i = 0; i < MetaStoreUtil.MAX_PROPERTY_VALUE_LENGTH; ++i) {
1956 long_property_value += 'v';
1957 }
1958
1959 // At this point long_property_{key_value} are actually not quite long enough to
1960 // cause analysis to fail.
1961
1962 AnalyzesOk("create table new_table (i int) "
1963 + "with serdeproperties ('" + long_property_key + "'='" + long_property_value
1964 + "') "
1965 + "tblproperties ('" + long_property_key + "'='" + long_property_value + "')");
1966
1967 long_property_key += 'X';
1968 long_property_value += 'X';
1969 // Now that long_property_{key,value} are one character longer, they are too long
1970 // for the analyzer.
1971
1972 AnalysisError("create table new_table (i int) "
1973 + "tblproperties ('" + long_property_key + "'='value')",
1974 "Property key length must be <= " + MetaStoreUtil.MAX_PROPERTY_KEY_LENGTH + ": "
1975 + (MetaStoreUtil.MAX_PROPERTY_KEY_LENGTH + 1));
1976
1977 AnalysisError("create table new_table (i int) "
1978 + "tblproperties ('key'='" + long_property_value + "')",
1979 "Property value length must be <= " + MetaStoreUtil.MAX_PROPERTY_VALUE_LENGTH
1980 + ": " + (MetaStoreUtil.MAX_PROPERTY_VALUE_LENGTH + 1));
1981
1982 AnalysisError("create table new_table (i int) "
1983 + "with serdeproperties ('" + long_property_key + "'='value')",
1984 "Serde property key length must be <= " + MetaStoreUtil.MAX_PROPERTY_KEY_LENGTH
1985 + ": " + (MetaStoreUtil.MAX_PROPERTY_KEY_LENGTH + 1));
1986
1987 AnalysisError("create table new_table (i int) "
1988 + "with serdeproperties ('key'='" + long_property_value + "')",
1989 "Serde property value length must be <= "
1990 + MetaStoreUtil.MAX_PROPERTY_VALUE_LENGTH + ": "
1991 + (MetaStoreUtil.MAX_PROPERTY_VALUE_LENGTH + 1));
1992 }
1993
1994 // Supported file formats. Exclude Avro since it is tested separately.
1995 String [] fileFormats =
1996 {"TEXTFILE", "SEQUENCEFILE", "PARQUET", "PARQUETFILE", "RCFILE"};
1997 String [] fileFormatsStr =
1998 {"TEXT", "SEQUENCE_FILE", "PARQUET", "PARQUET", "RC_FILE"};
1999 int formatIndx = 0;
2000 for (String format: fileFormats) {
2001 for (String create: ImmutableList.of("create table", "create external table")) {
2002 AnalyzesOk(String.format("%s new_table (i int) " +
2003 "partitioned by (d decimal) comment 'c' stored as %s", create, format));
2004 // No column definitions.
2005 AnalysisError(String.format("%s new_table " +
2006 "partitioned by (d decimal) comment 'c' stored as %s", create, format),
2007 "Table requires at least 1 column");
2008 }
2009 AnalysisError(String.format("create table t (i int primary key) stored as %s",
2010 format), String.format("Unsupported column options for file format " +
2011 "'%s': 'i INT PRIMARY KEY'", fileFormatsStr[formatIndx]));
2012 AnalysisError(String.format("create table t (i int, primary key(i)) stored as %s",
2013 format), "Only Kudu tables can specify a PRIMARY KEY");
2014 formatIndx++;
2015 }
2016
2017 // Note: Backslashes need to be escaped twice - once for Java and once for Impala.
2018 // For example, if this were a real query the value '\' would be stored in the
2019 // metastore for the ESCAPED BY field.
2020 AnalyzesOk("create table functional.new_table (i int) row format delimited fields " +
2021 "terminated by '\\t' escaped by '\\\\' lines terminated by '\\n'");
2022 AnalyzesOk("create table functional.new_table (i int) row format delimited fields " +
2023 "terminated by '\\001' escaped by '\\002' lines terminated by '\\n'");
2024 AnalyzesOk("create table functional.new_table (i int) row format delimited fields " +
2025 "terminated by '-2' escaped by '-3' lines terminated by '\\n'");
2026 AnalyzesOk("create table functional.new_table (i int) row format delimited fields " +
2027 "terminated by '-128' escaped by '127' lines terminated by '40'");
2028
2029 AnalysisError("create table functional.new_table (i int) row format delimited " +
2030 "fields terminated by '-2' escaped by '128' lines terminated by '\\n'",
2031 "ESCAPED BY values and LINE/FIELD terminators must be specified as a single " +
2032 "character or as a decimal value in the range [-128:127]: 128");
2033 AnalysisError("create table functional.new_table (i int) row format delimited " +
2034 "fields terminated by '-2' escaped by '127' lines terminated by '255'",
2035 "ESCAPED BY values and LINE/FIELD terminators must be specified as a single " +
2036 "character or as a decimal value in the range [-128:127]: 255");
2037 AnalysisError("create table functional.new_table (i int) row format delimited " +
2038 "fields terminated by '-129' escaped by '127' lines terminated by '\\n'",
2039 "ESCAPED BY values and LINE/FIELD terminators must be specified as a single " +
2040 "character or as a decimal value in the range [-128:127]: -129");
2041 AnalysisError("create table functional.new_table (i int) row format delimited " +
2042 "fields terminated by '||' escaped by '\\\\' lines terminated by '\\n'",
2043 "ESCAPED BY values and LINE/FIELD terminators must be specified as a single " +
2044 "character or as a decimal value in the range [-128:127]: ||");
2045
2046 // IMPALA-2251: it should not be possible to create text tables with the same
2047 // delimiter character used for multiple purposes.
2048 AnalysisError("create table functional.broken_text_table (c int) " +
2049 "row format delimited fields terminated by '\001' lines terminated by '\001'",
2050 "Field delimiter and line delimiter have same value: byte 1");
2051 AnalysisError("create table functional.broken_text_table (c int) " +
2052 "row format delimited lines terminated by '\001'",
2053 "Field delimiter and line delimiter have same value: byte 1");
2054 AnalysisError("create table functional.broken_text_table (c int) " +
2055 "row format delimited fields terminated by '\012'",
2056 "Field delimiter and line delimiter have same value: byte 10");
2057 AnalyzesOk("create table functional.broken_text_table (c int) " +
2058 "row format delimited escaped by '\001'",
2059 "Field delimiter and escape character have same value: byte 1. " +
2060 "Escape character will be ignored");
2061 AnalyzesOk("create table functional.broken_text_table (c int) " +
2062 "row format delimited escaped by 'x' lines terminated by 'x'",
2063 "Line delimiter and escape character have same value: byte 120. " +
2064 "Escape character will be ignored");
2065
2066 AnalysisError("create table db_does_not_exist.new_table (i int)",
2067 "Database does not exist: db_does_not_exist");
2068 AnalysisError("create table new_table (i int, I string)",
2069 "Duplicate column name: i");
2070 AnalysisError("create table new_table (c1 double, col2 int, c1 double, c4 string)",
2071 "Duplicate column name: c1");
2072 AnalysisError("create table new_table (i int, s string) PARTITIONED BY (i int)",
2073 "Duplicate column name: i");
2074 AnalysisError("create table new_table (i int) PARTITIONED BY (C int, c2 int, c int)",
2075 "Duplicate column name: c");
2076
2077 // Unsupported partition-column types.
2078 AnalysisError("create table new_table (i int) PARTITIONED BY (t timestamp)",
2079 "Type 'TIMESTAMP' is not supported as partition-column type in column: t");
2080
2081 // Caching ops
2082 AnalyzesOk("create table cached_tbl(i int) partitioned by(j int) " +
2083 "cached in 'testPool'");
2084 AnalyzesOk("create table cached_tbl(i int) partitioned by(j int) uncached");
2085 AnalyzesOk("create table cached_tbl(i int) partitioned by(j int) " +
2086 "location '/test-warehouse/' cached in 'testPool'");
2087 AnalyzesOk("create table cached_tbl(i int) partitioned by(j int) " +
2088 "location '/test-warehouse/' uncached");
2089 AnalysisError("create table cached_tbl(i int) location " +
2090 "'file:///test-warehouse/cache_tbl' cached in 'testPool'",
2091 "Location 'file:/test-warehouse/cache_tbl' cannot be cached. " +
2092 "Please retry without caching: CREATE TABLE ... UNCACHED");
2093
2094 // Invalid database name.
2095 AnalysisError("create table `???`.new_table (x int) PARTITIONED BY (y int)",
2096 "Invalid database name: ???");
2097 // Invalid table/view name.
2098 AnalysisError("create table functional.`^&*` (x int) PARTITIONED BY (y int)",
2099 "Invalid table/view name: ^&*");
2100 // Invalid column names.
2101 AnalysisError("create table new_table (`???` int) PARTITIONED BY (i int)",
2102 "Invalid column/field name: ???");
2103 AnalysisError("create table new_table (i int) PARTITIONED BY (`^&*` int)",
2104 "Invalid column/field name: ^&*");
2105 // Test HMS constraint on comment length.
2106 AnalyzesOk(String.format("create table t (i int comment '%s')",
2107 StringUtils.repeat("c", MetaStoreUtil.CREATE_MAX_COMMENT_LENGTH)));
2108 AnalysisError(String.format("create table t (i int comment '%s')",
2109 StringUtils.repeat("c", MetaStoreUtil.CREATE_MAX_COMMENT_LENGTH + 1)),
2110 "Comment of column 'i' exceeds maximum length of 256 characters:");
2111
2112 // Valid URI values.
2113 AnalyzesOk("create table tbl (i int) location '/test-warehouse/new_table'");
2114 AnalyzesOk("create table tbl (i int) location " +
2115 "'hdfs://localhost:20500/test-warehouse/new_table'");
2116 AnalyzesOk("create table tbl (i int) location " +
2117 "'file:///test-warehouse/new_table'");
2118 AnalyzesOk("create table tbl (i int) location " +
2119 "'s3a://bucket/test-warehouse/new_table'");
2120 AnalyzesOk("ALTER TABLE functional_seq_snap.alltypes SET LOCATION " +
2121 "'file://test-warehouse/new_table'");
2122
2123 // Invalid URI values.
2124 AnalysisError("create table functional.foo (x int) location " +
2125 "'foofs://test-warehouse/new_table'",
2126 "No FileSystem for scheme: foofs");
2127 AnalysisError("create table functional.foo (x int) location " +
2128 "' '", "URI path cannot be empty.");
2129 AnalysisError("ALTER TABLE functional_seq_snap.alltypes SET LOCATION " +
2130 "'foofs://test-warehouse/new_table'",
2131 "No FileSystem for scheme: foofs");
2132 AnalysisError("ALTER TABLE functional_seq_snap.alltypes SET LOCATION " +
2133 "' '", "URI path cannot be empty.");
2134
2135 // Create table PRODUCED BY DATA SOURCE
2136 final String DATA_SOURCE_NAME = "TestDataSource1";
2137 catalog_.addDataSource(new DataSource(DATA_SOURCE_NAME, "/foo.jar",
2138 "foo.Bar", "V1"));
2139 AnalyzesOk("CREATE TABLE DataSrcTable1 (x int) PRODUCED BY DATA SOURCE " +
2140 DATA_SOURCE_NAME);
2141 AnalyzesOk("CREATE TABLE DataSrcTable1 (x int) PRODUCED BY DATA SOURCE " +
2142 DATA_SOURCE_NAME.toLowerCase());
2143 AnalyzesOk("CREATE TABLE DataSrcTable1 (x int) PRODUCED BY DATA SOURCE " +
2144 DATA_SOURCE_NAME + "(\"\")");
2145 AnalyzesOk("CREATE TABLE DataSrcTable1 (a tinyint, b smallint, c int, d bigint, " +
2146 "e float, f double, g boolean, h string) PRODUCED BY DATA SOURCE " +
2147 DATA_SOURCE_NAME);
2148 AnalysisError("CREATE TABLE DataSrcTable1 (x int) PRODUCED BY DATA SOURCE " +
2149 "not_a_data_src(\"\")", "Data source does not exist");
2150 for (Type t: Type.getSupportedTypes()) {
2151 PrimitiveType type = t.getPrimitiveType();
2152 if (DataSourceTable.isSupportedPrimitiveType(type) || t.isNull()) continue;
2153 String typeSpec = type.name();
2154 if (type == PrimitiveType.CHAR || type == PrimitiveType.DECIMAL ||
2155 type == PrimitiveType.VARCHAR) {
2156 typeSpec += "(10)";
2157 }
2158 AnalysisError("CREATE TABLE DataSrcTable1 (x " + typeSpec + ") PRODUCED " +
2159 "BY DATA SOURCE " + DATA_SOURCE_NAME,
2160 "Tables produced by an external data source do not support the column type: " +
2161 type.name());
2162 }
2163
2164 // Tables with sort columns
2165 AnalyzesOk("create table functional.new_table (i int, j int) sort by (i)");
2166 AnalyzesOk("create table functional.new_table (i int, j int) sort by (i, j)");
2167 AnalyzesOk("create table functional.new_table (i int, j int) sort by (j, i)");
2168
2169 // 'sort.columns' property not supported in table definition.
2170 AnalysisError("create table Foo (i int) sort by (i) " +
2171 "tblproperties ('sort.columns'='i')", "Table definition must not contain the " +
2172 "sort.columns table property. Use SORT BY (...) instead.");
2173
2174 // Column in sort by list must exist.
2175 AnalysisError("create table functional.new_table (i int) sort by (j)", "Could not " +
2176 "find SORT BY column 'j' in table.");
2177
2178 // Partitioned HDFS table
2179 AnalyzesOk("create table functional.new_table (i int) PARTITIONED BY (d decimal)" +
2180 "SORT BY (i)");
2181 // Column in sort by list must not be a Hdfs partition column.
2182 AnalysisError("create table functional.new_table (i int) PARTITIONED BY (d decimal)" +
2183 "SORT BY (d)", "SORT BY column list must not contain partition column: 'd'");
2184 }
2185
2186 @Test
2187 public void TestAlterKuduTable() {
2188 TestUtils.assumeKuduIsSupported();
2189 // ALTER TABLE ADD/DROP range partitions
2190 String[] addDrop = {"add if not exists", "add", "drop if exists", "drop"};
2191 for (String kw: addDrop) {
2192 AnalyzesOk(String.format("alter table functional_kudu.testtbl %s range " +
2193 "partition 10 <= values < 20", kw));
2194 AnalyzesOk(String.format("alter table functional_kudu.testtbl %s range " +
2195 "partition value = 30", kw));
2196 AnalyzesOk(String.format("alter table functional_kudu.testtbl %s range " +
2197 "partition values < 100", kw));
2198 AnalyzesOk(String.format("alter table functional_kudu.testtbl %s range " +
2199 "partition 10 <= values", kw));
2200 AnalyzesOk(String.format("alter table functional_kudu.testtbl %s range " +
2201 "partition 1+1 <= values <= factorial(3)", kw));
2202 AnalysisError(String.format("alter table functional.alltypes %s range " +
2203 "partition 10 < values < 20", kw), "Table functional.alltypes does not " +
2204 "support range partitions: RANGE PARTITION 10 < VALUES < 20");
2205 AnalysisError(String.format("alter table functional_kudu.testtbl %s range " +
2206 "partition values < isnull(null, null)", kw), "Range partition values " +
2207 "cannot be NULL. Range partition: 'PARTITION VALUES < isnull(NULL, NULL)'");
2208 }
2209
2210 // ALTER TABLE ADD COLUMNS
2211 // Columns with different supported data types
2212 AnalyzesOk("alter table functional_kudu.testtbl add columns (a1 tinyint null, a2 " +
2213 "smallint null, a3 int null, a4 bigint null, a5 string null, a6 float null, " +
2214 "a7 double null, a8 boolean null comment 'boolean')");
2215 // Complex types
2216 AnalysisError("alter table functional_kudu.testtbl add columns ( "+
2217 "a struct<f1:int>)", "Kudu tables do not support complex types: " +
2218 "a STRUCT<f1:INT>");
2219 // Add primary key
2220 AnalysisError("alter table functional_kudu.testtbl add columns (a int primary key)",
2221 "Cannot add a primary key using an ALTER TABLE ADD COLUMNS statement: " +
2222 "a INT PRIMARY KEY");
2223 // Columns requiring a default value
2224 AnalyzesOk("alter table functional_kudu.testtbl add columns (a1 int not null " +
2225 "default 10)");
2226 AnalyzesOk("alter table functional_kudu.testtbl add columns (a1 int null " +
2227 "default 10)");
2228 // Other Kudu column options
2229 AnalyzesOk("alter table functional_kudu.testtbl add columns (a int encoding rle)");
2230 AnalyzesOk("alter table functional_kudu.testtbl add columns (a int compression lz4)");
2231 AnalyzesOk("alter table functional_kudu.testtbl add columns (a int block_size 10)");
2232
2233 // REPLACE columns is not supported for Kudu tables
2234 AnalysisError("alter table functional_kudu.testtbl replace columns (a int null)",
2235 "ALTER TABLE REPLACE COLUMNS is not supported on Kudu tables");
2236 // Conflict with existing column
2237 AnalysisError("alter table functional_kudu.testtbl add columns (zip int)",
2238 "Column already exists: zip");
2239 // Kudu column options on an HDFS table
2240 AnalysisError("alter table functional.alltypes add columns (a int not null)",
2241 "The specified column options are only supported in Kudu tables: a INT NOT NULL");
2242
2243 // ALTER TABLE DROP COLUMN
2244 AnalyzesOk("alter table functional_kudu.testtbl drop column name");
2245 AnalysisError("alter table functional_kudu.testtbl drop column no_col",
2246 "Column 'no_col' does not exist in table: functional_kudu.testtbl");
2247
2248 // ALTER TABLE CHANGE COLUMN on Kudu tables
2249 AnalyzesOk("alter table functional_kudu.testtbl change column name new_name string");
2250 // Unsupported column options
2251 AnalysisError("alter table functional_kudu.testtbl change column zip zip_code int " +
2252 "encoding rle compression lz4 default 90000", "Unsupported column options in " +
2253 "ALTER TABLE CHANGE COLUMN statement: 'zip_code INT ENCODING RLE COMPRESSION " +
2254 "LZ4 DEFAULT 90000'. Use ALTER TABLE ALTER COLUMN instead.");
2255 AnalysisError(
2256 "alter table functional_kudu.testtbl change column zip zip int comment 'comment'",
2257 "Kudu does not support column comments.");
2258 // Changing the column type is not supported for Kudu tables
2259 AnalysisError("alter table functional_kudu.testtbl change column zip zip bigint",
2260 "Cannot change the type of a Kudu column using an ALTER TABLE CHANGE COLUMN " +
2261 "statement: (INT vs BIGINT)");
2262
2263 // Rename the underlying Kudu table is not supported for managed Kudu tables
2264 AnalysisError("ALTER TABLE functional_kudu.testtbl SET " +
2265 "TBLPROPERTIES ('kudu.table_name' = 'Hans')",
2266 "Not allowed to set 'kudu.table_name' manually for managed Kudu tables");
2267
2268 // TODO IMPALA-6375: Allow setting kudu.table_name for managed Kudu tables
2269 // if the 'EXTERNAL' property is set to TRUE in the same step.
2270 AnalysisError("ALTER TABLE functional_kudu.testtbl SET " +
2271 "TBLPROPERTIES ('EXTERNAL' = 'TRUE','kudu.table_name' = 'Hans')",
2272 "Not allowed to set 'kudu.table_name' manually for managed Kudu tables");
2273
2274 // ALTER TABLE RENAME TO
2275 AnalyzesOk("ALTER TABLE functional_kudu.testtbl RENAME TO new_testtbl");
2276
2277 // ALTER TABLE SORT BY
2278 AnalysisError("alter table functional_kudu.alltypes sort by (int_col)",
2279 "ALTER TABLE SORT BY not supported on Kudu tables.");
2280
2281 // ALTER TABLE SET TBLPROPERTIES for sort.columns
2282 AnalysisError("alter table functional_kudu.alltypes set tblproperties(" +
2283 "'sort.columns'='int_col')",
2284 "'sort.columns' table property is not supported for Kudu tables.");
2285 }
2286
2287 @Test
2288 public void TestCreateManagedKuduTable() {
2289 TestUtils.assumeKuduIsSupported();
2290 // Test primary keys and partition by clauses
2291 AnalyzesOk("create table tab (x int primary key) partition by hash(x) " +
2292 "partitions 8 stored as kudu");
2293 AnalyzesOk("create table tab (x int, primary key(x)) partition by hash(x) " +
2294 "partitions 8 stored as kudu");
2295 AnalyzesOk("create table tab (x int, y int, primary key (x, y)) " +
2296 "partition by hash(x, y) partitions 8 stored as kudu");
2297 AnalyzesOk("create table tab (x int, y int, primary key (x)) " +
2298 "partition by hash(x) partitions 8 stored as kudu");
2299 AnalyzesOk("create table tab (x int, y int, primary key(x, y)) " +
2300 "partition by hash(y) partitions 8 stored as kudu");
2301 AnalyzesOk("create table tab (x timestamp, y timestamp, primary key(x)) " +
2302 "partition by hash(x) partitions 8 stored as kudu");
2303 AnalyzesOk("create table tab (x int, y string, primary key (x)) partition by " +
2304 "hash (x) partitions 3, range (x) (partition values < 1, partition " +
2305 "1 <= values < 10, partition 10 <= values < 20, partition value = 30) " +
2306 "stored as kudu");
2307 AnalyzesOk("create table tab (x int, y int, primary key (x, y)) partition by " +
2308 "range (x, y) (partition value = (2001, 1), partition value = (2002, 1), " +
2309 "partition value = (2003, 2)) stored as kudu");
2310 // Non-literal boundary values in range partitions
2311 AnalyzesOk("create table tab (x int, y int, primary key (x)) partition by " +
2312 "range (x) (partition values < 1 + 1, partition (1+3) + 2 < values < 10, " +
2313 "partition factorial(4) < values < factorial(5), " +
2314 "partition value = factorial(6)) stored as kudu");
2315 AnalyzesOk("create table tab (x int, y int, primary key(x, y)) partition by " +
2316 "range(x, y) (partition value = (1+1, 2+2), partition value = ((1+1+1)+1, 10), " +
2317 "partition value = (cast (30 as int), factorial(5))) stored as kudu");
2318 AnalysisError("create table tab (x int primary key) partition by range (x) " +
2319 "(partition values < x + 1) stored as kudu", "Only constant values are allowed " +
2320 "for range-partition bounds: x + 1");
2321 AnalysisError("create table tab (x int primary key) partition by range (x) " +
2322 "(partition values <= isnull(null, null)) stored as kudu", "Range partition " +
2323 "values cannot be NULL. Range partition: 'PARTITION VALUES <= " +
2324 "isnull(NULL, NULL)'");
2325 AnalysisError("create table tab (x int primary key) partition by range (x) " +
2326 "(partition values <= (select count(*) from functional.alltypestiny)) " +
2327 "stored as kudu", "Only constant values are allowed for range-partition " +
2328 "bounds: (SELECT count(*) FROM functional.alltypestiny)");
2329 // Multilevel partitioning. Data is split into 3 buckets based on 'x' and each
2330 // bucket is partitioned into 4 tablets based on the range partitions of 'y'.
2331 AnalyzesOk("create table tab (x int, y string, primary key(x, y)) " +
2332 "partition by hash(x) partitions 3, range(y) " +
2333 "(partition values < 'aa', partition 'aa' <= values < 'bb', " +
2334 "partition 'bb' <= values < 'cc', partition 'cc' <= values) " +
2335 "stored as kudu");
2336 // Key column in upper case
2337 AnalyzesOk("create table tab (x int, y int, primary key (X)) " +
2338 "partition by hash (x) partitions 8 stored as kudu");
2339 // Flexible Partitioning
2340 AnalyzesOk("create table tab (a int, b int, c int, d int, primary key (a, b, c))" +
2341 "partition by hash (a, b) partitions 8, hash(c) partitions 2 stored as " +
2342 "kudu");
2343 // No columns specified in the PARTITION BY HASH clause
2344 AnalyzesOk("create table tab (a int primary key, b int, c int, d int) " +
2345 "partition by hash partitions 8 stored as kudu");
2346 // Distribute range data types are picked up during analysis and forwarded to Kudu.
2347 // Column names in distribute params should also be case-insensitive.
2348 AnalyzesOk("create table tab (a int, b int, c int, d int, primary key(a, b, c, d))" +
2349 "partition by hash (a, B, c) partitions 8, " +
2350 "range (A) (partition values < 1, partition 1 <= values < 2, " +
2351 "partition 2 <= values < 3, partition 3 <= values < 4, partition 4 <= values) " +
2352 "stored as kudu");
2353 // Allowing range partitioning on a subset of the primary keys
2354 AnalyzesOk("create table tab (id int, name string, valf float, vali bigint, " +
2355 "primary key (id, name)) partition by range (name) " +
2356 "(partition 'aa' < values <= 'bb') stored as kudu");
2357 // Null values in range partition values
2358 AnalysisError("create table tab (id int, name string, primary key(id, name)) " +
2359 "partition by hash (id) partitions 3, range (name) " +
2360 "(partition value = null, partition value = 1) stored as kudu",
2361 "Range partition values cannot be NULL. Range partition: 'PARTITION " +
2362 "VALUE = NULL'");
2363 // Primary key specified in tblproperties
2364 AnalysisError(String.format("create table tab (x int) partition by hash (x) " +
2365 "partitions 8 stored as kudu tblproperties ('%s' = 'x')",
2366 KuduTable.KEY_KEY_COLUMNS), "PRIMARY KEY must be used instead of the table " +
2367 "property");
2368 // Primary key column that doesn't exist
2369 AnalysisError("create table tab (x int, y int, primary key (z)) " +
2370 "partition by hash (x) partitions 8 stored as kudu",
2371 "PRIMARY KEY column 'z' does not exist in the table");
2372 // Invalid composite primary key
2373 AnalysisError("create table tab (x int primary key, primary key(x)) stored " +
2374 "as kudu", "Multiple primary keys specified. Composite primary keys can " +
2375 "be specified using the PRIMARY KEY (col1, col2, ...) syntax at the end " +
2376 "of the column definition.");
2377 AnalysisError("create table tab (x int primary key, y int primary key) stored " +
2378 "as kudu", "Multiple primary keys specified. Composite primary keys can " +
2379 "be specified using the PRIMARY KEY (col1, col2, ...) syntax at the end " +
2380 "of the column definition.");
2381 // Specifying the same primary key column multiple times
2382 AnalysisError("create table tab (x int, primary key (x, x)) partition by hash (x) " +
2383 "partitions 8 stored as kudu",
2384 "Column 'x' is listed multiple times as a PRIMARY KEY.");
2385 // Number of range partition boundary values should be equal to the number of range
2386 // columns.
2387 AnalysisError("create table tab (a int, b int, c int, d int, primary key(a, b, c)) " +
2388 "partition by range(a) (partition value = (1, 2), " +
2389 "partition value = 3, partition value = 4) stored as kudu",
2390 "Number of specified range partition values is different than the number of " +
2391 "partitioning columns: (2 vs 1). Range partition: 'PARTITION VALUE = (1,2)'");
2392 // Key ranges must match the column types.
2393 AnalysisError("create table tab (a int, b int, c int, d int, primary key(a, b, c)) " +
2394 "partition by hash (a, b, c) partitions 8, range (a) " +
2395 "(partition value = 1, partition value = 'abc', partition 3 <= values) " +
2396 "stored as kudu", "Range partition value 'abc' (type: STRING) is not type " +
2397 "compatible with partitioning column 'a' (type: INT).");
2398 AnalysisError("create table tab (a tinyint primary key) partition by range (a) " +
2399 "(partition value = 128) stored as kudu", "Range partition value 128 " +
2400 "(type: SMALLINT) is not type compatible with partitioning column 'a' " +
2401 "(type: TINYINT)");
2402 AnalysisError("create table tab (a smallint primary key) partition by range (a) " +
2403 "(partition value = 32768) stored as kudu", "Range partition value 32768 " +
2404 "(type: INT) is not type compatible with partitioning column 'a' " +
2405 "(type: SMALLINT)");
2406 AnalysisError("create table tab (a int primary key) partition by range (a) " +
2407 "(partition value = 2147483648) stored as kudu", "Range partition value " +
2408 "2147483648 (type: BIGINT) is not type compatible with partitioning column 'a' " +
2409 "(type: INT)");
2410 AnalysisError("create table tab (a bigint primary key) partition by range (a) " +
2411 "(partition value = 9223372036854775808) stored as kudu", "Range partition " +
2412 "value 9223372036854775808 (type: DECIMAL(19,0)) is not type compatible with " +
2413 "partitioning column 'a' (type: BIGINT)");
2414 // Test implicit casting/folding of partition values.
2415 AnalyzesOk("create table tab (a int primary key) partition by range (a) " +
2416 "(partition value = false, partition value = true) stored as kudu");
2417 // Non-key column used in PARTITION BY
2418 AnalysisError("create table tab (a int, b string, c bigint, primary key (a)) " +
2419 "partition by range (b) (partition value = 'abc') stored as kudu",
2420 "Column 'b' in 'RANGE (b) (PARTITION VALUE = 'abc')' is not a key column. " +
2421 "Only key columns can be used in PARTITION BY.");
2422 // No float range partition values
2423 AnalysisError("create table tab (a int, b int, c int, d int, primary key (a, b, c))" +
2424 "partition by hash (a, b, c) partitions 8, " +
2425 "range (a) (partition value = 1.2, partition value = 2) stored as kudu",
2426 "Range partition value 1.2 (type: DECIMAL(2,1)) is not type compatible with " +
2427 "partitioning column 'a' (type: INT).");
2428 // Non-existing column used in PARTITION BY
2429 AnalysisError("create table tab (a int, b int, primary key (a, b)) " +
2430 "partition by range(unknown_column) (partition value = 'abc') stored as kudu",
2431 "Column 'unknown_column' in 'RANGE (unknown_column) (PARTITION VALUE = 'abc')' " +
2432 "is not a key column. Only key columns can be used in PARTITION BY");
2433 // Kudu num_tablet_replicas is specified in tblproperties
2434 AnalyzesOk("create table tab (x int primary key) partition by hash (x) " +
2435 "partitions 8 stored as kudu tblproperties ('kudu.num_tablet_replicas'='1'," +
2436 "'kudu.master_addresses' = '127.0.0.1:8080, 127.0.0.1:8081')");
2437 // Kudu table name is specified in tblproperties resulting in an error
2438 AnalysisError("create table tab (x int primary key) partition by hash (x) " +
2439 "partitions 8 stored as kudu tblproperties ('kudu.table_name'='tab')",
2440 "Not allowed to set 'kudu.table_name' manually for managed Kudu tables");
2441 // No port is specified in kudu master address
2442 AnalyzesOk("create table tdata_no_port (id int primary key, name string, " +
2443 "valf float, vali bigint) partition by range(id) (partition values <= 10, " +
2444 "partition 10 < values <= 30, partition 30 < values) " +
2445 "stored as kudu tblproperties('kudu.master_addresses'='127.0.0.1')");
2446 // Not using the STORED AS KUDU syntax to specify a Kudu table
2447 AnalysisError("create table tab (x int) tblproperties (" +
2448 "'storage_handler'='com.cloudera.kudu.hive.KuduStorageHandler')",
2449 CreateTableStmt.KUDU_STORAGE_HANDLER_ERROR_MESSAGE);
2450 // Creating unpartitioned table results in a warning.
2451 AnalyzesOk("create table tab (x int primary key) stored as kudu tblproperties (" +
2452 "'storage_handler'='com.cloudera.kudu.hive.KuduStorageHandler')",
2453 "Unpartitioned Kudu tables are inefficient for large data sizes.");
2454 // Invalid value for number of replicas
2455 AnalysisError("create table t (x int primary key) stored as kudu tblproperties (" +
2456 "'kudu.num_tablet_replicas'='1.1')",
2457 "Table property 'kudu.num_tablet_replicas' must be an integer.");
2458 // Don't allow caching
2459 AnalysisError("create table tab (x int primary key) stored as kudu cached in " +
2460 "'testPool'", "A Kudu table cannot be cached in HDFS.");
2461 // LOCATION cannot be used with Kudu tables
2462 AnalysisError("create table tab (a int primary key) partition by hash (a) " +
2463 "partitions 3 stored as kudu location '/test-warehouse/'",
2464 "LOCATION cannot be specified for a Kudu table.");
2465 // Creating unpartitioned table results in a warning.
2466 AnalyzesOk("create table tab (a int, primary key (a)) stored as kudu",
2467 "Unpartitioned Kudu tables are inefficient for large data sizes.");
2468 AnalysisError("create table tab (a int) stored as kudu",
2469 "A primary key is required for a Kudu table.");
2470 // Using ROW FORMAT with a Kudu table
2471 AnalysisError("create table tab (x int primary key) " +
2472 "row format delimited escaped by 'X' stored as kudu",
2473 "ROW FORMAT cannot be specified for file format KUDU.");
2474 // Using PARTITIONED BY with a Kudu table
2475 AnalysisError("create table tab (x int primary key) " +
2476 "partitioned by (y int) stored as kudu", "PARTITIONED BY cannot be used " +
2477 "in Kudu tables.");
2478
2479 // Test unsupported Kudu types
2480 List<String> unsupportedTypes = Lists.newArrayList(
2481 "DECIMAL(9,0)", "VARCHAR(20)", "CHAR(20)",
2482 "STRUCT<f1:INT,f2:STRING>", "ARRAY<INT>", "MAP<STRING,STRING>");
2483 for (String t: unsupportedTypes) {
2484 String expectedError = String.format(
2485 "Cannot create table 'tab': Type %s is not supported in Kudu", t);
2486
2487 // Unsupported type is PK and partition col
2488 String stmt = String.format("create table tab (x %s primary key) " +
2489 "partition by hash(x) partitions 3 stored as kudu", t);
2490 AnalysisError(stmt, expectedError);
2491
2492 // Unsupported type is not PK/partition col
2493 stmt = String.format("create table tab (x int primary key, y %s) " +
2494 "partition by hash(x) partitions 3 stored as kudu", t);
2495 AnalysisError(stmt, expectedError);
2496 }
2497
2498 // Test column options
2499 String[] nullability = {"not null", "null", ""};
2500 String[] defaultVal = {"default 10", ""};
2501 String[] blockSize = {"block_size 4096", ""};
2502 for (Encoding enc: Encoding.values()) {
2503 for (CompressionAlgorithm comp: CompressionAlgorithm.values()) {
2504 for (String nul: nullability) {
2505 for (String def: defaultVal) {
2506 for (String block: blockSize) {
2507 // Test analysis for a non-key column
2508 AnalyzesOk(String.format("create table tab (x int primary key " +
2509 "not null encoding %s compression %s %s %s, y int encoding %s " +
2510 "compression %s %s %s %s) partition by hash (x) " +
2511 "partitions 3 stored as kudu", enc, comp, def, block, enc,
2512 comp, def, nul, block));
2513
2514 // For a key column
2515 String createTblStr = String.format("create table tab (x int primary key " +
2516 "%s encoding %s compression %s %s %s) partition by hash (x) " +
2517 "partitions 3 stored as kudu", nul, enc, comp, def, block);
2518 if (nul.equals("null")) {
2519 AnalysisError(createTblStr, "Primary key columns cannot be nullable");
2520 } else {
2521 AnalyzesOk(createTblStr);
2522 }
2523 }
2524 }
2525 }
2526 }
2527 }
2528 // Use NULL as default values
2529 AnalyzesOk("create table tab (x int primary key, i1 tinyint default null, " +
2530 "i2 smallint default null, i3 int default null, i4 bigint default null, " +
2531 "vals string default null, valf float default null, vald double default null, " +
2532 "valb boolean default null) partition by hash (x) partitions 3 stored as kudu");
2533 // Use NULL as a default value on a non-nullable column
2534 AnalysisError("create table tab (x int primary key, y int not null default null) " +
2535 "partition by hash (x) partitions 3 stored as kudu", "Default value of NULL " +
2536 "not allowed on non-nullable column: 'y'");
2537 // Primary key specified using the PRIMARY KEY clause
2538 AnalyzesOk("create table tab (x int not null encoding plain_encoding " +
2539 "compression snappy block_size 1, y int null encoding rle compression lz4 " +
2540 "default 1, primary key(x)) partition by hash (x) partitions 3 " +
2541 "stored as kudu");
2542 // Primary keys can't be null
2543 AnalysisError("create table tab (x int primary key null, y int not null) " +
2544 "partition by hash (x) partitions 3 stored as kudu", "Primary key columns " +
2545 "cannot be nullable: x INT PRIMARY KEY NULL");
2546 AnalysisError("create table tab (x int not null, y int null, primary key (x, y)) " +
2547 "partition by hash (x) partitions 3 stored as kudu", "Primary key columns " +
2548 "cannot be nullable: y INT NULL");
2549 // Unsupported encoding value
2550 AnalysisError("create table tab (x int primary key, y int encoding invalid_enc) " +
2551 "partition by hash (x) partitions 3 stored as kudu", "Unsupported encoding " +
2552 "value 'INVALID_ENC'. Supported encoding values are: " +
2553 Joiner.on(", ").join(Encoding.values()));
2554 // Unsupported compression algorithm
2555 AnalysisError("create table tab (x int primary key, y int compression " +
2556 "invalid_comp) partition by hash (x) partitions 3 stored as kudu",
2557 "Unsupported compression algorithm 'INVALID_COMP'. Supported compression " +
2558 "algorithms are: " + Joiner.on(", ").join(CompressionAlgorithm.values()));
2559 // Default values
2560 AnalyzesOk("create table tab (i1 tinyint default 1, i2 smallint default 10, " +
2561 "i3 int default 100, i4 bigint default 1000, vals string default 'test', " +
2562 "valf float default cast(1.2 as float), vald double default " +
2563 "cast(3.1452 as double), valb boolean default true, " +
2564 "primary key (i1, i2, i3, i4, vals)) partition by hash (i1) partitions 3 " +
2565 "stored as kudu");
2566 AnalyzesOk("create table tab (i int primary key default 1+1+1) " +
2567 "partition by hash (i) partitions 3 stored as kudu");
2568 AnalyzesOk("create table tab (i int primary key default factorial(5)) " +
2569 "partition by hash (i) partitions 3 stored as kudu");
2570 AnalyzesOk("create table tab (i int primary key, x int null default " +
2571 "isnull(null, null)) partition by hash (i) partitions 3 stored as kudu");
2572 // Invalid default values
2573 AnalysisError("create table tab (i int primary key default 'string_val') " +
2574 "partition by hash (i) partitions 3 stored as kudu", "Default value " +
2575 "'string_val' (type: STRING) is not compatible with column 'i' (type: INT).");
2576 AnalysisError("create table tab (i int primary key, x int default 1.1) " +
2577 "partition by hash (i) partitions 3 stored as kudu",
2578 "Default value 1.1 (type: DECIMAL(2,1)) is not compatible with column " +
2579 "'x' (type: INT).");
2580 AnalysisError("create table tab (i tinyint primary key default 128) " +
2581 "partition by hash (i) partitions 3 stored as kudu", "Default value " +
2582 "128 (type: SMALLINT) is not compatible with column 'i' (type: TINYINT).");
2583 AnalysisError("create table tab (i int primary key default isnull(null, null)) " +
2584 "partition by hash (i) partitions 3 stored as kudu", "Default value of " +
2585 "NULL not allowed on non-nullable column: 'i'");
2586 AnalysisError("create table tab (i int primary key, x int not null " +
2587 "default isnull(null, null)) partition by hash (i) partitions 3 " +
2588 "stored as kudu", "Default value of NULL not allowed on non-nullable column: " +
2589 "'x'");
2590 // Invalid block_size values
2591 AnalysisError("create table tab (i int primary key block_size 1.1) " +
2592 "partition by hash (i) partitions 3 stored as kudu", "Invalid value " +
2593 "for BLOCK_SIZE: 1.1. A positive INTEGER value is expected.");
2594 AnalysisError("create table tab (i int primary key block_size 'val') " +
2595 "partition by hash (i) partitions 3 stored as kudu", "Invalid value " +
2596 "for BLOCK_SIZE: 'val'. A positive INTEGER value is expected.");
2597
2598 // Sort columns are not supported for Kudu tables.
2599 AnalysisError("create table tab (i int, x int primary key) partition by hash(x) " +
2600 "partitions 8 sort by(i) stored as kudu", "SORT BY is not supported for Kudu " +
2601 "tables.");
2602
2603 // Range partitions with TIMESTAMP
2604 AnalyzesOk("create table ts_ranges (ts timestamp primary key) " +
2605 "partition by range (partition cast('2009-01-01 00:00:00' as timestamp) " +
2606 "<= VALUES < '2009-01-02 00:00:00') stored as kudu");
2607 AnalyzesOk("create table ts_ranges (ts timestamp primary key) " +
2608 "partition by range (partition value = cast('2009-01-01 00:00:00' as timestamp" +
2609 ")) stored as kudu");
2610 AnalyzesOk("create table ts_ranges (ts timestamp primary key) " +
2611 "partition by range (partition value = '2009-01-01 00:00:00') " +
2612 "stored as kudu");
2613 AnalyzesOk("create table ts_ranges (id int, ts timestamp, primary key(id, ts))" +
2614 "partition by range (partition value = (9, cast('2009-01-01 00:00:00' as " +
2615 "timestamp))) stored as kudu");
2616 AnalyzesOk("create table ts_ranges (id int, ts timestamp, primary key(id, ts))" +
2617 "partition by range (partition value = (9, '2009-01-01 00:00:00')) " +
2618 "stored as kudu");
2619 AnalysisError("create table ts_ranges (ts timestamp primary key, i int)" +
2620 "partition by range (partition '2009-01-01 00:00:00' <= VALUES < " +
2621 "'NOT A TIMESTAMP') stored as kudu",
2622 "Range partition value 'NOT A TIMESTAMP' cannot be cast to target TIMESTAMP " +
2623 "partitioning column.");
2624 AnalysisError("create table ts_ranges (ts timestamp primary key, i int)" +
2625 "partition by range (partition 100 <= VALUES < 200) stored as kudu",
2626 "Range partition value 100 (type: TINYINT) is not type " +
2627 "compatible with partitioning column 'ts' (type: TIMESTAMP).");
2628
2629 // TIMESTAMP columns with default values
2630 AnalyzesOk("create table tdefault (id int primary key, ts timestamp default now())" +
2631 "partition by hash(id) partitions 3 stored as kudu");
2632 AnalyzesOk("create table tdefault (id int primary key, ts timestamp default " +
2633 "unix_micros_to_utc_timestamp(1230768000000000)) partition by hash(id) " +
2634 "partitions 3 stored as kudu");
2635 AnalyzesOk("create table tdefault (id int primary key, " +
2636 "ts timestamp not null default '2009-01-01 00:00:00') " +
2637 "partition by hash(id) partitions 3 stored as kudu");
2638 AnalyzesOk("create table tdefault (id int primary key, " +
2639 "ts timestamp not null default cast('2009-01-01 00:00:00' as timestamp)) " +
2640 "partition by hash(id) partitions 3 stored as kudu");
2641 AnalysisError("create table tdefault (id int primary key, ts timestamp " +
2642 "default null) partition by hash(id) partitions 3 stored as kudu",
2643 "NULL cannot be cast to a TIMESTAMP literal.");
2644 AnalysisError("create table tdefault (id int primary key, " +
2645 "ts timestamp not null default cast('00:00:00' as timestamp)) " +
2646 "partition by hash(id) partitions 3 stored as kudu",
2647 "CAST('00:00:00' AS TIMESTAMP) cannot be cast to a TIMESTAMP literal.");
2648 AnalysisError("create table tdefault (id int primary key, " +
2649 "ts timestamp not null default '2009-1 foo') " +
2650 "partition by hash(id) partitions 3 stored as kudu",
2651 "String '2009-1 foo' cannot be cast to a TIMESTAMP literal.");
2652 }
2653
2654 @Test
2655 public void TestCreateExternalKuduTable() {
2656 AnalyzesOk("create external table t stored as kudu " +
2657 "tblproperties('kudu.table_name'='t')");
2658 // Use all allowed optional table props.
2659 AnalyzesOk("create external table t stored as kudu tblproperties (" +
2660 "'kudu.table_name'='tab'," +
2661 "'kudu.master_addresses' = '127.0.0.1:8080, 127.0.0.1:8081')");
2662 // Kudu table should be specified using the STORED AS KUDU syntax.
2663 AnalysisError("create external table t tblproperties (" +
2664 "'storage_handler'='com.cloudera.kudu.hive.KuduStorageHandler'," +
2665 "'kudu.table_name'='t')",
2666 CreateTableStmt.KUDU_STORAGE_HANDLER_ERROR_MESSAGE);
2667 // Columns should not be specified in an external Kudu table
2668 AnalysisError("create external table t (x int) stored as kudu " +
2669 "tblproperties('kudu.table_name'='t')",
2670 "Columns cannot be specified with an external Kudu table.");
2671 // Primary keys cannot be specified in an external Kudu table
2672 AnalysisError("create external table t (x int primary key) stored as kudu " +
2673 "tblproperties('kudu.table_name'='t')", "Primary keys cannot be specified " +
2674 "for an external Kudu table");
2675 // Invalid syntax for specifying a Kudu table
2676 AnalysisError("create external table t (x int) stored as parquet tblproperties (" +
2677 "'storage_handler'='com.cloudera.kudu.hive.KuduStorageHandler'," +
2678 "'kudu.table_name'='t')", CreateTableStmt.KUDU_STORAGE_HANDLER_ERROR_MESSAGE);
2679 AnalysisError("create external table t stored as kudu tblproperties (" +
2680 "'storage_handler'='foo', 'kudu.table_name'='t')",
2681 "Invalid storage handler specified for Kudu table: foo");
2682 // Cannot specify the number of replicas for external Kudu tables
2683 AnalysisError("create external table tab (x int) stored as kudu " +
2684 "tblproperties ('kudu.num_tablet_replicas' = '1', " +
2685 "'kudu.table_name'='tab')",
2686 "Table property 'kudu.num_tablet_replicas' cannot be used with an external " +
2687 "Kudu table.");
2688 // Don't allow caching
2689 AnalysisError("create external table t stored as kudu cached in 'testPool' " +
2690 "tblproperties('kudu.table_name'='t')", "A Kudu table cannot be cached in HDFS.");
2691 // LOCATION cannot be used for a Kudu table
2692 AnalysisError("create external table t stored as kudu " +
2693 "location '/test-warehouse' tblproperties('kudu.table_name'='t')",
2694 "LOCATION cannot be specified for a Kudu table.");
2695 }
2696
2697 @Test
2698 public void TestCreateAvroTest() {
2699 String alltypesSchemaLoc =
2700 "hdfs:///test-warehouse/avro_schemas/functional/alltypes.json";
2701
2702 // Analysis of Avro schemas. Column definitions match the Avro schema exactly.
2703 // Note: Avro does not have a tinyint and smallint type.
2704 AnalyzesOk(String.format(
2705 "create table foo_avro (id int, bool_col boolean, tinyint_col int, " +
2706 "smallint_col int, int_col int, bigint_col bigint, float_col float," +
2707 "double_col double, date_string_col string, string_col string, " +
2708 "timestamp_col timestamp) with serdeproperties ('avro.schema.url'='%s')" +
2709 "stored as avro", alltypesSchemaLoc));
2710 AnalyzesOk(String.format(
2711 "create table foo_avro (id int, bool_col boolean, tinyint_col int, " +
2712 "smallint_col int, int_col int, bigint_col bigint, float_col float," +
2713 "double_col double, date_string_col string, string_col string, " +
2714 "timestamp_col timestamp) stored as avro tblproperties ('avro.schema.url'='%s')",
2715 alltypesSchemaLoc));
2716 AnalyzesOk("create table foo_avro (string1 string) stored as avro tblproperties " +
2717 "('avro.schema.literal'='{\"name\": \"my_record\", \"type\": \"record\", " +
2718 "\"fields\": [{\"name\": \"string1\", \"type\": \"string\"}]}')");
2719
2720 // No column definitions.
2721 AnalyzesOk(String.format(
2722 "create table foo_avro with serdeproperties ('avro.schema.url'='%s')" +
2723 "stored as avro", alltypesSchemaLoc));
2724 AnalyzesOk(String.format(
2725 "create table foo_avro stored as avro tblproperties ('avro.schema.url'='%s')",
2726 alltypesSchemaLoc));
2727 AnalyzesOk("create table foo_avro stored as avro tblproperties " +
2728 "('avro.schema.literal'='{\"name\": \"my_record\", \"type\": \"record\", " +
2729 "\"fields\": [{\"name\": \"string1\", \"type\": \"string\"}]}')");
2730
2731 // Analysis of Avro schemas. Column definitions do not match Avro schema.
2732 AnalyzesOk(String.format(
2733 "create table foo_avro (id int) with serdeproperties ('avro.schema.url'='%s')" +
2734 "stored as avro", alltypesSchemaLoc),
2735 "Ignoring column definitions in favor of Avro schema.\n" +
2736 "The Avro schema has 11 column(s) but 1 column definition(s) were given.");
2737 AnalyzesOk(String.format(
2738 "create table foo_avro (bool_col boolean, string_col string) " +
2739 "stored as avro tblproperties ('avro.schema.url'='%s')",
2740 alltypesSchemaLoc),
2741 "Ignoring column definitions in favor of Avro schema.\n" +
2742 "The Avro schema has 11 column(s) but 2 column definition(s) were given.");
2743 AnalyzesOk("create table foo_avro (string1 string) stored as avro tblproperties " +
2744 "('avro.schema.literal'='{\"name\": \"my_record\", \"type\": \"record\", " +
2745 "\"fields\": [{\"name\": \"string1\", \"type\": \"string\"}," +
2746 "{\"name\": \"string2\", \"type\": \"string\"}]}')",
2747 "Ignoring column definitions in favor of Avro schema.\n" +
2748 "The Avro schema has 2 column(s) but 1 column definition(s) were given.");
2749 // Mismatched name.
2750 AnalyzesOk(String.format(
2751 "create table foo_avro (id int, bool_col boolean, tinyint_col int, " +
2752 "smallint_col int, bad_int_col int, bigint_col bigint, float_col float," +
2753 "double_col double, date_string_col string, string_col string, " +
2754 "timestamp_col timestamp) with serdeproperties ('avro.schema.url'='%s')" +
2755 "stored as avro", alltypesSchemaLoc),
2756 "Resolved the following name and/or type inconsistencies between the column " +
2757 "definitions and the Avro schema.\n" +
2758 "Column definition at position 4: bad_int_col INT\n" +
2759 "Avro schema column at position 4: int_col INT\n" +
2760 "Resolution at position 4: int_col INT\n" +
2761 "Column definition at position 10: timestamp_col TIMESTAMP\n" +
2762 "Avro schema column at position 10: timestamp_col STRING\n" +
2763 "Resolution at position 10: timestamp_col STRING");
2764 // Mismatched type.
2765 AnalyzesOk(String.format(
2766 "create table foo_avro (id int, bool_col boolean, tinyint_col int, " +
2767 "smallint_col int, int_col int, bigint_col bigint, float_col float," +
2768 "double_col bigint, date_string_col string, string_col string, " +
2769 "timestamp_col timestamp) stored as avro tblproperties ('avro.schema.url'='%s')",
2770 alltypesSchemaLoc),
2771 "Resolved the following name and/or type inconsistencies between the column " +
2772 "definitions and the Avro schema.\n" +
2773 "Column definition at position 7: double_col BIGINT\n" +
2774 "Avro schema column at position 7: double_col DOUBLE\n" +
2775 "Resolution at position 7: double_col DOUBLE\n" +
2776 "Column definition at position 10: timestamp_col TIMESTAMP\n" +
2777 "Avro schema column at position 10: timestamp_col STRING\n" +
2778 "Resolution at position 10: timestamp_col STRING");
2779
2780 // Avro schema is inferred from column definitions.
2781 AnalyzesOk("create table foo_avro (c1 tinyint, c2 smallint, c3 int, c4 bigint, " +
2782 "c5 float, c6 double, c7 timestamp, c8 string, c9 char(10), c10 varchar(20)," +
2783 "c11 decimal(10, 5), c12 struct<f1:int,f2:string>, c13 array<int>," +
2784 "c14 map<string,string>) stored as avro");
2785 AnalyzesOk("create table foo_avro (c1 tinyint, c2 smallint, c3 int, c4 bigint, " +
2786 "c5 float, c6 double, c7 timestamp, c8 string, c9 char(10), c10 varchar(20)," +
2787 "c11 decimal(10, 5), c12 struct<f1:int,f2:string>, c13 array<int>," +
2788 "c14 map<string,string>) partitioned by (year int, month int) stored as avro");
2789 // Neither Avro schema nor column definitions.
2790 AnalysisError("create table foo_avro stored as avro tblproperties ('a'='b')",
2791 "An Avro table requires column definitions or an Avro schema.");
2792
2793 // Invalid schema URL
2794 AnalysisError("create table foo_avro (i int) stored as avro tblproperties " +
2795 "('avro.schema.url'='')",
2796 "Invalid avro.schema.url: . Can not create a Path from an empty string");
2797 AnalysisError("create table foo_avro (i int) stored as avro tblproperties " +
2798 "('avro.schema.url'='schema.avsc')",
2799 "Invalid avro.schema.url: schema.avsc. Path does not exist.");
2800 AnalysisError("create table foo_avro (i int) stored as avro tblproperties " +
2801 "('avro.schema.url'='hdfs://invalid*host/schema.avsc')",
2802 "Failed to read Avro schema at: hdfs://invalid*host/schema.avsc. " +
2803 "Incomplete HDFS URI, no host: hdfs://invalid*host/schema.avsc");
2804 AnalysisError("create table foo_avro (i int) stored as avro tblproperties " +
2805 "('avro.schema.url'='foo://bar/schema.avsc')",
2806 "Failed to read Avro schema at: foo://bar/schema.avsc. " +
2807 "No FileSystem for scheme: foo");
2808
2809 // Decimal parsing
2810 AnalyzesOk("create table foo_avro (i int) stored as avro tblproperties " +
2811 "('avro.schema.literal'='{\"name\": \"my_record\", \"type\": \"record\", " +
2812 "\"fields\": [{\"name\":\"value\",\"type\":{\"type\":\"bytes\", " +
2813 "\"logicalType\":\"decimal\",\"precision\":5,\"scale\":2}}]}')");
2814 // Scale not required (defaults to zero).
2815 AnalyzesOk("create table foo_avro (i int) stored as avro tblproperties " +
2816 "('avro.schema.literal'='{\"name\": \"my_record\", \"type\": \"record\", " +
2817 "\"fields\": [{\"name\":\"value\",\"type\":{\"type\":\"bytes\", " +
2818 "\"logicalType\":\"decimal\",\"precision\":5}}]}')");
2819 // Precision is always required
2820 AnalysisError("create table foo_avro (i int) stored as avro tblproperties " +
2821 "('avro.schema.literal'='{\"name\": \"my_record\", \"type\": \"record\", " +
2822 "\"fields\": [{\"name\":\"value\",\"type\":{\"type\":\"bytes\", " +
2823 "\"logicalType\":\"decimal\",\"scale\":5}}]}')",
2824 "Error parsing Avro schema for table 'default.foo_avro': " +
2825 "No 'precision' property specified for 'decimal' logicalType");
2826 // Precision/Scale must be positive integers
2827 AnalysisError("create table foo_avro (i int) stored as avro tblproperties " +
2828 "('avro.schema.literal'='{\"name\": \"my_record\", \"type\": \"record\", " +
2829 "\"fields\": [{\"name\":\"value\",\"type\":{\"type\":\"bytes\", " +
2830 "\"logicalType\":\"decimal\",\"scale\":5, \"precision\":-20}}]}')",
2831 "Error parsing Avro schema for table 'default.foo_avro': " +
2832 "Invalid decimal 'precision' property value: -20");
2833 AnalysisError("create table foo_avro (i int) stored as avro tblproperties " +
2834 "('avro.schema.literal'='{\"name\": \"my_record\", \"type\": \"record\", " +
2835 "\"fields\": [{\"name\":\"value\",\"type\":{\"type\":\"bytes\", " +
2836 "\"logicalType\":\"decimal\",\"scale\":-1, \"precision\":20}}]}')",
2837 "Error parsing Avro schema for table 'default.foo_avro': " +
2838 "Invalid decimal 'scale' property value: -1");
2839
2840 // Invalid schema (bad JSON - missing opening bracket for "field" array)
2841 AnalysisError("create table foo_avro (i int) stored as avro tblproperties " +
2842 "('avro.schema.literal'='{\"name\": \"my_record\", \"type\": \"record\", " +
2843 "\"fields\": {\"name\": \"string1\", \"type\": \"string\"}]}')",
2844 "Error parsing Avro schema for table 'default.foo_avro': " +
2845 "org.codehaus.jackson.JsonParseException: Unexpected close marker ']': "+
2846 "expected '}'");
2847
2848 // Map/Array types in Avro schema.
2849 AnalyzesOk("create table foo_avro (i int) stored as avro tblproperties " +
2850 "('avro.schema.literal'='{\"name\": \"my_record\", \"type\": \"record\", " +
2851 "\"fields\": [{\"name\": \"string1\", \"type\": \"string\"}," +
2852 "{\"name\": \"list1\", \"type\": {\"type\":\"array\", \"items\": \"int\"}}]}')");
2853 AnalyzesOk("create table foo_avro (i int) stored as avro tblproperties " +
2854 "('avro.schema.literal'='{\"name\": \"my_record\", \"type\": \"record\", " +
2855 "\"fields\": [{\"name\": \"string1\", \"type\": \"string\"}," +
2856 "{\"name\": \"map1\", \"type\": {\"type\":\"map\", \"values\": \"int\"}}]}')");
2857
2858 // Union is not supported
2859 AnalysisError("create table foo_avro (i int) stored as avro tblproperties " +
2860 "('avro.schema.literal'='{\"name\": \"my_record\", \"type\": \"record\", " +
2861 "\"fields\": [{\"name\": \"string1\", \"type\": \"string\"}," +
2862 "{\"name\": \"union1\", \"type\": [\"float\", \"boolean\"]}]}')",
2863 "Unsupported type 'union' of column 'union1'");
2864
2865 // TODO: Add COLLECTION ITEMS TERMINATED BY and MAP KEYS TERMINATED BY clauses.
2866 // Test struct complex type.
2867 AnalyzesOk("create table functional.new_table (" +
2868 "a struct<f1: int, f2: string, f3: timestamp, f4: boolean>, " +
2869 "b struct<f1: struct<f11: int>, f2: struct<f21: struct<f22: string>>>, " +
2870 "c struct<f1: map<int, string>, f2: array<bigint>>," +
2871 "d struct<f1: struct<f11: map<int, string>, f12: array<bigint>>>)");
2872 // Test array complex type.
2873 AnalyzesOk("create table functional.new_table (" +
2874 "a array<int>, b array<timestamp>, c array<string>, d array<boolean>, " +
2875 "e array<array<int>>, f array<array<array<string>>>, " +
2876 "g array<struct<f1: int, f2: string>>, " +
2877 "h array<map<string,int>>)");
2878 // Test map complex type.
2879 AnalyzesOk("create table functional.new_table (" +
2880 "a map<string, int>, b map<timestamp, boolean>, c map<bigint, float>, " +
2881 "d array<array<int>>, e array<array<array<string>>>, " +
2882 "f array<struct<f1: int, f2: string>>," +
2883 "g array<map<string,int>>)");
2884 // Cannot partition by a complex column.
2885 AnalysisError("create table functional.new_table (i int) " +
2886 "partitioned by (x array<int>)",
2887 "Type 'ARRAY<INT>' is not supported as partition-column type in column: x");
2888 AnalysisError("create table functional.new_table (i int) " +
2889 "partitioned by (x map<int,int>)",
2890 "Type 'MAP<INT,INT>' is not supported as partition-column type in column: x");
2891 AnalysisError("create table functional.new_table (i int) " +
2892 "partitioned by (x struct<f1:int>)",
2893 "Type 'STRUCT<f1:INT>' is not supported as partition-column type in column: x");
2894
2895 // Kudu specific clauses used in an Avro table.
2896 AnalysisError("create table functional.new_table (i int) " +
2897 "partition by hash(i) partitions 3 stored as avro",
2898 "Only Kudu tables can use the PARTITION BY clause.");
2899 AnalysisError("create table functional.new_table (i int primary key) " +
2900 "stored as avro", "Unsupported column options for file format 'AVRO': " +
2901 "'i INT PRIMARY KEY'");
2902 }
2903
2904 @Test
2905 public void TestCreateView() throws AnalysisException {
2906 AnalyzesOk(
2907 "create view foo_new as select int_col, string_col from functional.alltypes");
2908 AnalyzesOk("create view functional.foo as select * from functional.alltypes");
2909 AnalyzesOk("create view if not exists foo as select * from functional.alltypes");
2910 AnalyzesOk("create view foo (a, b) as select int_col, string_col " +
2911 "from functional.alltypes");
2912 AnalyzesOk("create view functional.foo (a, b) as select int_col x, double_col y " +
2913 "from functional.alltypes");
2914
2915 // Creating a view on a view is ok (alltypes_view is a view on alltypes).
2916 AnalyzesOk("create view foo as select * from functional.alltypes_view");
2917 AnalyzesOk("create view foo (aaa, bbb) as select * from functional.complex_view");
2918
2919 // Create a view resulting in Hive-style auto-generated column names.
2920 AnalyzesOk("create view foo as select trim('abc'), 17 * 7");
2921
2922 // Creating a view on an HBase table is ok.
2923 AnalyzesOk("create view foo as select * from functional_hbase.alltypesagg");
2924
2925 // Complex view definition with joins and aggregates.
2926 AnalyzesOk("create view foo (cnt) as " +
2927 "select count(distinct x.int_col) from functional.alltypessmall x " +
2928 "inner join functional.alltypessmall y on (x.id = y.id) group by x.bigint_col");
2929
2930 // Test different query-statement types as view definition.
2931 AnalyzesOk("create view foo (a, b) as values(1, 'a'), (2, 'b')");
2932 AnalyzesOk("create view foo (a, b) as select 1, 'a' union all select 2, 'b'");
2933
2934 // View with a subquery
2935 AnalyzesOk("create view test_view_with_subquery as " +
2936 "select * from functional.alltypestiny t where exists " +
2937 "(select * from functional.alltypessmall s where s.id = t.id)");
2938
2939 // Mismatching number of columns in column definition and view-definition statement.
2940 AnalysisError("create view foo (a) as select int_col, string_col " +
2941 "from functional.alltypes",
2942 "Column-definition list has fewer columns (1) than the " +
2943 "view-definition query statement returns (2).");
2944 AnalysisError("create view foo (a, b, c) as select int_col " +
2945 "from functional.alltypes",
2946 "Column-definition list has more columns (3) than the " +
2947 "view-definition query statement returns (1).");
2948 // Duplicate columns in the view-definition statement.
2949 AnalysisError("create view foo as select * from functional.alltypessmall a " +
2950 "inner join functional.alltypessmall b on a.id = b.id",
2951 "Duplicate column name: id");
2952 // Duplicate columns in the column definition.
2953 AnalysisError("create view foo (a, b, a) as select int_col, int_col, int_col " +
2954 "from functional.alltypes",
2955 "Duplicate column name: a");
2956
2957 // Invalid database/view/column names.
2958 AnalysisError("create view `???`.new_view as select 1, 2, 3",
2959 "Invalid database name: ???");
2960 AnalysisError("create view `^%&` as select 1, 2, 3",
2961 "Invalid table/view name: ^%&");
2962 AnalysisError("create view foo as select 1 as `???`",
2963 "Invalid column/field name: ???");
2964 AnalysisError("create view foo(`%^&`) as select 1",
2965 "Invalid column/field name: %^&");
2966
2967 // Table/view already exists.
2968 AnalysisError("create view functional.alltypes as " +
2969 "select * from functional.alltypessmall ",
2970 "Table already exists: functional.alltypes");
2971 // Target database does not exist.
2972 AnalysisError("create view wrongdb.test as " +
2973 "select * from functional.alltypessmall ",
2974 "Database does not exist: wrongdb");
2975 // Source database does not exist,
2976 AnalysisError("create view foo as " +
2977 "select * from wrongdb.alltypessmall ",
2978 "Could not resolve table reference: 'wrongdb.alltypessmall'");
2979 // Source table does not exist,
2980 AnalysisError("create view foo as " +
2981 "select * from wrongdb.alltypessmall ",
2982 "Could not resolve table reference: 'wrongdb.alltypessmall'");
2983 // Analysis error in view-definition statement.
2984 AnalysisError("create view foo as " +
2985 "select int_col from functional.alltypessmall union all " +
2986 "select string_col from functional.alltypes",
2987 "Incompatible return types 'INT' and 'STRING' of exprs " +
2988 "'int_col' and 'string_col'.");
2989
2990 // View cannot have complex-typed columns because complex-typed exprs are
2991 // not supported in the select list.
2992 AnalysisError("create view functional.foo (a, b, c) as " +
2993 "select int_array_col, int_map_col, int_struct_col " +
2994 "from functional.allcomplextypes",
2995 "Expr 'int_array_col' in select list returns a complex type 'ARRAY<INT>'.\n" +
2996 "Only scalar types are allowed in the select list.");
2997 }
2998
2999 @Test
3000 public void TestUdf() throws AnalysisException {
3001 final String symbol =
3002 "'_Z8IdentityPN10impala_udf15FunctionContextERKNS_10BooleanValE'";
3003 final String udfSuffix = " LOCATION '/test-warehouse/libTestUdfs.so' " +
3004 "SYMBOL=" + symbol;
3005 final String udfSuffixIr = " LOCATION '/test-warehouse/test-udfs.ll' " +
3006 "SYMBOL=" + symbol;
3007 final String hdfsPath = "hdfs://localhost:20500/test-warehouse/libTestUdfs.so";
3008 final String javaFnSuffix = " LOCATION '/test-warehouse/impala-hive-udfs.jar' " +
3009 "SYMBOL='org.apache.impala.TestUdf'";
3010
3011 AnalyzesOk("create function foo() RETURNS int" + udfSuffix);
3012 AnalyzesOk("create function foo(int, int, string) RETURNS int" + udfSuffix);
3013 AnalyzesOk("create function foo" + javaFnSuffix);
3014 AnalyzesOk("create function foo(INT) returns INT" + javaFnSuffix);
3015
3016 // Try some fully qualified function names
3017 AnalyzesOk("create function functional.B() RETURNS int" + udfSuffix);
3018 AnalyzesOk("create function functional.B1() RETURNS int" + udfSuffix);
3019 AnalyzesOk("create function functional.`B1C`() RETURNS int" + udfSuffix);
3020
3021 // Name with underscore
3022 AnalyzesOk("create function A_B() RETURNS int" + udfSuffix);
3023
3024 // Locations for all the udfs types.
3025 AnalyzesOk("create function foo() RETURNS int LOCATION " +
3026 "'/test-warehouse/libTestUdfs.so' " +
3027 "SYMBOL='_Z8IdentityPN10impala_udf15FunctionContextERKNS_10BooleanValE'");
3028 AnalysisError("create function foo() RETURNS int LOCATION " +
3029 "'/test-warehouse/libTestUdfs.ll' " +
3030 "SYMBOL='_Z8IdentityPN10impala_udf15FunctionContextERKNS_10BooleanValE'",
3031 "Could not load binary: /test-warehouse/libTestUdfs.ll");
3032 AnalyzesOk("create function foo() RETURNS int LOCATION " +
3033 "'/test-warehouse/test-udfs.ll' " +
3034 "SYMBOL='_Z8IdentityPN10impala_udf15FunctionContextERKNS_10BooleanValE'");
3035 AnalyzesOk("create function foo(int) RETURNS int LOCATION " +
3036 "'/test-warehouse/test-udfs.ll' SYMBOL='Identity'");
3037
3038 AnalyzesOk("create function foo() RETURNS int LOCATION " +
3039 "'/test-warehouse/libTestUdfs.SO' " +
3040 "SYMBOL='_Z8IdentityPN10impala_udf15FunctionContextERKNS_10BooleanValE'");
3041 AnalyzesOk("create function foo() RETURNS int LOCATION " +
3042 "'/test-warehouse/hive-exec.jar' SYMBOL='a'");
3043
3044 // Test Java UDFs for unsupported types
3045 AnalysisError("create function foo() RETURNS timestamp LOCATION '/test-warehouse/hive-exec.jar' SYMBOL='a'",
3046 "Type TIMESTAMP is not supported for Java UDFs.");
3047 AnalysisError("create function foo(timestamp) RETURNS int LOCATION '/a.jar'",
3048 "Type TIMESTAMP is not supported for Java UDFs.");
3049 AnalysisError("create function foo() RETURNS decimal LOCATION '/a.jar'",
3050 "Type DECIMAL(9,0) is not supported for Java UDFs.");
3051 AnalysisError("create function foo(Decimal) RETURNS int LOCATION '/a.jar'",
3052 "Type DECIMAL(9,0) is not supported for Java UDFs.");
3053 AnalysisError("create function foo(char(5)) RETURNS int LOCATION '/a.jar'",
3054 "Type CHAR(5) is not supported for Java UDFs.");
3055 AnalysisError("create function foo(varchar(5)) RETURNS int LOCATION '/a.jar'",
3056 "Type VARCHAR(5) is not supported for Java UDFs.");
3057 AnalysisError("create function foo() RETURNS CHAR(5) LOCATION '/a.jar'",
3058 "Type CHAR(5) is not supported for Java UDFs.");
3059 AnalysisError("create function foo() RETURNS VARCHAR(5) LOCATION '/a.jar'",
3060 "Type VARCHAR(5) is not supported for Java UDFs.");
3061 AnalysisError("create function foo() RETURNS CHAR(5)" + udfSuffix,
3062 "UDFs that use CHAR are not yet supported.");
3063 AnalysisError("create function foo() RETURNS VARCHAR(5)" + udfSuffix,
3064 "UDFs that use VARCHAR are not yet supported.");
3065 AnalysisError("create function foo(CHAR(5)) RETURNS int" + udfSuffix,
3066 "UDFs that use CHAR are not yet supported.");
3067 AnalysisError("create function foo(VARCHAR(5)) RETURNS int" + udfSuffix,
3068 "UDFs that use VARCHAR are not yet supported.");
3069
3070 AnalyzesOk("create function foo() RETURNS decimal" + udfSuffix);
3071 AnalyzesOk("create function foo() RETURNS decimal(38,10)" + udfSuffix);
3072 AnalyzesOk("create function foo(Decimal, decimal(10, 2)) RETURNS int" + udfSuffix);
3073 AnalysisError("create function foo() RETURNS decimal(100)" + udfSuffix,
3074 "Decimal precision must be <= 38: 100");
3075 AnalysisError("create function foo(Decimal(2, 3)) RETURNS int" + udfSuffix,
3076 "Decimal scale (3) must be <= precision (2)");
3077
3078 // Varargs
3079 AnalyzesOk("create function foo(INT...) RETURNS int" + udfSuffix);
3080
3081 // Prepare/Close functions
3082 AnalyzesOk("create function foo() returns int" + udfSuffix
3083 + " prepare_fn='ValidateOpenPrepare'" + " close_fn='ValidateOpenClose'");
3084 AnalyzesOk("create function foo() returns int" + udfSuffixIr
3085 + " prepare_fn='ValidateOpenPrepare'" + " close_fn='ValidateOpenClose'");
3086 AnalyzesOk("create function foo() returns int" + udfSuffixIr
3087 + " prepare_fn='_Z19ValidateOpenPreparePN10impala_udf15FunctionContextENS0_18FunctionStateScopeE'"
3088 + " close_fn='_Z17ValidateOpenClosePN10impala_udf15FunctionContextENS0_18FunctionStateScopeE'");
3089 AnalysisError("create function foo() returns int" + udfSuffix + " prepare_fn=''",
3090 "Could not find symbol ''");
3091 AnalysisError("create function foo() returns int" + udfSuffix + " close_fn=''",
3092 "Could not find symbol ''");
3093 AnalysisError("create function foo() returns int" + udfSuffix +
3094 " prepare_fn='FakePrepare'",
3095 "Could not find function FakePrepare(impala_udf::FunctionContext*, "+
3096 "impala_udf::FunctionContext::FunctionStateScope) in: ");
3097
3098 // Try to create a function with the same name as a builtin
3099 AnalysisError("create function sin(double) RETURNS double" + udfSuffix,
3100 "Function cannot have the same name as a builtin: sin");
3101 AnalysisError("create function sin() RETURNS double" + udfSuffix,
3102 "Function cannot have the same name as a builtin: sin");
3103
3104 // Try to create with a bad location
3105 AnalysisError("create function foo() RETURNS int LOCATION 'bad-location' SYMBOL='c'",
3106 "URI path must be absolute: bad-location");
3107 AnalysisError("create function foo LOCATION 'bad-location' SYMBOL='c'",
3108 "URI path must be absolute: bad-location");
3109 AnalysisError("create function foo() RETURNS int LOCATION " +
3110 "'blah://localhost:50200/bad-location' SYMBOL='c'",
3111 "No FileSystem for scheme: blah");
3112 AnalysisError("create function foo LOCATION " +
3113 "'blah://localhost:50200/bad-location' SYMBOL='c'",
3114 "No FileSystem for scheme: blah");
3115 AnalysisError("create function foo() RETURNS int LOCATION " +
3116 "'file:///foo.jar' SYMBOL='c'",
3117 "Could not load binary: file:///foo.jar");
3118 AnalysisError("create function foo LOCATION " +
3119 "'file:///foo.jar' SYMBOL='c'",
3120 "Could not load binary: file:///foo.jar");
3121
3122 // Try creating udfs with unknown extensions
3123 AnalysisError("create function foo() RETURNS int LOCATION '/binary' SYMBOL='a'",
3124 "Unknown binary type: '/binary'. Binary must end in .jar, .so or .ll");
3125 AnalysisError("create function foo() RETURNS int LOCATION '/binary.a' SYMBOL='a'",
3126 "Unknown binary type: '/binary.a'. Binary must end in .jar, .so or .ll");
3127 AnalysisError("create function foo() RETURNS int LOCATION '/binary.so.' SYMBOL='a'",
3128 "Unknown binary type: '/binary.so.'. Binary must end in .jar, .so or .ll");
3129
3130 // Try with missing symbol
3131 AnalysisError("create function foo() RETURNS int LOCATION '/binary.so'",
3132 "Argument 'SYMBOL' must be set.");
3133
3134 // Try with symbols missing in binary and symbols
3135 AnalysisError("create function foo() RETURNS int LOCATION '/blah.so' " +
3136 "SYMBOL='ab'", "Could not load binary: /blah.so");
3137 AnalysisError("create function foo() RETURNS int LOCATION '/binary.JAR' SYMBOL='a'",
3138 "Could not load binary: /binary.JAR");
3139 AnalysisError("create function foo LOCATION '/binary.JAR' SYMBOL='a'",
3140 "Could not load binary: /binary.JAR");
3141 AnalysisError("create function foo() RETURNS int " +
3142 "LOCATION '/test-warehouse/libTestUdfs.so' " +
3143 "SYMBOL='b'", "Could not find function b() in: " + hdfsPath);
3144 AnalysisError("create function foo() RETURNS int " +
3145 "LOCATION '/test-warehouse/libTestUdfs.so' " +
3146 "SYMBOL=''", "Could not find symbol ''");
3147 AnalysisError("create function foo() RETURNS int " +
3148 "LOCATION '/test-warehouse/libTestUdfs.so' " +
3149 "SYMBOL='_ZAB'",
3150 "Could not find symbol '_ZAB' in: " + hdfsPath);
3151
3152 // Infer the fully mangled symbol from the signature
3153 AnalyzesOk("create function foo() RETURNS int " +
3154 "LOCATION '/test-warehouse/libTestUdfs.so' " + "SYMBOL='NoArgs'");
3155 // We can't get the return type so any of those will match
3156 AnalyzesOk("create function foo() RETURNS double " +
3157 "LOCATION '/test-warehouse/libTestUdfs.so' " + "SYMBOL='NoArgs'");
3158 // The part the user specifies is case sensitive
3159 AnalysisError("create function foo() RETURNS int " +
3160 "LOCATION '/test-warehouse/libTestUdfs.so' " + "SYMBOL='noArgs'",
3161 "Could not find function noArgs() in: " + hdfsPath);
3162 // Types no longer match
3163 AnalysisError("create function foo(int) RETURNS int " +
3164 "LOCATION '/test-warehouse/libTestUdfs.so' " + "SYMBOL='NoArgs'",
3165 "Could not find function NoArgs(INT) in: " + hdfsPath);
3166
3167 // Check we can match identity for all types
3168 AnalyzesOk("create function identity(boolean) RETURNS int " +
3169 "LOCATION '/test-warehouse/libTestUdfs.so' " + "SYMBOL='Identity'");
3170 AnalyzesOk("create function identity(tinyint) RETURNS int " +
3171 "LOCATION '/test-warehouse/libTestUdfs.so' " + "SYMBOL='Identity'");
3172 AnalyzesOk("create function identity(smallint) RETURNS int " +
3173 "LOCATION '/test-warehouse/libTestUdfs.so' " + "SYMBOL='Identity'");
3174 AnalyzesOk("create function identity(int) RETURNS int " +
3175 "LOCATION '/test-warehouse/libTestUdfs.so' " + "SYMBOL='Identity'");
3176 AnalyzesOk("create function identity(bigint) RETURNS int " +
3177 "LOCATION '/test-warehouse/libTestUdfs.so' " + "SYMBOL='Identity'");
3178 AnalyzesOk("create function identity(float) RETURNS int " +
3179 "LOCATION '/test-warehouse/libTestUdfs.so' " + "SYMBOL='Identity'");
3180 AnalyzesOk("create function identity(double) RETURNS int " +
3181 "LOCATION '/test-warehouse/libTestUdfs.so' " + "SYMBOL='Identity'");
3182 AnalyzesOk("create function identity(string) RETURNS int " +
3183 "LOCATION '/test-warehouse/libTestUdfs.so' " + "SYMBOL='Identity'");
3184 AnalyzesOk("create function all_types_fn(string, boolean, tinyint, " +
3185 "smallint, int, bigint, float, double, decimal) returns int " +
3186 "location '/test-warehouse/libTestUdfs.so' symbol='AllTypes'");
3187
3188 // Try creating functions with illegal function names.
3189 AnalysisError("create function 123A() RETURNS int" + udfSuffix,
3190 "Function cannot start with a digit: 123a");
3191 AnalysisError("create function A.`1A`() RETURNS int" + udfSuffix,
3192 "Function cannot start with a digit: 1a");
3193 AnalysisError("create function A.`ABC-D`() RETURNS int" + udfSuffix,
3194 "Function names must be all al