IMPALA-6571: NullPointerException in SHOW CREATE TABLE for HBase tables
[impala.git] / fe / src / main / java / org / apache / impala / analysis / ToSqlUtils.java
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 package org.apache.impala.analysis;
19
20 import java.util.ArrayList;
21 import java.util.Collections;
22 import java.util.Comparator;
23 import java.util.LinkedHashMap;
24 import java.util.List;
25 import java.util.Map;
26 import java.util.Map.Entry;
27
28 import com.google.common.base.Joiner;
29 import com.google.common.base.Preconditions;
30 import com.google.common.base.Splitter;
31 import com.google.common.base.Strings;
32 import com.google.common.collect.ImmutableSet;
33 import com.google.common.collect.Lists;
34 import com.google.common.collect.Maps;
35 import org.antlr.runtime.ANTLRStringStream;
36 import org.antlr.runtime.Token;
37 import org.apache.commons.lang.ObjectUtils;
38 import org.apache.commons.lang.StringEscapeUtils;
39 import org.apache.hadoop.hive.common.StatsSetupConst;
40 import org.apache.hadoop.hive.metastore.TableType;
41 import org.apache.hadoop.hive.ql.parse.HiveLexer;
42 import org.apache.impala.catalog.CatalogException;
43 import org.apache.impala.catalog.Column;
44 import org.apache.impala.catalog.Function;
45 import org.apache.impala.catalog.HBaseTable;
46 import org.apache.impala.catalog.HdfsCompression;
47 import org.apache.impala.catalog.HdfsFileFormat;
48 import org.apache.impala.catalog.HdfsTable;
49 import org.apache.impala.catalog.KuduColumn;
50 import org.apache.impala.catalog.KuduTable;
51 import org.apache.impala.catalog.RowFormat;
52 import org.apache.impala.catalog.Table;
53 import org.apache.impala.catalog.View;
54 import org.apache.impala.util.KuduUtil;
55
56 /**
57 * Contains utility methods for creating SQL strings, for example,
58 * for creating identifier strings that are compatible with Hive or Impala.
59 */
60 public class ToSqlUtils {
61 // Table properties to hide when generating the toSql() statement
62 // EXTERNAL, SORT BY, and comment are hidden because they are part of the toSql result,
63 // e.g., "CREATE EXTERNAL TABLE <name> ... SORT BY (...) ... COMMENT <comment> ..."
64 private static final ImmutableSet<String> HIDDEN_TABLE_PROPERTIES = ImmutableSet.of(
65 "EXTERNAL", "comment", AlterTableSortByStmt.TBL_PROP_SORT_COLUMNS);
66
67 /**
68 * Removes all hidden properties from the given 'tblProperties' map.
69 */
70 private static void removeHiddenTableProperties(Map<String, String> tblProperties) {
71 for (String key: HIDDEN_TABLE_PROPERTIES) tblProperties.remove(key);
72 }
73
74 /**
75 * Returns the list of sort columns from 'properties' or 'null' if 'properties' doesn't
76 * contain 'sort.columns'.
77 */
78 private static List<String> getSortColumns(Map<String, String> properties) {
79 String sortByKey = AlterTableSortByStmt.TBL_PROP_SORT_COLUMNS;
80 if (!properties.containsKey(sortByKey)) return null;
81 return Lists.newArrayList(Splitter.on(",").trimResults().omitEmptyStrings().split(
82 properties.get(sortByKey)));
83 }
84
85 /**
86 * Returns a comma-delimited string of Kudu 'partition by' parameters on a
87 * CreateTableStmt, or null if this isn't a CreateTableStmt for a Kudu table.
88 */
89 private static String getKuduPartitionByParams(CreateTableStmt stmt) {
90 List<KuduPartitionParam> partitionParams = stmt.getKuduPartitionParams();
91 Preconditions.checkNotNull(partitionParams);
92 if (partitionParams.isEmpty()) return null;
93 List<String> paramStrings = Lists.newArrayList();
94 for (KuduPartitionParam p : partitionParams) {
95 paramStrings.add(p.toSql());
96 }
97 return Joiner.on(", ").join(paramStrings);
98 }
99
100 /**
101 * Given an unquoted identifier string, returns an identifier lexable by
102 * Impala and Hive, possibly by enclosing the original identifier in "`" quotes.
103 * For example, Hive cannot parse its own auto-generated column
104 * names "_c0", "_c1" etc. unless they are quoted. Impala and Hive keywords
105 * must also be quoted.
106 *
107 * Impala's lexer recognizes a superset of the unquoted identifiers that Hive can.
108 * At the same time, Impala's and Hive's list of keywords differ.
109 * This method always returns an identifier that Impala and Hive can recognize,
110 * although for some identifiers the quotes may not be strictly necessary for
111 * one or the other system.
112 */
113 public static String getIdentSql(String ident) {
114 boolean hiveNeedsQuotes = true;
115 HiveLexer hiveLexer = new HiveLexer(new ANTLRStringStream(ident));
116 try {
117 Token t = hiveLexer.nextToken();
118 // Check that the lexer recognizes an identifier and then EOF.
119 boolean identFound = t.getType() == HiveLexer.Identifier;
120 t = hiveLexer.nextToken();
121 // No enclosing quotes are necessary for Hive.
122 hiveNeedsQuotes = !(identFound && t.getType() == HiveLexer.EOF);
123 } catch (Exception e) {
124 // Ignore exception and just quote the identifier to be safe.
125 }
126 boolean isImpalaReserved = SqlScanner.isReserved(ident.toUpperCase());
127 // Impala's scanner recognizes the ".123" portion of "db.123_tbl" as a decimal,
128 // so while the quoting is not necessary for the given identifier itself, the quotes
129 // are needed if this identifier will be preceded by a ".".
130 boolean startsWithNumber = false;
131 if (!hiveNeedsQuotes && !isImpalaReserved) {
132 startsWithNumber = Character.isDigit(ident.charAt(0));
133 }
134 if (hiveNeedsQuotes || isImpalaReserved || startsWithNumber) return "`" + ident + "`";
135 return ident;
136 }
137
138 public static List<String> getIdentSqlList(List<String> identList) {
139 List<String> identSqlList = Lists.newArrayList();
140 for (String ident: identList) {
141 identSqlList.add(getIdentSql(ident));
142 }
143 return identSqlList;
144 }
145
146 public static String getPathSql(List<String> path) {
147 StringBuilder result = new StringBuilder();
148 for (String p: path) {
149 if (result.length() > 0) result.append(".");
150 result.append(getIdentSql(p));
151 }
152 return result.toString();
153 }
154
155 /**
156 * Returns the "CREATE TABLE" SQL string corresponding to the given CreateTableStmt
157 * statement.
158 */
159 public static String getCreateTableSql(CreateTableStmt stmt) {
160 ArrayList<String> colsSql = Lists.newArrayList();
161 for (ColumnDef col: stmt.getColumnDefs()) {
162 colsSql.add(col.toString());
163 }
164 ArrayList<String> partitionColsSql = Lists.newArrayList();
165 for (ColumnDef col: stmt.getPartitionColumnDefs()) {
166 partitionColsSql.add(col.toString());
167 }
168 String kuduParamsSql = getKuduPartitionByParams(stmt);
169 // TODO: Pass the correct compression, if applicable.
170 return getCreateTableSql(stmt.getDb(), stmt.getTbl(), stmt.getComment(), colsSql,
171 partitionColsSql, stmt.getTblPrimaryKeyColumnNames(), kuduParamsSql,
172 stmt.getSortColumns(), stmt.getTblProperties(), stmt.getSerdeProperties(),
173 stmt.isExternal(), stmt.getIfNotExists(), stmt.getRowFormat(),
174 HdfsFileFormat.fromThrift(stmt.getFileFormat()), HdfsCompression.NONE, null,
175 stmt.getLocation());
176 }
177
178 /**
179 * Returns the "CREATE TABLE" SQL string corresponding to the given
180 * CreateTableAsSelectStmt statement.
181 */
182 public static String getCreateTableSql(CreateTableAsSelectStmt stmt) {
183 CreateTableStmt innerStmt = stmt.getCreateStmt();
184 // Only add partition column labels to output. Table columns must not be specified as
185 // they are deduced from the select statement.
186 ArrayList<String> partitionColsSql = Lists.newArrayList();
187 for (ColumnDef col: innerStmt.getPartitionColumnDefs()) {
188 partitionColsSql.add(col.getColName());
189 }
190 // Use a LinkedHashMap to preserve the ordering of the table properties.
191 LinkedHashMap<String, String> properties =
192 Maps.newLinkedHashMap(innerStmt.getTblProperties());
193 removeHiddenTableProperties(properties);
194 String kuduParamsSql = getKuduPartitionByParams(innerStmt);
195 // TODO: Pass the correct compression, if applicable.
196 String createTableSql = getCreateTableSql(innerStmt.getDb(), innerStmt.getTbl(),
197 innerStmt.getComment(), null, partitionColsSql,
198 innerStmt.getTblPrimaryKeyColumnNames(), kuduParamsSql,
199 innerStmt.getSortColumns(), properties, innerStmt.getSerdeProperties(),
200 innerStmt.isExternal(), innerStmt.getIfNotExists(), innerStmt.getRowFormat(),
201 HdfsFileFormat.fromThrift(innerStmt.getFileFormat()), HdfsCompression.NONE, null,
202 innerStmt.getLocation());
203 return createTableSql + " AS " + stmt.getQueryStmt().toSql();
204 }
205
206 /**
207 * Returns a "CREATE TABLE" or "CREATE VIEW" statement that creates the specified
208 * table.
209 */
210 public static String getCreateTableSql(Table table) throws CatalogException {
211 Preconditions.checkNotNull(table);
212 if (table instanceof View) return getCreateViewSql((View)table);
213 org.apache.hadoop.hive.metastore.api.Table msTable = table.getMetaStoreTable();
214 // Use a LinkedHashMap to preserve the ordering of the table properties.
215 LinkedHashMap<String, String> properties = Maps.newLinkedHashMap(msTable.getParameters());
216 if (properties.containsKey("transient_lastDdlTime")) {
217 properties.remove("transient_lastDdlTime");
218 }
219 boolean isExternal = msTable.getTableType() != null &&
220 msTable.getTableType().equals(TableType.EXTERNAL_TABLE.toString());
221 List<String> sortColsSql = getSortColumns(properties);
222 String comment = properties.get("comment");
223 removeHiddenTableProperties(properties);
224 ArrayList<String> colsSql = Lists.newArrayList();
225 ArrayList<String> partitionColsSql = Lists.newArrayList();
226 boolean isHbaseTable = table instanceof HBaseTable;
227 for (int i = 0; i < table.getColumns().size(); i++) {
228 if (!isHbaseTable && i < table.getNumClusteringCols()) {
229 partitionColsSql.add(columnToSql(table.getColumns().get(i)));
230 } else {
231 colsSql.add(columnToSql(table.getColumns().get(i)));
232 }
233 }
234 RowFormat rowFormat = RowFormat.fromStorageDescriptor(msTable.getSd());
235 HdfsFileFormat format = null;
236 HdfsCompression compression = null;
237 String location = isHbaseTable ? null : msTable.getSd().getLocation();
238 Map<String, String> serdeParameters = msTable.getSd().getSerdeInfo().getParameters();
239
240 String storageHandlerClassName = table.getStorageHandlerClassName();
241 List<String> primaryKeySql = Lists.newArrayList();
242 String kuduPartitionByParams = null;
243 if (table instanceof KuduTable) {
244 KuduTable kuduTable = (KuduTable) table;
245 // Kudu tables don't use LOCATION syntax
246 location = null;
247 format = HdfsFileFormat.KUDU;
248 // Kudu tables cannot use the Hive DDL syntax for the storage handler
249 storageHandlerClassName = null;
250 properties.remove(KuduTable.KEY_STORAGE_HANDLER);
251 String kuduTableName = properties.get(KuduTable.KEY_TABLE_NAME);
252 Preconditions.checkNotNull(kuduTableName);
253 if (kuduTableName.equals(KuduUtil.getDefaultCreateKuduTableName(
254 table.getDb().getName(), table.getName()))) {
255 properties.remove(KuduTable.KEY_TABLE_NAME);
256 }
257 // Internal property, should not be exposed to the user.
258 properties.remove(StatsSetupConst.DO_NOT_UPDATE_STATS);
259
260 if (!isExternal) {
261 primaryKeySql.addAll(kuduTable.getPrimaryKeyColumnNames());
262
263 List<String> paramsSql = Lists.newArrayList();
264 for (KuduPartitionParam param: kuduTable.getPartitionBy()) {
265 paramsSql.add(param.toSql());
266 }
267 kuduPartitionByParams = Joiner.on(", ").join(paramsSql);
268 } else {
269 // We shouldn't output the columns for external tables
270 colsSql = null;
271 }
272 } else if (table instanceof HdfsTable) {
273 String inputFormat = msTable.getSd().getInputFormat();
274 format = HdfsFileFormat.fromHdfsInputFormatClass(inputFormat);
275 compression = HdfsCompression.fromHdfsInputFormatClass(inputFormat);
276 }
277 HdfsUri tableLocation = location == null ? null : new HdfsUri(location);
278 return getCreateTableSql(table.getDb().getName(), table.getName(), comment, colsSql,
279 partitionColsSql, primaryKeySql, kuduPartitionByParams, sortColsSql, properties,
280 serdeParameters, isExternal, false, rowFormat, format, compression,
281 storageHandlerClassName, tableLocation);
282 }
283
284 /**
285 * Returns a "CREATE TABLE" string that creates the table with the specified properties.
286 * The tableName must not be null. If columnsSql is null, the schema syntax will
287 * not be generated.
288 */
289 public static String getCreateTableSql(String dbName, String tableName,
290 String tableComment, List<String> columnsSql, List<String> partitionColumnsSql,
291 List<String> primaryKeysSql, String kuduPartitionByParams,
292 List<String> sortColsSql, Map<String, String> tblProperties,
293 Map<String, String> serdeParameters, boolean isExternal, boolean ifNotExists,
294 RowFormat rowFormat, HdfsFileFormat fileFormat, HdfsCompression compression,
295 String storageHandlerClass, HdfsUri location) {
296 Preconditions.checkNotNull(tableName);
297 StringBuilder sb = new StringBuilder("CREATE ");
298 if (isExternal) sb.append("EXTERNAL ");
299 sb.append("TABLE ");
300 if (ifNotExists) sb.append("IF NOT EXISTS ");
301 if (dbName != null) sb.append(dbName + ".");
302 sb.append(tableName);
303 if (columnsSql != null && !columnsSql.isEmpty()) {
304 sb.append(" (\n ");
305 sb.append(Joiner.on(",\n ").join(columnsSql));
306 if (primaryKeysSql != null && !primaryKeysSql.isEmpty()) {
307 sb.append(",\n PRIMARY KEY (");
308 Joiner.on(", ").appendTo(sb, primaryKeysSql).append(")");
309 }
310 sb.append("\n)");
311 } else {
312 // CTAS for Kudu tables still print the primary key
313 if (primaryKeysSql != null && !primaryKeysSql.isEmpty()) {
314 sb.append("\n PRIMARY KEY (");
315 Joiner.on(", ").appendTo(sb, primaryKeysSql).append(")");
316 }
317 }
318 sb.append("\n");
319
320 if (partitionColumnsSql != null && partitionColumnsSql.size() > 0) {
321 sb.append(String.format("PARTITIONED BY (\n %s\n)\n",
322 Joiner.on(", \n ").join(partitionColumnsSql)));
323 }
324
325 if (kuduPartitionByParams != null && !kuduPartitionByParams.equals("")) {
326 sb.append("PARTITION BY " + kuduPartitionByParams + "\n");
327 }
328
329 if (sortColsSql != null) {
330 sb.append(String.format("SORT BY (\n %s\n)\n",
331 Joiner.on(", \n ").join(sortColsSql)));
332 }
333
334 if (tableComment != null) sb.append(" COMMENT '" + tableComment + "'\n");
335
336 if (rowFormat != null && !rowFormat.isDefault()) {
337 sb.append("ROW FORMAT DELIMITED");
338 if (rowFormat.getFieldDelimiter() != null) {
339 String fieldDelim = StringEscapeUtils.escapeJava(rowFormat.getFieldDelimiter());
340 sb.append(" FIELDS TERMINATED BY '" + fieldDelim + "'");
341 }
342 if (rowFormat.getEscapeChar() != null) {
343 String escapeChar = StringEscapeUtils.escapeJava(rowFormat.getEscapeChar());
344 sb.append(" ESCAPED BY '" + escapeChar + "'");
345 }
346 if (rowFormat.getLineDelimiter() != null) {
347 String lineDelim = StringEscapeUtils.escapeJava(rowFormat.getLineDelimiter());
348 sb.append(" LINES TERMINATED BY '" + lineDelim + "'");
349 }
350 sb.append("\n");
351 }
352
353 if (storageHandlerClass == null) {
354 // TODO: Remove this special case when we have the LZO_TEXT writer
355 // We must handle LZO_TEXT specially because Impala does not yet support creating
356 // tables with this row format. In this case, we cannot output "WITH
357 // SERDEPROPERTIES" because Hive does not support it with "STORED AS". For any
358 // other HdfsFileFormat we want to output the serdeproperties because it is
359 // supported by Impala.
360 if (compression != HdfsCompression.LZO &&
361 compression != HdfsCompression.LZO_INDEX &&
362 serdeParameters != null && !serdeParameters.isEmpty()) {
363 sb.append(
364 "WITH SERDEPROPERTIES " + propertyMapToSql(serdeParameters) + "\n");
365 }
366
367 if (fileFormat != null) {
368 sb.append("STORED AS " + fileFormat.toSql(compression) + "\n");
369 }
370 } else {
371 // If the storageHandlerClass is set, then we will generate the proper Hive DDL
372 // because we do not yet support creating HBase tables via Impala.
373 sb.append("STORED BY '" + storageHandlerClass + "'\n");
374 if (serdeParameters != null && !serdeParameters.isEmpty()) {
375 sb.append(
376 "WITH SERDEPROPERTIES " + propertyMapToSql(serdeParameters) + "\n");
377 }
378 }
379 if (location != null) {
380 sb.append("LOCATION '" + location.toString() + "'\n");
381 }
382 if (tblProperties != null && !tblProperties.isEmpty()) {
383 sb.append("TBLPROPERTIES " + propertyMapToSql(tblProperties));
384 }
385 return sb.toString();
386 }
387
388 public static String getCreateFunctionSql(List<Function> functions) {
389 Preconditions.checkNotNull(functions);
390 StringBuilder sb = new StringBuilder();
391 for (Function fn: functions) {
392 sb.append(fn.toSql(false));
393 }
394 return sb.toString();
395 }
396
397 public static String getCreateViewSql(View view) {
398 StringBuffer sb = new StringBuffer();
399 sb.append("CREATE VIEW ");
400 // Use toSql() to ensure that the table name and query statement are normalized
401 // and identifiers are quoted.
402 sb.append(view.getTableName().toSql());
403 sb.append(" AS\n");
404 sb.append(view.getQueryStmt().toSql());
405 return sb.toString();
406 }
407
408 private static String columnToSql(Column col) {
409 StringBuilder sb = new StringBuilder(col.getName());
410 if (col.getType() != null) sb.append(" " + col.getType().toSql());
411 if (col instanceof KuduColumn) {
412 KuduColumn kuduCol = (KuduColumn) col;
413 Boolean isNullable = kuduCol.isNullable();
414 if (isNullable != null) sb.append(isNullable ? " NULL" : " NOT NULL");
415 if (kuduCol.getEncoding() != null) sb.append(" ENCODING " + kuduCol.getEncoding());
416 if (kuduCol.getCompression() != null) {
417 sb.append(" COMPRESSION " + kuduCol.getCompression());
418 }
419 if (kuduCol.hasDefaultValue()) {
420 sb.append(" DEFAULT " + kuduCol.getDefaultValueSql());
421 }
422 if (kuduCol.getBlockSize() != 0) {
423 sb.append(String.format(" BLOCK_SIZE %d", kuduCol.getBlockSize()));
424 }
425 }
426 if (!Strings.isNullOrEmpty(col.getComment())) {
427 sb.append(String.format(" COMMENT '%s'", col.getComment()));
428 }
429 return sb.toString();
430 }
431
432 private static String propertyMapToSql(Map<String, String> propertyMap) {
433 // Sort entries on the key to ensure output is deterministic for tests (IMPALA-5757).
434 List<Entry<String, String>> mapEntries = Lists.newArrayList(propertyMap.entrySet());
435 Collections.sort(mapEntries, new Comparator<Entry<String, String>>() {
436 public int compare(Entry<String, String> o1, Entry<String, String> o2) {
437 return ObjectUtils.compare(o1.getKey(), o2.getKey());
438 } });
439
440 List<String> properties = Lists.newArrayList();
441 for (Map.Entry<String, String> entry: mapEntries) {
442 properties.add(String.format("'%s'='%s'", entry.getKey(),
443 // Properties may contain characters that need to be escaped.
444 // e.g. If the row format escape delimiter is '\', the map of serde properties
445 // from the metastore table will contain 'escape.delim' => '\', which is not
446 // properly escaped.
447 StringEscapeUtils.escapeJava(entry.getValue())));
448 }
449 return "(" + Joiner.on(", ").join(properties) + ")";
450 }
451
452 /**
453 * Returns a SQL representation of the given list of hints. Uses the end-of-line
454 * commented plan hint style such that hinted views created by Impala are readable by
455 * Hive (parsed as a comment by Hive).
456 */
457 public static String getPlanHintsSql(List<PlanHint> hints) {
458 Preconditions.checkNotNull(hints);
459 if (hints.isEmpty()) return "";
460 StringBuilder sb = new StringBuilder();
461 sb.append("\n-- +");
462 sb.append(Joiner.on(",").join(hints));
463 sb.append("\n");
464 return sb.toString();
465 }
466 }