最近在项目中遇到同步两个 CDH 数据文件的需求,调研了一圈,发现 datax 不支持两个 cdh 同时进行 kerberos 认证,因此切换到 chunjun,chunjun 在 kerberos 认证的配置这块,官网也没有详细的介绍案例,在查看源代码后,确定了配置项及各配置项的值,供大家参考

  1 {
  2   "job": {
  3     "content": [
  4       {
  5         "reader" : {
  6           "parameter" : {
  7             "path" : "hdfs://cdh-test-namenode05:8020/user/hive/warehouse/testuser_db.db/testuser_user/t4.txt",
  8             "hadoopConfig" : {
  9               "properties.hadoop.user.name": "testuser",
 10               "properties.dfs.ha.namenodes.nameservice1": "namenode27,namenode29",
 11               "properties.fs.defaultFS": "hdfs://nameservice1",
 12               "properties.dfs.namenode.rpc-address.nameservice1.namenode27": "cdh-test-namenode04:8020",
 13               "properties.dfs.client.failover.proxy.provider.nameservice1": "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider",
 14               "properties.dfs.namenode.rpc-address.nameservice1.namenode29": "cdh-test-namenode05:8020",
 15               "properties.dfs.nameservices": "nameservice1",
 16               "properties.fs.hdfs.impl.disable.cache": "true",
 17               "properties.dfs.client.use.datanode.hostname":"true",
 18               "dfs.client.use.datanode.hostname":"true",
 19               "properties.fs.hdfs.impl": "org.apache.hadoop.hdfs.DistributedFileSystem",
 20               "hadoop.security.authentication": "Kerberos",
 21               "dfs.namenode.kerberos.principal": "hdfs/_HOST@CDHDEV.COM",
 22               "dfs.datanode.kerberos.principal": "hdfs/_HOST@CDHDEV.COM",
 23               "yarn.resourcemanager.principal": "rm/_HOST@CDHDEV.COM",
 24               "dfs.namenode.kerberos.internal.spnego.principal": "HTTP/_HOST@CDHDEV.COM",
 25               "hadoop.security.authorization": "true",
 26               "dfs.namenode.keytab.file": "/app/data/CDH6.3.2/kerberos/testuser.keytab",
 27               "java.security.krb5.conf": "/app/data/CDH6.3.2/krb5/krb5.conf",
 28               "useLocalFile": "true",
 29               "principalFile": "/app/data/CDH6.3.2/kerberos/testuser.keytab",
 30               "principal": "testuser/cdh-test-namenode05"
 31             },
 32 
 33             "column": [
 34               {
 35                 "name": "id",
 36                 "type": "int"
 37               },
 38               {
 39                 "name": "name",
 40                 "type": "string"
 41               }
 42             ],
 43             "fieldDelimiter" : ",",
 44             "encoding" : "utf-8",
 45             "defaultFS": "hdfs://cdh-test-namenode05:8020",
 46             "fileType" : "text"
 47           },
 48           "name" : "hdfsreader"
 49         },
 50         "writer": {
 51           "name": "hdfswriter",
 52           "parameter": {
 53             "path": "/user/hive/warehouse/test_system_user.db/testuser",
 54             "defaultFS": "hdfs://test01:8020",
 55             "column": [
 56               {
 57                 "name": "id",
 58                 "type": "int"
 59               },
 60               {
 61                 "name": "name",
 62                 "type": "string"
 63               }
 64             ],
 65             "fileType": "text",
 66             "fieldDelimiter": ",",
 67             "compress":"gzip",
 68             "encoding": "utf-8",
 69             "fileName": "20220705",
 70             "writeMode": "overwrite",
 71             "hadoopConfig": {
 72               "properties.hadoop.user.name": "test_system_user",
 73               "properties.dfs.ha.namenodes.nameservice1": "namenode37,namenode51",
 74               "properties.fs.defaultFS": "hdfs://nameservice1",
 75               "properties.dfs.namenode.rpc-address.nameservice1.namenode51": "test02:8020",
 76               "properties.dfs.client.failover.proxy.provider.nameservice1": "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider",
 77               "properties.dfs.namenode.rpc-address.nameservice1.namenode37": "test01:8020",
 78               "properties.dfs.nameservices": "nameservice1",
 79               "properties.dfs.client.use.datanode.hostname":"true",
 80               "dfs.client.use.datanode.hostname":"true",
 81               "properties.fs.hdfs.impl.disable.cache": "true",
 82               "properties.fs.hdfs.impl": "org.apache.hadoop.hdfs.DistributedFileSystem",
 83               "hadoop.security.authentication": "Kerberos",
 84               "dfs.namenode.kerberos.principal": "hdfs/_HOST@SINOSIG.COM",
 85               "dfs.datanode.kerberos.principal": "hdfs/_HOST@SINOSIG.COM",
 86               "yarn.resourcemanager.principal": "rm/_HOST@SINOSIG.COM",
 87               "dfs.namenode.kerberos.internal.spnego.principal": "HTTP/_HOST@SINOSIG.COM",
 88               "hadoop.security.authorization": "true",
 89               "dfs.namenode.keytab.file": "/app/data/cloudera/CDH-6.3.2/kerberos/test_system_user.keytab",
 90               "java.security.krb5.conf": "/app/data/cloudera/CDH-6.3.2/krb5/krb5.conf",
 91               "useLocalFile": "true",
 92               "principalFile": "/app/data/cloudera/CDH-6.3.2/kerberos/test_system_user.keytab",
 93               "principal": "test_system_user/test02@SINOSIG.COM"
 94             }
 95           }
 96         }
 97       }
 98     ],
 99     "setting": {
100       "speed": {
101         "channel": 4,
102         "bytes": 0
103       }
104     }
105   }
106 }