java - java.lang. : UnsatisfiedLinkError: NativeCodeLoader buildsupportssnappy ( ) Z

我在尝试从map-reduce编写一个snappy块压缩文件作业。我使用hadoop 2.0.0 -cdh4.5.0和snappy java 1.0.4.1

这是我的代码:


package jinvestor.jhouse.mr;



import java.io.ByteArrayOutputStream;


import java.io.IOException;


import java.io.OutputStream;


import java.util.Arrays;


import java.util.List;



import jinvestor.jhouse.core.House;


import jinvestor.jhouse.core.util.HouseAvroUtil;


import jinvestor.jhouse.download.HBaseHouseDAO;



import org.apache.commons.io.IOUtils;


import org.apache.hadoop.conf.Configuration;


import org.apache.hadoop.fs.FileSystem;


import org.apache.hadoop.fs.LocatedFileStatus;


import org.apache.hadoop.fs.Path;


import org.apache.hadoop.fs.RemoteIterator;


import org.apache.hadoop.hbase.client.Result;


import org.apache.hadoop.hbase.client.Scan;


import org.apache.hadoop.hbase.io.ImmutableBytesWritable;


import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;


import org.apache.hadoop.hbase.mapreduce.TableMapper;


import org.apache.hadoop.hbase.util.Bytes;


import org.apache.hadoop.io.LongWritable;


import org.apache.hadoop.io.SequenceFile;


import org.apache.hadoop.io.compress.CompressionCodec;


import org.apache.hadoop.io.compress.SnappyCodec;


import org.apache.hadoop.mapred.FileOutputFormat;


import org.apache.hadoop.mapred.JobConf;


import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;


import org.apache.hadoop.mapreduce.Job;


import org.apache.mahout.math.DenseVector;


import org.apache.mahout.math.NamedVector;


import org.apache.mahout.math.VectorWritable;



/**


 * Produces mahout vectors from House entries in HBase.


 * 


 * @author Michael Scott Knapp


 * 


 */


public class HouseVectorizer {



 private final Configuration configuration;


 private final House minimumHouse;


 private final House maximumHouse;



 public HouseVectorizer(final Configuration configuration,


 final House minimumHouse, final House maximumHouse) {


 this.configuration = configuration;


 this.minimumHouse = minimumHouse;


 this.maximumHouse = maximumHouse;


 }



 public void vectorize() throws IOException, ClassNotFoundException, InterruptedException {


 JobConf jobConf = new JobConf();


 jobConf.setMapOutputKeyClass(LongWritable.class);


 jobConf.setMapOutputValueClass(VectorWritable.class);



 // we want the vectors written straight to HDFS,


 // the order does not matter.


 jobConf.setNumReduceTasks(0);



 Path outputDir = new Path("/home/cloudera/house_vectors");


 FileSystem fs = FileSystem.get(configuration);


 if (fs.exists(outputDir)) {


 fs.delete(outputDir, true);


 }



 FileOutputFormat.setOutputPath(jobConf, outputDir);



 // I want the mappers to know the max and min value


 // so they can normalize the data.


 // I will add them as properties in the configuration,


 // by serializing them with avro.


 String minmax = HouseAvroUtil.toBase64String(Arrays.asList(minimumHouse,


 maximumHouse));


 jobConf.set("minmax", minmax);



 Job job = Job.getInstance(jobConf);


 Scan scan = new Scan();


 scan.addFamily(Bytes.toBytes("data"));


 TableMapReduceUtil.initTableMapperJob("homes", scan,


 HouseVectorizingMapper.class, LongWritable.class,


 VectorWritable.class, job);


 job.setOutputFormatClass(SequenceFileOutputFormat.class);


 job.setOutputKeyClass(LongWritable.class);


 job.setOutputValueClass(VectorWritable.class);


 job.setMapOutputKeyClass(LongWritable.class);


 job.setMapOutputValueClass(VectorWritable.class);



 SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);


 SequenceFileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);


 SequenceFileOutputFormat.setOutputPath(job, outputDir);


 job.getConfiguration().setClass("mapreduce.map.output.compress.codec", 


 SnappyCodec.class, 


 CompressionCodec.class);



 job.waitForCompletion(true);


 }



当我运行它时,得到这个:


java.lang.Exception: java.lang.UnsatisfiedLinkError: org.apache.hadoop.util.NativeCodeLoader.buildSupportsSnappy()Z


 at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:401)


Caused by: java.lang.UnsatisfiedLinkError: org.apache.hadoop.util.NativeCodeLoader.buildSupportsSnappy()Z


 at org.apache.hadoop.util.NativeCodeLoader.buildSupportsSnappy(Native Method)


 at org.apache.hadoop.io.compress.SnappyCodec.checkNativeCodeLoaded(SnappyCodec.java:62)


 at org.apache.hadoop.io.compress.SnappyCodec.getCompressorType(SnappyCodec.java:127)


 at org.apache.hadoop.io.compress.CodecPool.getCompressor(CodecPool.java:104)


 at org.apache.hadoop.io.compress.CodecPool.getCompressor(CodecPool.java:118)


 at org.apache.hadoop.io.SequenceFile$Writer.init(SequenceFile.java:1169)


 at org.apache.hadoop.io.SequenceFile$Writer.<init>(SequenceFile.java:1080)


 at org.apache.hadoop.io.SequenceFile$BlockCompressWriter.<init>(SequenceFile.java:1400)


 at org.apache.hadoop.io.SequenceFile.createWriter(SequenceFile.java:274)


 at org.apache.hadoop.io.SequenceFile.createWriter(SequenceFile.java:527)


 at org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat.getSequenceWriter(SequenceFileOutputFormat.java:64)


 at org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat.getRecordWriter(SequenceFileOutputFormat.java:75)


 at org.apache.hadoop.mapred.MapTask$NewDirectOutputCollector.<init>(MapTask.java:617)


 at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:737)


 at org.apache.hadoop.mapred.MapTask.run(MapTask.java:338)


 at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:233)


 at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)


 at java.util.concurrent.FutureTask.run(FutureTask.java:262)


 at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)


 at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)


 at java.lang.Thread.run(Thread.java:744)



如果我注释掉这些行,那么测试可以通过:


SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);


 SequenceFileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);


 job.getConfiguration().setClass("mapreduce.map.output.compress.coded", 


 SnappyCodec.class, 


 CompressionCodec.class);



但我真的想在序列文件中使用snappy压缩,请向我解释一下错误?

时间:

找到以下信息cloudera Communities

  • 确保LD_LIBRARY_PATH和JAVA_LIBRARY_PATH包含有libsnappy .so *文件的本机目录路径
  • 确保LD_LIBRARY_PATH和JAVA_LIBRARY路径已在SPARK环境中导出(spark-env.sh )

例如我使用Hortonworks HDP,spark-env.sh中有以下配置


export JAVA_LIBRARY_PATH=$JAVA_LIBRARY_PATH:/usr/hdp/2.2.0.0-2041/hadoop/lib/native


export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/hdp/2.2.0.0-2041/hadoop/lib/native


export SPARK_YARN_USER_ENV="JAVA_LIBRARY_PATH=$JAVA_LIBRARY_PATH,LD_LIBRARY_PATH=$LD_LIBRARY_PATH"



问题出在JRE没有包含适当的本机库,snappy.so文件在你的hadoop/lib/native目录中,JRE需要它们,将它们添加到类路径似乎无法解决我的问题。我解决了这个问题:


$ cd /usr/lib/hadoop/lib/native


$ sudo cp *.so /usr/java/latest/jre/lib/amd64/



然后我可以使用SnappyCodec类,

检查core-site.xml和mapred-site.xml,它们应该包含正确的属性和库的文件夹路径

core-site.xml


<property>


 <name>io.compression.codecs</name>


<value>org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.SnappyCodec</value>


</property>



mapred-site.xml


 <property>


 <name>mapreduce.map.output.compress</name>


 <value>true</value>


 </property>



 <property>


 <name>mapred.map.output.compress.codec</name> 


 <value>org.apache.hadoop.io.compress.SnappyCodec</value>


 </property>



 <property>


 <name>mapreduce.admin.user.env</name>


 <value>LD_LIBRARY_PATH=/usr/hdp/2.2.0.0-1084/hadoop/lib/native</value>


 </property>



复制hadoop.dll到windowssystem32,设置HADOOP_HOME=HADOOP-2.6.4,就可以了!

...