在http://blog.csdn.net/sunflower_cao/article/details/28266939 写过可以通过继承 Writable, DBWritable实现在reduce过程中讲结果写入到mysql数据库里边,但是一直有一个问题就是只能实现insert 没法去更新已经存在的数据,这就导致不同的mapreduce程序获得的数据只能插入到不同的数据库中 在使用的时候需要建立view或者使用复杂的sql语句去查询,今天调查了下,发现可以通过重写DBOutputFormat
上代码:
TblsWritable.java
import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import java.sql.PreparedStatement;import java.sql.ResultSet;import java.sql.SQLException;import org.apache.hadoop.io.Writable;import org.apache.hadoop.mapreduce.lib.db.DBWritable;/** * 重写DBWritable * * @author caozw TblsWritable需要向mysql中写入数据 */public class TblsWritable implements Writable, DBWritable { String initTime; String new_android_user; String new_ios_user; String new_total_user; String iosUserTotal; String androidUserTotal; String userTotal; public TblsWritable() { } public TblsWritable(String initTime, String new_android_user, String new_ios_user, String new_total_user, String iosUserTotal, String androidUserTotal, String userTotal) { this.initTime = initTime; this.new_android_user = new_android_user; this.new_ios_user = new_ios_user; this.new_total_user = new_total_user; this.iosUserTotal = iosUserTotal; this.androidUserTotal = androidUserTotal; this.userTotal = userTotal; } @Override public void write(PreparedStatement statement) throws SQLException { statement.setString(1, this.new_android_user); statement.setString(2, this.new_ios_user); statement.setString(3, this.new_total_user); statement.setString(4, this.androidUserTotal); statement.setString(5, this.iosUserTotal); statement.setString(6, this.userTotal); statement.setString(7, this.initTime); } @Override public void readFields(ResultSet resultSet) throws SQLException { this.new_android_user = resultSet.getString(1); this.new_ios_user = resultSet.getString(2); this.new_total_user = resultSet.getString(3); this.androidUserTotal = resultSet.getString(4); this.iosUserTotal = resultSet.getString(5); this.userTotal = resultSet.getString(6); this.initTime = resultSet.getString(7); } @Override public void write(DataOutput out) throws IOException { out.writeUTF(this.new_android_user); out.writeUTF(this.new_ios_user); out.writeUTF(this.new_total_user); out.writeUTF(this.androidUserTotal); out.writeUTF(this.iosUserTotal); out.writeUTF(this.userTotal); out.writeUTF(this.initTime); } @Override public void readFields(DataInput in) throws IOException { this.new_android_user = in.readUTF(); this.new_ios_user = in.readUTF(); this.new_total_user = in.readUTF(); this.androidUserTotal = in.readUTF(); this.iosUserTotal = in.readUTF(); this.userTotal = in.readUTF(); this.initTime = in.readUTF(); } public String toString() { return new String(this.initTime + " " + this.new_android_user + " " + this.new_ios_user + " " + this.new_total_user + " " + this.androidUserTotal + " " + this.iosUserTotal + " " + this.userTotal); }}
WriteDataToMysql.java
import java.io.IOException;import java.text.ParseException;import java.text.SimpleDateFormat;import java.util.Date;import java.util.Iterator;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;/** * 将mapreduce的结果数据写入mysql中 * * @author caozw */public class WriteDataToMysql { public static String[] fieldNames = { "INITTIME", "NEW_ANDROID_USER", "NEW_IOS_USER", "NEW_USER_TOTAL", "TOTAL_ANDROID_USER", "TOTAL_IOS_USER", "TOTAL_USER" }; public static String table = "USER_INFO_STATIC"; public static class ConnMysqlMapper extends Mapper<longwritable text intwritable> { enum Counter { LINESKIP, } private final static IntWritable one = new IntWritable(1); private final static IntWritable zero = new IntWritable(0); public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { try { String line = value.toString(); String[] strings = line.split("/t"); String initTime = strings[1]; String devType = strings[4]; if (initTime.length() == 19) { SimpleDateFormat sdf = new SimpleDateFormat( "yyyy-MM-dd HH:mm:ss"); Date date = sdf.parse(initTime); if ("1".equals(devType)) { context.write(new Text(initTime.substring(0, 10)), one); context.write(new Text(initTime.substring(0, 10)), zero); } } else { // System.err.println(initTime); context.getCounter(Counter.LINESKIP).increment(1); } // } catch (ArrayIndexOutOfBoundsException e) { } catch (ArrayIndexOutOfBoundsException e) { context.getCounter(Counter.LINESKIP).increment(1); return; } catch (ParseException e) { context.getCounter(Counter.LINESKIP).increment(1); return; } } } public static class ConnMysqlReducer extends Reducer<text intwritable tblswritable> { static int iosUserTotal = 0; static int androidUserTotal = 0; public void reduce(Text key, Iterable<intwritable> values, Context context) throws IOException, InterruptedException { int android = 0; int ios = 0; int total = 0; for (Iterator<intwritable> itr = values.iterator(); itr.hasNext();) { total++; if (0 == itr.next().get()) { android++; } else { ios++; } } iosUserTotal += ios; androidUserTotal += android; /* * System.err.println(key.toString() + ":" + String.valueOf(android) * + ":" + String.valueOf(ios) + ":" + String.valueOf(total)); */ context.write( new TblsWritable(key.toString(), String.valueOf(android), String.valueOf(ios), String.valueOf(total), String .valueOf(androidUserTotal), String .valueOf(iosUserTotal), String .valueOf(androidUserTotal + iosUserTotal)), null); } } public static void main(String args[]) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver", "jdbc:mysql://127.0.0.1:3306/XINGXUNTONG", "hadoop", "123456"); Job job = new Job(conf, "test mysql connection"); job.setJarByClass(WriteDataToMysql.class); job.setMapperClass(ConnMysqlMapper.class); job.setReducerClass(ConnMysqlReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setSpeculativeExecution(false); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(MysqlDBOutputFormat.class); //job.setOutputFormatClass(DBOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); // DBOutputFormat.setOutput(job, "test", "initTime", "new_user_total"); //DBOutputFormat.setOutput(job, table, fieldNames); MysqlDBOutputFormat.setOutput(job, table, fieldNames); System.exit(job.waitForCompletion(true) ? 0 : 1); }}</intwritable></intwritable></text></longwritable>
MysqlDBOutputFormat.java
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import java.io.IOException;import java.sql.Connection;import java.sql.PreparedStatement;import java.sql.SQLException;import org.apache.commons.logging.Log;import org.apache.commons.logging.LogFactory;import org.apache.hadoop.classification.InterfaceAudience;import org.apache.hadoop.classification.InterfaceStability;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.JobContext;import org.apache.hadoop.mapreduce.OutputCommitter;import org.apache.hadoop.mapreduce.OutputFormat;import org.apache.hadoop.mapreduce.RecordWriter;import org.apache.hadoop.mapreduce.TaskAttemptContext;import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;import org.apache.hadoop.mapreduce.lib.db.DBWritable;import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.StringUtils;/** * A OutputFormat that sends the reduce output to a SQL table. * <p> * {@link MysqlDBOutputFormat} accepts <key> pairs, where * key has a type extending DBWritable. Returned {@link RecordWriter} * writes <b>only the key</b> to the database with a batch SQL query. * */@InterfaceAudience.Public@InterfaceStability.Stablepublic class MysqlDBOutputFormat<k extends dbwritable v> extends OutputFormat<k> { private static final Log LOG = LogFactory.getLog(MysqlDBOutputFormat.class); public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {} public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { return new FileOutputCommitter(FileOutputFormat.getOutputPath(context), context); } /** * A RecordWriter that writes the reduce output to a SQL table */ @InterfaceStability.Evolving public class DBRecordWriter extends RecordWriter<k v> { private Connection connection; private PreparedStatement statement; public DBRecordWriter() throws SQLException { } public DBRecordWriter(Connection connection , PreparedStatement statement) throws SQLException { this.connection = connection; this.statement = statement; this.connection.setAutoCommit(false); } public Connection getConnection() { return connection; } public PreparedStatement getStatement() { return statement; } /** {@inheritDoc} */ public void close(TaskAttemptContext context) throws IOException { try { statement.executeBatch(); connection.commit(); } catch (SQLException e) { try { connection.rollback(); } catch (SQLException ex) { LOG.warn(StringUtils.stringifyException(ex)); } throw new IOException(e.getMessage()); } finally { try { statement.close(); connection.close(); } catch (SQLException ex) { throw new IOException(ex.getMessage()); } } } /** {@inheritDoc} */ public void write(K key, V value) throws IOException { try { key.write(statement); statement.addBatch(); } catch (SQLException e) { e.printStackTrace(); } } } /** * Constructs the query used as the prepared statement to insert data. * * @param table * the table to insert into * @param fieldNames * the fields to insert into. If field names are unknown, supply an * array of nulls. */ public String constructQuery(String table, String[] fieldNames) { if (fieldNames == null) { throw new IllegalArgumentException( "Field names may not be null"); } StringBuilder query = new StringBuilder(); query.append("UPDATE ").append(table); System.err.println("fieldNames.length:" + fieldNames.length); if (fieldNames.length > 0) { query.append(" SET "); query.append(fieldNames[1] + " = ?"); query.append("," + fieldNames[2] + " = ?"); query.append("," + fieldNames[3] + " = ?"); query.append("," + fieldNames[4] + " = ?"); query.append("," + fieldNames[5] + " = ?"); query.append("," + fieldNames[6] + " = ?"); query.append(" WHERE "); query.append(fieldNames[0] + " = ?"); System.err.println(query.toString()); return query.toString(); } else { return null; } } /** {@inheritDoc} */ public RecordWriter<k v> getRecordWriter(TaskAttemptContext context) throws IOException { DBConfiguration dbConf = new DBConfiguration(context.getConfiguration()); String tableName = dbConf.getOutputTableName(); String[] fieldNames = dbConf.getOutputFieldNames(); if(fieldNames == null) { fieldNames = new String[dbConf.getOutputFieldCount()]; } try { Connection connection = dbConf.getConnection(); PreparedStatement statement = null; statement = connection.prepareStatement( constructQuery(tableName, fieldNames)); return new DBRecordWriter(connection, statement); } catch (Exception ex) { throw new IOException(ex.getMessage()); } } /** * Initializes the reduce-part of the job with * the appropriate output settings * * @param job The job * @param tableName The table to insert data into * @param fieldNames The field names in the table. */ public static void setOutput(Job job, String tableName, String... fieldNames) throws IOException { if(fieldNames.length > 0 && fieldNames[0] != null) { DBConfiguration dbConf = setOutput(job, tableName); dbConf.setOutputFieldNames(fieldNames); } else { if (fieldNames.length > 0) { setOutput(job, tableName, fieldNames.length); } else { throw new IllegalArgumentException( "Field names must be greater than 0"); } } } /** * Initializes the reduce-part of the job * with the appropriate output settings * * @param job The job * @param tableName The table to insert data into * @param fieldCount the number of fields in the table. */ public static void setOutput(Job job, String tableName, int fieldCount) throws IOException { DBConfiguration dbConf = setOutput(job, tableName); dbConf.setOutputFieldCount(fieldCount); } private static DBConfiguration setOutput(Job job, String tableName) throws IOException { job.setOutputFormatClass(MysqlDBOutputFormat.class); job.setReduceSpeculativeExecution(false); DBConfiguration dbConf = new DBConfiguration(job.getConfiguration()); dbConf.setOutputTableName(tableName); return dbConf; }}</k></k></k></k></key></p>
MysqlDBOutputFormat.java是将hadoop源码里边的DBOutputFormat.java拿过来重写了里边的constructQuery的方法,在生成sql语句的时候产生update的sql语句来实现的
DBOutputFormat.java里边的constructQuery
public String constructQuery(String table, String[] fieldNames) { if(fieldNames == null) { throw new IllegalArgumentException("Field names may not be null"); } StringBuilder query = new StringBuilder(); query.append("INSERT INTO ").append(table); if (fieldNames.length > 0 && fieldNames[0] != null) { query.append(" ("); for (int i = 0; i <p><br>重写后:</p><pre class="brush:php;toolbar:false">public String constructQuery(String table, String[] fieldNames) { if (fieldNames == null) { throw new IllegalArgumentException( "Field names may not be null"); } StringBuilder query = new StringBuilder(); query.append("UPDATE ").append(table); System.err.println("fieldNames.length:" + fieldNames.length); if (fieldNames.length > 0) { query.append(" SET "); query.append(fieldNames[1] + " = ?"); query.append("," + fieldNames[2] + " = ?"); query.append("," + fieldNames[3] + " = ?"); query.append("," + fieldNames[4] + " = ?"); query.append("," + fieldNames[5] + " = ?"); query.append("," + fieldNames[6] + " = ?"); query.append(" WHERE "); query.append(fieldNames[0] + " = ?"); System.err.println(query.toString()); return query.toString(); } else { return null; } }
按照java的思想我这里其实可以通过extends DBOutputFomat来实现update mysql的功能 ,但是我试了死活不行,暂且记下 有时间再来仔细揣摩
重载方式代码(测试未通过)有知道为什么的还请不吝赐教
import org.apache.hadoop.mapreduce.lib.db.DBOutputFormat;import org.apache.hadoop.mapreduce.lib.db.DBWritable;public class MysqlOutputFormat1<k extends dbwritable> extends DBOutputFormat<k v> { public String constructQuery(String table, String[] fieldNames) { if (fieldNames == null) { throw new IllegalArgumentException( "Field names may not be null"); } StringBuilder query = new StringBuilder(); query.append("UPDATE ").append(table); System.err.println("fieldNames.length:"+fieldNames.length); if (fieldNames.length > 0) { query.append(" SET "); query.append(fieldNames[1] + " = ?"); query.append("," + fieldNames[2] + " = ?"); query.append("," + fieldNames[3] + " = ?"); query.append("," + fieldNames[4] + " = ?"); query.append("," + fieldNames[5] + " = ?"); query.append("," + fieldNames[6] + " = ?"); query.append(" WHERE "); query.append(fieldNames[0] + " = ?"); System.err.println(query.toString()); return query.toString(); } else { return null; } }}</k></k>
另外可以通过我感觉可以通过sql语句的merge方式生成merge语句从而实现有则更新 没有则插入 参考网页http://en.wikipedia.org/wiki/Merge_%28SQL%29
--http://en.wikipedia.org/wiki/Merge_%28SQL%29 MERGE INTO tablename USING table_reference ON (condition) WHEN MATCHED THEN UPDATE SET column1 = value1 [, column2 = value2 ...] WHEN NOT MATCHED THEN INSERT (column1 [, column2 ...]) VALUES (value1 [, value2 ...

MySQL是一种开源的关系型数据库管理系统,主要用于快速、可靠地存储和检索数据。其工作原理包括客户端请求、查询解析、执行查询和返回结果。使用示例包括创建表、插入和查询数据,以及高级功能如JOIN操作。常见错误涉及SQL语法、数据类型和权限问题,优化建议包括使用索引、优化查询和分表分区。

MySQL是一个开源的关系型数据库管理系统,适用于数据存储、管理、查询和安全。1.它支持多种操作系统,广泛应用于Web应用等领域。2.通过客户端-服务器架构和不同存储引擎,MySQL高效处理数据。3.基本用法包括创建数据库和表,插入、查询和更新数据。4.高级用法涉及复杂查询和存储过程。5.常见错误可通过EXPLAIN语句调试。6.性能优化包括合理使用索引和优化查询语句。

选择MySQL的原因是其性能、可靠性、易用性和社区支持。1.MySQL提供高效的数据存储和检索功能,支持多种数据类型和高级查询操作。2.采用客户端-服务器架构和多种存储引擎,支持事务和查询优化。3.易于使用,支持多种操作系统和编程语言。4.拥有强大的社区支持,提供丰富的资源和解决方案。

InnoDB的锁机制包括共享锁、排他锁、意向锁、记录锁、间隙锁和下一个键锁。1.共享锁允许事务读取数据而不阻止其他事务读取。2.排他锁阻止其他事务读取和修改数据。3.意向锁优化锁效率。4.记录锁锁定索引记录。5.间隙锁锁定索引记录间隙。6.下一个键锁是记录锁和间隙锁的组合,确保数据一致性。

MySQL查询性能不佳的原因主要包括没有使用索引、查询优化器选择错误的执行计划、表设计不合理、数据量过大和锁竞争。 1.没有索引导致查询缓慢,添加索引后可显着提升性能。 2.使用EXPLAIN命令可以分析查询计划,找出优化器错误。 3.重构表结构和优化JOIN条件可改善表设计问题。 4.数据量大时,采用分区和分表策略。 5.高并发环境下,优化事务和锁策略可减少锁竞争。

在数据库优化中,应根据查询需求选择索引策略:1.当查询涉及多个列且条件顺序固定时,使用复合索引;2.当查询涉及多个列但条件顺序不固定时,使用多个单列索引。复合索引适用于优化多列查询,单列索引则适合单列查询。

要优化MySQL慢查询,需使用slowquerylog和performance_schema:1.启用slowquerylog并设置阈值,记录慢查询;2.利用performance_schema分析查询执行细节,找出性能瓶颈并优化。

MySQL和SQL是开发者必备技能。1.MySQL是开源的关系型数据库管理系统,SQL是用于管理和操作数据库的标准语言。2.MySQL通过高效的数据存储和检索功能支持多种存储引擎,SQL通过简单语句完成复杂数据操作。3.使用示例包括基本查询和高级查询,如按条件过滤和排序。4.常见错误包括语法错误和性能问题,可通过检查SQL语句和使用EXPLAIN命令优化。5.性能优化技巧包括使用索引、避免全表扫描、优化JOIN操作和提升代码可读性。


热AI工具

Undresser.AI Undress
人工智能驱动的应用程序,用于创建逼真的裸体照片

AI Clothes Remover
用于从照片中去除衣服的在线人工智能工具。

Undress AI Tool
免费脱衣服图片

Clothoff.io
AI脱衣机

AI Hentai Generator
免费生成ai无尽的。

热门文章

热工具

EditPlus 中文破解版
体积小,语法高亮,不支持代码提示功能

记事本++7.3.1
好用且免费的代码编辑器

SecLists
SecLists是最终安全测试人员的伙伴。它是一个包含各种类型列表的集合,这些列表在安全评估过程中经常使用,都在一个地方。SecLists通过方便地提供安全测试人员可能需要的所有列表,帮助提高安全测试的效率和生产力。列表类型包括用户名、密码、URL、模糊测试有效载荷、敏感数据模式、Web shell等等。测试人员只需将此存储库拉到新的测试机上,他就可以访问到所需的每种类型的列表。

MinGW - 适用于 Windows 的极简 GNU
这个项目正在迁移到osdn.net/projects/mingw的过程中,你可以继续在那里关注我们。MinGW:GNU编译器集合(GCC)的本地Windows移植版本,可自由分发的导入库和用于构建本地Windows应用程序的头文件;包括对MSVC运行时的扩展,以支持C99功能。MinGW的所有软件都可以在64位Windows平台上运行。

ZendStudio 13.5.1 Mac
功能强大的PHP集成开发环境