import pymongo import mysql.connector
Connect to MySQL database
mysql_conn = mysql.connector.connect( host="localhost", user="yourusername", password="yourpassword", database="yourdatabase" )
Connect to MongoDB database
mongo_client = pymongo.MongoClient("mongodb://localhost:27017/") mongo_db = mongo_client["mydatabase"] mongo_collection = mongo_db["mycollection"]
Create an index for the specified collection in MongoDB
mongo_collection.create_index([("myfield", pymongo.ASCENDING)])
Buffering lists and counters to batch after every 10000 rows
bulk_data = [] bulk_count = 0
Create cursor object and retrieve data from MySQL database
mysql_cursor = mysql_conn.cursor() mysql_cursor.execute("SELECT * FROM mytable")
Loop through the result set and process each row.
for row in mysql_cursor: # 将一条记录转换成你的MongoDB文档,然后将其添加到缓冲列表。 doc = { "myfield": row[0], "anotherfield": row[1], "yetanotherfield": row[2] } bulk_data.append(doc) bulk_count += 1
# 如果我们达到了10000,请在集合中批量插入缓冲数据。 if bulk_count == 10000: mongo_collection.insert_many(bulk_data) # 重置计数器并清除缓冲数据列表 bulk_count = 0 bulk_data.clear()
Process the remaining rows, if anything needs to be processed.
if bulk_count > 0: mongo_collection.insert_many(bulk_data)
Close the MySQL connection.
mysql_conn.close()
import pymongo import mysql.connector # 连接MySQL数据库 mysql_conn = mysql.connector.connect( host="localhost", user="yourusername", password="yourpassword", database="yourdatabase" ) # 连接MongoDB数据库 mongo_client = pymongo.MongoClient("mongodb://localhost:27017/") mongo_db = mongo_client["mydatabase"] mongo_collection = mongo_db["mycollection"] # 在MongoDB中为指定集合创建索引 mongo_collection.create_index([("myfield", pymongo.ASCENDING)]) # 缓冲列表和计数器以在每10000行处理后进行批处理 bulk_data = [] bulk_count = 0 # 创建游标对象并从MySQL数据库检索数据 mysql_cursor = mysql_conn.cursor() mysql_cursor.execute("SELECT * FROM mytable") # 遍历结果集并处理每个行。 for row in mysql_cursor: # 将一条记录转换成你的MongoDB文档,然后将其添加到缓冲列表。 doc = { "myfield": row[0], "anotherfield": row[1], "yetanotherfield": row[2] } bulk_data.append(doc) bulk_count += 1 # 如果我们达到了10000,请在集合中批量插入缓冲数据。 if bulk_count == 10000: mongo_collection.insert_many(bulk_data) # 重置计数器并清除缓冲数据列表 bulk_count = 0 bulk_data.clear() # 处理剩余的行,如果有任何事情需要处理。 if bulk_count > 0: mongo_collection.insert_many(bulk_data) # 关闭MySQL连接。 mysql_conn.close()
The above is the detailed content of How to push data to Mongo in batches using MySQL. For more information, please follow other related articles on the PHP Chinese website!