Maison > Article > développement back-end > csv2mysql
<span style="color: #0000ff">import</span><span style="color: #000000"> os </span><span style="color: #0000ff">import</span><span style="color: #000000"> re </span><span style="color: #0000ff">import</span><span style="color: #000000"> sys </span><span style="color: #0000ff">import</span><span style="color: #000000"> csv </span><span style="color: #0000ff">import</span><span style="color: #000000"> time </span><span style="color: #0000ff">import</span><span style="color: #000000"> argparse </span><span style="color: #0000ff">import</span><span style="color: #000000"> collections </span><span style="color: #0000ff">import</span><span style="color: #000000"> MySQLdb </span><span style="color: #0000ff">import</span><span style="color: #000000"> warnings </span><span style="color: #008000">#</span><span style="color: #008000"> suppress annoying mysql warnings</span> warnings.filterwarnings(action=<span style="color: #800000">'</span><span style="color: #800000">ignore</span><span style="color: #800000">'</span>, category=<span style="color: #000000">MySQLdb.Warning) </span><span style="color: #0000ff">def</span><span style="color: #000000"> get_type(s): </span><span style="color: #800000">"""</span><span style="color: #800000">Find type for this string </span><span style="color: #800000">"""</span> <span style="color: #008000">#</span><span style="color: #008000"> try integer type</span> <span style="color: #0000ff">try</span><span style="color: #000000">: v </span>=<span style="color: #000000"> int(s) </span><span style="color: #0000ff">except</span><span style="color: #000000"> ValueError: </span><span style="color: #0000ff">pass</span> <span style="color: #0000ff">else</span><span style="color: #000000">: </span><span style="color: #0000ff">if</span> abs(v) > 2147483647<span style="color: #000000">: </span><span style="color: #0000ff">return</span> <span style="color: #800000">'</span><span style="color: #800000">bigint</span><span style="color: #800000">'</span> <span style="color: #0000ff">else</span><span style="color: #000000">: </span><span style="color: #0000ff">return</span> <span style="color: #800000">'</span><span style="color: #800000">int</span><span style="color: #800000">'</span> <span style="color: #008000">#</span><span style="color: #008000"> try float type</span> <span style="color: #0000ff">try</span><span style="color: #000000">: float(s) </span><span style="color: #0000ff">except</span><span style="color: #000000"> ValueError: </span><span style="color: #0000ff">pass</span> <span style="color: #0000ff">else</span><span style="color: #000000">: </span><span style="color: #0000ff">return</span> <span style="color: #800000">'</span><span style="color: #800000">double</span><span style="color: #800000">'</span> <span style="color: #008000">#</span><span style="color: #008000"> check for timestamp</span> dt_formats =<span style="color: #000000"> ( (</span><span style="color: #800000">'</span><span style="color: #800000">%Y-%m-%d %H:%M:%S</span><span style="color: #800000">'</span>, <span style="color: #800000">'</span><span style="color: #800000">datetime</span><span style="color: #800000">'</span><span style="color: #000000">), (</span><span style="color: #800000">'</span><span style="color: #800000">%Y-%m-%d %H:%M:%S.%f</span><span style="color: #800000">'</span>, <span style="color: #800000">'</span><span style="color: #800000">datetime</span><span style="color: #800000">'</span><span style="color: #000000">), (</span><span style="color: #800000">'</span><span style="color: #800000">%Y-%m-%d</span><span style="color: #800000">'</span>, <span style="color: #800000">'</span><span style="color: #800000">date</span><span style="color: #800000">'</span><span style="color: #000000">), (</span><span style="color: #800000">'</span><span style="color: #800000">%H:%M:%S</span><span style="color: #800000">'</span>, <span style="color: #800000">'</span><span style="color: #800000">time</span><span style="color: #800000">'</span><span style="color: #000000">), ) </span><span style="color: #0000ff">for</span> dt_format, dt_type <span style="color: #0000ff">in</span><span style="color: #000000"> dt_formats: </span><span style="color: #0000ff">try</span><span style="color: #000000">: time.strptime(s, dt_format) </span><span style="color: #0000ff">except</span><span style="color: #000000"> ValueError: </span><span style="color: #0000ff">pass</span> <span style="color: #0000ff">else</span><span style="color: #000000">: </span><span style="color: #0000ff">return</span><span style="color: #000000"> dt_type </span><span style="color: #008000">#</span><span style="color: #008000"> doesn't match any other types so assume text</span> <span style="color: #0000ff">if</span> len(s) > 255<span style="color: #000000">: </span><span style="color: #0000ff">return</span> <span style="color: #800000">'</span><span style="color: #800000">text</span><span style="color: #800000">'</span> <span style="color: #0000ff">else</span><span style="color: #000000">: </span><span style="color: #0000ff">return</span> <span style="color: #800000">'</span><span style="color: #800000">varchar(255)</span><span style="color: #800000">'</span> <span style="color: #0000ff">def</span> most_common(l, default=<span style="color: #800000">'</span><span style="color: #800000">varchar(255)</span><span style="color: #800000">'</span><span style="color: #000000">): </span><span style="color: #800000">"""</span><span style="color: #800000">Return most common value from list </span><span style="color: #800000">"""</span> <span style="color: #008000">#</span><span style="color: #008000"> some formats trump others</span> <span style="color: #0000ff">if</span><span style="color: #000000"> l: </span><span style="color: #0000ff">for</span> dt_type <span style="color: #0000ff">in</span> (<span style="color: #800000">'</span><span style="color: #800000">text</span><span style="color: #800000">'</span>, <span style="color: #800000">'</span><span style="color: #800000">bigint</span><span style="color: #800000">'</span><span style="color: #000000">): </span><span style="color: #0000ff">if</span> dt_type <span style="color: #0000ff">in</span><span style="color: #000000"> l: </span><span style="color: #0000ff">return</span><span style="color: #000000"> dt_type </span><span style="color: #0000ff">return</span> max(l, key=<span style="color: #000000">l.count) </span><span style="color: #0000ff">return</span><span style="color: #000000"> default </span><span style="color: #0000ff">def</span> get_col_types(input_file, max_rows=1000<span style="color: #000000">): </span><span style="color: #800000">"""</span><span style="color: #800000">Find the type for each CSV column </span><span style="color: #800000">"""</span><span style="color: #000000"> csv_types </span>=<span style="color: #000000"> collections.defaultdict(list) reader </span>=<span style="color: #000000"> csv.reader(open(input_file)) </span><span style="color: #008000">#</span><span style="color: #008000"> test the first few rows for their data types</span> <span style="color: #0000ff">for</span> row_i, row <span style="color: #0000ff">in</span><span style="color: #000000"> enumerate(reader): </span><span style="color: #0000ff">if</span> row_i ==<span style="color: #000000"> 0: header </span>=<span style="color: #000000"> row </span><span style="color: #0000ff">else</span><span style="color: #000000">: </span><span style="color: #0000ff">for</span> col_i, s <span style="color: #0000ff">in</span><span style="color: #000000"> enumerate(row): data_type </span>=<span style="color: #000000"> get_type(s) csv_types[header[col_i]].append(data_type) </span><span style="color: #0000ff">if</span> row_i ==<span style="color: #000000"> max_rows: </span><span style="color: #0000ff">break</span> <span style="color: #008000">#</span><span style="color: #008000"> take the most common data type for each row</span> <span style="color: #0000ff">return</span> [most_common(csv_types[col]) <span style="color: #0000ff">for</span> col <span style="color: #0000ff">in</span><span style="color: #000000"> header] </span><span style="color: #0000ff">def</span><span style="color: #000000"> get_schema(table, header, col_types): </span><span style="color: #800000">"""</span><span style="color: #800000">Generate the schema for this table from given types and columns </span><span style="color: #800000">"""</span><span style="color: #000000"> schema_sql </span>= <span style="color: #800000">"""</span><span style="color: #800000">CREATE TABLE IF NOT EXISTS %s ( id int NOT NULL AUTO_INCREMENT,</span><span style="color: #800000">"""</span> %<span style="color: #000000"> table </span><span style="color: #0000ff">for</span> col_name, col_type <span style="color: #0000ff">in</span><span style="color: #000000"> zip(header, col_types): schema_sql </span>+= <span style="color: #800000">'</span><span style="color: #800000">\n%s %s,</span><span style="color: #800000">'</span> %<span style="color: #000000"> (col_name, col_type) schema_sql </span>+= <span style="color: #800000">"""</span><span style="color: #800000">\nPRIMARY KEY (id) ) DEFAULT CHARSET=utf8;</span><span style="color: #800000">"""</span> <span style="color: #0000ff">return</span><span style="color: #000000"> schema_sql </span><span style="color: #0000ff">def</span><span style="color: #000000"> get_insert(table, header): </span><span style="color: #800000">"""</span><span style="color: #800000">Generate the SQL for inserting rows </span><span style="color: #800000">"""</span><span style="color: #000000"> field_names </span>= <span style="color: #800000">'</span><span style="color: #800000">, </span><span style="color: #800000">'</span><span style="color: #000000">.join(header) field_markers </span>= <span style="color: #800000">'</span><span style="color: #800000">, </span><span style="color: #800000">'</span>.join(<span style="color: #800000">'</span><span style="color: #800000">%s</span><span style="color: #800000">'</span> <span style="color: #0000ff">for</span> col <span style="color: #0000ff">in</span><span style="color: #000000"> header) </span><span style="color: #0000ff">return</span> <span style="color: #800000">'</span><span style="color: #800000">INSERT INTO %s (%s) VALUES (%s);</span><span style="color: #800000">'</span> %<span style="color: #000000"> \ (table, field_names, field_markers) </span><span style="color: #0000ff">def</span><span style="color: #000000"> format_header(row): </span><span style="color: #800000">"""</span><span style="color: #800000">Format column names to remove illegal characters and duplicates </span><span style="color: #800000">"""</span><span style="color: #000000"> safe_col </span>= <span style="color: #0000ff">lambda</span> s: re.sub(<span style="color: #800000">'</span><span style="color: #800000">\W+</span><span style="color: #800000">'</span>, <span style="color: #800000">'</span><span style="color: #800000">_</span><span style="color: #800000">'</span>, s.lower()).strip(<span style="color: #800000">'</span><span style="color: #800000">_</span><span style="color: #800000">'</span><span style="color: #000000">) header </span>=<span style="color: #000000"> [] counts </span>=<span style="color: #000000"> collections.defaultdict(int) </span><span style="color: #0000ff">for</span> col <span style="color: #0000ff">in</span><span style="color: #000000"> row: col </span>=<span style="color: #000000"> safe_col(col) counts[col] </span>+= 1 <span style="color: #0000ff">if</span> counts[col] > 1<span style="color: #000000">: col </span>= <span style="color: #800000">'</span><span style="color: #800000">{}{}</span><span style="color: #800000">'</span><span style="color: #000000">.format(col, counts[col]) header.append(col) </span><span style="color: #0000ff">return</span><span style="color: #000000"> header </span><span style="color: #0000ff">def</span> main(input_file, user, password, host, table, database, max_inserts=10000<span style="color: #000000">): </span><span style="color: #0000ff">print</span> <span style="color: #800000">"</span><span style="color: #800000">Importing `%s' into MySQL database `%s.%s'</span><span style="color: #800000">"</span> %<span style="color: #000000"> (input_file, database, table) db </span>= MySQLdb.connect(host=host, user=user, passwd=password, charset=<span style="color: #800000">'</span><span style="color: #800000">utf8</span><span style="color: #800000">'</span><span style="color: #000000">) cursor </span>=<span style="color: #000000"> db.cursor() </span><span style="color: #008000">#</span><span style="color: #008000"> create database and if doesn't exist</span> cursor.execute(<span style="color: #800000">'</span><span style="color: #800000">CREATE DATABASE IF NOT EXISTS %s;</span><span style="color: #800000">'</span> %<span style="color: #000000"> database) db.select_db(database) </span><span style="color: #008000">#</span><span style="color: #008000"> define table</span> <span style="color: #0000ff">print</span> <span style="color: #800000">'</span><span style="color: #800000">Analyzing column types ...</span><span style="color: #800000">'</span><span style="color: #000000"> col_types </span>=<span style="color: #000000"> get_col_types(input_file) </span><span style="color: #0000ff">print</span><span style="color: #000000"> col_types header </span>=<span style="color: #000000"> None </span><span style="color: #0000ff">for</span> i, row <span style="color: #0000ff">in</span><span style="color: #000000"> enumerate(csv.reader(open(input_file))): </span><span style="color: #0000ff">if</span><span style="color: #000000"> header: </span><span style="color: #0000ff">while</span> len(row) <<span style="color: #000000"> len(header): row.append(</span><span style="color: #800000">''</span>) <span style="color: #008000">#</span><span style="color: #008000"> this row is missing columns so pad blank values</span> <span style="color: #000000"> cursor.execute(insert_sql, row) </span><span style="color: #0000ff">if</span> i % max_inserts ==<span style="color: #000000"> 0: db.commit() </span><span style="color: #0000ff">print</span> <span style="color: #800000">'</span><span style="color: #800000">commit</span><span style="color: #800000">'</span> <span style="color: #0000ff">else</span><span style="color: #000000">: header </span>=<span style="color: #000000"> format_header(row) schema_sql </span>=<span style="color: #000000"> get_schema(table, header, col_types) </span><span style="color: #0000ff">print</span><span style="color: #000000"> schema_sql </span><span style="color: #008000">#</span><span style="color: #008000"> create table</span> cursor.execute(<span style="color: #800000">'</span><span style="color: #800000">DROP TABLE IF EXISTS %s;</span><span style="color: #800000">'</span> %<span style="color: #000000"> table) cursor.execute(schema_sql) </span><span style="color: #008000">#</span><span style="color: #008000"> create index for more efficient access</span> <span style="color: #0000ff">try</span><span style="color: #000000">: cursor.execute(</span><span style="color: #800000">'</span><span style="color: #800000">CREATE INDEX ids ON %s (id);</span><span style="color: #800000">'</span> %<span style="color: #000000"> table) </span><span style="color: #0000ff">except</span><span style="color: #000000"> MySQLdb.OperationalError: </span><span style="color: #0000ff">pass</span> <span style="color: #008000">#</span><span style="color: #008000"> index already exists</span> <span style="color: #0000ff">print</span> <span style="color: #800000">'</span><span style="color: #800000">Inserting rows ...</span><span style="color: #800000">'</span> <span style="color: #008000">#</span><span style="color: #008000"> SQL string for inserting data</span> insert_sql =<span style="color: #000000"> get_insert(table, header) </span><span style="color: #008000">#</span><span style="color: #008000"> commit rows to database</span> <span style="color: #0000ff">print</span> <span style="color: #800000">'</span><span style="color: #800000">Committing rows to database ...</span><span style="color: #800000">'</span><span style="color: #000000"> db.commit() </span><span style="color: #0000ff">print</span> <span style="color: #800000">'</span><span style="color: #800000">Done!</span><span style="color: #800000">'</span> <span style="color: #0000ff">if</span> <span style="color: #800080">__name__</span> == <span style="color: #800000">'</span><span style="color: #800000">__main__</span><span style="color: #800000">'</span><span style="color: #000000">: parser </span>= argparse.ArgumentParser(description=<span style="color: #800000">'</span><span style="color: #800000">Automatically insert CSV contents into MySQL</span><span style="color: #800000">'</span><span style="color: #000000">) parser.add_argument(</span><span style="color: #800000">'</span><span style="color: #800000">--table</span><span style="color: #800000">'</span>, dest=<span style="color: #800000">'</span><span style="color: #800000">table</span><span style="color: #800000">'</span>, help=<span style="color: #800000">'</span><span style="color: #800000">Set the name of the table. If not set the CSV filename will be used</span><span style="color: #800000">'</span><span style="color: #000000">) parser.add_argument(</span><span style="color: #800000">'</span><span style="color: #800000">--database</span><span style="color: #800000">'</span>, dest=<span style="color: #800000">'</span><span style="color: #800000">database</span><span style="color: #800000">'</span>, default=<span style="color: #800000">'</span><span style="color: #800000">test</span><span style="color: #800000">'</span>, help=<span style="color: #800000">'</span><span style="color: #800000">Set the name of the database. If not set the test database will be used</span><span style="color: #800000">'</span><span style="color: #000000">) parser.add_argument(</span><span style="color: #800000">'</span><span style="color: #800000">--user</span><span style="color: #800000">'</span>, dest=<span style="color: #800000">'</span><span style="color: #800000">user</span><span style="color: #800000">'</span>, default=<span style="color: #800000">'</span><span style="color: #800000">root</span><span style="color: #800000">'</span>, help=<span style="color: #800000">'</span><span style="color: #800000">The MySQL login username</span><span style="color: #800000">'</span><span style="color: #000000">) parser.add_argument(</span><span style="color: #800000">'</span><span style="color: #800000">--password</span><span style="color: #800000">'</span>, dest=<span style="color: #800000">'</span><span style="color: #800000">password</span><span style="color: #800000">'</span>, default=<span style="color: #800000">''</span>, help=<span style="color: #800000">'</span><span style="color: #800000">The MySQL login password</span><span style="color: #800000">'</span><span style="color: #000000">) parser.add_argument(</span><span style="color: #800000">'</span><span style="color: #800000">--host</span><span style="color: #800000">'</span>, dest=<span style="color: #800000">'</span><span style="color: #800000">host</span><span style="color: #800000">'</span>, default=<span style="color: #800000">'</span><span style="color: #800000">localhost</span><span style="color: #800000">'</span>, help=<span style="color: #800000">'</span><span style="color: #800000">The MySQL host</span><span style="color: #800000">'</span><span style="color: #000000">) parser.add_argument(</span><span style="color: #800000">'</span><span style="color: #800000">input_file</span><span style="color: #800000">'</span>, help=<span style="color: #800000">'</span><span style="color: #800000">The input CSV file</span><span style="color: #800000">'</span><span style="color: #000000">) args </span>= parser.parse_args(sys.argv[1<span style="color: #000000">:]) </span><span style="color: #0000ff">if</span> <span style="color: #0000ff">not</span><span style="color: #000000"> args.table: </span><span style="color: #008000">#</span><span style="color: #008000"> use input file name for table</span> args.table =<span style="color: #000000"> os.path.splitext(os.path.basename(args.input_file))[0] main(args.input_file, args.user, args.password, args.host, args.table, args.database)</span>
具体使用例子如下
[root@server1]# python csv2mysql.py --host=172.20.197.61 --user=PdYRxGWNpVRCQfHj --password=RX5a5YsViQcDdywr --database=cf_dc61100a_92a7_43ca_81dd_2a7e3fa0808a --table=performance_history _2 performance_history_2.csv
Importation de `performance_history_2. csv' dans la base de données MySQL `cf_dc61100a_92a7_43ca_81dd_2a7e3fa0808a.performance_history_2'
Analyse des types de colonnes ...
['datetime', 'varchar(255)', 'varchar(255)', 'varchar(255)', 'varchar (255)', 'varchar(255)', 'varchar(255)', 'varchar(255)']
CRÉER UNE TABLE SI NON EXISTE performance_history_2 (
id int NOT NULL AUTO_INCREMENT,
date_time datetime ,
write_bw_mb_s varchar(255),
read_bw_mb_s varchar(255),
write_iops varchar(255),
read_iops varchar(255),
write_latency_usec varchar(255),
read_latency_usec varchar(255),
avg_latency_usec varchar(255),
PRIMARY KEY (id)
) DEFAULT CHARSET=utf8;
Insertion de lignes ...
commit
commit
commit
commit
commit
commit
commit
commit
commit
commit
commit
Commit des lignes dans la base de données...
Terminé !
Ce qui précède est le contenu détaillé de. pour plus d'informations, suivez d'autres articles connexes sur le site Web de PHP en chinois!