Home > Article > Backend Development > PHP 处理excel
require_once 'Excel/reader.php'; //加载所需类
$data = new Spreadsheet_Excel_Reader(); // 实例化
$data->setOutputEncoding('gbk'); //设置编码
$data->read('text.xls'); //read函数读取所需EXCEL表,支持中文
//numrows 是行 numcols是列
for ($i = 2; $i sheets[0]['numRows']; $i++) {
for ($j = 1; $j sheets[0]['numCols']; $j++) {
//echo "\"".$data->sheets[0]['cells'][$i][$j]."\",";
echo
}
echo "\n";
}
下面的保存为reader.php
---------------------------------------------------开始---------------------------------
/*** A class for reading Microsoft Excel Spreadsheets.** Originally developed by Vadim Tkachenko under the name PHPExcelReader.* (http://sourceforge.net/projects/phpexcelreader)* Based on the Java version by Andy Khan (http://www.andykhan.com). Now* maintained by David Sanders. Reads only Biff 7 and Biff 8 formats.** PHP versions 4 and 5** LICENSE: This source file is subject to version 3.0 of the PHP license* that is available through the world-wide-web at the following URI:* http://www.php.net/license/3_0.txt. If you did not receive a copy of* the PHP License and are unable to obtain it through the web, please* send a note to license@php.net so we can mail you a copy immediately.** @category Spreadsheet* @package Spreadsheet_Excel_Reader* @author Vadim Tkachenko
//require_once 'PEAR.php';//require_once 'Spreadsheet/Excel/Reader/OLERead.php';require_once 'OLERead.inc';//require_once 'OLE.php';
define('SPREADSHEET_EXCEL_READER_BIFF8', 0x600);define('SPREADSHEET_EXCEL_READER_BIFF7', 0x500);define('SPREADSHEET_EXCEL_READER_WORKBOOKGLOBALS', 0x5);define('SPREADSHEET_EXCEL_READER_WORKSHEET', 0x10);
define('SPREADSHEET_EXCEL_READER_TYPE_BOF', 0x809);define('SPREADSHEET_EXCEL_READER_TYPE_EOF', 0x0a);define('SPREADSHEET_EXCEL_READER_TYPE_BOUNDSHEET', 0x85);define('SPREADSHEET_EXCEL_READER_TYPE_DIMENSION', 0x200);define('SPREADSHEET_EXCEL_READER_TYPE_ROW', 0x208);define('SPREADSHEET_EXCEL_READER_TYPE_DBCELL', 0xd7);define('SPREADSHEET_EXCEL_READER_TYPE_FILEPASS', 0x2f);define('SPREADSHEET_EXCEL_READER_TYPE_NOTE', 0x1c);define('SPREADSHEET_EXCEL_READER_TYPE_TXO', 0x1b6);define('SPREADSHEET_EXCEL_READER_TYPE_RK', 0x7e);define('SPREADSHEET_EXCEL_READER_TYPE_RK2', 0x27e);define('SPREADSHEET_EXCEL_READER_TYPE_MULRK', 0xbd);define('SPREADSHEET_EXCEL_READER_TYPE_MULBLANK', 0xbe);define('SPREADSHEET_EXCEL_READER_TYPE_INDEX', 0x20b);define('SPREADSHEET_EXCEL_READER_TYPE_SST', 0xfc);define('SPREADSHEET_EXCEL_READER_TYPE_EXTSST', 0xff);define('SPREADSHEET_EXCEL_READER_TYPE_CONTINUE', 0x3c);define('SPREADSHEET_EXCEL_READER_TYPE_LABEL', 0x204);define('SPREADSHEET_EXCEL_READER_TYPE_LABELSST', 0xfd);define('SPREADSHEET_EXCEL_READER_TYPE_NUMBER', 0x203);define('SPREADSHEET_EXCEL_READER_TYPE_NAME', 0x18);define('SPREADSHEET_EXCEL_READER_TYPE_ARRAY', 0x221);define('SPREADSHEET_EXCEL_READER_TYPE_STRING', 0x207);define('SPREADSHEET_EXCEL_READER_TYPE_FORMULA', 0x406);define('SPREADSHEET_EXCEL_READER_TYPE_FORMULA2', 0x6);define('SPREADSHEET_EXCEL_READER_TYPE_FORMAT', 0x41e);define('SPREADSHEET_EXCEL_READER_TYPE_XF', 0xe0);define('SPREADSHEET_EXCEL_READER_TYPE_BOOLERR', 0x205);define('SPREADSHEET_EXCEL_READER_TYPE_UNKNOWN', 0xffff);define('SPREADSHEET_EXCEL_READER_TYPE_NINETEENFOUR', 0x22);define('SPREADSHEET_EXCEL_READER_TYPE_MERGEDCELLS', 0xE5);
define('SPREADSHEET_EXCEL_READER_UTCOFFSETDAYS' , 25569);define('SPREADSHEET_EXCEL_READER_UTCOFFSETDAYS1904', 24107);define('SPREADSHEET_EXCEL_READER_MSINADAY', 86400);//define('SPREADSHEET_EXCEL_READER_MSINADAY', 24 * 60 * 60);
//define('SPREADSHEET_EXCEL_READER_DEF_NUM_FORMAT', "%.2f");define('SPREADSHEET_EXCEL_READER_DEF_NUM_FORMAT', "%s");
/** Place includes, constant defines and $_GLOBAL settings here.* Make sure they have appropriate docblocks to avoid phpDocumentor* construing they are documented by the page-level docblock.*/
/*** A class for reading Microsoft Excel Spreadsheets.** Originally developed by Vadim Tkachenko under the name PHPExcelReader.* (http://sourceforge.net/projects/phpexcelreader)* Based on the Java version by Andy Khan (http://www.andykhan.com). Now* maintained by David Sanders. Reads only Biff 7 and Biff 8 formats.** @category Spreadsheet* @package Spreadsheet_Excel_Reader* @author Vadim Tkachenko
/** * Array of format records found * * @var array * @access public */ var $formatRecords = array();
/** * todo * * @var array * @access public */ var $sst = array();
/** * Array of worksheets * * The data is stored in 'cells' and the meta-data is stored in an array * called 'cellsInfo' * * Example: * * $sheets --> 'cells' --> row --> column --> Interpreted value * --> 'cellsInfo' --> row --> column --> 'type' - Can be 'date', 'number', or 'unknown' * --> 'raw' - The raw data that Excel stores for that data cell * * @var array * @access public */ var $sheets = array();
/** * The data returned by OLE * * @var string * @access public */ var $data;
/** * OLE object for reading the file * * @var OLE object * @access private */ var $_ole;
/** * Default encoding * * @var string * @access private */ var $_defaultEncoding;
/** * Default number format * * @var integer * @access private */ var $_defaultFormat = SPREADSHEET_EXCEL_READER_DEF_NUM_FORMAT;
/** * todo * List of formats to use for each column * * @var array * @access private */ var $_columnsFormat = array();
/** * todo * * @var integer * @access private */ var $_rowoffset = 1;
/** * todo * * @var integer * @access private */ var $_coloffset = 1;
/** * List of default date formats used by Excel * * @var array * @access public */ var $dateFormats = array ( 0xe => "d/m/Y", 0xf => "d-M-Y", 0x10 => "d-M", 0x11 => "M-Y", 0x12 => "h:i a", 0x13 => "h:i:s a", 0x14 => "H:i", 0x15 => "H:i:s", 0x16 => "d/m/Y H:i", 0x2d => "i:s", 0x2e => "H:i:s", 0x2f => "i:s.S");
/** * Default number formats used by Excel * * @var array * @access public */ var $numberFormats = array( 0x1 => "%1.0f", // "0" 0x2 => "%1.2f", // "0.00", 0x3 => "%1.0f", //"#,##0", 0x4 => "%1.2f", //"#,##0.00", 0x5 => "%1.0f", /*"$#,##0;($#,##0)",*/ 0x6 => '$%1.0f', /*"$#,##0;($#,##0)",*/ 0x7 => '$%1.2f', //"$#,##0.00;($#,##0.00)", 0x8 => '$%1.2f', //"$#,##0.00;($#,##0.00)", 0x9 => '%1.0f%%', // "0%" 0xa => '%1.2f%%', // "0.00%" 0xb => '%1.2f', // 0.00E00", 0x25 => '%1.0f', // "#,##0;(#,##0)", 0x26 => '%1.0f', //"#,##0;(#,##0)", 0x27 => '%1.2f', //"#,##0.00;(#,##0.00)", 0x28 => '%1.2f', //"#,##0.00;(#,##0.00)", 0x29 => '%1.0f', //"#,##0;(#,##0)", 0x2a => '$%1.0f', //"$#,##0;($#,##0)", 0x2b => '%1.2f', //"#,##0.00;(#,##0.00)", 0x2c => '$%1.2f', //"$#,##0.00;($#,##0.00)", 0x30 => '%1.0f'); //"##0.0E0";
// }}} // {{{ Spreadsheet_Excel_Reader()
/** * Constructor * * Some basic initialisation */ function Spreadsheet_Excel_Reader() { $this->_ole = new OLERead(); $this->setUTFEncoder('iconv'); }
// }}} // {{{ setOutputEncoding()
/** * Set the encoding method * * @param string Encoding to use * @access public */ function setOutputEncoding($encoding) { $this->_defaultEncoding = $encoding; }
// }}} // {{{ setUTFEncoder()
/** * $encoder = 'iconv' or 'mb' * set iconv if you would like use 'iconv' for encode UTF-16LE to your encoding * set mb if you would like use 'mb_convert_encoding' for encode UTF-16LE to your encoding * * @access public * @param string Encoding type to use. Either 'iconv' or 'mb' */ function setUTFEncoder($encoder = 'iconv') { $this->_encoderFunction = '';
if ($encoder == 'iconv') { $this->_encoderFunction = function_exists('iconv') ? 'iconv' : ''; } elseif ($encoder == 'mb') { $this->_encoderFunction = function_exists('mb_convert_encoding') ? 'mb_convert_encoding' : ''; } }
// }}} // {{{ setRowColOffset()
/** * todo * * @access public * @param offset */ function setRowColOffset($iOffset) { $this->_rowoffset = $iOffset; $this->_coloffset = $iOffset; }
// }}} // {{{ setDefaultFormat()
/** * Set the default number format * * @access public * @param Default format */ function setDefaultFormat($sFormat) { $this->_defaultFormat = $sFormat; }
// }}} // {{{ setColumnFormat()
/** * Force a column to use a certain format * * @access public * @param integer Column number * @param string Format */ function setColumnFormat($column, $sFormat) { $this->_columnsFormat[$column] = $sFormat; }
// }}} // {{{ read()
/** * Read the spreadsheet file using OLE, then parse * * @access public * @param filename * @todo return a valid value */ function read($sFileName) { /* require_once 'OLE.php'; $ole = new OLE(); $ole->read($sFileName);
foreach ($ole->_list as $i => $pps) { if (($pps->Name == 'Workbook' || $pps->Name == 'Book') && $pps->Size >= SMALL_BLOCK_THRESHOLD) {
$this->data = $ole->getData($i, 0, $ole->getDataLength($i)); } elseif ($pps->Name == 'Root Entry') { $this->data = $ole->getData($i, 0, $ole->getDataLength($i)); } //var_dump(strlen($ole->getData($i, 0, $ole->getDataLength($i))), $pps->Name, md5($this->data), $ole->getDataLength($i)); }//exit; $this->_parse();
return sizeof($this->sheets) > 0; */
$res = $this->_ole->read($sFileName);
// oops, something goes wrong (Darko Miljanovic) if($res === false) { // check error code if($this->_ole->error == 1) { // bad file die('The filename ' . $sFileName . ' is not readable'); } // check other error codes here (eg bad fileformat, etc...) }
$this->data = $this->_ole->getWorkBook();
/* $res = $this->_ole->read($sFileName);
if ($this->isError($res)) {// var_dump($res); return $this->raiseError($res); }
$total = $this->_ole->ppsTotal(); for ($i = 0; $i _ole->isFile($i)) { $type = unpack("v", $this->_ole->getData($i, 0, 2)); if ($type[''] == 0x0809) { // check if it's a BIFF stream $this->_index = $i; $this->data = $this->_ole->getData($i, 0, $this->_ole->getDataLength($i)); break; } } }
if ($this->_index === null) { return $this->raiseError("$file doesn't seem to be an Excel file"); }
*/
//echo "data =".$this->data; //$this->readRecords(); $this->_parse(); }
// }}} // {{{ _parse()
/** * Parse a workbook * * @access private * @return bool */ function _parse() { $pos = 0;
$code = ord($this->data[$pos]) | ord($this->data[$pos+1])data[$pos+2]) | ord($this->data[$pos+3]) $version = ord($this->data[$pos + 4]) | ord($this->data[$pos + 5])data[$pos + 6]) | ord($this->data[$pos + 7]) if (($version != SPREADSHEET_EXCEL_READER_BIFF8) && ($version != SPREADSHEET_EXCEL_READER_BIFF7)) { return false; }
if ($substreamType != SPREADSHEET_EXCEL_READER_WORKBOOKGLOBALS){ return false; }
//print_r($rec); $pos += $length + 4;
$code = ord($this->data[$pos]) | ord($this->data[$pos+1])data[$pos+2]) | ord($this->data[$pos+3]) while ($code != SPREADSHEET_EXCEL_READER_TYPE_EOF) { switch ($code) { case SPREADSHEET_EXCEL_READER_TYPE_SST: //echo "Type_SST\n"; $spos = $pos + 4; $limitpos = $spos + $length; $uniqueStrings = $this->_GetInt4d($this->data, $spos+4); $spos += 8; for ($i = 0; $i data[$spos]) | ord($this->data[$spos+1])data[$spos+2]) | ord($this->data[$spos+3])data[$spos]) | (ord($this->data[$spos+1]) data[$spos]); $spos++; $asciiEncoding = (($optionFlags & 0x01) == 0) ; $extendedString = ( ($optionFlags & 0x04) != 0);
// See if string contains formatting information $richString = ( ($optionFlags & 0x08) != 0);
if ($richString) { // Read in the crun $formattingRuns = ord($this->data[$spos]) | (ord($this->data[$spos+1]) if ($extendedString) { // Read in cchExtRst $extendedRunLength = $this->_GetInt4d($this->data, $spos); $spos += 4; }
$len = ($asciiEncoding)? $numChars : $numChars*2; if ($spos + $len data, $spos, $len); $spos += $len; }else{ // found countinue $retstr = substr($this->data, $spos, $limitpos - $spos); $bytesRead = $limitpos - $spos; $charsLeft = $numChars - (($asciiEncoding) ? $bytesRead : ($bytesRead / 2)); $spos = $limitpos;
while ($charsLeft > 0){ $opcode = ord($this->data[$spos]) | ord($this->data[$spos+1])data[$spos+2]) | ord($this->data[$spos+3])data[$spos]); $spos += 1; if ($asciiEncoding && ($option == 0)) { $len = min($charsLeft, $limitpos - $spos); // min($charsLeft, $conlength); $retstr .= substr($this->data, $spos, $len); $charsLeft -= $len; $asciiEncoding = true; }elseif (!$asciiEncoding && ($option != 0)){ $len = min($charsLeft * 2, $limitpos - $spos); // min($charsLeft, $conlength); $retstr .= substr($this->data, $spos, $len); $charsLeft -= $len/2; $asciiEncoding = false; }elseif (!$asciiEncoding && ($option == 0)) { // Bummer - the string starts off as Unicode, but after the // continuation it is in straightforward ASCII encoding $len = min($charsLeft, $limitpos - $spos); // min($charsLeft, $conlength); for ($j = 0; $j data[$spos + $j].chr(0); } $charsLeft -= $len; $asciiEncoding = false; }else{ $newstr = ''; for ($j = 0; $j data, $spos, $len); $charsLeft -= $len/2; $asciiEncoding = false; //echo "Izavrat\n"; } $spos += $len;
} } $retstr = ($asciiEncoding) ? $retstr : $this->_encodeUTF16($retstr);// echo "Str $i = $retstr\n"; if ($richString){ $spos += 4 * $formattingRuns; }
// For extended strings, skip over the extended string data if ($extendedString) { $spos += $extendedRunLength; } //if ($retstr == 'Derby'){ // echo "bb\n"; //} $this->sst[]=$retstr; } /*$continueRecords = array(); while ($this->getNextCode() == Type_CONTINUE) { $continueRecords[] = &$this->nextRecord(); } //echo " 1 Type_SST\n"; $this->shareStrings = new SSTRecord($r, $continueRecords); //print_r($this->shareStrings->strings); */ // echo 'SST read: '.($time_end-$time_start)."\n"; break;
case SPREADSHEET_EXCEL_READER_TYPE_FILEPASS: return false; break; case SPREADSHEET_EXCEL_READER_TYPE_NAME: //echo "Type_NAME\n"; break; case SPREADSHEET_EXCEL_READER_TYPE_FORMAT: $indexCode = ord($this->data[$pos+4]) | ord($this->data[$pos+5]) if ($version == SPREADSHEET_EXCEL_READER_BIFF8) { $numchars = ord($this->data[$pos+6]) | ord($this->data[$pos+7]) data[$pos+8]) == 0){ $formatString = substr($this->data, $pos+9, $numchars); } else { $formatString = substr($this->data, $pos+9, $numchars*2); } } else { $numchars = ord($this->data[$pos+6]); $formatString = substr($this->data, $pos+7, $numchars*2); }
$this->formatRecords[$indexCode] = $formatString; // echo "Type.FORMAT\n"; break; case SPREADSHEET_EXCEL_READER_TYPE_XF: //global $dateFormats, $numberFormats; $indexCode = ord($this->data[$pos+6]) | ord($this->data[$pos+7]) formatRecords['xfrecords'])." $indexCode "; if (array_key_exists($indexCode, $this->dateFormats)) { //echo "isdate ".$dateFormats[$indexCode]; $this->formatRecords['xfrecords'][] = array( 'type' => 'date', 'format' => $this->dateFormats[$indexCode] ); }elseif (array_key_exists($indexCode, $this->numberFormats)) { //echo "isnumber ".$this->numberFormats[$indexCode]; $this->formatRecords['xfrecords'][] = array( 'type' => 'number', 'format' => $this->numberFormats[$indexCode] ); }else{ $isdate = FALSE; if ($indexCode > 0){ if (isset($this->formatRecords[$indexCode])) $formatstr = $this->formatRecords[$indexCode]; //echo '.other.'; //echo "\ndate-time=$formatstr=\n"; if ($formatstr) if (preg_match("/[^hmsday\/\-:\s]/i", $formatstr) == 0) { // found day and time format $isdate = TRUE; $formatstr = str_replace('mm', 'i', $formatstr); $formatstr = str_replace('h', 'H', $formatstr); //echo "\ndate-time $formatstr \n"; } }
if ($isdate){ $this->formatRecords['xfrecords'][] = array( 'type' => 'date', 'format' => $formatstr, ); }else{ $this->formatRecords['xfrecords'][] = array( 'type' => 'other', 'format' => '', 'code' => $indexCode ); } } //echo "\n"; break; case SPREADSHEET_EXCEL_READER_TYPE_NINETEENFOUR: //echo "Type.NINETEENFOUR\n"; $this->nineteenFour = (ord($this->data[$pos+4]) == 1); break; case SPREADSHEET_EXCEL_READER_TYPE_BOUNDSHEET: //echo "Type.BOUNDSHEET\n"; $rec_offset = $this->_GetInt4d($this->data, $pos+4); $rec_typeFlag = ord($this->data[$pos+8]); $rec_visibilityFlag = ord($this->data[$pos+9]); $rec_length = ord($this->data[$pos+10]);
if ($version == SPREADSHEET_EXCEL_READER_BIFF8){ $chartype = ord($this->data[$pos+11]); if ($chartype == 0){ $rec_name = substr($this->data, $pos+12, $rec_length); } else { $rec_name = $this->_encodeUTF16(substr($this->data, $pos+12, $rec_length*2)); } }elseif ($version == SPREADSHEET_EXCEL_READER_BIFF7){ $rec_name = substr($this->data, $pos+11, $rec_length); } $this->boundsheets[] = array('name'=>$rec_name, 'offset'=>$rec_offset);
break;
}
//echo "Code = ".base_convert($r['code'],10,16)."\n"; $pos += $length + 4; $code = ord($this->data[$pos]) | ord($this->data[$pos+1])data[$pos+2]) | ord($this->data[$pos+3]) //$r = &$this->nextRecord(); //echo "1 Code = ".base_convert($r['code'],10,16)."\n"; }
foreach ($this->boundsheets as $key=>$val){ $this->sn = $key; $this->_parsesheet($val['offset']); } return true;
}
/** * Parse a worksheet * * @access private * @param todo * @todo fix return codes */ function _parsesheet($spos) { $cont = true; // read BOF $code = ord($this->data[$spos]) | ord($this->data[$spos+1])data[$spos+2]) | ord($this->data[$spos+3]) $version = ord($this->data[$spos + 4]) | ord($this->data[$spos + 5])data[$spos + 6]) | ord($this->data[$spos + 7]) if (($version != SPREADSHEET_EXCEL_READER_BIFF8) && ($version != SPREADSHEET_EXCEL_READER_BIFF7)) { return -1; }
if ($substreamType != SPREADSHEET_EXCEL_READER_WORKSHEET){ return -2; } //echo "Start parse code=".base_convert($code,10,16)." version=".base_convert($version,10,16)." substreamType=".base_convert($substreamType,10,16).""."\n"; $spos += $length + 4; //var_dump($this->formatRecords); //echo "code $code $length"; while($cont) { //echo "mem= ".memory_get_usage()."\n";// $r = &$this->file->nextRecord(); $lowcode = ord($this->data[$spos]); if ($lowcode == SPREADSHEET_EXCEL_READER_TYPE_EOF) break; $code = $lowcode | ord($this->data[$spos+1])data[$spos+2]) | ord($this->data[$spos+3])sheets[$this->sn]['maxrow'] = $this->_rowoffset - 1; $this->sheets[$this->sn]['maxcol'] = $this->_coloffset - 1; //echo "Code=".base_convert($code,10,16)." $code\n"; unset($this->rectype); $this->multiplier = 1; // need for format with % switch ($code) { case SPREADSHEET_EXCEL_READER_TYPE_DIMENSION: //echo 'Type_DIMENSION '; if (!isset($this->numRows)) { if (($length == 10) || ($version == SPREADSHEET_EXCEL_READER_BIFF7)){ $this->sheets[$this->sn]['numRows'] = ord($this->data[$spos+2]) | ord($this->data[$spos+3]) sheets[$this->sn]['numCols'] = ord($this->data[$spos+6]) | ord($this->data[$spos+7]) sheets[$this->sn]['numRows'] = ord($this->data[$spos+4]) | ord($this->data[$spos+5