Rumah  >  Artikel  >  pangkalan data  >  找出多个文本中频率高的单词(2)

找出多个文本中频率高的单词(2)

WBOY
WBOYasal
2016-06-07 15:32:301301semak imbas

接上篇,我打算用 用concurrent包里的CountDownLatch类 去实现。 还是直接上代码吧: Main.java package com.anders.thread;import java.util.HashMap;import java.util.Map;import java.util.concurrent.CountDownLatch;import java.util.concurrent.Execut

接上篇,我打算用用concurrent包里的CountDownLatch类去实现。


还是直接上代码吧:

Main.java

package com.anders.thread;

import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class Main {

	public static void main(String[] args) {

		int threadNumber = Integer.parseInt(PropertiesUtil.get("ThreadNumber"));

		ExecutorService es = Executors.newFixedThreadPool(threadNumber);
		SingleThreadStatistics[] threads = new SingleThreadStatistics[threadNumber];
		try {
			CountDownLatch doneSignals = new CountDownLatch(threadNumber);

			// 这是在 文件数比线程数多的情况下,若文件比线程数少的话,加个判断就可以了
			for (int i = 0; i  map = mergeThreadMap(threads);

			display(map);

		} catch (InterruptedException e) {
			e.printStackTrace();
		} finally {
			es.shutdown();
		}

	}

	private static Map<string integer> mergeThreadMap(SingleThreadStatistics[] threads) {
		Map<string integer> map = new HashMap<string integer>();

		for (SingleThreadStatistics singleThreadStatistics : threads) {
			Map<string integer> threadMap = singleThreadStatistics.getMap();

			for (Map.Entry<string integer> entry : threadMap.entrySet()) {
				String threadWord = entry.getKey();
				Integer threadWordCount = entry.getValue();
				Integer wordCount = map.get(threadWord);

				if (wordCount == null) {
					map.put(threadWord, threadWordCount);
				} else {
					map.put(threadWord, threadWordCount + wordCount);
				}
			}
		}

		return map;
	}

	private static void display(Map<string integer> map) {

		for (Map.Entry<string integer> entry : map.entrySet()) {
			System.out.print(entry.getKey());
			System.out.println("   ," + entry.getValue());
		}

	}

}
</string></string></string></string></string></string></string>

SingleThreadStatistics.java
package com.anders.thread;

import java.io.File;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.CountDownLatch;

public class SingleThreadStatistics implements Runnable {

	private Map<string integer> map = new HashMap<string integer>();
	private CountDownLatch doneSignals;

	public SingleThreadStatistics(CountDownLatch doneSignals) {
		this.doneSignals = doneSignals;
	}

	@Override
	public void run() {

		while (true) {
			File file = FileManager.getFile();
			if (file == null) {
				break;
			}
			FileManager.parseFile(file, map);
		}

		doneSignals.countDown();

	}

	// --------getter/setter------------

	public Map<string integer> getMap() {
		return map;
	}

}
</string></string></string>

FileManager.java
package com.anders.thread;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Manage files and offer single for every thread
 * 
 * @author Anders
 * 
 */
public class FileManager {

	private static List<file> fileList;
	private static int index = 0;

	static {
		String dirPath = PropertiesUtil.get("DirName");
		String path = FileManager.class.getClassLoader().getResource(dirPath).getPath();
		fileList = getFiles(path);
	}

	public synchronized static File getFile() {
		if (index == fileList.size()) {
			return null;
		}
		File file = fileList.get(index);
		index++;
		return file;
	}

	private static List<file> getFiles(String dirPath) {

		File dir = new File(dirPath);
		if (!dir.exists() || !dir.isDirectory()) {
			return Collections.emptyList();
		}

		File[] files = dir.listFiles();

		//判断 是不是  以txt结尾的文件
		Pattern pattern = Pattern.compile(PropertiesUtil.get("FileType"));
		List<file> list = new ArrayList<file>();

		for (File file : files) {
			Matcher matcher = pattern.matcher(file.getName());
			if (matcher.matches()) {
				list.add(file);
			}
		}

		return list;
	}

	//读取文件  使用的是java.nio的filechannel 和bytebuffer
	public static void parseFile(File file, Map<string integer> map) {
		FileInputStream ins = null;
		try {
			ins = new FileInputStream(file);
			FileChannel fIns = ins.getChannel();
			ByteBuffer buffer = ByteBuffer.allocate(1024);

			while (true) {
				buffer.clear();
				int r = fIns.read(buffer);
				if (r == -1) {
					break;
				}
				buffer.flip();
				buffer2word(buffer, map);
			}
			fIns.close();

		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try {
				if (ins != null) {
					ins.close();
				}
			} catch (IOException e) {
				e.printStackTrace();
			}
		}

	}

	//这个是  将读取的内容,提取出  英语字母
	private static void buffer2word(ByteBuffer buffer, Map<string integer> map) {
		StringBuilder str = new StringBuilder();
		for (int i = 0; i  map) {
		Integer count = map.get(word);
		if (null == count) {
			map.put(word, 1);
		} else {
			map.put(word, ++count);
		}
	}

	//看看是否是  英语字符
	private static boolean isEnglishChar(byte b) {
		//通过ASCLL码  判断
		if (b > 65 && b  97 && b <br>

<p><br>
</p>
config.properties<br>


<pre class="brush:php;toolbar:false">ThreadNumber=3
DirName=txt
FileType=.*.txt


其实我觉得最重要的代码是  FileManager里的

public synchronized static File getFile() {
		if (index == fileList.size()) {
			return null;
		}
		File file = fileList.get(index);
		index++;
		return file;
	}
这部分代码,因为只要  每个thread 分别得到不同的文件,就可以了。

而且还有一个很重要的一点就是  验证index是否已经读取完所有的文件  要和index++放在一个同步块里面,不然会引起线程安全问题


Kenyataan:
Kandungan artikel ini disumbangkan secara sukarela oleh netizen, dan hak cipta adalah milik pengarang asal. Laman web ini tidak memikul tanggungjawab undang-undang yang sepadan. Jika anda menemui sebarang kandungan yang disyaki plagiarisme atau pelanggaran, sila hubungi admin@php.cn