search
HomeDatabaseMysql Tutorial找出多个文本中频率高的单词(2)

接上篇,我打算用 用concurrent包里的CountDownLatch类 去实现。 还是直接上代码吧: Main.java package com.anders.thread;import java.util.HashMap;import java.util.Map;import java.util.concurrent.CountDownLatch;import java.util.concurrent.Execut

接上篇,我打算用用concurrent包里的CountDownLatch类去实现。


还是直接上代码吧:

Main.java

package com.anders.thread;

import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class Main {

	public static void main(String[] args) {

		int threadNumber = Integer.parseInt(PropertiesUtil.get("ThreadNumber"));

		ExecutorService es = Executors.newFixedThreadPool(threadNumber);
		SingleThreadStatistics[] threads = new SingleThreadStatistics[threadNumber];
		try {
			CountDownLatch doneSignals = new CountDownLatch(threadNumber);

			// 这是在 文件数比线程数多的情况下,若文件比线程数少的话,加个判断就可以了
			for (int i = 0; i  map = mergeThreadMap(threads);

			display(map);

		} catch (InterruptedException e) {
			e.printStackTrace();
		} finally {
			es.shutdown();
		}

	}

	private static Map<string integer> mergeThreadMap(SingleThreadStatistics[] threads) {
		Map<string integer> map = new HashMap<string integer>();

		for (SingleThreadStatistics singleThreadStatistics : threads) {
			Map<string integer> threadMap = singleThreadStatistics.getMap();

			for (Map.Entry<string integer> entry : threadMap.entrySet()) {
				String threadWord = entry.getKey();
				Integer threadWordCount = entry.getValue();
				Integer wordCount = map.get(threadWord);

				if (wordCount == null) {
					map.put(threadWord, threadWordCount);
				} else {
					map.put(threadWord, threadWordCount + wordCount);
				}
			}
		}

		return map;
	}

	private static void display(Map<string integer> map) {

		for (Map.Entry<string integer> entry : map.entrySet()) {
			System.out.print(entry.getKey());
			System.out.println("   ," + entry.getValue());
		}

	}

}
</string></string></string></string></string></string></string>

SingleThreadStatistics.java
package com.anders.thread;

import java.io.File;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.CountDownLatch;

public class SingleThreadStatistics implements Runnable {

	private Map<string integer> map = new HashMap<string integer>();
	private CountDownLatch doneSignals;

	public SingleThreadStatistics(CountDownLatch doneSignals) {
		this.doneSignals = doneSignals;
	}

	@Override
	public void run() {

		while (true) {
			File file = FileManager.getFile();
			if (file == null) {
				break;
			}
			FileManager.parseFile(file, map);
		}

		doneSignals.countDown();

	}

	// --------getter/setter------------

	public Map<string integer> getMap() {
		return map;
	}

}
</string></string></string>

FileManager.java
package com.anders.thread;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Manage files and offer single for every thread
 * 
 * @author Anders
 * 
 */
public class FileManager {

	private static List<file> fileList;
	private static int index = 0;

	static {
		String dirPath = PropertiesUtil.get("DirName");
		String path = FileManager.class.getClassLoader().getResource(dirPath).getPath();
		fileList = getFiles(path);
	}

	public synchronized static File getFile() {
		if (index == fileList.size()) {
			return null;
		}
		File file = fileList.get(index);
		index++;
		return file;
	}

	private static List<file> getFiles(String dirPath) {

		File dir = new File(dirPath);
		if (!dir.exists() || !dir.isDirectory()) {
			return Collections.emptyList();
		}

		File[] files = dir.listFiles();

		//判断 是不是  以txt结尾的文件
		Pattern pattern = Pattern.compile(PropertiesUtil.get("FileType"));
		List<file> list = new ArrayList<file>();

		for (File file : files) {
			Matcher matcher = pattern.matcher(file.getName());
			if (matcher.matches()) {
				list.add(file);
			}
		}

		return list;
	}

	//读取文件  使用的是java.nio的filechannel 和bytebuffer
	public static void parseFile(File file, Map<string integer> map) {
		FileInputStream ins = null;
		try {
			ins = new FileInputStream(file);
			FileChannel fIns = ins.getChannel();
			ByteBuffer buffer = ByteBuffer.allocate(1024);

			while (true) {
				buffer.clear();
				int r = fIns.read(buffer);
				if (r == -1) {
					break;
				}
				buffer.flip();
				buffer2word(buffer, map);
			}
			fIns.close();

		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try {
				if (ins != null) {
					ins.close();
				}
			} catch (IOException e) {
				e.printStackTrace();
			}
		}

	}

	//这个是  将读取的内容,提取出  英语字母
	private static void buffer2word(ByteBuffer buffer, Map<string integer> map) {
		StringBuilder str = new StringBuilder();
		for (int i = 0; i  map) {
		Integer count = map.get(word);
		if (null == count) {
			map.put(word, 1);
		} else {
			map.put(word, ++count);
		}
	}

	//看看是否是  英语字符
	private static boolean isEnglishChar(byte b) {
		//通过ASCLL码  判断
		if (b > 65 && b  97 && b <br>

<p><br>
</p>
config.properties<br>


<pre class="brush:php;toolbar:false">ThreadNumber=3
DirName=txt
FileType=.*.txt


其实我觉得最重要的代码是  FileManager里的

public synchronized static File getFile() {
		if (index == fileList.size()) {
			return null;
		}
		File file = fileList.get(index);
		index++;
		return file;
	}
这部分代码,因为只要  每个thread 分别得到不同的文件,就可以了。

而且还有一个很重要的一点就是  验证index是否已经读取完所有的文件  要和index++放在一个同步块里面,不然会引起线程安全问题


Statement
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn
MySQL BLOB : are there any limits?MySQL BLOB : are there any limits?May 08, 2025 am 12:22 AM

MySQLBLOBshavelimits:TINYBLOB(255bytes),BLOB(65,535bytes),MEDIUMBLOB(16,777,215bytes),andLONGBLOB(4,294,967,295bytes).TouseBLOBseffectively:1)ConsiderperformanceimpactsandstorelargeBLOBsexternally;2)Managebackupsandreplicationcarefully;3)Usepathsinst

MySQL : What are the best tools to automate users creation?MySQL : What are the best tools to automate users creation?May 08, 2025 am 12:22 AM

The best tools and technologies for automating the creation of users in MySQL include: 1. MySQLWorkbench, suitable for small to medium-sized environments, easy to use but high resource consumption; 2. Ansible, suitable for multi-server environments, simple but steep learning curve; 3. Custom Python scripts, flexible but need to ensure script security; 4. Puppet and Chef, suitable for large-scale environments, complex but scalable. Scale, learning curve and integration needs should be considered when choosing.

MySQL: Can I search inside a blob?MySQL: Can I search inside a blob?May 08, 2025 am 12:20 AM

Yes,youcansearchinsideaBLOBinMySQLusingspecifictechniques.1)ConverttheBLOBtoaUTF-8stringwithCONVERTfunctionandsearchusingLIKE.2)ForcompressedBLOBs,useUNCOMPRESSbeforeconversion.3)Considerperformanceimpactsanddataencoding.4)Forcomplexdata,externalproc

MySQL String Data Types: A Comprehensive GuideMySQL String Data Types: A Comprehensive GuideMay 08, 2025 am 12:14 AM

MySQLoffersvariousstringdatatypes:1)CHARforfixed-lengthstrings,idealforconsistentlengthdatalikecountrycodes;2)VARCHARforvariable-lengthstrings,suitableforfieldslikenames;3)TEXTtypesforlargertext,goodforblogpostsbutcanimpactperformance;4)BINARYandVARB

Mastering MySQL BLOBs: A Step-by-Step TutorialMastering MySQL BLOBs: A Step-by-Step TutorialMay 08, 2025 am 12:01 AM

TomasterMySQLBLOBs,followthesesteps:1)ChoosetheappropriateBLOBtype(TINYBLOB,BLOB,MEDIUMBLOB,LONGBLOB)basedondatasize.2)InsertdatausingLOAD_FILEforefficiency.3)Storefilereferencesinsteadoffilestoimproveperformance.4)UseDUMPFILEtoretrieveandsaveBLOBsco

BLOB Data Type in MySQL: A Detailed Overview for DevelopersBLOB Data Type in MySQL: A Detailed Overview for DevelopersMay 07, 2025 pm 05:41 PM

BlobdatatypesinmysqlareusedforvoringLargebinarydatalikeImagesoraudio.1) Useblobtypes (tinyblobtolongblob) Basedondatasizeneeds. 2) Storeblobsin Perplate Petooptimize Performance.3) ConsidersxterNal Storage Forel Blob Romana DatabasesizerIndimprovebackupupe

How to Add Users to MySQL from the Command LineHow to Add Users to MySQL from the Command LineMay 07, 2025 pm 05:01 PM

ToadduserstoMySQLfromthecommandline,loginasroot,thenuseCREATEUSER'username'@'host'IDENTIFIEDBY'password';tocreateanewuser.GrantpermissionswithGRANTALLPRIVILEGESONdatabase.*TO'username'@'host';anduseFLUSHPRIVILEGES;toapplychanges.Alwaysusestrongpasswo

What Are the Different String Data Types in MySQL? A Detailed OverviewWhat Are the Different String Data Types in MySQL? A Detailed OverviewMay 07, 2025 pm 03:33 PM

MySQLofferseightstringdatatypes:CHAR,VARCHAR,BINARY,VARBINARY,BLOB,TEXT,ENUM,andSET.1)CHARisfixed-length,idealforconsistentdatalikecountrycodes.2)VARCHARisvariable-length,efficientforvaryingdatalikenames.3)BINARYandVARBINARYstorebinarydata,similartoC

See all articles

Hot AI Tools

Undresser.AI Undress

Undresser.AI Undress

AI-powered app for creating realistic nude photos

AI Clothes Remover

AI Clothes Remover

Online AI tool for removing clothes from photos.

Undress AI Tool

Undress AI Tool

Undress images for free

Clothoff.io

Clothoff.io

AI clothes remover

Video Face Swap

Video Face Swap

Swap faces in any video effortlessly with our completely free AI face swap tool!

Hot Tools

SAP NetWeaver Server Adapter for Eclipse

SAP NetWeaver Server Adapter for Eclipse

Integrate Eclipse with SAP NetWeaver application server.

DVWA

DVWA

Damn Vulnerable Web App (DVWA) is a PHP/MySQL web application that is very vulnerable. Its main goals are to be an aid for security professionals to test their skills and tools in a legal environment, to help web developers better understand the process of securing web applications, and to help teachers/students teach/learn in a classroom environment Web application security. The goal of DVWA is to practice some of the most common web vulnerabilities through a simple and straightforward interface, with varying degrees of difficulty. Please note that this software

MinGW - Minimalist GNU for Windows

MinGW - Minimalist GNU for Windows

This project is in the process of being migrated to osdn.net/projects/mingw, you can continue to follow us there. MinGW: A native Windows port of the GNU Compiler Collection (GCC), freely distributable import libraries and header files for building native Windows applications; includes extensions to the MSVC runtime to support C99 functionality. All MinGW software can run on 64-bit Windows platforms.

SublimeText3 Linux new version

SublimeText3 Linux new version

SublimeText3 Linux latest version

WebStorm Mac version

WebStorm Mac version

Useful JavaScript development tools