tess4j3.2.1识别中英文

2022-09-06 11:18:48 浏览数 (1)

大家好,又见面了,我是你们的朋友全栈君。

1. 测试代码

代码语言:javascript复制
public class TesseractExample {
	 public static void main(String[] args) throws Exception {
	    //1.本地
	    //File imageFile = new File("C:\wangl\eurotext.tif");
		//InputStream is = new FileInputStream(imageFile);; 
		//2.url http
	    //InputStream is = downLoadFromUrl("http://s1.sinaimg.cn/mw690/003bsgbmgy6R6ekxkl2e0"); 
		//3.url https
		InputStream is = downLoadFromUrl("https://login.10333.com/dotoyo//captchacode");
		    
		BufferedImage bi = ImageIO.read(is);
	    ITesseract instance = new Tesseract();  // JNA Interface Mapping
        //设置datapath
	    instance.setDatapath("C:\wangl\soft\tess4j\tessdata");  
        //设置语言包
        instance.setLanguage("eng chi_sim");
	    try {
	        String result = instance.doOCR(bi);
	        System.out.println(result);
	    } catch (TesseractException e) {
	        System.err.println(e.getMessage());
	    }
	    if(is != null){
	       is.close();
	    }
	 }
	 
	 public static InputStream downLoadFromUrl(String urlStr) throws Exception{
             //指定证书文件
		System.setProperty("javax.net.ssl.trustStore","C:\wangl\soft\Java\jdk1.7.0_79\jre\lib\security\jssecacerts");
	        URL url = new URL(urlStr);  
	        HttpURLConnection conn = (HttpURLConnection)url.openConnection();  
	        //信任所有的https证书
	        trustAllHttpsCertificates();
	        HttpsURLConnection.setDefaultHostnameVerifier(hv);
	        //设置超时间为3秒
	        conn.setConnectTimeout(3*1000);
	        //防止屏蔽程序抓取而返回403错误
	        conn.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");
	        //得到输入流
	        InputStream inputStream = conn.getInputStream();  
	        return inputStream;
	}
	 
	public static HostnameVerifier hv = new HostnameVerifier() {
	        public boolean verify(String urlHostName, SSLSession session) {
	            System.out.println("Warning: URL Host: "   urlHostName   " vs. "
	                                 session.getPeerHost());
	            return true;
	        }
	};
		
	private static void trustAllHttpsCertificates() throws Exception {
		javax.net.ssl.TrustManager[] trustAllCerts = new javax.net.ssl.TrustManager[1];
		javax.net.ssl.TrustManager tm = new miTM();
		trustAllCerts[0] = tm;
		javax.net.ssl.SSLContext sc = javax.net.ssl.SSLContext
				.getInstance("SSL");
		sc.init(null, trustAllCerts, null);
		javax.net.ssl.HttpsURLConnection.setDefaultSSLSocketFactory(sc
				.getSocketFactory());
	}

	static class miTM implements javax.net.ssl.TrustManager,
			javax.net.ssl.X509TrustManager {
		public java.security.cert.X509Certificate[] getAcceptedIssuers() {
			return null;
		}

		public boolean isServerTrusted(
				java.security.cert.X509Certificate[] certs) {
			return true;
		}

		public boolean isClientTrusted(
				java.security.cert.X509Certificate[] certs) {
			return true;
		}

		public void checkServerTrusted(
				java.security.cert.X509Certificate[] certs, String authType)
				throws java.security.cert.CertificateException {
			return;
		}

		public void checkClientTrusted(
				java.security.cert.X509Certificate[] certs, String authType)
				throws java.security.cert.CertificateException {
			return;
		}
	}
}

2. 设置datapath,设置语言包中英文

3. 注意如果https证书不被信任,那么需要导出证书(填写host port 路径,运行InstallCert

导出证书)

代码语言:javascript复制
public class InstallCert {

	public static void main(String[] args) throws Exception {
//		
//		int port;
//		char[] passphrase;
//		if ((args.length == 1) || (args.length == 2)) {
//			String[] c = args[0].split(":");
//			host = c[0];
//			port = (c.length == 1) ? 443 : Integer.parseInt(c[1]);
//			String p = (args.length == 1) ? "changeit" : args[1];
//			passphrase = p.toCharArray();
//		} else {
//			System.out
//					.println("Usage: java InstallCert <host>[:port] [passphrase]");
//			return;
//		}
		String host = "14.17.69.168";
		int port = 443;
		String p =  "changeit" ;
		char[] passphrase = p.toCharArray();
		

		File file = new File("jssecacerts");
		if (file.isFile() == false) {
			char SEP = File.separatorChar;
			File dir = new File(System.getProperty("java.home")   SEP   "lib"
					  SEP   "security");
			file = new File(dir, "jssecacerts");
			if (file.isFile() == false) {
				file = new File(dir, "cacerts");
			}
		}
		System.out.println("Loading KeyStore "   file   "...");
		InputStream in = new FileInputStream(file);
		KeyStore ks = KeyStore.getInstance(KeyStore.getDefaultType());
		ks.load(in, passphrase);
		in.close();

		SSLContext context = SSLContext.getInstance("TLS");
		TrustManagerFactory tmf = TrustManagerFactory
				.getInstance(TrustManagerFactory.getDefaultAlgorithm());
		tmf.init(ks);
		X509TrustManager defaultTrustManager = (X509TrustManager) tmf
				.getTrustManagers()[0];
		SavingTrustManager tm = new SavingTrustManager(defaultTrustManager);
		context.init(null, new TrustManager[] { tm }, null);
		SSLSocketFactory factory = context.getSocketFactory();

		System.out
				.println("Opening connection to "   host   ":"   port   "...");
		SSLSocket socket = (SSLSocket) factory.createSocket(host, port);
		socket.setSoTimeout(10000);
		try {
			System.out.println("Starting SSL handshake...");
			socket.startHandshake();
			socket.close();
			System.out.println();
			System.out.println("No errors, certificate is already trusted");
		} catch (SSLException e) {
			System.out.println();
			e.printStackTrace(System.out);
		}

		X509Certificate[] chain = tm.chain;
		if (chain == null) {
			System.out.println("Could not obtain server certificate chain");
			return;
		}
 
		System.out.println();
		System.out.println("Server sent "   chain.length   " certificate(s):");
		System.out.println();
		MessageDigest sha1 = MessageDigest.getInstance("SHA1");
		MessageDigest md5 = MessageDigest.getInstance("MD5");
		for (int i = 0; i < chain.length; i  ) {
			X509Certificate cert = chain[i];
			System.out.println(" "   (i   1)   " Subject "
					  cert.getSubjectDN());
			System.out.println("   Issuer  "   cert.getIssuerDN());
			sha1.update(cert.getEncoded());
			System.out.println("   sha1    "   toHexString(sha1.digest()));
			md5.update(cert.getEncoded());
			System.out.println("   md5     "   toHexString(md5.digest()));
			System.out.println();
		}

		System.out
				.println("Enter certificate to add to trusted keystore or 'q' to quit: [1]");
		String line = "1";
		int k;
		try {
			k = (line.length() == 0) ? 0 : Integer.parseInt(line) - 1;
		} catch (NumberFormatException e) {
			System.out.println("KeyStore not changed");
			return;
		}

		X509Certificate cert = chain[k];
		String alias = host   "-"   (k   1);
		ks.setCertificateEntry(alias, cert);

		OutputStream out = new FileOutputStream("c://wangl//https//jssecacerts");
		ks.store(out, passphrase);
		out.close();

		System.out.println();
		System.out.println(cert);
		System.out.println();
		System.out
				.println("Added certificate to keystore 'jssecacerts' using alias '"
						  alias   "'");
	}

	private static final char[] HEXDIGITS = "0123456789abcdef".toCharArray();

	private static String toHexString(byte[] bytes) {
		StringBuilder sb = new StringBuilder(bytes.length * 3);
		for (int b : bytes) {
			b &= 0xff; 
			sb.append(HEXDIGITS[b >> 4]);
			sb.append(HEXDIGITS[b & 15]);
			sb.append(' ');
		}
		return sb.toString();
	}

	private static class SavingTrustManager implements X509TrustManager {

		private final X509TrustManager tm;
		private X509Certificate[] chain;

		SavingTrustManager(X509TrustManager tm) {
			this.tm = tm;
		}

		public X509Certificate[] getAcceptedIssuers() {
			throw new UnsupportedOperationException();
		}

		public void checkClientTrusted(X509Certificate[] chain, String authType)
				throws CertificateException {
			throw new UnsupportedOperationException();
		}

		public void checkServerTrusted(X509Certificate[] chain, String authType)
				throws CertificateException {
			this.chain = chain;
			tm.checkServerTrusted(chain, authType);
		}
	}

}

4.https需要在代码1中指定证书文件.

5.图片信息为: 汉字验证码–demo分享

最终结果显示: 汉字验证码汴demo分享

OK,识别率还可以,但存在提升的空间!

6.如果追求高识别率,可以考虑付费的国外泰比和国内的汉王.

发布者:全栈程序员栈长,转载请注明出处:https://javaforall.cn/134715.html原文链接:https://javaforall.cn

0 人点赞