001/* A program to find non-ASCII characters in a file 002 * 003 * Copyright (C) 2015 Sidney Marshall (swm@cs.rit.edu) 004 * 005 * This program is free software: you can redistribute it and/or 006 * modify it under the terms of the GNU General Public License as 007 * published by the Free Software Foundation, either version 3 of the 008 * License, or (at your option) any later version. 009 * 010 * This program is distributed in the hope that it will be useful, but 011 * WITHOUT ANY WARRANTY; without even the implied warranty of 012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013 * General Public License for more details. 014 * 015 * You should have received a copy of the GNU General Public License 016 * along with this program. If not, see 017 * <http://www.gnu.org/licenses/>. 018 */ 019 020import java.io.FileInputStream; 021import java.io.InputStream; 022import java.util.ArrayList; 023 024/** 025 * This class looks for "funny" characters in a file. Funny 026 * characters are bytes that are not between 0x20 and 0x7e or 0x0a (a 027 * new-line). 028 */ 029class Clean { 030 /** 031 * Search files for "funny" characters. 032 * 033 * @param args file names of files to search 034 * @throws Exception This is just a catch-all 035 */ 036 public static void main(String...args) throws Exception { 037 for(String filename : args) { 038 System.out.println("====================================="); 039 System.out.println(filename); 040 System.out.println("====================================="); 041 InputStream strm = new FileInputStream(filename); 042 int lineno = 0; 043 ArrayList<Character> chararray = new ArrayList<Character>(); 044 int c; 045 int line = 1; 046 while((c = strm.read()) != -1) { 047 if(c != '\n') { 048 chararray.add((char)c); 049 } else { 050 int index = 0; 051 for(char ch : chararray) { 052 if(ch < 32 || ch > 126) { 053 StringBuilder sb = new StringBuilder(); 054 for(char ch1 : chararray) { 055 if(ch1 != '\t') { 056 sb.append(ch1); 057 } else { 058 sb.append(' '); 059 } 060 } 061 System.out.println("line: " + line); 062 System.out.println(sb); 063 for(int i = 0; i < index; i++) { 064 System.out.print(' '); 065 } 066 System.out.println('^'); 067 System.out.println((int)chararray.get(index)); 068 if(index+1 < chararray.size()) System.out.println((int)chararray.get(index+1)); 069 if(index+2 < chararray.size()) System.out.println((int)chararray.get(index+2)); 070 break; 071 } 072 ++index; 073 } 074 chararray = new ArrayList<Character>(); 075 line++; 076 } 077 } 078 } 079 } 080} // class Clean