001/* A program to find non-ASCII characters in a file
002 *
003 * Copyright (C) 2015 Sidney Marshall (swm@cs.rit.edu)
004 *
005 * This program is free software: you can redistribute it and/or
006 * modify it under the terms of the GNU General Public License as
007 * published by the Free Software Foundation, either version 3 of the
008 * License, or (at your option) any later version.
009 *
010 * This program is distributed in the hope that it will be useful, but
011 * WITHOUT ANY WARRANTY; without even the implied warranty of
012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013 * General Public License for more details.
014 *
015 * You should have received a copy of the GNU General Public License
016 * along with this program.  If not, see
017 * <http://www.gnu.org/licenses/>.
018 */
019
020import java.io.FileInputStream;
021import java.io.InputStream;
022import java.util.ArrayList;
023
024/**
025 * This class looks for "funny" characters in a file. Funny
026 * characters are bytes that are not between 0x20 and 0x7e or 0x0a (a
027 * new-line).
028 */
029class Clean {
030  /**
031   * Search files for "funny" characters.
032   *
033   * @param args file names of files to search
034   * @throws Exception This is just a catch-all
035   */
036  public static void main(String...args) throws Exception {
037    for(String filename : args) {
038      System.out.println("=====================================");
039      System.out.println(filename);
040      System.out.println("=====================================");
041      InputStream strm = new FileInputStream(filename);
042      int lineno = 0;
043      ArrayList<Character> chararray = new ArrayList<Character>();
044      int c;
045      int line = 1;
046      while((c = strm.read()) != -1) {
047        if(c != '\n') {
048          chararray.add((char)c);
049        } else {
050          int index = 0;
051          for(char ch : chararray) {
052            if(ch < 32 || ch > 126) {
053              StringBuilder sb = new StringBuilder();
054              for(char ch1 : chararray) {
055                if(ch1 != '\t') {
056                  sb.append(ch1);
057                } else {
058                  sb.append(' ');
059                }
060              }
061              System.out.println("line: " + line);
062              System.out.println(sb);
063              for(int i = 0; i < index; i++) {
064                System.out.print(' ');
065              }
066              System.out.println('^');
067              System.out.println((int)chararray.get(index));
068              if(index+1 < chararray.size()) System.out.println((int)chararray.get(index+1));
069              if(index+2 < chararray.size()) System.out.println((int)chararray.get(index+2));
070              break;
071            }
072            ++index;
073          }
074          chararray = new ArrayList<Character>();
075          line++;
076        }
077      }
078    }
079  }
080}  // class Clean