View Javadoc

1   /*
2   jMimeMagic(TM) is a Java library for determining the content type of files or
3   streams.
4   
5   Copyright (C) 2004 David Castro
6   
7   This library is free software; you can redistribute it and/or
8   modify it under the terms of the GNU Lesser General Public
9   License as published by the Free Software Foundation; either
10  version 2.1 of the License, or (at your option) any later version.
11  
12  This library is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  Lesser General Public License for more details.
16  
17  You should have received a copy of the GNU Lesser General Public
18  License along with this library; if not, write to the Free Software
19  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20  
21  For more information, please email arimus@users.sourceforge.net
22  */
23  package net.sf.jmimemagic;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.oro.text.perl.Perl5Util;
28  
29  import java.io.File;
30  import java.io.IOException;
31  import java.io.RandomAccessFile;
32  
33  import java.nio.ByteBuffer;
34  import java.nio.ByteOrder;
35  
36  import java.util.ArrayList;
37  import java.util.Collection;
38  import java.util.Iterator;
39  
40  
41  /***
42   * This class represents a single match test
43   *
44   * @author $Author: arimus $
45   * @version $Revision: 1.1 $
46   */
47  public class MagicMatcher implements Cloneable
48  {
49      private static Log log = LogFactory.getLog(MagicMatcher.class);
50      private ArrayList subMatchers = new ArrayList(0);
51      private MagicMatch match = null;
52  
53      /*** 
54       * constructor 
55       */
56      public MagicMatcher()
57      {
58          log.debug("instantiated");
59      }
60  
61      /***
62       * DOCUMENT ME!
63       *
64       * @param match DOCUMENT ME!
65       */
66      public void setMatch(MagicMatch match)
67      {
68          log.debug("setMatch()");
69          this.match = match;
70      }
71  
72      /***
73       * DOCUMENT ME!
74       *
75       * @return DOCUMENT ME!
76       */
77      public MagicMatch getMatch()
78      {
79          log.debug("getMatch()");
80  
81          return this.match;
82      }
83  
84      /***
85       * test to see if everything is in order for this match
86       *
87       * @return whether or not this match has enough data to be valid
88       */
89      public boolean isValid()
90      {
91          log.debug("isValid()");
92  
93          if ((match == null) || (match.getTest() == null)) {
94              return false;
95          }
96  
97          String type = new String(match.getTest().array());
98          char comparator = match.getComparator();
99          String description = match.getDescription();
100         String test = new String(match.getTest().array());
101 
102         if ((type != null) && !type.equals("") && (comparator != '\0') &&
103                 ((comparator == '=') || (comparator == '!') || (comparator == '>') ||
104                 (comparator == '<')) && (description != null) && !description.equals("") &&
105                 (test != null) && !test.equals("")) {
106             return true;
107         }
108 
109         return false;
110     }
111 
112     /***
113      * add a submatch to this magic match
114      *
115      * @param m a magic match
116      */
117     public void addSubMatcher(MagicMatcher m)
118     {
119         log.debug("addSubMatcher()");
120         subMatchers.add(m);
121     }
122 
123     /***
124      * set all submatches
125      *
126      * @param a a collection of submatches
127      */
128     public void setSubMatchers(Collection a)
129     {
130         log.debug("setSubMatchers(): for match '" + match.getDescription() + "'");
131         subMatchers.clear();
132         subMatchers.addAll(a);
133     }
134 
135     /***
136      * get all submatches for this magic match
137      *
138      * @return a collection of submatches
139      */
140     public Collection getSubMatchers()
141     {
142         log.debug("getSubMatchers()");
143 
144         return subMatchers;
145     }
146 
147     /***
148      * test to see if this match or any submatches match
149      *
150      * @param f the file that should be used to test the match
151      * @param onlyMimeMatch DOCUMENT ME!
152      *
153      * @return the deepest magic match object that matched
154      *
155      * @throws IOException DOCUMENT ME!
156      * @throws UnsupportedTypeException DOCUMENT ME!
157      */
158     public MagicMatch test(File f, boolean onlyMimeMatch)
159         throws IOException, UnsupportedTypeException
160     {
161         log.debug("test(File)");
162 
163         int offset = match.getOffset();
164         String description = match.getDescription();
165         String type = match.getType();
166         String mimeType = match.getMimeType();
167 
168         log.debug("test(File): testing '" + f.getName() + "' for '" + description + "'");
169 
170         log.debug("test(File): \n=== BEGIN MATCH INFO ==");
171         log.debug(match.print());
172         log.debug("test(File): \n=== END MATCH INFO ====\n");
173 
174         RandomAccessFile file = null;
175         file = new RandomAccessFile(f, "r");
176 
177         try {
178             int length = 0;
179 
180             if (type.equals("byte")) {
181                 length = 1;
182             } else if (type.equals("short") || type.equals("leshort") || type.equals("beshort")) {
183                 length = 4;
184             } else if (type.equals("long") || type.equals("lelong") || type.equals("belong")) {
185                 length = 8;
186             } else if (type.equals("string")) {
187                 length = match.getTest().capacity();
188             } else if (type.equals("regex")) {
189                 length = (int) file.length() - offset;
190 
191                 if (length < 0) {
192                     length = 0;
193                 }
194             } else if (type.equals("detector")) {
195                 length = (int) file.length() - offset;
196 
197                 if (length < 0) {
198                     length = 0;
199                 }
200             } else {
201                 throw new UnsupportedTypeException("unsupported test type '" + type + "'");
202             }
203 
204             // we know this match won't work since there isn't enough data for the test
205             if (length > (file.length() - offset)) {
206                 return null;
207             }
208 
209             byte[] buf = new byte[length];
210             file.seek(offset);
211 
212             int bytesRead = 0;
213             int size = 0;
214             boolean gotAllBytes = false;
215             boolean done = false;
216 
217             while (!done) {
218                 size = file.read(buf, 0, length - bytesRead);
219 
220                 if (size == -1) {
221                     throw new IOException("reached end of file before all bytes were read");
222                 }
223 
224                 bytesRead += size;
225 
226                 if (bytesRead == length) {
227                     gotAllBytes = true;
228                     done = true;
229                 }
230             }
231 
232             log.debug("test(File): stream size is '" + buf.length + "'");
233 
234             MagicMatch match = null;
235             MagicMatch submatch = null;
236 
237             if (testInternal(buf)) {
238                 // set the top level match to this one
239                 match = getMatch();
240 
241                 log.debug("test(File): testing matched '" + description + "'");
242 
243                 // set the data on this match
244                 if ((onlyMimeMatch == false) && (subMatchers != null) && (subMatchers.size() > 0)) {
245                     log.debug("test(File): testing " + subMatchers.size() + " submatches for '" +
246                         description + "'");
247 
248                     for (int i = 0; i < subMatchers.size(); i++) {
249                         log.debug("test(File): testing submatch " + i);
250 
251                         MagicMatcher m = (MagicMatcher) subMatchers.get(i);
252 
253                         if ((submatch = m.test(f, false)) != null) {
254                             log.debug("test(File): submatch " + i + " matched with '" +
255                                 submatch.getDescription() + "'");
256                             match.addSubMatch(submatch);
257                         } else {
258                             log.debug("test(File): submatch " + i + " doesn't match");
259                         }
260                     }
261                 }
262             }
263 
264             return match;
265         } finally {
266             try {
267                 file.close();
268             } catch (Exception fce) {
269             }
270         }
271     }
272 
273     /***
274      * test to see if this match or any submatches match
275      *
276      * @param data the data that should be used to test the match
277      * @param onlyMimeMatch DOCUMENT ME!
278      *
279      * @return the deepest magic match object that matched
280      *
281      * @throws IOException DOCUMENT ME!
282      * @throws UnsupportedTypeException DOCUMENT ME!
283      */
284     public MagicMatch test(byte[] data, boolean onlyMimeMatch)
285         throws IOException, UnsupportedTypeException
286     {
287         log.debug("test(byte[])");
288 
289         int offset = match.getOffset();
290         String description = match.getDescription();
291         String type = match.getType();
292         String test = new String(match.getTest().array());
293         String mimeType = match.getMimeType();
294 
295         log.debug("test(byte[]): testing byte[] data for '" + description + "'");
296 
297         log.debug("test(byte[]): \n=== BEGIN MATCH INFO ==");
298         log.debug(match.print());
299         log.debug("test(byte[]): \n=== END MATCH INFO ====\n");
300 
301         int length = 0;
302 
303         if (type.equals("byte")) {
304             length = 1;
305         } else if (type.equals("short") || type.equals("leshort") || type.equals("beshort")) {
306             length = 4;
307         } else if (type.equals("long") || type.equals("lelong") || type.equals("belong")) {
308             length = 8;
309         } else if (type.equals("string")) {
310             length = match.getTest().capacity();
311         } else if (type.equals("regex")) {
312             // FIXME - something wrong here, shouldn't have to subtract 1???
313             length = data.length - offset - 1;
314 
315             if (length < 0) {
316                 length = 0;
317             }
318         } else if (type.equals("detector")) {
319             // FIXME - something wrong here, shouldn't have to subtract 1???
320             length = data.length - offset - 1;
321 
322             if (length < 0) {
323                 length = 0;
324             }
325         } else {
326             throw new UnsupportedTypeException("unsupported test type " + type);
327         }
328 
329         byte[] buf = new byte[length];
330         log.debug("test(byte[]): offset=" + offset + ",length=" + length + ",data length=" +
331             data.length);
332 
333         if ((offset + length) < data.length) {
334             System.arraycopy(data, offset, buf, 0, length);
335 
336             log.debug("test(byte[]): stream size is '" + buf.length + "'");
337 
338             MagicMatch match = null;
339             MagicMatch submatch = null;
340 
341             if (testInternal(buf)) {
342                 // set the top level match to this one
343                 match = getMatch();
344 
345                 log.debug("test(byte[]): testing matched '" + description + "'");
346 
347                 // set the data on this match
348                 if ((onlyMimeMatch == false) && (subMatchers != null) && (subMatchers.size() > 0)) {
349                     log.debug("test(byte[]): testing " + subMatchers.size() + " submatches for '" +
350                         description + "'");
351 
352                     for (int i = 0; i < subMatchers.size(); i++) {
353                         log.debug("test(byte[]): testing submatch " + i);
354 
355                         MagicMatcher m = (MagicMatcher) subMatchers.get(i);
356 
357                         if ((submatch = m.test(data, false)) != null) {
358                             log.debug("test(byte[]): submatch " + i + " matched with '" +
359                                 submatch.getDescription() + "'");
360                             match.addSubMatch(submatch);
361                         } else {
362                             log.debug("test(byte[]): submatch " + i + " doesn't match");
363                         }
364                     }
365                 }
366             }
367 
368             return match;
369         } else {
370             return null;
371         }
372     }
373 
374     /***
375      * internal test switch
376      * 
377      * @param data DOCUMENT ME!
378      * @return DOCUMENT ME!
379      */
380     private boolean testInternal(byte[] data)
381     {
382         log.debug("testInternal(byte[])");
383 
384         if (data.length == 0) {
385             return false;
386         }
387 
388         String type = match.getType();
389         String test = new String(match.getTest().array());
390         String mimeType = match.getMimeType();
391         String description = match.getDescription();
392 
393         ByteBuffer buffer = ByteBuffer.allocate(data.length);
394 
395         if ((type != null) && (test != null) && (test.length() > 0)) {
396             if (type.equals("string")) {
397                 buffer = buffer.put(data);
398 
399                 return testString(buffer);
400             } else if (type.equals("byte")) {
401                 buffer = buffer.put(data);
402 
403                 return testByte(buffer);
404             } else if (type.equals("short")) {
405                 buffer = buffer.put(data);
406 
407                 return testShort(buffer);
408             } else if (type.equals("leshort")) {
409                 buffer = buffer.put(data);
410                 buffer.order(ByteOrder.LITTLE_ENDIAN);
411 
412                 return testShort(buffer);
413             } else if (type.equals("beshort")) {
414                 buffer = buffer.put(data);
415                 buffer.order(ByteOrder.BIG_ENDIAN);
416 
417                 return testShort(buffer);
418             } else if (type.equals("long")) {
419                 buffer = buffer.put(data);
420 
421                 return testLong(buffer);
422             } else if (type.equals("lelong")) {
423                 buffer = buffer.put(data);
424                 buffer.order(ByteOrder.LITTLE_ENDIAN);
425 
426                 return testLong(buffer);
427             } else if (type.equals("belong")) {
428                 buffer = buffer.put(data);
429                 buffer.order(ByteOrder.BIG_ENDIAN);
430 
431                 return testLong(buffer);
432             } else if (type.equals("regex")) {
433                 return testRegex(new String(data));
434             } else if (type.equals("detector")) {
435                 buffer = buffer.put(data);
436 
437                 return testDetector(buffer);
438 
439                 //			} else if (type.equals("date")) {
440                 //				return testDate(data, BIG_ENDIAN);
441                 //			} else if (type.equals("ledate")) {
442                 //				return testDate(data, LITTLE_ENDIAN);
443                 //			} else if (type.equals("bedate")) {
444                 //				return testDate(data, BIG_ENDIAN);
445             } else {
446                 log.error("testInternal(byte[]): invalid test type '" + type + "'");
447             }
448         } else {
449             log.error("testInternal(byte[]): type or test is empty for '" + mimeType + " - " +
450                 description + "'");
451         }
452 
453         return false;
454     }
455 
456     /***
457      * test the data against the test byte
458      *
459      * @param data the data we are testing
460      *
461      * @return if we have a match
462      */
463     private boolean testByte(ByteBuffer data)
464     {
465         log.debug("testByte()");
466 
467         String test = new String(match.getTest().array());
468         char comparator = match.getComparator();
469         long bitmask = match.getBitmask();
470 
471         String s = test;
472         byte b = data.get(0);
473         b = (byte) (b & bitmask);
474         log.debug("testByte(): decoding '" + test + "' to byte");
475 
476         int tst = Integer.decode(test).byteValue();
477         byte t = (byte) (tst & 0xff);
478         log.debug("testByte(): applying bitmask '" + bitmask + "' to '" + tst + "', result is '" +
479             t + "'");
480         log.debug("testByte(): comparing byte '" + b + "' to '" + t + "'");
481 
482         switch (comparator) {
483         case '=':
484             return t == b;
485 
486         case '!':
487             return t != b;
488 
489         case '>':
490             return t > b;
491 
492         case '<':
493             return t < b;
494         }
495 
496         return false;
497     }
498 
499     /***
500      * test the data against the byte array
501      *
502      * @param data the data we are testing
503      *
504      * @return if we have a match
505      */
506     private boolean testString(ByteBuffer data)
507     {
508         log.debug("testString()");
509 
510         ByteBuffer test = match.getTest();
511         char comparator = match.getComparator();
512 
513         byte[] b = data.array();
514         byte[] t = test.array();
515 
516         boolean diff = false;
517         int i = 0;
518 
519         for (i = 0; i < t.length; i++) {
520             log.debug("testing byte '" + b[i] + "' from '" + new String(data.array()) +
521                 "' against byte '" + t[i] + "' from '" + new String(test.array()) + "'");
522 
523             if (t[i] != b[i]) {
524                 diff = true;
525 
526                 break;
527             }
528         }
529 
530         switch (comparator) {
531         case '=':
532             return !diff;
533 
534         case '!':
535             return diff;
536 
537         case '>':
538             return t[i] > b[i];
539 
540         case '<':
541             return t[i] < b[i];
542         }
543 
544         return false;
545     }
546 
547     /***
548      * test the data against a short
549      *
550      * @param data the data we are testing
551      *
552      * @return if we have a match
553      */
554     private boolean testShort(ByteBuffer data)
555     {
556         log.debug("testShort()");
557 
558         short val = 0;
559         String test = new String(match.getTest().array());
560         char comparator = match.getComparator();
561         long bitmask = match.getBitmask();
562 
563         val = byteArrayToShort(data);
564 
565         // apply bitmask before the comparison
566         val = (short) (val & (short) bitmask);
567 
568         short tst = 0;
569 
570         try {
571             tst = Integer.decode(test).shortValue();
572         } catch (NumberFormatException e) {
573             log.error("testShort(): " + e);
574 
575             return false;
576 
577             //if (test.length() == 1) {	
578             //	tst = new Integer(Character.getNumericValue(test.charAt(0))).shortValue();
579             //}
580         }
581 
582         log.debug("testShort(): testing '" + Long.toHexString(val) + "' against '" +
583             Long.toHexString(tst) + "'");
584 
585         switch (comparator) {
586         case '=':
587             return val == tst;
588 
589         case '!':
590             return val != tst;
591 
592         case '>':
593             return val > tst;
594 
595         case '<':
596             return val < tst;
597         }
598 
599         return false;
600     }
601 
602     /***
603      * test the data against a long
604      *
605      * @param data the data we are testing
606      *
607      * @return if we have a match
608      */
609     private boolean testLong(ByteBuffer data)
610     {
611         log.debug("testLong()");
612 
613         long val = 0;
614         String test = new String(match.getTest().array());
615         char comparator = match.getComparator();
616         long bitmask = match.getBitmask();
617 
618         val = byteArrayToLong(data);
619 
620         // apply bitmask before the comparison
621         val = val & bitmask;
622 
623         long tst = Long.decode(test).longValue();
624 
625         log.debug("testLong(): testing '" + Long.toHexString(val) + "' against '" + test +
626             "' => '" + Long.toHexString(tst) + "'");
627 
628         switch (comparator) {
629         case '=':
630             return val == tst;
631 
632         case '!':
633             return val != tst;
634 
635         case '>':
636             return val > tst;
637 
638         case '<':
639             return val < tst;
640         }
641 
642         return false;
643     }
644 
645     /***
646      * test the data against a regex
647      *
648      * @param text the data we are testing
649      *
650      * @return if we have a match
651      */
652     private boolean testRegex(String text)
653     {
654         log.debug("testRegex()");
655 
656         String test = new String(match.getTest().array());
657         char comparator = match.getComparator();
658 
659         Perl5Util utility = new Perl5Util();
660         log.debug("testRegex(): searching for '" + test + "'");
661 
662         if (comparator == '=') {
663             if (utility.match(test, text)) {
664                 return true;
665             } else {
666                 return false;
667             }
668         } else if (comparator == '!') {
669             if (utility.match(test, text)) {
670                 return false;
671             } else {
672                 return true;
673             }
674         }
675 
676         return false;
677     }
678 
679     /***
680      * test the data using a detector
681      *
682      * @param data the data we are testing
683      *
684      * @return if we have a match
685      */
686     private boolean testDetector(ByteBuffer data)
687     {
688         log.debug("testDetector()");
689 
690         String detectorClass = new String(match.getTest().array());
691 
692         try {
693             log.debug("loading class: " + detectorClass);
694 
695             Class c = Class.forName(detectorClass);
696             MagicDetector detector = (MagicDetector) c.newInstance();
697             String[] types = detector.process(data.array(), match.getOffset(), match.getLength(),
698                     match.getBitmask(), match.getComparator(), match.getMimeType(),
699                     match.getProperties());
700 
701             if ((types != null) && (types.length > 0)) {
702                 // the match object has no mime type set, so set from the detector class processing
703                 match.setMimeType(types[0]);
704 
705                 return true;
706             }
707         } catch (ClassNotFoundException e) {
708             log.error("failed to load detector: " + detectorClass, e);
709         } catch (InstantiationException e) {
710             log.error("specified class is not a valid detector class: " + detectorClass, e);
711         } catch (IllegalAccessException e) {
712             log.error("specified class cannot be accessed: " + detectorClass, e);
713         }
714 
715         return false;
716     }
717 
718     /***
719      * Get the extensions for the underlying detectory
720      *
721      * @return DOCUMENT ME!
722      */
723     public String[] getDetectorExtensions()
724     {
725         log.debug("testDetector()");
726 
727         String detectorClass = new String(match.getTest().array());
728 
729         try {
730             log.debug("loading class: " + detectorClass);
731 
732             Class c = Class.forName(detectorClass);
733             MagicDetector detector = (MagicDetector) c.newInstance();
734 
735             return detector.getHandledTypes();
736         } catch (ClassNotFoundException e) {
737             log.error("failed to load detector: " + detectorClass, e);
738         } catch (InstantiationException e) {
739             log.error("specified class is not a valid detector class: " + detectorClass, e);
740         } catch (IllegalAccessException e) {
741             log.error("specified class cannot be accessed: " + detectorClass, e);
742         }
743 
744         return new String[0];
745     }
746 
747     /***
748      * encode a byte as an octal string
749      *
750      * @param b a byte of data
751      *
752      * @return an octal representation of the byte data
753      */
754     private String byteToOctalString(byte b)
755     {
756         int n1;
757         int n2;
758         int n3;
759         n1 = (b / 32) & 7;
760         n2 = (b / 8) & 7;
761         n3 = b & 7;
762 
763         return String.valueOf(n1) + String.valueOf(n2) + String.valueOf(n3);
764     }
765 
766     /***
767      * convert a byte array to a short
768      *
769      * @param data buffer of byte data
770      *
771      * @return byte array converted to a short
772      */
773     private short byteArrayToShort(ByteBuffer data)
774     {
775         return data.getShort(0);
776     }
777 
778     /***
779      * convert a byte array to a long
780      *
781      * @param data buffer of byte data
782      *
783      * @return byte arrays (high and low bytes) converted to a long value
784      */
785     private long byteArrayToLong(ByteBuffer data)
786     {
787         return (long) data.getInt(0);
788     }
789 
790     /***
791      * DOCUMENT ME!
792      *
793      * @return DOCUMENT ME!
794      *
795      * @throws CloneNotSupportedException DOCUMENT ME!
796      */
797     protected Object clone()
798         throws CloneNotSupportedException
799     {
800         MagicMatcher clone = new MagicMatcher();
801 
802         clone.setMatch((MagicMatch) match.clone());
803 
804         Iterator i = subMatchers.iterator();
805         ArrayList sub = new ArrayList();
806 
807         while (i.hasNext()) {
808             MagicMatcher m = (MagicMatcher) i.next();
809             sub.add(m.clone());
810         }
811 
812         clone.setSubMatchers(sub);
813 
814         return clone;
815     }
816 }