1   
2   
3   
4   
5   
6   
7   
8   
9   
10  
11  
12  
13  
14  
15  
16  
17  
18  
19  
20  
21  
22  
23  package net.sf.jmimemagic;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.oro.text.perl.Perl5Util;
28  
29  import java.io.File;
30  import java.io.IOException;
31  import java.io.RandomAccessFile;
32  
33  import java.nio.ByteBuffer;
34  import java.nio.ByteOrder;
35  
36  import java.util.ArrayList;
37  import java.util.Collection;
38  import java.util.Iterator;
39  
40  
41  /***
42   * This class represents a single match test
43   *
44   * @author $Author: arimus $
45   * @version $Revision: 1.1 $
46   */
47  public class MagicMatcher implements Cloneable
48  {
49      private static Log log = LogFactory.getLog(MagicMatcher.class);
50      private ArrayList subMatchers = new ArrayList(0);
51      private MagicMatch match = null;
52  
53      /*** 
54       * constructor 
55       */
56      public MagicMatcher()
57      {
58          log.debug("instantiated");
59      }
60  
61      /***
62       * DOCUMENT ME!
63       *
64       * @param match DOCUMENT ME!
65       */
66      public void setMatch(MagicMatch match)
67      {
68          log.debug("setMatch()");
69          this.match = match;
70      }
71  
72      /***
73       * DOCUMENT ME!
74       *
75       * @return DOCUMENT ME!
76       */
77      public MagicMatch getMatch()
78      {
79          log.debug("getMatch()");
80  
81          return this.match;
82      }
83  
84      /***
85       * test to see if everything is in order for this match
86       *
87       * @return whether or not this match has enough data to be valid
88       */
89      public boolean isValid()
90      {
91          log.debug("isValid()");
92  
93          if ((match == null) || (match.getTest() == null)) {
94              return false;
95          }
96  
97          String type = new String(match.getTest().array());
98          char comparator = match.getComparator();
99          String description = match.getDescription();
100         String test = new String(match.getTest().array());
101 
102         if ((type != null) && !type.equals("") && (comparator != '\0') &&
103                 ((comparator == '=') || (comparator == '!') || (comparator == '>') ||
104                 (comparator == '<')) && (description != null) && !description.equals("") &&
105                 (test != null) && !test.equals("")) {
106             return true;
107         }
108 
109         return false;
110     }
111 
112     /***
113      * add a submatch to this magic match
114      *
115      * @param m a magic match
116      */
117     public void addSubMatcher(MagicMatcher m)
118     {
119         log.debug("addSubMatcher()");
120         subMatchers.add(m);
121     }
122 
123     /***
124      * set all submatches
125      *
126      * @param a a collection of submatches
127      */
128     public void setSubMatchers(Collection a)
129     {
130         log.debug("setSubMatchers(): for match '" + match.getDescription() + "'");
131         subMatchers.clear();
132         subMatchers.addAll(a);
133     }
134 
135     /***
136      * get all submatches for this magic match
137      *
138      * @return a collection of submatches
139      */
140     public Collection getSubMatchers()
141     {
142         log.debug("getSubMatchers()");
143 
144         return subMatchers;
145     }
146 
147     /***
148      * test to see if this match or any submatches match
149      *
150      * @param f the file that should be used to test the match
151      * @param onlyMimeMatch DOCUMENT ME!
152      *
153      * @return the deepest magic match object that matched
154      *
155      * @throws IOException DOCUMENT ME!
156      * @throws UnsupportedTypeException DOCUMENT ME!
157      */
158     public MagicMatch test(File f, boolean onlyMimeMatch)
159         throws IOException, UnsupportedTypeException
160     {
161         log.debug("test(File)");
162 
163         int offset = match.getOffset();
164         String description = match.getDescription();
165         String type = match.getType();
166         String mimeType = match.getMimeType();
167 
168         log.debug("test(File): testing '" + f.getName() + "' for '" + description + "'");
169 
170         log.debug("test(File): \n=== BEGIN MATCH INFO ==");
171         log.debug(match.print());
172         log.debug("test(File): \n=== END MATCH INFO ====\n");
173 
174         RandomAccessFile file = null;
175         file = new RandomAccessFile(f, "r");
176 
177         try {
178             int length = 0;
179 
180             if (type.equals("byte")) {
181                 length = 1;
182             } else if (type.equals("short") || type.equals("leshort") || type.equals("beshort")) {
183                 length = 4;
184             } else if (type.equals("long") || type.equals("lelong") || type.equals("belong")) {
185                 length = 8;
186             } else if (type.equals("string")) {
187                 length = match.getTest().capacity();
188             } else if (type.equals("regex")) {
189                 length = (int) file.length() - offset;
190 
191                 if (length < 0) {
192                     length = 0;
193                 }
194             } else if (type.equals("detector")) {
195                 length = (int) file.length() - offset;
196 
197                 if (length < 0) {
198                     length = 0;
199                 }
200             } else {
201                 throw new UnsupportedTypeException("unsupported test type '" + type + "'");
202             }
203 
204             
205             if (length > (file.length() - offset)) {
206                 return null;
207             }
208 
209             byte[] buf = new byte[length];
210             file.seek(offset);
211 
212             int bytesRead = 0;
213             int size = 0;
214             boolean gotAllBytes = false;
215             boolean done = false;
216 
217             while (!done) {
218                 size = file.read(buf, 0, length - bytesRead);
219 
220                 if (size == -1) {
221                     throw new IOException("reached end of file before all bytes were read");
222                 }
223 
224                 bytesRead += size;
225 
226                 if (bytesRead == length) {
227                     gotAllBytes = true;
228                     done = true;
229                 }
230             }
231 
232             log.debug("test(File): stream size is '" + buf.length + "'");
233 
234             MagicMatch match = null;
235             MagicMatch submatch = null;
236 
237             if (testInternal(buf)) {
238                 
239                 match = getMatch();
240 
241                 log.debug("test(File): testing matched '" + description + "'");
242 
243                 
244                 if ((onlyMimeMatch == false) && (subMatchers != null) && (subMatchers.size() > 0)) {
245                     log.debug("test(File): testing " + subMatchers.size() + " submatches for '" +
246                         description + "'");
247 
248                     for (int i = 0; i < subMatchers.size(); i++) {
249                         log.debug("test(File): testing submatch " + i);
250 
251                         MagicMatcher m = (MagicMatcher) subMatchers.get(i);
252 
253                         if ((submatch = m.test(f, false)) != null) {
254                             log.debug("test(File): submatch " + i + " matched with '" +
255                                 submatch.getDescription() + "'");
256                             match.addSubMatch(submatch);
257                         } else {
258                             log.debug("test(File): submatch " + i + " doesn't match");
259                         }
260                     }
261                 }
262             }
263 
264             return match;
265         } finally {
266             try {
267                 file.close();
268             } catch (Exception fce) {
269             }
270         }
271     }
272 
273     /***
274      * test to see if this match or any submatches match
275      *
276      * @param data the data that should be used to test the match
277      * @param onlyMimeMatch DOCUMENT ME!
278      *
279      * @return the deepest magic match object that matched
280      *
281      * @throws IOException DOCUMENT ME!
282      * @throws UnsupportedTypeException DOCUMENT ME!
283      */
284     public MagicMatch test(byte[] data, boolean onlyMimeMatch)
285         throws IOException, UnsupportedTypeException
286     {
287         log.debug("test(byte[])");
288 
289         int offset = match.getOffset();
290         String description = match.getDescription();
291         String type = match.getType();
292         String test = new String(match.getTest().array());
293         String mimeType = match.getMimeType();
294 
295         log.debug("test(byte[]): testing byte[] data for '" + description + "'");
296 
297         log.debug("test(byte[]): \n=== BEGIN MATCH INFO ==");
298         log.debug(match.print());
299         log.debug("test(byte[]): \n=== END MATCH INFO ====\n");
300 
301         int length = 0;
302 
303         if (type.equals("byte")) {
304             length = 1;
305         } else if (type.equals("short") || type.equals("leshort") || type.equals("beshort")) {
306             length = 4;
307         } else if (type.equals("long") || type.equals("lelong") || type.equals("belong")) {
308             length = 8;
309         } else if (type.equals("string")) {
310             length = match.getTest().capacity();
311         } else if (type.equals("regex")) {
312             
313             length = data.length - offset - 1;
314 
315             if (length < 0) {
316                 length = 0;
317             }
318         } else if (type.equals("detector")) {
319             
320             length = data.length - offset - 1;
321 
322             if (length < 0) {
323                 length = 0;
324             }
325         } else {
326             throw new UnsupportedTypeException("unsupported test type " + type);
327         }
328 
329         byte[] buf = new byte[length];
330         log.debug("test(byte[]): offset=" + offset + ",length=" + length + ",data length=" +
331             data.length);
332 
333         if ((offset + length) < data.length) {
334             System.arraycopy(data, offset, buf, 0, length);
335 
336             log.debug("test(byte[]): stream size is '" + buf.length + "'");
337 
338             MagicMatch match = null;
339             MagicMatch submatch = null;
340 
341             if (testInternal(buf)) {
342                 
343                 match = getMatch();
344 
345                 log.debug("test(byte[]): testing matched '" + description + "'");
346 
347                 
348                 if ((onlyMimeMatch == false) && (subMatchers != null) && (subMatchers.size() > 0)) {
349                     log.debug("test(byte[]): testing " + subMatchers.size() + " submatches for '" +
350                         description + "'");
351 
352                     for (int i = 0; i < subMatchers.size(); i++) {
353                         log.debug("test(byte[]): testing submatch " + i);
354 
355                         MagicMatcher m = (MagicMatcher) subMatchers.get(i);
356 
357                         if ((submatch = m.test(data, false)) != null) {
358                             log.debug("test(byte[]): submatch " + i + " matched with '" +
359                                 submatch.getDescription() + "'");
360                             match.addSubMatch(submatch);
361                         } else {
362                             log.debug("test(byte[]): submatch " + i + " doesn't match");
363                         }
364                     }
365                 }
366             }
367 
368             return match;
369         } else {
370             return null;
371         }
372     }
373 
374     /***
375      * internal test switch
376      * 
377      * @param data DOCUMENT ME!
378      * @return DOCUMENT ME!
379      */
380     private boolean testInternal(byte[] data)
381     {
382         log.debug("testInternal(byte[])");
383 
384         if (data.length == 0) {
385             return false;
386         }
387 
388         String type = match.getType();
389         String test = new String(match.getTest().array());
390         String mimeType = match.getMimeType();
391         String description = match.getDescription();
392 
393         ByteBuffer buffer = ByteBuffer.allocate(data.length);
394 
395         if ((type != null) && (test != null) && (test.length() > 0)) {
396             if (type.equals("string")) {
397                 buffer = buffer.put(data);
398 
399                 return testString(buffer);
400             } else if (type.equals("byte")) {
401                 buffer = buffer.put(data);
402 
403                 return testByte(buffer);
404             } else if (type.equals("short")) {
405                 buffer = buffer.put(data);
406 
407                 return testShort(buffer);
408             } else if (type.equals("leshort")) {
409                 buffer = buffer.put(data);
410                 buffer.order(ByteOrder.LITTLE_ENDIAN);
411 
412                 return testShort(buffer);
413             } else if (type.equals("beshort")) {
414                 buffer = buffer.put(data);
415                 buffer.order(ByteOrder.BIG_ENDIAN);
416 
417                 return testShort(buffer);
418             } else if (type.equals("long")) {
419                 buffer = buffer.put(data);
420 
421                 return testLong(buffer);
422             } else if (type.equals("lelong")) {
423                 buffer = buffer.put(data);
424                 buffer.order(ByteOrder.LITTLE_ENDIAN);
425 
426                 return testLong(buffer);
427             } else if (type.equals("belong")) {
428                 buffer = buffer.put(data);
429                 buffer.order(ByteOrder.BIG_ENDIAN);
430 
431                 return testLong(buffer);
432             } else if (type.equals("regex")) {
433                 return testRegex(new String(data));
434             } else if (type.equals("detector")) {
435                 buffer = buffer.put(data);
436 
437                 return testDetector(buffer);
438 
439                 
440                 
441                 
442                 
443                 
444                 
445             } else {
446                 log.error("testInternal(byte[]): invalid test type '" + type + "'");
447             }
448         } else {
449             log.error("testInternal(byte[]): type or test is empty for '" + mimeType + " - " +
450                 description + "'");
451         }
452 
453         return false;
454     }
455 
456     /***
457      * test the data against the test byte
458      *
459      * @param data the data we are testing
460      *
461      * @return if we have a match
462      */
463     private boolean testByte(ByteBuffer data)
464     {
465         log.debug("testByte()");
466 
467         String test = new String(match.getTest().array());
468         char comparator = match.getComparator();
469         long bitmask = match.getBitmask();
470 
471         String s = test;
472         byte b = data.get(0);
473         b = (byte) (b & bitmask);
474         log.debug("testByte(): decoding '" + test + "' to byte");
475 
476         int tst = Integer.decode(test).byteValue();
477         byte t = (byte) (tst & 0xff);
478         log.debug("testByte(): applying bitmask '" + bitmask + "' to '" + tst + "', result is '" +
479             t + "'");
480         log.debug("testByte(): comparing byte '" + b + "' to '" + t + "'");
481 
482         switch (comparator) {
483         case '=':
484             return t == b;
485 
486         case '!':
487             return t != b;
488 
489         case '>':
490             return t > b;
491 
492         case '<':
493             return t < b;
494         }
495 
496         return false;
497     }
498 
499     /***
500      * test the data against the byte array
501      *
502      * @param data the data we are testing
503      *
504      * @return if we have a match
505      */
506     private boolean testString(ByteBuffer data)
507     {
508         log.debug("testString()");
509 
510         ByteBuffer test = match.getTest();
511         char comparator = match.getComparator();
512 
513         byte[] b = data.array();
514         byte[] t = test.array();
515 
516         boolean diff = false;
517         int i = 0;
518 
519         for (i = 0; i < t.length; i++) {
520             log.debug("testing byte '" + b[i] + "' from '" + new String(data.array()) +
521                 "' against byte '" + t[i] + "' from '" + new String(test.array()) + "'");
522 
523             if (t[i] != b[i]) {
524                 diff = true;
525 
526                 break;
527             }
528         }
529 
530         switch (comparator) {
531         case '=':
532             return !diff;
533 
534         case '!':
535             return diff;
536 
537         case '>':
538             return t[i] > b[i];
539 
540         case '<':
541             return t[i] < b[i];
542         }
543 
544         return false;
545     }
546 
547     /***
548      * test the data against a short
549      *
550      * @param data the data we are testing
551      *
552      * @return if we have a match
553      */
554     private boolean testShort(ByteBuffer data)
555     {
556         log.debug("testShort()");
557 
558         short val = 0;
559         String test = new String(match.getTest().array());
560         char comparator = match.getComparator();
561         long bitmask = match.getBitmask();
562 
563         val = byteArrayToShort(data);
564 
565         
566         val = (short) (val & (short) bitmask);
567 
568         short tst = 0;
569 
570         try {
571             tst = Integer.decode(test).shortValue();
572         } catch (NumberFormatException e) {
573             log.error("testShort(): " + e);
574 
575             return false;
576 
577             
578             
579             
580         }
581 
582         log.debug("testShort(): testing '" + Long.toHexString(val) + "' against '" +
583             Long.toHexString(tst) + "'");
584 
585         switch (comparator) {
586         case '=':
587             return val == tst;
588 
589         case '!':
590             return val != tst;
591 
592         case '>':
593             return val > tst;
594 
595         case '<':
596             return val < tst;
597         }
598 
599         return false;
600     }
601 
602     /***
603      * test the data against a long
604      *
605      * @param data the data we are testing
606      *
607      * @return if we have a match
608      */
609     private boolean testLong(ByteBuffer data)
610     {
611         log.debug("testLong()");
612 
613         long val = 0;
614         String test = new String(match.getTest().array());
615         char comparator = match.getComparator();
616         long bitmask = match.getBitmask();
617 
618         val = byteArrayToLong(data);
619 
620         
621         val = val & bitmask;
622 
623         long tst = Long.decode(test).longValue();
624 
625         log.debug("testLong(): testing '" + Long.toHexString(val) + "' against '" + test +
626             "' => '" + Long.toHexString(tst) + "'");
627 
628         switch (comparator) {
629         case '=':
630             return val == tst;
631 
632         case '!':
633             return val != tst;
634 
635         case '>':
636             return val > tst;
637 
638         case '<':
639             return val < tst;
640         }
641 
642         return false;
643     }
644 
645     /***
646      * test the data against a regex
647      *
648      * @param text the data we are testing
649      *
650      * @return if we have a match
651      */
652     private boolean testRegex(String text)
653     {
654         log.debug("testRegex()");
655 
656         String test = new String(match.getTest().array());
657         char comparator = match.getComparator();
658 
659         Perl5Util utility = new Perl5Util();
660         log.debug("testRegex(): searching for '" + test + "'");
661 
662         if (comparator == '=') {
663             if (utility.match(test, text)) {
664                 return true;
665             } else {
666                 return false;
667             }
668         } else if (comparator == '!') {
669             if (utility.match(test, text)) {
670                 return false;
671             } else {
672                 return true;
673             }
674         }
675 
676         return false;
677     }
678 
679     /***
680      * test the data using a detector
681      *
682      * @param data the data we are testing
683      *
684      * @return if we have a match
685      */
686     private boolean testDetector(ByteBuffer data)
687     {
688         log.debug("testDetector()");
689 
690         String detectorClass = new String(match.getTest().array());
691 
692         try {
693             log.debug("loading class: " + detectorClass);
694 
695             Class c = Class.forName(detectorClass);
696             MagicDetector detector = (MagicDetector) c.newInstance();
697             String[] types = detector.process(data.array(), match.getOffset(), match.getLength(),
698                     match.getBitmask(), match.getComparator(), match.getMimeType(),
699                     match.getProperties());
700 
701             if ((types != null) && (types.length > 0)) {
702                 
703                 match.setMimeType(types[0]);
704 
705                 return true;
706             }
707         } catch (ClassNotFoundException e) {
708             log.error("failed to load detector: " + detectorClass, e);
709         } catch (InstantiationException e) {
710             log.error("specified class is not a valid detector class: " + detectorClass, e);
711         } catch (IllegalAccessException e) {
712             log.error("specified class cannot be accessed: " + detectorClass, e);
713         }
714 
715         return false;
716     }
717 
718     /***
719      * Get the extensions for the underlying detectory
720      *
721      * @return DOCUMENT ME!
722      */
723     public String[] getDetectorExtensions()
724     {
725         log.debug("testDetector()");
726 
727         String detectorClass = new String(match.getTest().array());
728 
729         try {
730             log.debug("loading class: " + detectorClass);
731 
732             Class c = Class.forName(detectorClass);
733             MagicDetector detector = (MagicDetector) c.newInstance();
734 
735             return detector.getHandledTypes();
736         } catch (ClassNotFoundException e) {
737             log.error("failed to load detector: " + detectorClass, e);
738         } catch (InstantiationException e) {
739             log.error("specified class is not a valid detector class: " + detectorClass, e);
740         } catch (IllegalAccessException e) {
741             log.error("specified class cannot be accessed: " + detectorClass, e);
742         }
743 
744         return new String[0];
745     }
746 
747     /***
748      * encode a byte as an octal string
749      *
750      * @param b a byte of data
751      *
752      * @return an octal representation of the byte data
753      */
754     private String byteToOctalString(byte b)
755     {
756         int n1;
757         int n2;
758         int n3;
759         n1 = (b / 32) & 7;
760         n2 = (b / 8) & 7;
761         n3 = b & 7;
762 
763         return String.valueOf(n1) + String.valueOf(n2) + String.valueOf(n3);
764     }
765 
766     /***
767      * convert a byte array to a short
768      *
769      * @param data buffer of byte data
770      *
771      * @return byte array converted to a short
772      */
773     private short byteArrayToShort(ByteBuffer data)
774     {
775         return data.getShort(0);
776     }
777 
778     /***
779      * convert a byte array to a long
780      *
781      * @param data buffer of byte data
782      *
783      * @return byte arrays (high and low bytes) converted to a long value
784      */
785     private long byteArrayToLong(ByteBuffer data)
786     {
787         return (long) data.getInt(0);
788     }
789 
790     /***
791      * DOCUMENT ME!
792      *
793      * @return DOCUMENT ME!
794      *
795      * @throws CloneNotSupportedException DOCUMENT ME!
796      */
797     protected Object clone()
798         throws CloneNotSupportedException
799     {
800         MagicMatcher clone = new MagicMatcher();
801 
802         clone.setMatch((MagicMatch) match.clone());
803 
804         Iterator i = subMatchers.iterator();
805         ArrayList sub = new ArrayList();
806 
807         while (i.hasNext()) {
808             MagicMatcher m = (MagicMatcher) i.next();
809             sub.add(m.clone());
810         }
811 
812         clone.setSubMatchers(sub);
813 
814         return clone;
815     }
816 }