moss 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. #!/usr/bin/perl
  2. #
  3. # Please read all the comments down to the line that says "TOP".
  4. # These comments are divided into three sections:
  5. #
  6. # 1. usage instructions
  7. # 2. installation instructions
  8. # 3. standard copyright
  9. #
  10. # Feel free to share this script with other instructors of programming
  11. # classes, but please do not place the script in a publicly accessible
  12. # place. Comments, questions, and bug reports should be sent to
  13. # moss-request@moss.stanford.edu.
  14. #
  15. # IMPORTANT: This script is known to work on Unix and on Windows using Cygwin.
  16. # It is not known to work on other ways of using Perl under Windows. If the
  17. # script does not work for you under Windows, you can try the email-based
  18. # version for Windows (available on the Moss home page).
  19. #
  20. #
  21. # Section 1. Usage instructions
  22. #
  23. # moss [-l language] [-d] [-b basefile1] ... [-b basefilen] [-m #] [-c "string"] file1 file2 file3 ...
  24. #
  25. # The -l option specifies the source language of the tested programs.
  26. # Moss supports many different languages; see the variable "languages" below for the
  27. # full list.
  28. #
  29. # Example: Compare the lisp programs foo.lisp and bar.lisp:
  30. #
  31. # moss -l lisp foo.lisp bar.lisp
  32. #
  33. #
  34. # The -d option specifies that submissions are by directory, not by file.
  35. # That is, files in a directory are taken to be part of the same program,
  36. # and reported matches are organized accordingly by directory.
  37. #
  38. # Example: Compare the programs foo and bar, which consist of .c and .h
  39. # files in the directories foo and bar respectively.
  40. #
  41. # moss -d foo/*.c foo/*.h bar/*.c bar/*.h
  42. #
  43. # Example: Each program consists of the *.c and *.h files in a directory under
  44. # the directory "assignment1."
  45. #
  46. # moss -d assignment1/*/*.h assignment1/*/*.c
  47. #
  48. #
  49. # The -b option names a "base file". Moss normally reports all code
  50. # that matches in pairs of files. When a base file is supplied,
  51. # program code that also appears in the base file is not counted in matches.
  52. # A typical base file will include, for example, the instructor-supplied
  53. # code for an assignment. Multiple -b options are allowed. You should
  54. # use a base file if it is convenient; base files improve results, but
  55. # are not usually necessary for obtaining useful information.
  56. #
  57. # IMPORTANT: Unlike previous versions of moss, the -b option *always*
  58. # takes a single filename, even if the -d option is also used.
  59. #
  60. # Examples:
  61. #
  62. # Submit all of the C++ files in the current directory, using skeleton.cc
  63. # as the base file:
  64. #
  65. # moss -l cc -b skeleton.cc *.cc
  66. #
  67. # Submit all of the ML programs in directories asn1.96/* and asn1.97/*, where
  68. # asn1.97/instructor/example.ml and asn1.96/instructor/example.ml contain the base files.
  69. #
  70. # moss -l ml -b asn1.97/instructor/example.ml -b asn1.96/instructor/example.ml -d asn1.97/*/*.ml asn1.96/*/*.ml
  71. #
  72. # The -m option sets the maximum number of times a given passage may appear
  73. # before it is ignored. A passage of code that appears in many programs
  74. # is probably legitimate sharing and not the result of plagiarism. With -m N,
  75. # any passage appearing in more than N programs is treated as if it appeared in
  76. # a base file (i.e., it is never reported). Option -m can be used to control
  77. # moss' sensitivity. With -m 2, moss reports only passages that appear
  78. # in exactly two programs. If one expects many very similar solutions
  79. # (e.g., the short first assignments typical of introductory programming
  80. # courses) then using -m 3 or -m 4 is a good way to eliminate all but
  81. # truly unusual matches between programs while still being able to detect
  82. # 3-way or 4-way plagiarism. With -m 1000000 (or any very
  83. # large number), moss reports all matches, no matter how often they appear.
  84. # The -m setting is most useful for large assignments where one also a base file
  85. # expected to hold all legitimately shared code. The default for -m is 10.
  86. #
  87. # Examples:
  88. #
  89. # moss -l pascal -m 2 *.pascal
  90. # moss -l cc -m 1000000 -b mycode.cc asn1/*.cc
  91. #
  92. #
  93. # The -c option supplies a comment string that is attached to the generated
  94. # report. This option facilitates matching queries submitted with replies
  95. # received, especially when several queries are submitted at once.
  96. #
  97. # Example:
  98. #
  99. # moss -l scheme -c "Scheme programs" *.sch
  100. #
  101. # The -n option determines the number of matching files to show in the results.
  102. # The default is 250.
  103. #
  104. # Example:
  105. # moss -c java -n 200 *.java
  106. # The -x option sends queries to the current experimental version of the server.
  107. # The experimental server has the most recent Moss features and is also usually
  108. # less stable (read: may have more bugs).
  109. #
  110. # Example:
  111. #
  112. # moss -x -l ml *.ml
  113. #
  114. #
  115. # Section 2. Installation instructions.
  116. #
  117. # You may need to change the very first line of this script
  118. # if perl is not in /usr/bin on your system. Just replace /usr/bin
  119. # with the pathname of the directory where perl resides.
  120. #
  121. #
  122. # 3. Standard Copyright
  123. #
  124. #Copyright (c) 1997 The Regents of the University of California.
  125. #All rights reserved.
  126. #
  127. #Permission to use, copy, modify, and distribute this software for any
  128. #purpose, without fee, and without written agreement is hereby granted,
  129. #provided that the above copyright notice and the following two
  130. #paragraphs appear in all copies of this software.
  131. #
  132. #IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
  133. #DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
  134. #OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
  135. #CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  136. #
  137. #THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
  138. #INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
  139. #AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
  140. #ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
  141. #PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  142. #
  143. #
  144. # STOP. It should not be necessary to change anything below this line
  145. # to use the script.
  146. #
  147. use IO::Socket;
  148. #
  149. # As of the date this script was written, the following languages were supported. This script will work with
  150. # languages added later however. Check the moss website for the full list of supported languages.
  151. #
  152. @languages = ("c", "cc", "java", "ml", "pascal", "ada", "lisp", "scheme", "haskell", "fortran", "ascii", "vhdl", "perl", "matlab", "python", "mips", "prolog", "spice", "vb", "csharp", "modula2", "a8086", "javascript", "plsql", "verilog");
  153. $server = 'moss.stanford.edu';
  154. $port = '7690';
  155. $noreq = "Request not sent.";
  156. $usage = "usage: moss [-x] [-l language] [-d] [-b basefile1] ... [-b basefilen] [-m #] [-c \"string\"] file1 file2 file3 ...";
  157. #
  158. # The userid is used to authenticate your queries to the server; don't change it!
  159. #
  160. $userid=720082582;
  161. #
  162. # Process the command line options. This is done in a non-standard
  163. # way to allow multiple -b's.
  164. #
  165. $opt_l = "c"; # default language is c
  166. $opt_m = 10;
  167. $opt_d = 0;
  168. $opt_x = 0;
  169. $opt_c = "";
  170. $opt_n = 250;
  171. $bindex = 0; # this becomes non-zero if we have any base files
  172. while (@ARGV && ($_ = $ARGV[0]) =~ /^-(.)(.*)/) {
  173. ($first,$rest) = ($1,$2);
  174. shift(@ARGV);
  175. if ($first eq "d") {
  176. $opt_d = 1;
  177. next;
  178. }
  179. if ($first eq "b") {
  180. if($rest eq '') {
  181. die "No argument for option -b.\n" unless @ARGV;
  182. $rest = shift(@ARGV);
  183. }
  184. $opt_b[$bindex++] = $rest;
  185. next;
  186. }
  187. if ($first eq "l") {
  188. if ($rest eq '') {
  189. die "No argument for option -l.\n" unless @ARGV;
  190. $rest = shift(@ARGV);
  191. }
  192. $opt_l = $rest;
  193. next;
  194. }
  195. if ($first eq "m") {
  196. if($rest eq '') {
  197. die "No argument for option -m.\n" unless @ARGV;
  198. $rest = shift(@ARGV);
  199. }
  200. $opt_m = $rest;
  201. next;
  202. }
  203. if ($first eq "c") {
  204. if($rest eq '') {
  205. die "No argument for option -c.\n" unless @ARGV;
  206. $rest = shift(@ARGV);
  207. }
  208. $opt_c = $rest;
  209. next;
  210. }
  211. if ($first eq "n") {
  212. if($rest eq '') {
  213. die "No argument for option -n.\n" unless @ARGV;
  214. $rest = shift(@ARGV);
  215. }
  216. $opt_n = $rest;
  217. next;
  218. }
  219. if ($first eq "x") {
  220. $opt_x = 1;
  221. next;
  222. }
  223. #
  224. # Override the name of the server. This is used for testing this script.
  225. #
  226. if ($first eq "s") {
  227. $server = shift(@ARGV);
  228. next;
  229. }
  230. #
  231. # Override the port. This is used for testing this script.
  232. #
  233. if ($first eq "p") {
  234. $port = shift(@ARGV);
  235. next;
  236. }
  237. die "Unrecognized option -$first. $usage\n";
  238. }
  239. #
  240. # Check a bunch of things first to ensure that the
  241. # script will be able to run to completion.
  242. #
  243. #
  244. # Make sure all the argument files exist and are readable.
  245. #
  246. print "Checking files . . . \n";
  247. $i = 0;
  248. while($i < $bindex)
  249. {
  250. die "Base file $opt_b[$i] does not exist. $noreq\n" unless -e "$opt_b[$i]";
  251. die "Base file $opt_b[$i] is not readable. $noreq\n" unless -r "$opt_b[$i]";
  252. die "Base file $opt_b is not a text file. $noreq\n" unless -T "$opt_b[$i]";
  253. $i++;
  254. }
  255. foreach $file (@ARGV)
  256. {
  257. die "File $file does not exist. $noreq\n" unless -e "$file";
  258. die "File $file is not readable. $noreq\n" unless -r "$file";
  259. die "File $file is not a text file. $noreq\n" unless -T "$file";
  260. }
  261. if ("@ARGV" eq '') {
  262. die "No files submitted.\n $usage";
  263. }
  264. print "OK\n";
  265. #
  266. # Now the real processing begins.
  267. #
  268. $sock = new IO::Socket::INET (
  269. PeerAddr => $server,
  270. PeerPort => $port,
  271. Proto => 'tcp',
  272. );
  273. die "Could not connect to server $server: $!\n" unless $sock;
  274. $sock->autoflush(1);
  275. sub read_from_server {
  276. $msg = <$sock>;
  277. print $msg;
  278. }
  279. sub upload_file {
  280. local ($file, $id, $lang) = @_;
  281. #
  282. # The stat function does not seem to give correct filesizes on windows, so
  283. # we compute the size here via brute force.
  284. #
  285. open(F,$file);
  286. $size = 0;
  287. while (<F>) {
  288. $size += length($_);
  289. }
  290. close(F);
  291. print "Uploading $file ...";
  292. open(F,$file);
  293. $file =~s/\s/\_/g; # replace blanks in filename with underscores
  294. print $sock "file $id $lang $size $file\n";
  295. while (<F>) {
  296. print $sock $_;
  297. }
  298. close(F);
  299. print "done.\n";
  300. }
  301. print $sock "moss $userid\n"; # authenticate user
  302. print $sock "directory $opt_d\n";
  303. print $sock "X $opt_x\n";
  304. print $sock "maxmatches $opt_m\n";
  305. print $sock "show $opt_n\n";
  306. #
  307. # confirm that we have a supported languages
  308. #
  309. print $sock "language $opt_l\n";
  310. $msg = <$sock>;
  311. chop($msg);
  312. if ($msg eq "no") {
  313. print $sock "end\n";
  314. die "Unrecognized language $opt_l.";
  315. }
  316. # upload any base files
  317. $i = 0;
  318. while($i < $bindex) {
  319. &upload_file($opt_b[$i++],0,$opt_l);
  320. }
  321. $setid = 1;
  322. foreach $file (@ARGV) {
  323. &upload_file($file,$setid++,$opt_l);
  324. }
  325. print $sock "query 0 $opt_c\n";
  326. print "Query submitted. Waiting for the server's response.\n";
  327. &read_from_server();
  328. print $sock "end\n";
  329. close($sock);