load_learning_set.py 979 B

12345678910111213141516171819202122232425262728293031323334353637383940
  1. #!/usr/bin/python
  2. from cPickle import dump, load
  3. from sys import argv, exit
  4. if len(argv) < 2:
  5. print 'Usage: python %s FILE_SUFFIX' % argv[0]
  6. exit(1)
  7. print 'Loading characters...'
  8. chars = load(file('characters%s.dat' % argv[1], 'r'))
  9. learning_set = []
  10. test_set = []
  11. #s = {}
  12. #
  13. #for char in chars:
  14. # if char.value not in s:
  15. # s[char.value] = [char]
  16. # else:
  17. # s[char.value].append(char)
  18. #
  19. #for value, chars in s.iteritems():
  20. # learning_set += chars[::2]
  21. # test_set += chars[1::2]
  22. learned = []
  23. for char in chars:
  24. if learned.count(char.value) == 70:
  25. test_set.append(char)
  26. else:
  27. learning_set.append(char)
  28. learned.append(char.value)
  29. print 'Learning set:', [c.value for c in learning_set]
  30. print '\nTest set:', [c.value for c in test_set]
  31. print '\nSaving learning set...'
  32. dump(learning_set, file('learning_set%s.dat' % argv[1], 'w+'))
  33. print 'Saving test set...'
  34. dump(test_set, file('test_set%s.dat' % argv[1], 'w+'))