parse.go 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. package ftp
  2. import (
  3. "errors"
  4. "fmt"
  5. "strconv"
  6. "strings"
  7. "time"
  8. )
  9. var errUnsupportedListLine = errors.New("Unsupported LIST line")
  10. type parseFunc func(string, time.Time, *time.Location) (*Entry, error)
  11. var listLineParsers = []parseFunc{
  12. parseRFC3659ListLine,
  13. parseLsListLine,
  14. parseDirListLine,
  15. parseHostedFTPLine,
  16. }
  17. var dirTimeFormats = []string{
  18. "01-02-06 03:04PM",
  19. "2006-01-02 15:04",
  20. }
  21. // parseRFC3659ListLine parses the style of directory line defined in RFC 3659.
  22. func parseRFC3659ListLine(line string, now time.Time, loc *time.Location) (*Entry, error) {
  23. iSemicolon := strings.Index(line, ";")
  24. iWhitespace := strings.Index(line, " ")
  25. if iSemicolon < 0 || iSemicolon > iWhitespace {
  26. return nil, errUnsupportedListLine
  27. }
  28. e := &Entry{
  29. Name: line[iWhitespace+1:],
  30. }
  31. for _, field := range strings.Split(line[:iWhitespace-1], ";") {
  32. i := strings.Index(field, "=")
  33. if i < 1 {
  34. return nil, errUnsupportedListLine
  35. }
  36. key := strings.ToLower(field[:i])
  37. value := field[i+1:]
  38. switch key {
  39. case "modify":
  40. var err error
  41. e.Time, err = time.ParseInLocation("20060102150405", value, loc)
  42. if err != nil {
  43. return nil, err
  44. }
  45. case "type":
  46. switch value {
  47. case "dir", "cdir", "pdir":
  48. e.Type = EntryTypeFolder
  49. case "file":
  50. e.Type = EntryTypeFile
  51. }
  52. case "size":
  53. e.setSize(value)
  54. }
  55. }
  56. return e, nil
  57. }
  58. // parseLsListLine parses a directory line in a format based on the output of
  59. // the UNIX ls command.
  60. func parseLsListLine(line string, now time.Time, loc *time.Location) (*Entry, error) {
  61. // Has the first field a length of 10 bytes?
  62. if strings.IndexByte(line, ' ') != 10 {
  63. return nil, errUnsupportedListLine
  64. }
  65. scanner := newScanner(line)
  66. fields := scanner.NextFields(6)
  67. if len(fields) < 6 {
  68. return nil, errUnsupportedListLine
  69. }
  70. if fields[1] == "folder" && fields[2] == "0" {
  71. e := &Entry{
  72. Type: EntryTypeFolder,
  73. Name: scanner.Remaining(),
  74. }
  75. if err := e.setTime(fields[3:6], now, loc); err != nil {
  76. return nil, err
  77. }
  78. return e, nil
  79. }
  80. if fields[1] == "0" {
  81. fields = append(fields, scanner.Next())
  82. e := &Entry{
  83. Type: EntryTypeFile,
  84. Name: scanner.Remaining(),
  85. }
  86. if err := e.setSize(fields[2]); err != nil {
  87. return nil, errUnsupportedListLine
  88. }
  89. if err := e.setTime(fields[4:7], now, loc); err != nil {
  90. return nil, err
  91. }
  92. return e, nil
  93. }
  94. // Read two more fields
  95. fields = append(fields, scanner.NextFields(2)...)
  96. if len(fields) < 8 {
  97. return nil, errUnsupportedListLine
  98. }
  99. e := &Entry{
  100. Name: scanner.Remaining(),
  101. }
  102. switch fields[0][0] {
  103. case '-':
  104. e.Type = EntryTypeFile
  105. if err := e.setSize(fields[4]); err != nil {
  106. return nil, err
  107. }
  108. case 'd':
  109. e.Type = EntryTypeFolder
  110. case 'l':
  111. e.Type = EntryTypeLink
  112. default:
  113. return nil, errors.New("Unknown entry type")
  114. }
  115. if err := e.setTime(fields[5:8], now, loc); err != nil {
  116. return nil, err
  117. }
  118. return e, nil
  119. }
  120. // parseDirListLine parses a directory line in a format based on the output of
  121. // the MS-DOS DIR command.
  122. func parseDirListLine(line string, now time.Time, loc *time.Location) (*Entry, error) {
  123. e := &Entry{}
  124. var err error
  125. // Try various time formats that DIR might use, and stop when one works.
  126. for _, format := range dirTimeFormats {
  127. if len(line) > len(format) {
  128. e.Time, err = time.ParseInLocation(format, line[:len(format)], loc)
  129. if err == nil {
  130. line = line[len(format):]
  131. break
  132. }
  133. }
  134. }
  135. if err != nil {
  136. // None of the time formats worked.
  137. return nil, errUnsupportedListLine
  138. }
  139. line = strings.TrimLeft(line, " ")
  140. if strings.HasPrefix(line, "<DIR>") {
  141. e.Type = EntryTypeFolder
  142. line = strings.TrimPrefix(line, "<DIR>")
  143. } else {
  144. space := strings.Index(line, " ")
  145. if space == -1 {
  146. return nil, errUnsupportedListLine
  147. }
  148. e.Size, err = strconv.ParseUint(line[:space], 10, 64)
  149. if err != nil {
  150. return nil, errUnsupportedListLine
  151. }
  152. e.Type = EntryTypeFile
  153. line = line[space:]
  154. }
  155. e.Name = strings.TrimLeft(line, " ")
  156. return e, nil
  157. }
  158. // parseHostedFTPLine parses a directory line in the non-standard format used
  159. // by hostedftp.com
  160. // -r-------- 0 user group 65222236 Feb 24 00:39 UABlacklistingWeek8.csv
  161. // (The link count is inexplicably 0)
  162. func parseHostedFTPLine(line string, now time.Time, loc *time.Location) (*Entry, error) {
  163. // Has the first field a length of 10 bytes?
  164. if strings.IndexByte(line, ' ') != 10 {
  165. return nil, errUnsupportedListLine
  166. }
  167. scanner := newScanner(line)
  168. fields := scanner.NextFields(2)
  169. if len(fields) < 2 || fields[1] != "0" {
  170. return nil, errUnsupportedListLine
  171. }
  172. // Set link count to 1 and attempt to parse as Unix.
  173. return parseLsListLine(fields[0]+" 1 "+scanner.Remaining(), now, loc)
  174. }
  175. // parseListLine parses the various non-standard format returned by the LIST
  176. // FTP command.
  177. func parseListLine(line string, now time.Time, loc *time.Location) (*Entry, error) {
  178. for _, f := range listLineParsers {
  179. e, err := f(line, now, loc)
  180. if err != errUnsupportedListLine {
  181. return e, err
  182. }
  183. }
  184. return nil, errUnsupportedListLine
  185. }
  186. func (e *Entry) setSize(str string) (err error) {
  187. e.Size, err = strconv.ParseUint(str, 0, 64)
  188. return
  189. }
  190. func (e *Entry) setTime(fields []string, now time.Time, loc *time.Location) (err error) {
  191. if strings.Contains(fields[2], ":") { // contains time
  192. thisYear, _, _ := now.Date()
  193. timeStr := fmt.Sprintf("%s %s %d %s", fields[1], fields[0], thisYear, fields[2])
  194. e.Time, err = time.ParseInLocation("_2 Jan 2006 15:04", timeStr, loc)
  195. /*
  196. On unix, `info ls` shows:
  197. 10.1.6 Formatting file timestamps
  198. ---------------------------------
  199. A timestamp is considered to be “recent” if it is less than six
  200. months old, and is not dated in the future. If a timestamp dated today
  201. is not listed in recent form, the timestamp is in the future, which
  202. means you probably have clock skew problems which may break programs
  203. like ‘make’ that rely on file timestamps.
  204. */
  205. if !e.Time.Before(now.AddDate(0, 6, 0)) {
  206. e.Time = e.Time.AddDate(-1, 0, 0)
  207. }
  208. } else { // only the date
  209. if len(fields[2]) != 4 {
  210. return errors.New("Invalid year format in time string")
  211. }
  212. timeStr := fmt.Sprintf("%s %s %s 00:00", fields[1], fields[0], fields[2])
  213. e.Time, err = time.ParseInLocation("_2 Jan 2006 15:04", timeStr, loc)
  214. }
  215. return
  216. }