xpat

2017-10-24 08:51:21 -04:00
parent 3b83f184ea
commit 09504ca363
13 changed files with 401 additions and 14 deletions
@@ -10,6 +10,13 @@ std::string B64Encode(const uint8_t *data, const std::size_t l);

 /** @brief returns true if decode was successful */
 bool B64Decode(const std::string &data, std::vector<uint8_t> &out);
+
+/** returns base32 encoded string */
+std::string B32Encode(const uint8_t *data, const std::size_t l);
+
+/** @brief returns true if decode was successful */
+bool B32Decode(const std::string &data, std::vector<uint8_t> &out);
+
 }

 #endif
@@ -3,6 +3,7 @@

 #include <array>
 #include <sodium/crypto_hash.h>
+#include <sodium/crypto_generichash.h>

 namespace nntpchan
 {
@@ -10,6 +11,12 @@ typedef std::array<uint8_t, crypto_hash_BYTES> SHA512Digest;

 void SHA512(const uint8_t *d, std::size_t l, SHA512Digest &h);

+  typedef std::array<uint8_t, crypto_generichash_BYTES> Blake2BDigest;
+  void Blake2B(const uint8_t *d, std::size_t l, Blake2BDigest & h);
+
+  std::string Blake2B_base32(const std::string & str);
+
+  
 /** global crypto initializer */
 struct Crypto
 {
@@ -40,6 +40,7 @@ private:
  bool init_skiplist(const std::string &subdir) const;

  fs::path skiplist_root(const std::string &name) const;
+  fs::path skiplist_dir(const fs::path & root, const std::string & name) const;

  fs::path basedir;
 };
@@ -5,6 +5,14 @@ namespace i2p
 {
 namespace data
 {
+
+static const char T32[32] = {
+    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
+    'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '2', '3', '4', '5', '6', '7',
+};
+
+const char *GetBase32SubstitutionTable() { return T32; }
+
 static void iT64Build(void);

 /*
@@ -18,7 +26,9 @@ static void iT64Build(void);
 static const char T64[64] = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
                             'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
                             'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
-                             'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'};
+                             'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '~'};
+
+const char *GetBase64SubstitutionTable() { return T64; }

 /*
 * Reverse Substitution Table (built in run time)
@@ -41,7 +51,8 @@ static char P64 = '=';
 * Converts binary encoded data to BASE64 format.
 *
 */
-static size_t                                   /* Number of bytes in the encoded buffer */
+
+size_t                                          /* Number of bytes in the encoded buffer */
    ByteStreamToBase64(const uint8_t *InBuffer, /* Input buffer, binary data */
                       size_t InCount,          /* Number of bytes in the input buffer */
                       char *OutBuffer,         /* output buffer */
@@ -122,7 +133,8 @@ static size_t                                   /* Number of bytes in the encode
 * not properly padded, buffer of negative length is returned
 *
 */
-static ssize_t                               /* Number of output bytes */
+
+size_t                                       /* Number of output bytes */
    Base64ToByteStream(const char *InBuffer, /* BASE64 encoded buffer */
                       size_t InCount,       /* Number of input bytes */
                       uint8_t *OutBuffer,   /* output buffer length */
@@ -184,7 +196,7 @@ static ssize_t                               /* Number of output bytes */
  return outCount;
 }

-static size_t Base64EncodingBufferSize(const size_t input_size)
+size_t Base64EncodingBufferSize(const size_t input_size)
 {
  auto d = div(input_size, 3);
  if (d.rem)
@@ -192,6 +204,13 @@ static size_t Base64EncodingBufferSize(const size_t input_size)
  return 4 * d.quot;
 }

+size_t Base32EncodingBufferSize(const size_t input_size)
+{
+  auto d = div(input_size, 5);
+  if (d.rem)
+    d.quot++;
+  return 8 * d.quot;
+}
 /*
 *
 * iT64
@@ -211,6 +230,65 @@ static void iT64Build()
    iT64[(int)T64[i]] = i;
  iT64[(int)P64] = 0;
 }
+
+size_t Base32ToByteStream(const char *inBuf, size_t len, uint8_t *outBuf, size_t outLen)
+{
+  int tmp = 0, bits = 0;
+  size_t ret = 0;
+  for (size_t i = 0; i < len; i++)
+  {
+    char ch = inBuf[i];
+    if (ch >= '2' && ch <= '7') // digit
+      ch = (ch - '2') + 26;     // 26 means a-z
+    else if (ch >= 'a' && ch <= 'z')
+      ch = ch - 'a'; // a = 0
+    else
+      return 0; // unexpected character
+
+    tmp |= ch;
+    bits += 5;
+    if (bits >= 8)
+    {
+      if (ret >= outLen)
+        return ret;
+      outBuf[ret] = tmp >> (bits - 8);
+      bits -= 8;
+      ret++;
+    }
+    tmp <<= 5;
+  }
+  return ret;
+}
+
+size_t ByteStreamToBase32(const uint8_t *inBuf, size_t len, char *outBuf, size_t outLen)
+{
+  size_t ret = 0, pos = 1;
+  int bits = 8, tmp = inBuf[0];
+  while (ret < outLen && (bits > 0 || pos < len))
+  {
+    if (bits < 5)
+    {
+      if (pos < len)
+      {
+        tmp <<= 8;
+        tmp |= inBuf[pos] & 0xFF;
+        pos++;
+        bits += 8;
+      }
+      else // last byte
+      {
+        tmp <<= (5 - bits);
+        bits = 5;
+      }
+    }
+
+    bits -= 5;
+    int ind = (tmp >> bits) & 0x1F;
+    outBuf[ret] = (ind < 26) ? (ind + 'a') : ((ind - 26) + '2');
+    ret++;
+  }
+  return ret;
+}
 }
 }

@@ -227,9 +305,30 @@ std::string B64Encode(const uint8_t *data, const std::size_t l)
 bool B64Decode(const std::string &data, std::vector<uint8_t> &out)
 {
  out.resize(data.size());
-  if (i2p::data::Base64ToByteStream(data.c_str(), data.size(), &out[0], out.size()) == -1)
-    return false;
+  if (i2p::data::Base64ToByteStream(data.c_str(), data.size(), &out[0], out.size()))
+  {
    out.shrink_to_fit();
    return true;
+  }
+  return false;
+}
+
+std::string B32Encode(const uint8_t *data, const std::size_t l)
+{
+  std::string out;
+  out.resize(i2p::data::Base32EncodingBufferSize(l));
+  i2p::data::ByteStreamToBase32(data, l, &out[0], out.size());
+  return out;
+}
+
+bool B32Decode(const std::string &data, std::vector<uint8_t> &out)
+{
+  out.resize(data.size());
+  if (i2p::data::Base32ToByteStream(data.c_str(), data.size(), &out[0], out.size()))
+  {
+    out.shrink_to_fit();
+    return true;
+  }
+  return false;
 }
 }
@@ -1,4 +1,5 @@
 #include <cassert>
+#include <nntpchan/base64.hpp>
 #include <nntpchan/crypto.hpp>
 #include <sodium.h>

@@ -6,6 +7,16 @@ namespace nntpchan
 {
 void SHA512(const uint8_t *d, const std::size_t l, SHA512Digest &h) { crypto_hash(h.data(), d, l); }

+  void Blake2B(const uint8_t *d, std::size_t l, Blake2BDigest & h) { crypto_generichash(h.data(), h.size(), d, l, nullptr, 0); }
+
+  std::string Blake2B_base32(const std::string & str)
+  {
+    Blake2BDigest d;
+    Blake2B(reinterpret_cast<const uint8_t*>(str.c_str()), str.size(), d);
+    return B32Encode(d.data(), d.size());
+  }
+
+  
 Crypto::Crypto() { assert(sodium_init() == 0); }

 Crypto::~Crypto() {}
@@ -1,4 +1,5 @@
 #include <cassert>
+#include <nntpchan/crypto.hpp>
 #include <nntpchan/sanitize.hpp>
 #include <nntpchan/storage.hpp>
 #include <sstream>
@@ -6,6 +7,9 @@
 namespace nntpchan
 {

+  const fs::path posts_skiplist_dir = "posts";
+  const fs::path threads_skiplist_dir = "threads";
+  
 ArticleStorage::ArticleStorage(const fs::path &fpath) { SetPath(fpath); }

 ArticleStorage::~ArticleStorage() {}
@@ -14,16 +18,21 @@ void ArticleStorage::SetPath(const fs::path &fpath)
 {
  basedir = fpath;
  fs::create_directories(basedir);
-  assert(init_skiplist("posts_skiplist"));
+  assert(init_skiplist(posts_skiplist_dir));
+  assert(init_skiplist(threads_skiplist_dir));
 }

+
 bool ArticleStorage::init_skiplist(const std::string &subdir) const
 {
-  fs::path skiplist = basedir / fs::path(subdir);
-  fs::create_directories(skiplist);
-  const auto subdirs = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f"};
+  fs::path skiplist = skiplist_root(subdir);
+  const auto subdirs = {		'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',
+		'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
+		'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
+		'y', 'z', '2', '3', '4', '5', '6', '7',
+};
  for (const auto &s : subdirs)
-    fs::create_directories(skiplist / s);
+    fs::create_directories(skiplist / std::string(&s, 1));
  return true;
 }

@@ -67,5 +76,19 @@ bool ArticleStorage::LoadThread(Thread &thread, const std::string &rootmsgid) co
 }

 /** ensure symlinks are formed for this article by message id */
-void ArticleStorage::EnsureSymlinks(const std::string &msgid) const { (void)msgid; }
+void ArticleStorage::EnsureSymlinks(const std::string &msgid) const
+{
+  std::string msgidhash = Blake2B_base32(msgid);
+  skiplist_dir(posts_skiplist_dir, msgidhash);
+}
+
+
+  fs::path ArticleStorage::skiplist_root(const std::string & name ) const
+  {
+    return basedir / name;
+  }
+  fs::path ArticleStorage::skiplist_dir(const fs::path & root, const std::string & name ) const
+  {
+    return root / name.substr(0, 1) ;
+  }
 }
@@ -333,6 +333,9 @@ type Database interface {

 	// find cites in text
 	FindCitesInText(msg string) ([]string, error)
+
+	// find headers in group with lo/hi watermark and list of patterns
+	FindHeaders(group, headername string, lo, hi int64) (ArticleHeaders, error)
 }

 func NewDatabase(db_type, schema, host, port, user, password string) Database {
@@ -37,6 +37,13 @@ func (self ArticleHeaders) Add(key, val string) {
 	}
 }

+func (self ArticleHeaders) Len() (l int) {
+	for k := range self {
+		l += len(self[k])
+	}
+	return
+}
+
 func (self ArticleHeaders) Get(key, fallback string) string {
 	val, ok := self[key]
 	if ok {
@@ -1108,6 +1108,50 @@ func (self *nntpConnection) handleLine(daemon *NNTPDaemon, code int, line string
 				} else {
 					conn.PrintfLine("500 invalid daemon state, got STAT with group set but we don't have that group now?")
 				}
+			} else if cmd == "XPAT" {
+				var hdr string
+				var msgid string
+				var lo, hi int64
+				var pats []string
+				if len(parts) >= 3 {
+					hdr = parts[1]
+					if ValidMessageID(parts[2]) {
+						msgid = parts[2]
+					} else {
+						lo, hi = parseRange(parts[2])
+						if !ValidNewsgroup(self.group) {
+							conn.PrintfLine("430 no such article")
+							return
+						}
+					}
+					pats = parts[3:]
+					var hdrs ArticleHeaders
+
+					if len(msgid) > 0 {
+						hdrs, err = daemon.database.GetHeadersForMessage(msgid)
+					} else {
+						hdrs, err = daemon.database.FindHeaders(self.group, hdr, lo, hi)
+					}
+					if err == nil {
+						hdrs = headerFindPats(hdr, hdrs, pats)
+						if hdrs.Len() > 0 {
+							conn.PrintfLine("221 Header follows")
+							for _, vals := range hdrs {
+								for idx := range vals {
+									conn.PrintfLine("%s: %s", hdr, vals[idx])
+								}
+							}
+							conn.PrintfLine(".")
+						} else {
+							conn.PrintfLine("430 no such article")
+						}
+					} else {
+						conn.PrintfLine("502 %s", err.Error())
+					}
+					return
+				}
+				conn.PrintfLine("430 no such article")
+				return
 			} else if cmd == "XHDR" {
 				if len(self.group) > 0 {
 					var msgid string
@@ -1606,6 +1650,8 @@ func (self *nntpConnection) runConnection(daemon *NNTPDaemon, inbound, stream, r
 						log.Println(self.name, "TLS initiated", self.authenticated)
 					} else {
 						log.Println("STARTTLS failed:", err)
+						nconn.Close()
+						return
 					}
 				} else if cmd == "CAPABILITIES" {
 					// write capabilities
@@ -1953,3 +1953,21 @@ func (self *PostgresDatabase) FindCitesInText(text string) (msgids []string, err
 	}
 	return
 }
+
+func (self *PostgresDatabase) FindHeaders(group, headername string, lo, hi int64) (hdr ArticleHeaders, err error) {
+	hdr = make(ArticleHeaders)
+	q := "SELECT header_value FROM nntpheaders WHERE header_name = $1 AND header_article_message_id IN ( SELECT message_id FROM articleposts WHERE newsgroup = $2 )"
+	var rows *sql.Rows
+	rows, err = self.conn.Query(q, strings.ToLower(headername), group)
+	if err == nil {
+		for rows.Next() {
+			var str string
+			rows.Scan(&str)
+			hdr.Add(headername, str)
+		}
+		rows.Close()
+	} else if err == sql.ErrNoRows {
+		err = nil
+	}
+	return
+}
@@ -682,3 +682,52 @@ func msgidFrontendSign(sk []byte, msgid string) string {
 	h := sha512.Sum512([]byte(msgid))
 	return cryptoSignFucky(h[:], sk)
 }
+
+func patMatch(v, pat string) (found bool) {
+	parts := strings.Split(pat, ",")
+	for _, part := range parts {
+		var invert bool
+		if part[0] == '!' {
+			invert = true
+			if len(parts) == 0 {
+				return
+			}
+			part = part[1:]
+		}
+		found, _ = regexp.MatchString(v, part)
+		log.Println(v, part, found)
+		if invert {
+			found = !found
+		}
+		if found {
+			return
+		}
+	}
+	return
+}
+
+func headerFindPats(header string, hdr ArticleHeaders, patterns []string) (found ArticleHeaders) {
+	found = make(ArticleHeaders)
+	if hdr.Has(header) && len(patterns) > 0 {
+		for _, v := range hdr[header] {
+			for _, pat := range patterns {
+				if patMatch(v, pat) {
+					found.Add(header, v)
+				}
+			}
+		}
+	}
+	return
+}
+
+func parseRange(str string) (lo, hi int64) {
+	parts := strings.Split(str, "-")
+	if len(parts) == 1 {
+		i, _ := strconv.ParseInt(parts[0], 10, 64)
+		lo, hi = i, i
+	} else if len(parts) == 2 {
+		lo, _ = strconv.ParseInt(parts[0], 10, 64)
+		hi, _ = strconv.ParseInt(parts[1], 10, 64)
+	}
+	return
+}
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2016 Sergey Demyanov
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,95 @@
+package wildmat
+
+// taken from https://github.com/demon-xxi/wildmatch/blob/0d1569265aadb1eb16009dd7bad941b4bd2aca8d/wildmatch.go
+
+import (
+	"strings"
+)
+
+// IsSubsetOf verifies if `w` wildcard is a subset of `s`.
+// I.e. checks if `s` is a superset of subset `w`.
+// Wildcard A is subset of B if any possible path that matches A also matches B.
+func IsSubsetOf(w string, s string) bool {
+
+	// shortcut for identical sets
+	if s == w {
+		return true
+	}
+
+	// only empty set is a subset of an empty set
+	if len(s) == 0 {
+		return len(w) == 0
+	}
+
+	// find nesting separators
+	sp := strings.Index(s, ",")
+	wp := strings.Index(w, ",")
+
+	// check if this is a nested path
+	if sp >= 0 {
+
+		// if set is nested then tested wildcard must be nested too
+		if wp < 0 {
+			return false
+		}
+
+		// Special case for /**/ mask that matches any number of levels
+		if s[:sp] == "**" &&
+			IsSubsetOf(w[wp+1:], s) ||
+			IsSubsetOf(w, s[sp+1:]) {
+			return true
+		}
+
+		// check that current level names are subsets
+		// and compare rest of the path to be subset also
+		return (IsSubsetOf(w[:wp], s[:sp]) &&
+			IsSubsetOf(w[wp+1:], s[sp+1:]))
+	}
+
+	// subset can't have more levels than set
+	if wp >= 0 {
+		return false
+	}
+
+	// we are comparing names on the same nesting level here
+	// so let's do symbol by symbol comparison
+	switch s[0] {
+	case '?':
+		// ? matches non empty character. '*' can't be a subset of '?'
+		if len(w) == 0 || w[0] == '*' {
+			return false
+		}
+		// any onther symbol matches '?', so let's skip to next
+		return IsSubsetOf(w[1:], s[1:])
+	case '*':
+		// '*' matches 0 and any other number of symbols
+		// so checking 0 and recursively subset without first letter
+		return IsSubsetOf(w, s[1:]) ||
+			(len(w) > 0 && IsSubsetOf(w[1:], s))
+	default:
+		// making sure next symbol in w exists and it's the same as in set
+		if len(w) == 0 || w[0] != s[0] {
+			return false
+		}
+	}
+
+	// recursively check rest of the set and w
+	return IsSubsetOf(w[1:], s[1:])
+}
+
+// IsSubsetOfAny verifies if current wildcard `w` is a subset of any of the given sets.
+// Wildcard A is subset of B if any possible path that matches A also matches B.
+// If multiple subsets match then the smallest or first lexicographical set is returned
+// Return -1 if not found or superset index.
+func IsSubsetOfAny(w string, sets ...string) (found int) {
+	found = -1 // not found by default
+	for i, superset := range sets {
+		if !IsSubsetOf(w, superset) {
+			continue
+		}
+		if found < 0 || IsSubsetOf(superset, sets[found]) {
+			found = i
+		}
+	}
+	return
+}