Container Based Filesystem

Prerequisites: 

A strong understanding of python2.7
python2.7
jsonpickle

WARNING: This may be a somewhat advanced topic for some people. if you are reading this tutorial, I assume you understand Python classes, dictionaries, and lists very well. If you do not feel comfy cozy in any of these , please practice your python before attempting this tutorial.

I will attempt to explain what is happening for interested parties as much as possible.

With political shitlers looking at wrecking basic internet encryption products again, Its time that people start rolling their own protected safe place to keep files, have private conversations, and other things that need crypto. I dont talk about anything confidental over any server i can't review the source code.

Today we will cover creating our own basic file system container in pure python. I will cover the implementation of a basic filesystem with no recursion.
You will have the ability to create, list a folder's contents, list all folders, put a file into the container, and retrieve file from container.

import jsonpickle
import os
import sys
import base64

class FS():
    def __init__(self):
        self.filesystem = {}
        self.structure = {}
        self.cont = cont
        self.filesystem["SX-FS"] = "v.0.5" #this header is great if you plan to implement crypto.

if __name__ == "__main__":
    if sys.argv[1] == ("-n"):
        cont = raw_input("Name for container file: ")
        FS()

you can use this however if you proceed to do anything else, you will run into trouble:

if i add the following:

import jsonpickle
import os
import sys
import base64

class FS():
    def __init__(self):
        self.filesystem = {}
        self.structure = {}
        self.cont = cont
        self.filesystem["SX-FS"] = "v.0.5" #this header is great if you plan to implement crypto.

    def MkFolder(self, name,  cont):
        self.cont = cont
        self.name = name
        if self.filesystem.has_key(self.name):
            print("Folder Already Exists!")
        else:
            self.structure[self.name] = ("")
            f = jsonpickle.encode(self.filesystem)
            s = jsonpickle.encode(self.structure)
            open(self.cont,"wb").write(f+"[99099]")
            open(self.cont,"ab").write(s)

if __name__ == "__main__":
    if sys.argv[1] == ("-n"):
        cont = raw_input("Name for container file: ")
        FS()
    while True:
        cmd = raw_input("NL>>> ")
        if cmd == ("-mf"):
            folder = raw_input("Folder: ")
            FS.MkFolder(folder,cont)

Dont ask what the MkFolder fucntion does yet just know this will error out with an instance error.

To fix the error of FS.Mkfolder we need to capture the instance not the class itself.

To write to an instance of a class put the class instance in a list:

import jsonpickle
import os
import sys
import base64

FSinstance = [] #global instance
class FS():
    def __init__(self):
        FSinstance.append(self) #we append self to FSinstance making it accessable.
        self.filesystem = {}
        self.structure = {}
        self.cont = cont
        self.filesystem["SX-FS"] = "v.0.5" #this header is great if you plan to implement crypto.
    def MkFolder(self, name,  cont):
        self.cont = cont
        self.name = name
        if self.filesystem.has_key(self.name):
            print("Folder Already Exists!")
        else:
            self.structure[self.name] = ("")
            f = jsonpickle.encode(self.filesystem)
            s = jsonpickle.encode(self.structure)
            open(self.cont,"wb").write(f+"[99099]")
            open(self.cont,"ab").write(s)
if __name__ == "__main__":
    if sys.argv[1] == ("-n"):
        cont = raw_input("Name for container file: ")
        FS()
    while True:
        cmd = raw_input("NL>>> ")
        if cmd == ("-mf"):
            folder = raw_input("Folder: ")
            for f in  FSinstance:               #we access the instance by for looping over the FSinstance list.
                f.MkFolder(folder,cont)

So now that were able to run the "-mf" command what did it do?

def MkFolder(self, name,  cont):

As we can see, the MkFolder takes two args: a folder name, and the container name, which is the current container that we are using.
self.cont = cont
self.name = name
if self.filesystem.has_key(self.name):
    print("Folder Already Exists!")
<code>
we set our args to be class local, and check if the dict has a key with the name we specified. It will refuse to create two folders with same name.
<code>
else:
      self.structure[self.name] = ("")

We write a folder to self.structure with whatever name you specified. value will be "" (you could likely use null) because we are creating a folder not a file.
f = jsonpickle.encode(self.filesystem)
s = jsonpickle.encode(self.structure)
open(self.cont,"wb").write(f+"[99099]")
open(self.cont,"ab").write(s)

This part is a bit more complex. we encode the filesystem in two parts. self.filesystem is encoded FIRST as the f variable using jsonpickle.
Then, we encode our actual filesystem structure into the s variable.
It happens so that we are writing two dicts to our FS container. We will need a way of spliting them correctly when we want to load the container.
By writing an additional string 5 numbers surrouded by "[]" at the end of writing f to the container, we now have a way of processing the two dictionaries correctly later. The actual filesystem is written after the split marker, and the header before. So now the question: how do I prove that the folder was created?

We will need to iterate over self.structure somehow but we havent defined a root? ITS NOT REALLY A PROBLEM!! (at least for our POC)

new code:

import jsonpickle
import os
import sys
import base64

FSinstance = []
class FS():
    def __init__(self):
        FSinstance.append(self)
        self.filesystem = {}
        self.structure = {}
        self.cont = cont
        self.filesystem["SX-FS"] = "v.0.5" #this header is great if you plan to implement crypto.
    def MkFolder(self, name,  cont):
        self.cont = cont
        self.name = name
        if self.filesystem.has_key(self.name):
            print("Folder Already Exists!")
        else:
            self.structure[self.name] = ("")
            f = jsonpickle.encode(self.filesystem)
            s = jsonpickle.encode(self.structure)
            open(self.cont,"wb").write(f+"[99099]")
            open(self.cont,"ab").write(s)

    def List(self,folder): #new function
        if folder == ("/"):
            for k, v in self.structure.iteritems():
                print k
if __name__ == "__main__":
    if sys.argv[1] == ("-n"):
        cont = raw_input("Name for container file: ")
        FS()
    while True:
        cmd = raw_input("NL>>> ")
        if cmd == ("-mf"):
            folder = raw_input("Folder: ")
            for f in  FSinstance:
                f.MkFolder(folder,cont)
        elif cmd == ("LS"): #new command!
            folder = raw_input("Folder: ")
            for f in  FSinstance:
                f.List(folder)

So we added a new command "LS" that takes Folder as a required argument.

note that we defined a keyword "/" which is the same as UNIX root.
when we loop in our dict, we print the "k" for key variable (because were after the folder) not the value which is v! remember v is null when it comes to folders! Now we are going to implement the ability to store files one at a time in a folder. Typically people read the file they want to store as binary, and serialize it directly as base64, which is what we are going to do.

The new code is now:

import jsonpickle
import os
import sys
import base64

FSinstance = []
class FS():
    def __init__(self):
        FSinstance.append(self)
        self.filesystem = {}
        self.structure = {}
        self.cont = cont
        self.filesystem["SX-FS"] = "v.0.5" #this header is great if you plan to implement crypto.
    def MkFolder(self, name,  cont):
        self.cont = cont
        self.name = name
        if self.filesystem.has_key(self.name):
            print("Folder Already Exists!")
        else:
            self.structure[self.name] = ("")
            f = jsonpickle.encode(self.filesystem)
            s = jsonpickle.encode(self.structure)
            open(self.cont,"wb").write(f+"[99099]")
            open(self.cont,"ab").write(s)
           
    def PutFile(self, folder, name, cont): #new function
        c = {}
        self.name = name
        self.structure[self.name] = c
        self.cont = cont
        self.folder = folder
        self.data = open(self.name,"rb").read()
        if self.structure.has_key(self.folder):
            try:
                self.structure[self.folder] = self.name
                e = base64.b64encode(self.data)
                c[self.name] = e
                f = jsonpickle.encode(self.filesystem)
                s = jsonpickle.encode(self.structure)
                open(self.cont,"wb").write(f+"[99099]")
                open(self.cont,"ab").write(s)
            except Exception  as e:
                print e
    def List(self,folder):
        if folder == ("/"):  
            for k, v in self.structure.iteritems():
                print k
if __name__ == "__main__":
    if sys.argv[1] == ("-n"):
        cont = raw_input("Name for container file: ")
        FS()
    while True:
        cmd = raw_input("NL>>> ")
        if cmd == ("PUT"): #new command
            folder = raw_input("Folder: ")
            name = raw_input("File: ")
            for f in  FSinstance:
                f.PutFile(folder,name, cont)
        elif cmd == ("-mf"):
            folder = raw_input("Folder: ")
            for f in  FSinstance:
                f.MkFolder(folder,cont)
        elif cmd == ("LS"):
            folder = raw_input("Folder: ")
            for f in  FSinstance:
                f.List(folder)

So, how does PUT work? as you may have noticed it creates a function local dict. This dict will represent the file.
We then implement the new file by installing c to self.structure with key name of file name.
data is retrieved by opening the file specified by the name as "rb" which is binary and read.
We now check the FS structure for the existence of the target folder and if found, we create a new entry into self.structure with value name.
We obviously base64encode before storing data.

c[self.name] = e

This is how we set the value of the c dict,
We then save the filesystem back to disk with the same method used for writing the new folders.
HOWEVER, theres a not so obvious bug: if you enter the file name the same as a folder that exists, you will damage your container.
change the Mkfolder and PutFile to look like this:

def MkFolder(self, name,  cont):
        self.cont = cont
        self.name = name
        if self.structure.has_key(self.name):
            print("all files and folders must have unique names")
        else:
            self.structure[self.name] = ("")
            f = jsonpickle.encode(self.filesystem)
            s = jsonpickle.encode(self.structure)
            open(self.cont,"wb").write(f+"[99099]")
            open(self.cont,"ab").write(s)
           
    def PutFile(self, folder, name, cont):
        c = {}
        self.name = name
        if self.structure.has_key(self.name):
            print("all files and folders must have unique names")
        self.structure[self.name] = c
        self.cont = cont
        self.folder = folder
        self.data = open(self.name,"rb").read()
        if self.structure.has_key(self.folder):
            try:
                self.structure[self.folder] = self.name
                e = base64.b64encode(self.data)
                c[self.name] = e
                f = jsonpickle.encode(self.filesystem)
                s = jsonpickle.encode(self.structure)
                open(self.cont,"wb").write(f+"[99099]")
                open(self.cont,"ab").write(s)
            except Exception  as e:
                print e

So, how do we retrieve a file from container?
we will need the folder's name, and the file name right?

Before I show you the full updated code, lets review the function we are going to implement.

def GetFile(self, folder, name):
        self.folder = folder
        self.name = name
        if self.structure.has_key(self.folder):
            try:
                c = self.structure[self.name]
                data = c[self.name]
                d = base64.b64decode(data)
                open(self.name,"wb").write(d)
            except Exception as e:
                print e
        else:
            print("no file found!")

As you can see we will look for a matching folder key as shown by
 if self.structure.has_key(self.folder):

We will then attempt to access the file by accessing the structure again but using a lookup of self.name. If this fails it will throw the no file found exception.
When file is found data is accessed by using the reverse of the put method which was
c[self.name] = e

Our method to access the stored data will be as the new function shows. We then decode the data, and write back to disk outside the container.
latest code will look like this:

import jsonpickle
import os
import sys
import base64

FSinstance = []
class FS():
    def __init__(self):
        FSinstance.append(self)
        self.filesystem = {}
        self.structure = {}
        self.cont = cont
        self.filesystem["SX-FS"] = "v.0.5" #this header is great if you plan to implement crypto.
    def MkFolder(self, name,  cont):
        self.cont = cont
        self.name = name
        if self.structure.has_key(self.name):
            print("all files and folders must have unique names")
        else:
            self.structure[self.name] = ("")
            f = jsonpickle.encode(self.filesystem)
            s = jsonpickle.encode(self.structure)
            open(self.cont,"wb").write(f+"[99099]")
            open(self.cont,"ab").write(s)
           
    def PutFile(self, folder, name, cont):
        c = {}
        self.name = name
        if self.structure.has_key(self.name):
            print("all files and folders must have unique names")
        self.structure[self.name] = c
        self.cont = cont
        self.folder = folder
        self.data = open(self.name,"rb").read()
        if self.structure.has_key(self.folder):
            try:
                self.structure[self.folder] = self.name
                e = base64.b64encode(self.data)
                c[self.name] = e
                f = jsonpickle.encode(self.filesystem)
                s = jsonpickle.encode(self.structure)
                open(self.cont,"wb").write(f+"[99099]")
                open(self.cont,"ab").write(s)
            except Exception  as e:
                print e
                   
                 
    def GetFile(self, folder, name):
        self.folder = folder
        self.name = name
        if self.structure.has_key(self.folder):
            try:
                c = self.structure[self.name]
                data = c[self.name]
                d = base64.b64decode(data)
                open(self.name,"wb").write(d)
            except Exception as e:
                print e
        else:
            print("no file found!")
    def List(self,folder):
        if folder == ("/"):  
            for k, v in self.structure.iteritems():
                print k
if __name__ == "__main__":
    if sys.argv[1] == ("-n"):
        cont = raw_input("Name for container file: ")
        FS()
    while True:
        cmd = raw_input("NL>>> ")
        if cmd == ("GET"):
            folder = raw_input("Folder: ")
            name = raw_input("File: ")
            for f in  FSinstance:
                f.GetFile(folder,name)
        elif cmd == ("PUT"):
            folder = raw_input("Folder: ")
            name = raw_input("File: ")
            for f in  FSinstance:
                f.PutFile(folder,name, cont)
        elif cmd == ("-mf"):
            folder = raw_input("Folder: ")
            for f in  FSinstance:
                f.MkFolder(folder,cont)
        elif cmd == ("LS"):
            folder = raw_input("Folder: ")
            for f in  FSinstance:
                f.List(folder)

JUST ONE PROBLEM!!

how do I know where the file is?? the LS function is broken!

Lets do a quick patch now.
change your List function to look like this

def List(self,folder):
        if folder == ("/"):           #leave to neophyte to fix the error here as a challenge
            for k, v in self.structure.iteritems():
                print k
        else:
            try:
                for k,v in self.structure.iteritems():
                    if k == folder:
                        print v
            except Exception as e:
                print e

The else statement here is what we will focus on. since k has the folder name we can print the values from the folder using v if we specify a valid folder.
NOTE: searching from "/" root is bugged still. I leave it to the reader to fix that bug.

So we have pretty much everything at this point but if we exit the software, with ctrl-c OR by adding this function to our commands list and class:

def Exit(self,cont):
        self.cont = cont
        f = jsonpickle.encode(self.filesystem)
        s = jsonpickle.encode(self.structure)
        open(self.cont,"wb").write(f+"[99099]")
        open(self.cont,"ab").write(s)
        sys.exit()

You now note that we won't be able to load our container back up? So how do we reload our container?

THE FINAL PART!

you will need to add entry code ( the cmd loop left out for reasons):

if __name__ == "__main__":
    if sys.argv[1] == ("-n"):
        cont = raw_input("Name for container file: ")
        FS()
    elif sys.argv[1] == ('-l'):
        cont = raw_input("Container to load: ")
        FS() #an FS instance must exist prior to LOAD command        
        for f in  FSinstance:
            f.LoadFS(cont)

and the LoadFS function:
def LoadFS(self,cont):
        try:
            self.cont = cont
            ld = open(cont,"rb").read()
            fs, st = ld.split("[99099]")  #this is how we use that split counter!
            self.filesystem = jsonpickle.decode(fs)
            self.structure = jsonpickle.decode(st)
        except Exception as e:
            print e

So basically we load the saved filesystem over a blank one! note the ld.split code. thats how we parse our 2 dicts.

There are likely several bugs in this tutorial. This is meant only as a guide for implementing a container based File system and not best practice.

This is the final code:

##SOLDIERX.COM "NANOCONT" container based filesystem
##version 0.5
##for tutorial and educational purposes only.
##author: r3q13m

import jsonpickle
import os
import sys
import base64

FSinstance = []
class FS():
    def __init__(self):
        FSinstance.append(self)
        self.filesystem = {}
        self.structure = {}
        self.cont = cont
        self.filesystem["SX-FS"] = "v.0.5" #this header is great if you plan to implement crypto.
    def MkFolder(self, name,  cont):
        self.cont = cont
        self.name = name
        if self.structure.has_key(self.name):
            print("all files and folders must have unique names")
        else:
            self.structure[self.name] = ("")
            f = jsonpickle.encode(self.filesystem)
            s = jsonpickle.encode(self.structure)
            open(self.cont,"wb").write(f+"[99099]")
            open(self.cont,"ab").write(s)
           
    def PutFile(self, folder, name, cont):
        c = {}
        self.name = name
        if self.structure.has_key(self.name):
            print("all files and folders must have unique names")
        self.structure[self.name] = c
        self.cont = cont
        self.folder = folder
        self.data = open(self.name,"rb").read()
        if self.structure.has_key(self.folder):
            try:
                self.structure[self.folder] = self.name
                e = base64.b64encode(self.data)
                c[self.name] = e
                f = jsonpickle.encode(self.filesystem)
                s = jsonpickle.encode(self.structure)
                open(self.cont,"wb").write(f+"[99099]")
                open(self.cont,"ab").write(s)
            except Exception  as e:
                print e
                   
                 
    def GetFile(self, folder, name):
        self.folder = folder
        self.name = name
        if self.structure.has_key(self.folder):
            try:
                c = self.structure[self.name]
                data = c[self.name]
                d = base64.b64decode(data)
                open(self.name,"wb").write(d)
            except Exception as e:
                print e
        else:
            print("no file found!")
    def LoadFS(self,cont):
        try:
            self.cont = cont
            ld = open(cont,"rb").read()
            fs, st = ld.split("[99099]")
            self.filesystem = jsonpickle.decode(fs)
            self.structure = jsonpickle.decode(st)
        except Exception as e:
            print e
    def Exit(self,cont):
        self.cont = cont
        f = jsonpickle.encode(self.filesystem)
        s = jsonpickle.encode(self.structure)
        open(self.cont,"wb").write(f+"[99099]")
        open(self.cont,"ab").write(s)
        sys.exit()

    def List(self,folder):
        if folder == ("/"):  #leave to neophyte to fix the error here as a challenge
            for k, v in self.structure.iteritems():
                print k
        else:
            try:
                for k,v in self.structure.iteritems():
                    if k == folder: #anything that contains our folder contains the correct files in it.
                        print v
            except Exception as e:
                print e

if __name__ == "__main__":
    if sys.argv[1] == ("-n"):
        cont = raw_input("Name for container file: ")
        FS()
    elif sys.argv[1] == ('-l'):
        cont = raw_input("Container to load: ")
        FS() #an FS instance must exist prior to LOAD command        
        for f in  FSinstance:
            f.LoadFS(cont)
    while True:
        cmd = raw_input("NL>>> ")
        if cmd == ("-e"):
            for f in  FSinstance:
                f.Exit(cont)
        elif cmd == ("GET"):
            folder = raw_input("Folder: ")
            name = raw_input("File: ")
            for f in  FSinstance:
                f.GetFile(folder,name)
        elif cmd == ("PUT"):
            folder = raw_input("Folder: ")
            name = raw_input("File: ")
            for f in  FSinstance:
                f.PutFile(folder,name, cont)
        elif cmd == ("-mf"):
            folder = raw_input("Folder: ")
            for f in  FSinstance:
                f.MkFolder(folder,cont)
        elif cmd == ("LS"):
            folder = raw_input("Folder: ")
            for f in  FSinstance:
                f.List(folder)